From a2756db66b567d21b845a104874488348280b815 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 15 Dec 2017 16:42:06 -0700 Subject: [PATCH] Update to Kokkos library v2.5.00 --- lib/kokkos/CHANGELOG.md | 40 + lib/kokkos/CMakeLists.txt | 280 +--- lib/kokkos/Makefile.kokkos | 402 +++--- lib/kokkos/Makefile.targets | 4 +- lib/kokkos/README | 40 +- lib/kokkos/algorithms/CMakeLists.txt | 4 +- .../algorithms/unit_tests/CMakeLists.txt | 28 +- lib/kokkos/algorithms/unit_tests/Makefile | 3 +- .../benchmark_suite/scripts/build_code.bash | 84 ++ .../scripts/checkout_repos.bash | 37 + .../scripts/run_benchmark.bash | 14 + .../benchmark_suite/scripts/run_tests.bash | 44 + .../benchmarks/bytes_and_flops/Makefile | 28 +- .../policy_performance/policy_perf_test.hpp | 17 +- lib/kokkos/bin/hpcbind | 197 ++- lib/kokkos/bin/nvcc_wrapper | 54 +- .../cmake/Makefile.generate_cmake_settings | 8 + lib/kokkos/cmake/kokkos.cmake | 1202 ----------------- lib/kokkos/cmake/kokkos_build.cmake | 219 +++ lib/kokkos/cmake/kokkos_functions.cmake | 345 +++++ lib/kokkos/cmake/kokkos_options.cmake | 365 +++++ lib/kokkos/cmake/kokkos_settings.cmake | 257 ++++ lib/kokkos/cmake/tribits.cmake | 94 +- lib/kokkos/config/master_history.txt | 1 + lib/kokkos/config/nvcc_wrapper | 74 +- lib/kokkos/config/test_all_sandia | 68 +- lib/kokkos/containers/CMakeLists.txt | 5 +- .../performance_tests/CMakeLists.txt | 10 +- .../containers/performance_tests/Makefile | 3 +- .../performance_tests/TestDynRankView.hpp | 10 +- .../performance_tests/TestOpenMP.cpp | 13 + .../performance_tests/TestScatterView.hpp | 113 ++ lib/kokkos/containers/src/CMakeLists.txt | 52 +- .../containers/src/Kokkos_ScatterView.hpp | 999 ++++++++++++++ lib/kokkos/containers/src/Kokkos_Vector.hpp | 1 + .../containers/unit_tests/CMakeLists.txt | 16 +- lib/kokkos/containers/unit_tests/Makefile | 3 +- lib/kokkos/containers/unit_tests/TestCuda.cpp | 9 + .../containers/unit_tests/TestOpenMP.cpp | 11 + .../containers/unit_tests/TestScatterView.hpp | 156 +++ .../containers/unit_tests/TestSerial.cpp | 10 + lib/kokkos/core/CMakeLists.txt | 4 +- lib/kokkos/core/perf_test/CMakeLists.txt | 10 +- lib/kokkos/core/perf_test/Makefile | 3 +- lib/kokkos/core/src/CMakeLists.txt | 149 +- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 10 +- .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 41 +- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 32 +- .../core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp | 8 +- .../src/Cuda/Kokkos_Cuda_Vectorization.hpp | 50 +- .../Kokkos_Cuda_Version_9_8_Compatibility.hpp | 12 + lib/kokkos/core/src/Kokkos_Macros.hpp | 12 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 61 +- lib/kokkos/core/src/Kokkos_NumericTraits.hpp | 2 + lib/kokkos/core/src/Kokkos_Pair.hpp | 4 +- lib/kokkos/core/src/Kokkos_Parallel.hpp | 4 +- .../src/Kokkos_Profiling_ProfileSection.hpp | 111 ++ lib/kokkos/core/src/Kokkos_Serial.hpp | 18 +- lib/kokkos/core/src/Makefile | 140 +- .../core/src/Makefile.generate_build_files | 100 ++ .../core/src/Makefile.generate_header_lists | 28 + .../src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 21 +- .../Kokkos_OpenMPTarget_Parallel.hpp | 14 +- .../src/Qthreads/Kokkos_Qthreads_Parallel.hpp | 14 +- .../core/src/ROCm/Kokkos_ROCm_Reduce.hpp | 9 +- .../core/src/Threads/Kokkos_ThreadsExec.hpp | 1 - .../src/Threads/Kokkos_Threads_Parallel.hpp | 25 +- .../Kokkos_Atomic_Compare_Exchange_Strong.hpp | 8 +- .../core/src/impl/Kokkos_Atomic_Exchange.hpp | 8 +- .../core/src/impl/Kokkos_Atomic_Fetch_Add.hpp | 8 +- .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 8 +- .../core/src/impl/Kokkos_Atomic_Generic.hpp | 12 +- lib/kokkos/core/src/impl/Kokkos_BitOps.hpp | 4 + lib/kokkos/core/src/impl/Kokkos_Core.cpp | 17 +- .../core/src/impl/Kokkos_HostBarrier.cpp | 204 +++ .../core/src/impl/Kokkos_HostBarrier.hpp | 146 ++ .../core/src/impl/Kokkos_HostThreadTeam.cpp | 152 --- .../core/src/impl/Kokkos_HostThreadTeam.hpp | 47 +- .../core/src/impl/Kokkos_MemoryPool.cpp | 125 ++ .../core/src/impl/Kokkos_Memory_Fence.hpp | 10 +- .../src/impl/Kokkos_Profiling_Interface.cpp | 58 +- .../src/impl/Kokkos_Profiling_Interface.hpp | 17 +- .../core/src/impl/Kokkos_Rendezvous.cpp | 219 --- .../core/src/impl/Kokkos_Rendezvous.hpp | 87 -- lib/kokkos/core/src/impl/Kokkos_Serial.cpp | 10 +- lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 16 +- lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp | 6 +- lib/kokkos/core/unit_test/CMakeLists.txt | 26 +- lib/kokkos/core/unit_test/Makefile | 5 +- lib/kokkos/core/unit_test/TestReduce.hpp | 220 ++- lib/kokkos/core/unit_test/TestTeamVector.hpp | 4 +- .../unit_test/TestViewMapping_subview.hpp | 4 + lib/kokkos/core/unit_test/UnitTestConfig.make | 52 + .../core/unit_test/config/bin/hcc-config | 2 + lib/kokkos/core/unit_test/config/clang | 5 + .../unit_test/config/cmaketest/CMakeLists.txt | 80 ++ lib/kokkos/core/unit_test/config/cxx | 5 + lib/kokkos/core/unit_test/config/mpic++ | 5 + lib/kokkos/core/unit_test/config/nvcc | 5 + .../results/AMDAVX_Cuda_KokkosCore_config.h | 18 + .../results/AMDAVX_OpenMP_KokkosCore_config.h | 17 + .../AMDAVX_Pthread_KokkosCore_config.h | 17 + .../AMDAVX_Qthreads_KokkosCore_config.h | 17 + .../results/AMDAVX_ROCm_KokkosCore_config.h | 18 + .../results/AMDAVX_Serial_KokkosCore_config.h | 17 + .../ARMv8-ThunderX_Cuda_KokkosCore_config.h | 19 + .../ARMv8-ThunderX_OpenMP_KokkosCore_config.h | 18 + ...ARMv8-ThunderX_Pthread_KokkosCore_config.h | 18 + ...RMv8-ThunderX_Qthreads_KokkosCore_config.h | 18 + .../ARMv8-ThunderX_ROCm_KokkosCore_config.h | 19 + .../ARMv8-ThunderX_Serial_KokkosCore_config.h | 18 + .../results/ARMv80_Cuda_KokkosCore_config.h | 18 + .../results/ARMv80_OpenMP_KokkosCore_config.h | 17 + .../ARMv80_Pthread_KokkosCore_config.h | 17 + .../ARMv80_Qthreads_KokkosCore_config.h | 17 + .../results/ARMv80_ROCm_KokkosCore_config.h | 18 + .../results/ARMv80_Serial_KokkosCore_config.h | 17 + .../results/ARMv81_Cuda_KokkosCore_config.h | 18 + .../results/ARMv81_OpenMP_KokkosCore_config.h | 17 + .../ARMv81_Pthread_KokkosCore_config.h | 17 + .../ARMv81_Qthreads_KokkosCore_config.h | 17 + .../results/ARMv81_ROCm_KokkosCore_config.h | 18 + .../results/ARMv81_Serial_KokkosCore_config.h | 17 + .../results/BDW_Cuda_KokkosCore_config.h | 24 + .../results/BDW_OpenMP_KokkosCore_config.h | 23 + .../results/BDW_Pthread_KokkosCore_config.h | 23 + .../results/BDW_Qthreads_KokkosCore_config.h | 23 + .../results/BDW_ROCm_KokkosCore_config.h | 24 + .../results/BDW_Serial_KokkosCore_config.h | 23 + .../results/BGQ_Cuda_KokkosCore_config.h | 17 + .../results/BGQ_OpenMP_KokkosCore_config.h | 16 + .../results/BGQ_Pthread_KokkosCore_config.h | 16 + .../results/BGQ_Qthreads_KokkosCore_config.h | 16 + .../results/BGQ_ROCm_KokkosCore_config.h | 17 + .../results/BGQ_Serial_KokkosCore_config.h | 16 + .../results/HSW_Cuda_KokkosCore_config.h | 21 + .../results/HSW_OpenMP_KokkosCore_config.h | 20 + .../results/HSW_Pthread_KokkosCore_config.h | 20 + .../results/HSW_Qthreads_KokkosCore_config.h | 20 + .../results/HSW_ROCm_KokkosCore_config.h | 21 + .../results/HSW_Serial_KokkosCore_config.h | 20 + .../results/KNC_Cuda_KokkosCore_config.h | 21 + .../results/KNC_OpenMP_KokkosCore_config.h | 20 + .../results/KNC_Pthread_KokkosCore_config.h | 20 + .../results/KNC_Qthreads_KokkosCore_config.h | 20 + .../results/KNC_ROCm_KokkosCore_config.h | 21 + .../results/KNC_Serial_KokkosCore_config.h | 20 + .../results/KNL_Cuda_KokkosCore_config.h | 21 + .../results/KNL_OpenMP_KokkosCore_config.h | 20 + .../results/KNL_Pthread_KokkosCore_config.h | 20 + .../results/KNL_Qthreads_KokkosCore_config.h | 20 + .../results/KNL_ROCm_KokkosCore_config.h | 21 + .../results/KNL_Serial_KokkosCore_config.h | 20 + .../results/Kepler30_Cuda_KokkosCore_config.h | 19 + .../Kepler30_OpenMP_KokkosCore_config.h | 16 + .../Kepler30_Pthread_KokkosCore_config.h | 16 + .../Kepler30_Qthreads_KokkosCore_config.h | 16 + .../results/Kepler30_ROCm_KokkosCore_config.h | 17 + .../Kepler30_Serial_KokkosCore_config.h | 16 + .../results/Kepler32_Cuda_KokkosCore_config.h | 19 + .../Kepler32_OpenMP_KokkosCore_config.h | 16 + .../Kepler32_Pthread_KokkosCore_config.h | 16 + .../Kepler32_Qthreads_KokkosCore_config.h | 16 + .../results/Kepler32_ROCm_KokkosCore_config.h | 17 + .../Kepler32_Serial_KokkosCore_config.h | 16 + .../results/Kepler35_Cuda_KokkosCore_config.h | 19 + .../Kepler35_OpenMP_KokkosCore_config.h | 16 + .../Kepler35_Pthread_KokkosCore_config.h | 16 + .../Kepler35_Qthreads_KokkosCore_config.h | 16 + .../results/Kepler35_ROCm_KokkosCore_config.h | 17 + .../Kepler35_Serial_KokkosCore_config.h | 16 + .../results/Kepler37_Cuda_KokkosCore_config.h | 19 + .../Kepler37_OpenMP_KokkosCore_config.h | 16 + .../Kepler37_Pthread_KokkosCore_config.h | 16 + .../Kepler37_Qthreads_KokkosCore_config.h | 16 + .../results/Kepler37_ROCm_KokkosCore_config.h | 17 + .../Kepler37_Serial_KokkosCore_config.h | 16 + .../results/Kepler_Cuda_KokkosCore_config.h | 19 + .../results/Kepler_OpenMP_KokkosCore_config.h | 16 + .../Kepler_Pthread_KokkosCore_config.h | 16 + .../Kepler_Qthreads_KokkosCore_config.h | 16 + .../results/Kepler_ROCm_KokkosCore_config.h | 17 + .../results/Kepler_Serial_KokkosCore_config.h | 16 + .../Maxwell50_Cuda_KokkosCore_config.h | 19 + .../Maxwell50_OpenMP_KokkosCore_config.h | 16 + .../Maxwell50_Pthread_KokkosCore_config.h | 16 + .../Maxwell50_Qthreads_KokkosCore_config.h | 16 + .../Maxwell50_ROCm_KokkosCore_config.h | 17 + .../Maxwell50_Serial_KokkosCore_config.h | 16 + .../Maxwell52_Cuda_KokkosCore_config.h | 19 + .../Maxwell52_OpenMP_KokkosCore_config.h | 16 + .../Maxwell52_Pthread_KokkosCore_config.h | 16 + .../Maxwell52_Qthreads_KokkosCore_config.h | 16 + .../Maxwell52_ROCm_KokkosCore_config.h | 17 + .../Maxwell52_Serial_KokkosCore_config.h | 16 + .../Maxwell53_Cuda_KokkosCore_config.h | 19 + .../Maxwell53_OpenMP_KokkosCore_config.h | 16 + .../Maxwell53_Pthread_KokkosCore_config.h | 16 + .../Maxwell53_Qthreads_KokkosCore_config.h | 16 + .../Maxwell53_ROCm_KokkosCore_config.h | 17 + .../Maxwell53_Serial_KokkosCore_config.h | 16 + .../results/Maxwell_Cuda_KokkosCore_config.h | 19 + .../Maxwell_OpenMP_KokkosCore_config.h | 16 + .../Maxwell_Pthread_KokkosCore_config.h | 16 + .../Maxwell_Qthreads_KokkosCore_config.h | 16 + .../results/Maxwell_ROCm_KokkosCore_config.h | 17 + .../Maxwell_Serial_KokkosCore_config.h | 16 + .../results/None_Cuda_KokkosCore_config.h | 17 + .../results/None_OpenMP_KokkosCore_config.h | 16 + .../results/None_Pthread_KokkosCore_config.h | 16 + .../results/None_Qthreads_KokkosCore_config.h | 16 + .../results/None_ROCm_KokkosCore_config.h | 17 + .../results/None_Serial_KokkosCore_config.h | 16 + .../results/Pascal60_Cuda_KokkosCore_config.h | 19 + .../Pascal60_OpenMP_KokkosCore_config.h | 16 + .../Pascal60_Pthread_KokkosCore_config.h | 16 + .../Pascal60_Qthreads_KokkosCore_config.h | 16 + .../results/Pascal60_ROCm_KokkosCore_config.h | 17 + .../Pascal60_Serial_KokkosCore_config.h | 16 + .../results/Pascal61_Cuda_KokkosCore_config.h | 19 + .../Pascal61_OpenMP_KokkosCore_config.h | 16 + .../Pascal61_Pthread_KokkosCore_config.h | 16 + .../Pascal61_Qthreads_KokkosCore_config.h | 16 + .../results/Pascal61_ROCm_KokkosCore_config.h | 17 + .../Pascal61_Serial_KokkosCore_config.h | 16 + .../results/Power7_Cuda_KokkosCore_config.h | 21 + .../results/Power7_OpenMP_KokkosCore_config.h | 20 + .../Power7_Pthread_KokkosCore_config.h | 20 + .../Power7_Qthreads_KokkosCore_config.h | 20 + .../results/Power7_ROCm_KokkosCore_config.h | 21 + .../results/Power7_Serial_KokkosCore_config.h | 20 + .../results/Power8_Cuda_KokkosCore_config.h | 21 + .../results/Power8_OpenMP_KokkosCore_config.h | 20 + .../Power8_Pthread_KokkosCore_config.h | 20 + .../Power8_Qthreads_KokkosCore_config.h | 20 + .../results/Power8_ROCm_KokkosCore_config.h | 21 + .../results/Power8_Serial_KokkosCore_config.h | 20 + .../results/Power9_Cuda_KokkosCore_config.h | 21 + .../results/Power9_OpenMP_KokkosCore_config.h | 20 + .../Power9_Pthread_KokkosCore_config.h | 20 + .../Power9_Qthreads_KokkosCore_config.h | 20 + .../results/Power9_ROCm_KokkosCore_config.h | 21 + .../results/Power9_Serial_KokkosCore_config.h | 20 + .../results/SKX_Cuda_KokkosCore_config.h | 24 + .../results/SKX_OpenMP_KokkosCore_config.h | 23 + .../results/SKX_Pthread_KokkosCore_config.h | 23 + .../results/SKX_Qthreads_KokkosCore_config.h | 23 + .../results/SKX_ROCm_KokkosCore_config.h | 24 + .../results/SKX_Serial_KokkosCore_config.h | 23 + .../results/SNB_Cuda_KokkosCore_config.h | 21 + .../results/SNB_OpenMP_KokkosCore_config.h | 20 + .../results/SNB_Pthread_KokkosCore_config.h | 20 + .../results/SNB_Qthreads_KokkosCore_config.h | 20 + .../results/SNB_ROCm_KokkosCore_config.h | 21 + .../results/SNB_Serial_KokkosCore_config.h | 20 + .../results/WSM_Cuda_KokkosCore_config.h | 21 + .../results/WSM_OpenMP_KokkosCore_config.h | 20 + .../results/WSM_Pthread_KokkosCore_config.h | 20 + .../results/WSM_Qthreads_KokkosCore_config.h | 20 + .../results/WSM_ROCm_KokkosCore_config.h | 21 + .../results/WSM_Serial_KokkosCore_config.h | 20 + lib/kokkos/core/unit_test/diffconfig.sh | 18 + lib/kokkos/core/unit_test/testmake.sh | 18 + lib/kokkos/doc/develop_builds.md | 76 ++ lib/kokkos/example/cmake/Dependencies.cmake | 1 - lib/kokkos/example/cmake_build/CMakeLists.txt | 6 +- .../example/cmake_build/cmake_example.cpp | 4 + lib/kokkos/example/cmake_build/foo.f | 4 + .../example/tutorial/01_hello_world/Makefile | 6 +- .../tutorial/01_hello_world_lambda/Makefile | 6 +- .../tutorial/02_simple_reduce/Makefile | 8 +- .../tutorial/02_simple_reduce_lambda/Makefile | 8 +- .../example/tutorial/03_simple_view/Makefile | 8 +- .../tutorial/03_simple_view_lambda/Makefile | 8 +- .../tutorial/04_simple_memoryspaces/Makefile | 8 +- .../tutorial/05_simple_atomics/Makefile | 8 +- .../tutorial/06_simple_mdrangepolicy/Makefile | 6 +- .../Advanced_Views/01_data_layouts/Makefile | 8 +- .../Advanced_Views/02_memory_traits/Makefile | 8 +- .../Advanced_Views/03_subviews/Makefile | 8 +- .../Advanced_Views/04_dualviews/Makefile | 8 +- .../Advanced_Views/05_NVIDIA_UVM/Makefile | 6 +- .../Advanced_Views/06_AtomicViews/Makefile | 8 +- .../07_Overlapping_DeepCopy/Makefile | 6 +- .../Algorithms/01_random_numbers/Makefile | 8 +- .../01_thread_teams/Makefile | 8 +- .../01_thread_teams_lambda/Makefile | 8 +- .../02_nested_parallel_for/Makefile | 8 +- .../03_vectorization/Makefile | 8 +- .../04_team_scan/Makefile | 8 +- .../example/tutorial/launch_bounds/Makefile | 8 +- lib/kokkos/generate_makefile.bash | 8 +- 292 files changed, 8238 insertions(+), 2823 deletions(-) create mode 100755 lib/kokkos/benchmarks/benchmark_suite/scripts/build_code.bash create mode 100755 lib/kokkos/benchmarks/benchmark_suite/scripts/checkout_repos.bash create mode 100755 lib/kokkos/benchmarks/benchmark_suite/scripts/run_benchmark.bash create mode 100755 lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash create mode 100644 lib/kokkos/cmake/Makefile.generate_cmake_settings delete mode 100644 lib/kokkos/cmake/kokkos.cmake create mode 100644 lib/kokkos/cmake/kokkos_build.cmake create mode 100644 lib/kokkos/cmake/kokkos_functions.cmake create mode 100644 lib/kokkos/cmake/kokkos_options.cmake create mode 100644 lib/kokkos/cmake/kokkos_settings.cmake create mode 100644 lib/kokkos/containers/performance_tests/TestScatterView.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_ScatterView.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestScatterView.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp create mode 100644 lib/kokkos/core/src/Makefile.generate_build_files create mode 100644 lib/kokkos/core/src/Makefile.generate_header_lists create mode 100644 lib/kokkos/core/src/impl/Kokkos_HostBarrier.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_HostBarrier.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_MemoryPool.cpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_Rendezvous.cpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_Rendezvous.hpp create mode 100644 lib/kokkos/core/unit_test/UnitTestConfig.make create mode 100755 lib/kokkos/core/unit_test/config/bin/hcc-config create mode 100755 lib/kokkos/core/unit_test/config/clang create mode 100644 lib/kokkos/core/unit_test/config/cmaketest/CMakeLists.txt create mode 100755 lib/kokkos/core/unit_test/config/cxx create mode 100755 lib/kokkos/core/unit_test/config/mpic++ create mode 100755 lib/kokkos/core/unit_test/config/nvcc create mode 100644 lib/kokkos/core/unit_test/config/results/AMDAVX_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/AMDAVX_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/AMDAVX_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/AMDAVX_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/AMDAVX_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/AMDAVX_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv80_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv80_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv80_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv80_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv80_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv80_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv81_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv81_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv81_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv81_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv81_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/ARMv81_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BDW_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BDW_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BDW_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BDW_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BDW_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BDW_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BGQ_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BGQ_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BGQ_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BGQ_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BGQ_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/BGQ_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/HSW_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/HSW_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/HSW_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/HSW_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/HSW_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/HSW_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNC_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNC_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNC_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNC_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNC_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNC_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNL_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNL_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNL_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNL_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNL_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/KNL_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler30_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler30_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler30_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler30_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler30_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler30_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler32_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler32_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler32_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler32_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler32_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler32_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler35_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler35_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler35_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler35_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler35_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler35_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler37_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler37_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler37_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler37_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler37_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler37_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Kepler_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell50_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell50_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell50_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell50_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell50_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell50_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell52_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell52_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell52_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell52_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell52_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell52_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell53_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell53_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell53_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell53_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell53_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell53_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Maxwell_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/None_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/None_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/None_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/None_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/None_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/None_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal60_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal60_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal60_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal60_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal60_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal60_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal61_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal61_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal61_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal61_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal61_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Pascal61_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power7_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power7_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power7_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power7_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power7_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power7_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power8_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power8_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power8_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power8_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power8_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power8_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power9_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power9_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power9_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power9_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power9_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/Power9_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SKX_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SKX_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SKX_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SKX_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SKX_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SKX_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SNB_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SNB_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SNB_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SNB_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SNB_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/SNB_Serial_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/WSM_Cuda_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/WSM_OpenMP_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/WSM_Pthread_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/WSM_Qthreads_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/WSM_ROCm_KokkosCore_config.h create mode 100644 lib/kokkos/core/unit_test/config/results/WSM_Serial_KokkosCore_config.h create mode 100755 lib/kokkos/core/unit_test/diffconfig.sh create mode 100755 lib/kokkos/core/unit_test/testmake.sh create mode 100644 lib/kokkos/doc/develop_builds.md create mode 100644 lib/kokkos/example/cmake_build/foo.f diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index bd5ae780c9..e3de6b048d 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,4 +1,44 @@ # Change Log + +## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00) + +**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.5** + +**Implemented enhancements:** + +- Provide Makefile.kokkos logic for CMake and TriBITS [\#878](https://github.com/kokkos/kokkos/issues/878) +- Add Scatter View [\#825](https://github.com/kokkos/kokkos/issues/825) +- Drop gcc 4.7 and intel 14 from supported compiler list [\#603](https://github.com/kokkos/kokkos/issues/603) +- Enable construction of unmanaged view using common\_view\_alloc\_prop [\#1170](https://github.com/kokkos/kokkos/issues/1170) +- Unused Function Warning with XL [\#1267](https://github.com/kokkos/kokkos/issues/1267) +- Add memory pool parameter check [\#1218](https://github.com/kokkos/kokkos/issues/1218) +- CUDA9: Fix warning for unsupported long double [\#1189](https://github.com/kokkos/kokkos/issues/1189) +- CUDA9: fix warning on defaulted function marking [\#1188](https://github.com/kokkos/kokkos/issues/1188) +- CUDA9: fix warnings for deprecated warp level functions [\#1187](https://github.com/kokkos/kokkos/issues/1187) +- Add CUDA 9.0 nightly testing [\#1174](https://github.com/kokkos/kokkos/issues/1174) +- {OMPI,MPICH}\_CXX hack breaks nvcc\_wrapper use case [\#1166](https://github.com/kokkos/kokkos/issues/1166) +- KOKKOS\_HAVE\_CUDA\_LAMBDA became KOKKOS\_CUDA\_USE\_LAMBDA [\#1274](https://github.com/kokkos/kokkos/issues/1274) + +**Fixed bugs:** + +- MinMax Reducer with tagged operator doesn't compile [\#1251](https://github.com/kokkos/kokkos/issues/1251) +- Reducers for Tagged operators give wrong answer [\#1250](https://github.com/kokkos/kokkos/issues/1250) +- Kokkos not Compatible with Big Endian Machines? [\#1235](https://github.com/kokkos/kokkos/issues/1235) +- Parallel Scan hangs forever on BG/Q [\#1234](https://github.com/kokkos/kokkos/issues/1234) +- Threads backend doesn't compile with Clang on OS X [\#1232](https://github.com/kokkos/kokkos/issues/1232) +- $\(shell date\) needs quote [\#1264](https://github.com/kokkos/kokkos/issues/1264) +- Unqualified parallel\_for call conflicts with user-defined parallel\_for [\#1219](https://github.com/kokkos/kokkos/issues/1219) +- KokkosAlgorithms: CMake issue in unit tests [\#1212](https://github.com/kokkos/kokkos/issues/1212) +- Intel 18 Error: "simd pragma has been deprecated" [\#1210](https://github.com/kokkos/kokkos/issues/1210) +- Memory leak in Kokkos::initialize [\#1194](https://github.com/kokkos/kokkos/issues/1194) +- CUDA9: compiler error with static assert template arguments [\#1190](https://github.com/kokkos/kokkos/issues/1190) +- Kokkos::Serial::is\_initialized returns always true [\#1184](https://github.com/kokkos/kokkos/issues/1184) +- Triple nested parallelism still fails on bowman [\#1093](https://github.com/kokkos/kokkos/issues/1093) +- OpenMP openmp.range on Develop Runs Forever on POWER7+ with RHEL7 and GCC4.8.5 [\#995](https://github.com/kokkos/kokkos/issues/995) +- Rendezvous performance at global scope [\#985](https://github.com/kokkos/kokkos/issues/985) + + ## [2.04.11](https://github.com/kokkos/kokkos/tree/2.04.11) (2017-10-28) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.04...2.04.11) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 7795973a22..9c708ded4a 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -1,3 +1,5 @@ +# Is this a build as part of Trilinos? + IF(COMMAND TRIBITS_PACKAGE_DECL) SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "") ELSE() @@ -6,13 +8,57 @@ ENDIF() IF(NOT KOKKOS_HAS_TRILINOS) cmake_minimum_required(VERSION 3.1 FATAL_ERROR) - project(Kokkos CXX) - INCLUDE(cmake/kokkos.cmake) + # Define Project Name if this is a standalone build + IF(NOT DEFINED ${PROJECT_NAME}) + project(Kokkos CXX) + ENDIF() + + # Basic initialization (Used in KOKKOS_SETTINGS) + set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) + set(KOKKOS_PATH ${KOKKOS_SRC_PATH}) + + #------------ COMPILER AND FEATURE CHECKS ------------------------------------ + include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) + set_kokkos_cxx_compiler() + set_kokkos_cxx_standard() + + #------------ GET OPTIONS AND KOKKOS_SETTINGS -------------------------------- + # Add Kokkos' modules to CMake's module path. + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") + + set(KOKKOS_CMAKE_VERBOSE True) + include(${KOKKOS_SRC_PATH}/cmake/kokkos_options.cmake) + + include(${KOKKOS_SRC_PATH}/cmake/kokkos_settings.cmake) + + #------------ GENERATE HEADER AND SOURCE FILES ------------------------------- + execute_process( + COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings + WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" + OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out + RESULT_VARIABLE res + ) + include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) + set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) + + #------------ NOW BUILD ------------------------------------------------------ + include(${KOKKOS_SRC_PATH}/cmake/kokkos_build.cmake) + + #------------ Add in Fake Tribits Handling to allow unit test builds- -------- + + include(${KOKKOS_SRC_PATH}/cmake/tribits.cmake) + + TRIBITS_PACKAGE_DECL(Kokkos) + + ADD_SUBDIRECTORY(core) + ADD_SUBDIRECTORY(containers) + ADD_SUBDIRECTORY(algorithms) + ELSE() #------------------------------------------------------------------------------ # -# A) Forward delcare the package so that certain options are also defined for +# A) Forward declare the package so that certain options are also defined for # subpackages # @@ -21,212 +67,28 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) #------------------------------------------------------------------------------ # -# B) Define the common options for Kokkos first so they can be used by -# subpackages as well. +# B) Install Kokkos' build files # +# If using the Makefile-generated files, then need to set things up. +# Here, assume that TriBITS has been run from ProjectCompilerPostConfig.cmake +# and already generated KokkosCore_config.h and kokkos_generated_settings.cmake +# in the previously define Kokkos_GEN_DIR +# We need to copy them over to the correct place and source the cmake file -# mfh 01 Aug 2016: See Issue #61: -# -# https://github.com/kokkos/kokkos/issues/61 -# -# Don't use TRIBITS_ADD_DEBUG_OPTION() here, because that defines -# HAVE_KOKKOS_DEBUG. We define KOKKOS_HAVE_DEBUG here instead, -# for compatibility with Kokkos' Makefile build system. +if(NOT KOKKOS_LEGACY_TRIBITS) + set(Kokkos_GEN_DIR ${CMAKE_BINARY_DIR}) + file(COPY "${Kokkos_GEN_DIR}/KokkosCore_config.h" + DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) + install(FILES "${Kokkos_GEN_DIR}/KokkosCore_config.h" + DESTINATION include) + file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake" + DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) -if (TPL_ENABLE_CUDA) - if (DEFINED CUDA_VERSION) - # there is a VERSION_GREATER_EQUAL, but only in CMake >= 3.7 - if (CUDA_VERSION VERSION_EQUAL "7.5") - set(KOKKOS_HAVE_CUDA_GEQ_75 TRUE) - endif() - if (CUDA_VERSION VERSION_GREATER "7.5") - set(KOKKOS_HAVE_CUDA_GEQ_75 TRUE) - endif() - if (CUDA_VERSION VERSION_EQUAL "8.0") - set(KOKKOS_HAVE_CUDA_GEQ_80 TRUE) - endif() - if (CUDA_VERSION VERSION_GREATER "8.0") - set(KOKKOS_HAVE_CUDA_GEQ_80 TRUE) - endif() - endif() -endif() - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_DEBUG - KOKKOS_HAVE_DEBUG - "Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build." - ${${PROJECT_NAME}_ENABLE_DEBUG} -) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_SIERRA_BUILD - KOKKOS_FOR_SIERRA - "Configure Kokkos for building within the Sierra build system." - OFF - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Cuda - KOKKOS_HAVE_CUDA - "Enable CUDA support in Kokkos." - "${KOKKOS_HAVE_CUDA_TPL}" - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Cuda_UVM - KOKKOS_USE_CUDA_UVM - "Enable CUDA Unified Virtual Memory as the default in Kokkos." - OFF - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Cuda_RDC - KOKKOS_HAVE_CUDA_RDC - "Enable CUDA Relocatable Device Code support in Kokkos." - OFF - ) - -set(Kokkos_ENABLE_Cuda_Lambda_DEFAULT OFF) -if (Kokkos_ENABLE_Cuda) - if (KOKKOS_HAVE_CUDA_GEQ_75) - if (CMAKE_CXX_FLAGS MATCHES "-expt-extended-lambda") - set(Kokkos_ENABLE_Cuda_Lambda_DEFAULT ON) - message("-- CUDA version is >= 7.5 and CMAKE_CXX_FLAGS contains -expt-extended-lambda,") - message("-- Kokkos_ENABLE_Cuda_Lambda defaults to ON") - else() - message("-- CMAKE_CXX_FLAGS doesn't contain -expt-extended-lambda,") - message("-- Kokkos_ENABLE_Cuda_Lambda defaults to OFF") - endif() - else() - message("-- CUDA version is < 7.5, Kokkos_ENABLE_Cuda_Lambda defaults to OFF") - endif() -endif() - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Cuda_Lambda - KOKKOS_HAVE_CUDA_LAMBDA - "Enable CUDA LAMBDA support in Kokkos." - "${Kokkos_ENABLE_Cuda_Lambda_DEFAULT}" - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Pthread - KOKKOS_HAVE_PTHREAD - "Enable Pthread support in Kokkos." - OFF - ) - -ASSERT_DEFINED(TPL_ENABLE_Pthread) -IF(Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) - MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.") -ENDIF() -IF(NOT TPL_ENABLE_Pthread) - ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0) -ENDIF() - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_OpenMP - KOKKOS_HAVE_OPENMP - "Enable OpenMP support in Kokkos." - "${${PROJECT_NAME}_ENABLE_OpenMP}" - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_QTHREAD - KOKKOS_HAVE_QTHREADS - "Enable Qthreads support in Kokkos." - "${TPL_ENABLE_QTHREAD}" - ) - -# TODO: No longer an option in Kokkos. Needs to be removed. -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_CXX11 - KOKKOS_HAVE_CXX11 - "Enable C++11 support in Kokkos." - "${${PROJECT_NAME}_ENABLE_CXX11}" - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_HWLOC - KOKKOS_HAVE_HWLOC - "Enable HWLOC support in Kokkos." - "${TPL_ENABLE_HWLOC}" - ) - -# TODO: This is currently not used in Kokkos. Should it be removed? -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_MPI - KOKKOS_HAVE_MPI - "Enable MPI support in Kokkos." - "${TPL_ENABLE_MPI}" - ) - -# Set default value of Kokkos_ENABLE_Debug_Bounds_Check option -# -# CMake is case sensitive. The Kokkos_ENABLE_Debug_Bounds_Check -# option (defined below) is annoyingly not all caps, but we need to -# keep it that way for backwards compatibility. If users forget and -# try using an all-caps variable, then make it count by using the -# all-caps version as the default value of the original, not-all-caps -# option. Otherwise, the default value of this option comes from -# Kokkos_ENABLE_DEBUG (see Issue #367). - -ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_DEBUG) -IF(DEFINED Kokkos_ENABLE_DEBUG_BOUNDS_CHECK) - IF(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK) - SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT ON) - ELSE() - SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}") - ENDIF() -ELSE() - SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}") -ENDIF() -ASSERT_DEFINED(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Debug_Bounds_Check - KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK - "Enable Kokkos::View run-time bounds checking." - "${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}" - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Debug_DualView_Modify_Check - KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK - "Enable abort when Kokkos::DualView modified on host and device without sync." - "${Kokkos_ENABLE_DEBUG}" - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Profiling - KOKKOS_ENABLE_PROFILING - "Enable KokkosP profiling support for kernel data collections." - "${TPL_ENABLE_DLlib}" - ) - -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Profiling_Load_Print - KOKKOS_ENABLE_PROFILING_LOAD_PRINT - "Print to standard output which profiling library was loaded." - OFF - ) - -# placeholder for future device... -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Winthread - KOKKOS_HAVE_WINTHREAD - "Enable Winthread support in Kokkos." - "${TPL_ENABLE_Winthread}" - ) - -# TODO: No longer an option in Kokkos. Needs to be removed. -# use new/old View -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_USING_DEPRECATED_VIEW - KOKKOS_USING_DEPRECATED_VIEW - "Choose whether to use the old, deprecated Kokkos::View" - OFF - ) + include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake) + # Sources come from makefile-generated kokkos_generated_settings.cmake file + # Enable using the individual sources if needed + set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) +endif () #------------------------------------------------------------------------------ @@ -260,10 +122,6 @@ TRIBITS_PACKAGE_DEF() TRIBITS_EXCLUDE_AUTOTOOLS_FILES() -TRIBITS_EXCLUDE_FILES( - classic/doc - classic/LinAlg/doc/CrsRefactorNotesMay2012 - ) - TRIBITS_PACKAGE_POSTPROCESS() + ENDIF() diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 4641232a1f..4315b009d5 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -28,33 +28,39 @@ KOKKOS_OPTIONS ?= "" # Options: force_uvm,use_ldg,rdc,enable_lambda KOKKOS_CUDA_OPTIONS ?= "enable_lambda" +# Return a 1 if a string contains a substring and 0 if not +# Note the search string should be without '"' +# Example: $(call kokkos_has_string,"hwloc,librt",hwloc) +# Will return a 1 +kokkos_has_string=$(if $(findstring $2,$1),1,0) + # Check for general settings. -KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l)) -KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l)) -KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l)) +KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes) +KOKKOS_INTERNAL_ENABLE_CXX11 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++11) +KOKKOS_INTERNAL_ENABLE_CXX1Z := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++1z) # Check for external libraries. -KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l)) -KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l)) -KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l)) +KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc) +KOKKOS_INTERNAL_USE_LIBRT := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),librt) +KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),experimental_memkind) # Check for advanced settings. -KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "compiler_warnings" | wc -l)) -KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) -KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) -KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l)) -KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l)) -KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) -KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l)) -KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) -KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) +KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings) +KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization) +KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling) +KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) +KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print) +KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg) +KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm) +KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) +KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) # Check for Kokkos Host Execution Spaces one of which must be on. -KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l)) -KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) -KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l)) -KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) +KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) +KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread) +KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads) +KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) @@ -65,9 +71,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) endif # Check for other Execution Spaces. -KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) -KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l)) -KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l)) +KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda) +KOKKOS_INTERNAL_USE_ROCM := $(call kokkos_has_string,$(KOKKOS_DEVICES),ROCm) +KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) @@ -77,25 +83,20 @@ endif # Check OS. KOKKOS_OS := $(strip $(shell uname -s)) -KOKKOS_INTERNAL_OS_CYGWIN := $(strip $(shell uname -s | grep CYGWIN | wc -l)) -KOKKOS_INTERNAL_OS_LINUX := $(strip $(shell uname -s | grep Linux | wc -l)) -KOKKOS_INTERNAL_OS_DARWIN := $(strip $(shell uname -s | grep Darwin | wc -l)) +KOKKOS_INTERNAL_OS_CYGWIN := $(call kokkos_has_string,$(KOKKOS_OS),CYGWIN) +KOKKOS_INTERNAL_OS_LINUX := $(call kokkos_has_string,$(KOKKOS_OS),Linux) +KOKKOS_INTERNAL_OS_DARWIN := $(call kokkos_has_string,$(KOKKOS_OS),Darwin) # Check compiler. -KOKKOS_INTERNAL_COMPILER_INTEL := $(strip $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)) -KOKKOS_INTERNAL_COMPILER_PGI := $(strip $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)) +KOKKOS_CXX_VERSION := $(strip $(shell $(CXX) --version 2>&1)) +KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Intel Corporation) +KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI) KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) -KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(CXX) --version 2>&1 | grep nvcc | wc -l)) -ifneq ($(OMPI_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep nvcc | wc -l)) -endif -ifneq ($(MPICH_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep nvcc | wc -l)) -endif -KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l)) -KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l)) -KOKKOS_INTERNAL_COMPILER_HCC := $(strip $(shell $(CXX) --version 2>&1 | grep HCC | wc -l)) +KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l)) +KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) +KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin) +KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) KOKKOS_INTERNAL_COMPILER_CLANG = 1 @@ -209,47 +210,48 @@ endif # Check for Kokkos Architecture settings. # Intel based. -KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_WSM := $(strip $(shell echo $(KOKKOS_ARCH) | grep WSM | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC) +KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM) +KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB) +KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW) +KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW) +KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX) +KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL) # NVIDIA based. NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper -KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler30) +KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler32) +KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler35) +KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler37) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell50) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell52) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell53) +KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal61) +KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60) +KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53)) +#SEK: This seems like a bug to me ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) - KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) - KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) - KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) + KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell) + KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler) + KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53)) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) @@ -262,43 +264,43 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) endif endif # ARM based. -KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv80) +KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv81) +KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc)) # IBM based. -KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ) +KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power7) +KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8) +KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9) KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc)) # AMD based. -KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) +KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen) +KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),Epyc) +KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri) +KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo) +KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji) +KOKKOS_INTERNAL_USE_ARCH_VEGA := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega) +KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx901) # Any AVX? -KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) -KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) +KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) +KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) +KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW)) +KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) +KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX)) # Decide what ISA level we are able to support. -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) -KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) -KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) -KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc )) +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX)) +KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC)) +KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9)) +KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7)) # Decide whether we can support transactional memory -KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) +KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX)) # Incompatible flags? KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc )) @@ -320,94 +322,100 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS) endif -KOKKOS_LIBS = -lkokkos -ldl +KOKKOS_LIBS = -ldl KOKKOS_LDFLAGS = -L$(shell pwd) KOKKOS_SRC = KOKKOS_HEADERS = # Generating the KokkosCore_config.h file. +KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp +KOKKOS_CONFIG_HEADER=KokkosCore_config.h +# Functions for generating config header file +kokkos_append_header = $(shell echo $1 >> $(KOKKOS_INTERNAL_CONFIG_TMP)) + +# Do not append first line tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) -tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp) -tmp := $(shell date >> KokkosCore_config.tmp) -tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) +tmp := $(call kokkos_append_header,"Makefile constructed configuration:") +tmp := $(call kokkos_append_header,"$(shell date)") +tmp := $(call kokkos_append_header,"----------------------------------------------*/") -tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp) -tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp) -tmp := $(shell echo '\#else' >> KokkosCore_config.tmp) -tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp) -tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp) - -tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) +tmp := $(call kokkos_append_header,'\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)') +tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEADER) directly; include Kokkos_Macros.hpp instead."') +tmp := $(call kokkos_append_header,'\#else') +tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H') +tmp := $(call kokkos_append_header,'\#endif') + +tmp := $(call kokkos_append_header,"/* Execution Spaces */") ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CUDA") endif ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) - tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp) + tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM') endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp) + tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET') endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) + tmp := $(call kokkos_append_header,'\#define KOKKOS_HAVE_OPENMP') endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_PTHREAD") endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_QTHREADS") endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_SERIAL") endif ifeq ($(KOKKOS_INTERNAL_USE_TM), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_TM") + tmp := $(call kokkos_append_header,"\#endif") endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") + tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_X86_64") + tmp := $(call kokkos_append_header,"\#endif") endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") + tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_KNC") + tmp := $(call kokkos_append_header,"\#endif") endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") + tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCLE") + tmp := $(call kokkos_append_header,"\#endif") endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#ifndef __CUDA_ARCH__") + tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_ISA_POWERPCBE") + tmp := $(call kokkos_append_header,"\#endif") endif -tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp) +tmp := $(call kokkos_append_header,"/* General Settings */") ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX11") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX11") + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX1Z") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) @@ -417,26 +425,26 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) KOKKOS_CXXFLAGS += -g KOKKOS_LDFLAGS += -g -ldl - tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK") + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_DEBUG") ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0) - tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK") endif endif ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) - tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT") endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib KOKKOS_LIBS += -lhwloc - tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HWLOC") endif ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) - tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT") KOKKOS_LIBS += -lrt endif @@ -444,36 +452,36 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib KOKKOS_LIBS += -lmemkind -lnuma - tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HBWSPACE") endif ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0) - tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING") endif -tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp) +tmp := $(call kokkos_append_header,"/* Optimization Settings */") ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1) - tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION") endif -tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) +tmp := $(call kokkos_append_header,"/* Cuda Settings */") ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LDG_INTRINSIC") else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LDG_INTRINSIC") endif endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_UVM") endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE") KOKKOS_CXXFLAGS += --relocatable-device-code=true KOKKOS_LDFLAGS += --relocatable-device-code=true endif @@ -481,7 +489,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LAMBDA") KOKKOS_CXXFLAGS += -expt-extended-lambda else $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) @@ -489,19 +497,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LAMBDA") endif endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_CLANG_WORKAROUND") endif endif # Add Architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -518,7 +526,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -535,8 +543,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV80") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -553,7 +561,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_SSE42 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xSSE4.2 @@ -575,7 +583,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx @@ -597,7 +605,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER7") ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) @@ -609,7 +617,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER8") ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) @@ -630,7 +638,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_POWER9") ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) @@ -651,7 +659,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xCORE-AVX2 @@ -673,7 +681,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xCORE-AVX2 @@ -695,7 +703,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512MIC") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xMIC-AVX512 @@ -716,7 +724,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_AVX512XEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -xCORE-AVX512 @@ -737,7 +745,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KNC") KOKKOS_CXXFLAGS += -mmic KOKKOS_LDFLAGS += -mmic endif @@ -753,48 +761,48 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER32") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER35") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER37") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL50") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL52") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_MAXWELL53") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL60") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61") KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 endif @@ -811,28 +819,28 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) # Lets start with adding architecture defines ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 701") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KAVERI") KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 801") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_CARRIZO") KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 803") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_FIJI") KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 900") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA") KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp ) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 901") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_GFX901") KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901 endif @@ -952,6 +960,10 @@ ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ endif +# Set KokkosExtraLibs and add -lkokkos to link line +KOKKOS_EXTRA_LIBS := ${KOKKOS_LIBS} +KOKKOS_LIBS := -lkokkos ${KOKKOS_LIBS} + # Setting up dependencies. KokkosCore_config.h: diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 964ec966d5..a63598577c 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -22,8 +22,8 @@ Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokk $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp -Kokkos_Rendezvous.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Rendezvous.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Rendezvous.cpp +Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp diff --git a/lib/kokkos/README b/lib/kokkos/README index e5ed39ef53..d6c66634dd 100644 --- a/lib/kokkos/README +++ b/lib/kokkos/README @@ -41,48 +41,44 @@ hcedwar(at)sandia.gov and crtrott(at)sandia.gov ============================================================================ Primary tested compilers on X86 are: - GCC 4.7.2 GCC 4.8.4 - GCC 4.9.2 + GCC 4.9.3 GCC 5.1.0 - GCC 5.2.0 - Intel 14.0.4 + GCC 5.3.0 + GCC 6.1.0 Intel 15.0.2 Intel 16.0.1 - Intel 17.0.098 - Intel 17.1.132 + Intel 17.1.043 + Intel 17.4.196 + Intel 18.0.128 Clang 3.5.2 Clang 3.6.1 Clang 3.7.1 Clang 3.8.1 Clang 3.9.0 - PGI 17.1 + Clang 4.0.0 + Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44) + PGI 17.10 + NVCC 7.0 for CUDA (with gcc 4.8.4) + NVCC 7.5 for CUDA (with gcc 4.8.4) + NVCC 8.0.44 for CUDA (with gcc 5.3.0) Primary tested compilers on Power 8 are: GCC 5.4.0 (OpenMP,Serial) - IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug) + IBM XL 13.1.5 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug) + NVCC 8.0.44 for CUDA (with gcc 5.4.0) + NVCC 9.0.103 for CUDA (with gcc 6.3.0) Primary tested compilers on Intel KNL are: GCC 6.2.0 - Intel 16.2.181 (with gcc 4.7.2) - Intel 17.0.098 (with gcc 4.7.2) - Intel 17.1.132 (with gcc 4.9.3) + Intel 16.4.258 (with gcc 4.7.2) Intel 17.2.174 (with gcc 4.9.3) - Intel 18.0.061 (beta) (with gcc 4.9.3) - -Secondary tested compilers are: - CUDA 7.0 (with gcc 4.8.4) - CUDA 7.5 (with gcc 4.8.4) - CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8) - CUDA/Clang 8.0 using Clang/Trunk compiler + Intel 18.0.128 (with gcc 4.9.3) Other compilers working: X86: Cygwin 2.1.0 64bit with gcc 4.9.3 -Limited testing of the following compilers on POWER7+ systems: - GCC 4.8.5 (on RHEL7.1 POWER7+) - Known non-working combinations: Power8: Pthreads backend @@ -96,8 +92,8 @@ GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized +NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized -Secondary compilers are passing without -Werror. Other compilers are tested occasionally, in particular when pushing from develop to master branch, without -Werror and only for a select set of backends. diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt index 7853184a54..507c9f2fdb 100644 --- a/lib/kokkos/algorithms/CMakeLists.txt +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -2,7 +2,9 @@ TRIBITS_SUBPACKAGE(Algorithms) -ADD_SUBDIRECTORY(src) +IF(KOKKOS_HAS_TRILINOS) + ADD_SUBDIRECTORY(src) +ENDIF() TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) #TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt index fde6b967e0..f5aa24e9be 100644 --- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt +++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -3,6 +3,32 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) +IF(NOT KOKKOS_HAS_TRILINOS) + IF(KOKKOS_SEPARATE_LIBS) + set(TEST_LINK_TARGETS kokkoscore) + ELSE() + set(TEST_LINK_TARGETS kokkos) + ENDIF() +ENDIF() + +SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) +INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) + +# mfh 03 Nov 2017: The gtest library used here must have a different +# name than that of the gtest library built in KokkosCore. We can't +# just refer to the library in KokkosCore's tests, because it's +# possible to build only (e.g.,) KokkosAlgorithms tests, without +# building KokkosCore tests. + +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") + +TRIBITS_ADD_LIBRARY( + kokkosalgorithms_gtest + HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h + SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc + TESTONLY + ) + SET(SOURCES UnitTestMain.cpp TestCuda.cpp @@ -34,5 +60,5 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkosalgorithms_gtest ${TEST_LINK_TARGETS} ) diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index a5a10c82ee..b5848c451e 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -15,7 +15,8 @@ endif CXXFLAGS = -O3 LINK ?= $(CXX) -LDFLAGS ?= -lpthread +LDFLAGS ?= +override LDFLAGS += -lpthread include $(KOKKOS_PATH)/Makefile.kokkos diff --git a/lib/kokkos/benchmarks/benchmark_suite/scripts/build_code.bash b/lib/kokkos/benchmarks/benchmark_suite/scripts/build_code.bash new file mode 100755 index 0000000000..0b885293e2 --- /dev/null +++ b/lib/kokkos/benchmarks/benchmark_suite/scripts/build_code.bash @@ -0,0 +1,84 @@ +#!/bin/bash + +# ---- Default Settings ----- + +# Paths +KOKKOS_PATH=${PWD}/kokkos +KOKKOS_KERNELS_PATH=${PWD}/kokkos-kernels +MINIMD_PATH=${PWD}/miniMD/kokkos +MINIFE_PATH=${PWD}/miniFE/kokkos + +# Kokkos Configure Options +KOKKOS_DEVICES=OpenMP +KOKKOS_ARCH=SNB + +# Compiler Options +CXX=mpicxx +OPT_FLAG="-O3" + +while [[ $# > 0 ]] +do + key="$1" + + case $key in + --kokkos-path*) + KOKKOS_PATH="${key#*=}" + ;; + --kokkos-kernels-path*) + KOKKOS_KERNELS_PATH="${key#*=}" + ;; + --minimd-path*) + MINIMD_PATH="${key#*=}" + ;; + --minife-path*) + MINIFE_PATH="${key#*=}" + ;; + --device-list*) + KOKKOS_DEVICES="${key#*=}" + ;; + --arch*) + KOKKOS_ARCH="--arch=${key#*=}" + ;; + --opt-flag*) + OPT_FLAG="${key#*=}" + ;; + --compiler*) + CXX="${key#*=}" + ;; + --with-cuda-options*) + KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" + ;; + --help*) + PRINT_HELP=True + ;; + *) + # args, just append + ARGS="$ARGS $1" + ;; + esac + + shift +done + +mkdir build + +# Build BytesAndFlops +mkdir build/bytes_and_flops +cd build/bytes_and_flops +make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH}\ + CXXFLAGS=${OPT_FLAG} -f ${KOKKOS_PATH}/benchmarks/bytes_and_flops/Makefile -j 16 +cd ../.. + +mkdir build/miniMD +cd build/miniMD +make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH} \ + CXXFLAGS=${OPT_FLAG} -f ${MINIMD_PATH}/Makefile -j 16 +cd ../../ + +mkdir build/miniFE +cd build/miniFE +make KOKKOS_ARCH=${KOKKOS_ARCH} KOKKOS_DEVICES=${KOKKOS_DEVICES} CXX=${CXX} KOKKOS_PATH=${KOKKOS_PATH} \ + CXXFLAGS=${OPT_FLAG} -f ${MINIFE_PATH}/src/Makefile -j 16 +cd ../../ + + diff --git a/lib/kokkos/benchmarks/benchmark_suite/scripts/checkout_repos.bash b/lib/kokkos/benchmarks/benchmark_suite/scripts/checkout_repos.bash new file mode 100755 index 0000000000..9b52a36d89 --- /dev/null +++ b/lib/kokkos/benchmarks/benchmark_suite/scripts/checkout_repos.bash @@ -0,0 +1,37 @@ +#!/bin/bash + +# Kokkos +if [ ! -d "kokkos" ]; then + git clone https://github.com/kokkos/kokkos +fi +cd kokkos +git checkout develop +git pull +cd .. + +# KokkosKernels +if [ ! -d "kokkos-kernels" ]; then +git clone https://github.com/kokkos/kokkos-kernels +fi +cd kokkos-kernels +git pull +cd .. + +# MiniMD +if [ ! -d "miniMD" ]; then + git clone https://github.com/mantevo/miniMD +fi +cd miniMD +git pull +cd .. + +# MiniFE +if [ ! -d "miniFE" ]; then + git clone https://github.com/mantevo/miniFE +fi +cd miniFE +git pull +cd .. + + + diff --git a/lib/kokkos/benchmarks/benchmark_suite/scripts/run_benchmark.bash b/lib/kokkos/benchmarks/benchmark_suite/scripts/run_benchmark.bash new file mode 100755 index 0000000000..6afa05f5fc --- /dev/null +++ b/lib/kokkos/benchmarks/benchmark_suite/scripts/run_benchmark.bash @@ -0,0 +1,14 @@ +#!/bin/bash +SCRIPT_PATH=$1 +KOKKOS_DEVICES=$2 +KOKKOS_ARCH=$3 +COMPILER=$4 +if [[ $# < 4 ]]; then + echo "Usage: ./run_benchmark.bash PATH_TO_SCRIPTS KOKKOS_DEVICES KOKKOS_ARCH COMPILER" +else + +${SCRIPT_PATH}/checkout_repos.bash +${SCRIPT_PATH}/build_code.bash --arch=${KOKKOS_ARCH} --device-list=${KOKKOS_DEVICES} --compiler=${COMPILER} +${SCRIPT_PATH}/run_tests.bash + +fi \ No newline at end of file diff --git a/lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash b/lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash new file mode 100755 index 0000000000..63aaca9e40 --- /dev/null +++ b/lib/kokkos/benchmarks/benchmark_suite/scripts/run_tests.bash @@ -0,0 +1,44 @@ +#!/bin/bash + +# BytesAndFlops +cd build/bytes_and_flops + +USE_CUDA=`grep "_CUDA 1" KokkosCore_config.h | wc -l` + +if [[ ${USE_CUDA} > 0 ]]; then + BAF_EXE=bytes_and_flops.cuda + TEAM_SIZE=256 +else + BAF_EXE=bytes_and_flops.host + TEAM_SIZE=1 +fi + +BAF_PERF_1=`./${BAF_EXE} 2 100000 1024 1 1 1 1 ${TEAM_SIZE} 6000 | awk '{print $12/174.5}'` +BAF_PERF_2=`./${BAF_EXE} 2 100000 1024 16 1 8 64 ${TEAM_SIZE} 6000 | awk '{print $14/1142.65}'` + +echo "BytesAndFlops: ${BAF_PERF_1} ${BAF_PERF_2}" +cd ../.. + + +# MiniMD +cd build/miniMD +cp ../../miniMD/kokkos/Cu_u6.eam ./ +MD_PERF_1=`./miniMD --half_neigh 0 -s 60 --ntypes 1 -t ${OMP_NUM_THREADS} -i ../../miniMD/kokkos/in.eam.miniMD | grep PERF_SUMMARY | awk '{print $10/21163341}'` +MD_PERF_2=`./miniMD --half_neigh 0 -s 20 --ntypes 1 -t ${OMP_NUM_THREADS} -i ../../miniMD/kokkos/in.eam.miniMD | grep PERF_SUMMARY | awk '{print $10/13393417}'` + +echo "MiniMD: ${MD_PERF_1} ${MD_PERF_2}" +cd ../.. + +# MiniFE +cd build/miniFE +rm *.yaml +./miniFE.x -nx 100 &> /dev/null +FE_PERF_1=`grep "CG Mflop" *.yaml | awk '{print $4/14174}'` +rm *.yaml +./miniFE.x -nx 50 &> /dev/null +FE_PERF_2=`grep "CG Mflop" *.yaml | awk '{print $4/11897}'` +cd ../.. +echo "MiniFE: ${FE_PERF_1} ${FE_PERF_2}" + +PERF_RESULT=`echo "${BAF_PERF_1} ${BAF_PERF_2} ${MD_PERF_1} ${MD_PERF_2} ${FE_PERF_1} ${FE_PERF_2}" | awk '{print ($1+$2+$3+$4+$5+$6)/6}'` +echo "Total Result: " ${PERF_RESULT} \ No newline at end of file diff --git a/lib/kokkos/benchmarks/bytes_and_flops/Makefile b/lib/kokkos/benchmarks/bytes_and_flops/Makefile index 5ddf78f28e..6cbef56ff0 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/Makefile +++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile @@ -1,7 +1,18 @@ -KOKKOS_PATH = ${HOME}/kokkos -SRC = $(wildcard *.cpp) KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda +KOKKOS_ARCH = "SNB,Kepler35" + + +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) + +ifndef KOKKOS_PATH + KOKKOS_PATH = $(MAKEFILE_PATH)../.. +endif + +SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) +HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) + +vpath %.cpp $(sort $(dir $(SRC))) default: build echo "Start Build" @@ -9,22 +20,19 @@ default: build ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper EXE = bytes_and_flops.cuda -KOKKOS_DEVICES = "Cuda,OpenMP" -KOKKOS_ARCH = "SNB,Kepler35" else CXX = g++ EXE = bytes_and_flops.host -KOKKOS_DEVICES = "OpenMP" -KOKKOS_ARCH = "SNB" endif -CXXFLAGS = -O3 -g +CXXFLAGS ?= -O3 -g +override CXXFLAGS += -I$(MAKEFILE_PATH) DEPFLAGS = -M LINK = ${CXX} LINKFLAGS = -OBJ = $(SRC:.cpp=.o) +OBJ = $(notdir $(SRC:.cpp=.o)) LIB = include $(KOKKOS_PATH)/Makefile.kokkos @@ -39,5 +47,5 @@ clean: kokkos-clean # Compilation rules -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) bench.hpp bench_unroll_stride.hpp bench_stride.hpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp index 8c79f3b88d..11576413e2 100644 --- a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp +++ b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp @@ -69,11 +69,11 @@ void test_policy(int team_range, int thread_range, int vector_range, int team_size, int vector_size, int test_type, ViewType1 &v1, ViewType2 &v2, ViewType3 &v3, double &result, double &result_expect, double &time) { - + typedef Kokkos::TeamPolicy t_policy; typedef typename t_policy::member_type t_team; Kokkos::Timer timer; - + for(int orep = 0; orep(v1) #if 0 // This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation diff --git a/lib/kokkos/bin/hpcbind b/lib/kokkos/bin/hpcbind index 4a6734262c..92f9f81ac9 100755 --- a/lib/kokkos/bin/hpcbind +++ b/lib/kokkos/bin/hpcbind @@ -26,6 +26,7 @@ fi # Get parent cpuset HPCBIND_HWLOC_PARENT_CPUSET="" if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then + HPCBIND_HWLOC_VERSION="$(hwloc-ls --version | cut -d ' ' -f 2)" MY_PID="$BASHPID" HPCBIND_HWLOC_PARENT_CPUSET="$(hwloc-ps -a --cpuset | grep ${MY_PID} | cut -f 2)" fi @@ -45,8 +46,11 @@ declare -i NUM_GPUS=0 HPCBIND_VISIBLE_GPUS="" if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then NUM_GPUS=$(nvidia-smi -L | wc -l); - GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" - HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}} + HPCBIND_HAS_NVIDIA=$((!$?)) + if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then + GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" + HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}} + fi fi declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0)) @@ -57,33 +61,38 @@ declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0)) # supports sbatch, bsub, aprun ################################################################################ HPCBIND_QUEUE_NAME="" -declare -i HPCBIND_QUEUE_INDEX=0 +declare -i HPCBIND_QUEUE_RANK=0 +declare -i HPCBIND_QUEUE_SIZE=0 declare -i HPCBIND_QUEUE_MAPPING=0 if [[ ! -z "${PMI_RANK}" ]]; then HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="mpich" - HPCBIND_QUEUE_INDEX=${PMI_RANK} + HPCBIND_QUEUE_RANK=${PMI_RANK} + HPCBIND_QUEUE_SIZE=${PMI_SIZE} elif [[ ! -z "${OMPI_COMM_WORLD_RANK}" ]]; then HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="openmpi" - HPCBIND_QUEUE_INDEX=${OMPI_COMM_WORLD_RANK} + HPCBIND_QUEUE_RANK=${OMPI_COMM_WORLD_RANK} + HPCBIND_QUEUE_SIZE=${OMPI_COMM_WORLD_SIZE} elif [[ ! -z "${MV2_COMM_WORLD_RANK}" ]]; then HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="mvapich2" - HPCBIND_QUEUE_INDEX=${MV2_COMM_WORLD_RANK} + HPCBIND_QUEUE_RANK=${MV2_COMM_WORLD_RANK} + HPCBIND_QUEUE_SIZE=${MV2_COMM_WORLD_SIZE} elif [[ ! -z "${SLURM_LOCAL_ID}" ]]; then HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="slurm" - HPCBIND_QUEUE_INDEX=${SLURM_LOCAL_ID} -elif [[ ! -z "${LBS_JOBINDEX}" ]]; then - HPCBIND_QUEUE_MAPPING=1 - HPCBIND_QUEUE_NAME="bsub" - HPCBIND_QUEUE_INDEX=${LBS_JOBINDEX} + HPCBIND_QUEUE_RANK=${SLURM_PROCID} + HPCBIND_QUEUE_SIZE=${SLURM_NPROCS} elif [[ ! -z "${ALPS_APP_PE}" ]]; then HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="aprun" - HPCBIND_QUEUE_INDEX=${ALPS_APP_PE} + HPCBIND_QUEUE_RANK=${ALPS_APP_PE} +elif [[ ! -z "${LBS_JOBINDEX}" ]]; then + HPCBIND_QUEUE_MAPPING=1 + HPCBIND_QUEUE_NAME="bsub" + HPCBIND_QUEUE_RANK=${LBS_JOBINDEX} fi ################################################################################ @@ -113,8 +122,8 @@ function show_help { echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES" echo " --openmp=M.m Set env variables for the given OpenMP version" echo " Default: 4.0" - echo " --openmp-percent=N Integer percentage of cpuset to use for OpenMP" - echo " threads Default: 100" + echo " --openmp-ratio=N/D Ratio of the cpuset to use for OpenMP" + echo " Default: 1" echo " --openmp-places= Op=threads|cores|sockets. Default: threads" echo " --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES" echo " --force-openmp-num-threads=N" @@ -123,8 +132,8 @@ function show_help { echo " Override logic for selecting OMP_PROC_BIND" echo " --no-openmp-nested Set OMP_NESTED to false" echo " --output-prefix=

Save the output to files of the form" - echo " P-N.log, P-N.out and P-N.err where P is the prefix" - echo " and N is the queue index or mpi rank (no spaces)" + echo " P.hpcbind.N, P.stdout.N and P.stderr.N where P is " + echo " the prefix and N is the rank (no spaces)" echo " --output-mode= How console output should be handled." echo " Options are all, rank0, and none. Default: rank0" echo " --lstopo Show bindings in lstopo" @@ -132,20 +141,27 @@ function show_help { echo " -h|--help Show this message" echo "" echo "Sample Usage:" + echo "" echo " Split the current process cpuset into 4 and use the 3rd partition" echo " ${cmd} --distribute=4 --distribute-partition=2 -v -- command ..." + echo "" echo " Launch 16 jobs over 4 nodes with 4 jobs per node using only the even pus" echo " and save the output to rank specific files" echo " mpiexec -N 16 -npernode 4 ${cmd} --whole-system --proc-bind=pu:even \\" echo " --distribute=4 -v --output-prefix=output -- command ..." + echo "" echo " Bind the process to all even cores" echo " ${cmd} --proc-bind=core:even -v -- command ..." + echo "" echo " Bind the the even cores of socket 0 and the odd cores of socket 1" echo " ${cmd} --proc-bind='socket:0.core:even socket:1.core:odd' -v -- command ..." + echo "" echo " Skip GPU 0 when mapping visible devices" echo " ${cmd} --distribute=4 --distribute-partition=0 --visible-gpus=1,2 -v -- command ..." + echo "" echo " Display the current bindings" echo " ${cmd} --proc-bind=numa:0 -- command" + echo "" echo " Display the current bindings using lstopo" echo " ${cmd} --proc-bind=numa:0.core:odd --lstopo" echo "" @@ -167,12 +183,13 @@ declare -i HPCBIND_DISTRIBUTE=1 declare -i HPCBIND_PARTITION=-1 HPCBIND_PROC_BIND="all" HPCBIND_OPENMP_VERSION=4.0 -declare -i HPCBIND_OPENMP_PERCENT=100 +declare -i HPCBIND_OPENMP_RATIO_NUMERATOR=1 +declare -i HPCBIND_OPENMP_RATIO_DENOMINATOR=1 HPCBIND_OPENMP_PLACES=${OMP_PLACES:-threads} declare -i HPCBIND_OPENMP_PROC_BIND=1 -declare -i HPCBIND_OPENMP_FORCE_NUM_THREADS=-1 +HPCBIND_OPENMP_FORCE_NUM_THREADS="" HPCBIND_OPENMP_FORCE_PROC_BIND="" -HPCBIND_OPENMP_NESTED=${OMP_NESTED:-true} +declare -i HPCBIND_OPENMP_NESTED=1 declare -i HPCBIND_VERBOSE=0 declare -i HPCBIND_LSTOPO=0 @@ -199,6 +216,9 @@ for i in "$@"; do ;; --distribute=*) HPCBIND_DISTRIBUTE="${i#*=}" + if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then + HPCBIND_DISTRIBUTE=1 + fi shift ;; # which partition to use @@ -222,8 +242,18 @@ for i in "$@"; do HPCBIND_OPENMP_VERSION="${i#*=}" shift ;; - --openmp-percent=*) - HPCBIND_OPENMP_PERCENT="${i#*=}" + --openmp-ratio=*) + IFS=/ read HPCBIND_OPENMP_RATIO_NUMERATOR HPCBIND_OPENMP_RATIO_DENOMINATOR <<< "${i#*=}" + if [[ ${HPCBIND_OPENMP_RATIO_NUMERATOR} -le 0 ]]; then + HPCBIND_OPENMP_RATIO_NUMERATOR=1 + fi + if [[ ${HPCBIND_OPENMP_RATIO_DENOMINATOR} -le 0 ]]; then + HPCBIND_OPENMP_RATIO_DENOMINATOR=1 + fi + if [[ ${HPCBIND_OPENMP_RATIO_NUMERATOR} -gt ${HPCBIND_OPENMP_RATIO_DENOMINATOR} ]]; then + HPCBIND_OPENMP_RATIO_NUMERATOR=1 + HPCBIND_OPENMP_RATIO_DENOMINATOR=1 + fi shift ;; --openmp-places=*) @@ -243,7 +273,7 @@ for i in "$@"; do shift ;; --no-openmp-nested) - HPCBIND_OPENMP_NESTED="false" + HPCBIND_OPENMP_NESTED=0 shift ;; --output-prefix=*) @@ -292,7 +322,7 @@ if [[ "${HPCBIND_OUTPUT_MODE}" == "none" ]]; then HPCBIND_TEE=0 elif [[ "${HPCBIND_OUTPUT_MODE}" == "all" ]]; then HPCBIND_TEE=1 -elif [[ ${HPCBIND_QUEUE_INDEX} -eq 0 ]]; then +elif [[ ${HPCBIND_QUEUE_RANK} -eq 0 ]]; then #default to rank0 printing to screen HPCBIND_TEE=1 fi @@ -303,9 +333,18 @@ if [[ "${HPCBIND_OUTPUT_PREFIX}" == "" ]]; then HPCBIND_ERR=/dev/null HPCBIND_OUT=/dev/null else - HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_QUEUE_INDEX}" - HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_QUEUE_INDEX}" - HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_QUEUE_INDEX}" + if [[ ${HPCBIND_QUEUE_SIZE} -gt 0 ]]; then + HPCBIND_STR_QUEUE_SIZE="${HPCBIND_QUEUE_SIZE}" + HPCBIND_STR_QUEUE_RANK=$(printf %0*d ${#HPCBIND_STR_QUEUE_SIZE} ${HPCBIND_QUEUE_RANK}) + + HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_STR_QUEUE_RANK}" + HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_STR_QUEUE_RANK}" + HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_STR_QUEUE_RANK}" + else + HPCBIND_LOG="${HPCBIND_OUTPUT_PREFIX}.hpcbind.${HPCBIND_QUEUE_RANK}" + HPCBIND_ERR="${HPCBIND_OUTPUT_PREFIX}.stderr.${HPCBIND_QUEUE_RANK}" + HPCBIND_OUT="${HPCBIND_OUTPUT_PREFIX}.stdout.${HPCBIND_QUEUE_RANK}" + fi > ${HPCBIND_LOG} fi @@ -333,27 +372,12 @@ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then NUM_GPUS=${#HPCBIND_VISIBLE_GPUS[@]} fi -################################################################################ -# Check OpenMP percent -################################################################################ -if [[ ${HPCBIND_OPENMP_PERCENT} -lt 1 ]]; then - HPCBIND_OPENMP_PERCENT=1 -elif [[ ${HPCBIND_OPENMP_PERCENT} -gt 100 ]]; then - HPCBIND_OPENMP_PERCENT=100 -fi - -################################################################################ -# Check distribute -################################################################################ -if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then - HPCBIND_DISTRIBUTE=1 -fi ################################################################################ #choose the correct partition ################################################################################ if [[ ${HPCBIND_PARTITION} -lt 0 && ${HPCBIND_QUEUE_MAPPING} -eq 1 ]]; then - HPCBIND_PARTITION=${HPCBIND_QUEUE_INDEX} + HPCBIND_PARTITION=${HPCBIND_QUEUE_RANK} elif [[ ${HPCBIND_PARTITION} -lt 0 ]]; then HPCBIND_PARTITION=0 fi @@ -381,23 +405,40 @@ if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then else HPCBIND_HWLOC_CPUSET="${BINDING}" fi - HPCBIND_NUM_PUS=$(hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu | wc -l) + HPCBIND_NUM_PUS=$(hwloc-calc -q -N pu ${HPCBIND_HWLOC_CPUSET} ) + if [ $? -ne 0 ]; then + HPCBIND_NUM_PUS=1 + fi + HPCBIND_NUM_CORES=$(hwloc-calc -q -N core ${HPCBIND_HWLOC_CPUSET} ) + if [ $? -ne 0 ]; then + HPCBIND_NUM_CORES=1 + fi + HPCBIND_NUM_NUMAS=$(hwloc-calc -q -N numa ${HPCBIND_HWLOC_CPUSET} ) + if [ $? -ne 0 ]; then + HPCBIND_NUM_NUMAS=1 + fi + HPCBIND_NUM_SOCKETS=$(hwloc-calc -q -N socket ${HPCBIND_HWLOC_CPUSET} ) + if [ $? -ne 0 ]; then + HPCBIND_NUM_SOCKETS=1 + fi else HPCBIND_NUM_PUS=$(cat /proc/cpuinfo | grep -c processor) + HPCBIND_NUM_CORES=${HPCBIND_NUM_PUS} + HPCBIND_NUM_NUMAS=1 + HPCBIND_NUM_SOCKETS=1 fi -declare -i HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_NUM_PUS * HPCBIND_OPENMP_PERCENT)) -HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_OPENMP_NUM_THREADS / 100)) - -if [[ ${HPCBIND_OPENMP_NUM_THREADS} -lt 1 ]]; then - HPCBIND_OPENMP_NUM_THREADS=1 -elif [[ ${HPCBIND_OPENMP_NUM_THREADS} -gt ${HPCBIND_NUM_PUS} ]]; then - HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_NUM_PUS} -fi - -if [[ ${HPCBIND_OPENMP_FORCE_NUM_THREADS} -gt 0 ]]; then +if [[ ${HPCBIND_OPENMP_FORCE_NUM_THREADS} != "" ]]; then HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_OPENMP_FORCE_NUM_THREADS} +else + declare -i HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_NUM_PUS * HPCBIND_OPENMP_RATIO_NUMERATOR / HPCBIND_OPENMP_RATIO_DENOMINATOR)) + + if [[ ${HPCBIND_OPENMP_NUM_THREADS} -lt 1 ]]; then + HPCBIND_OPENMP_NUM_THREADS=1 + elif [[ ${HPCBIND_OPENMP_NUM_THREADS} -gt ${HPCBIND_NUM_PUS} ]]; then + HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_NUM_PUS} + fi fi ################################################################################ @@ -405,7 +446,11 @@ fi ################################################################################ # set OMP_NUM_THREADS -export OMP_NUM_THREADS=${HPCBIND_OPENMP_NUM_THREADS} +if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then + export OMP_NUM_THREADS="${HPCBIND_OPENMP_NUM_THREADS},1" +else + export OMP_NUM_THREADS=${HPCBIND_OPENMP_NUM_THREADS} +fi # set OMP_PROC_BIND and OMP_PLACES if [[ ${HPCBIND_OPENMP_PROC_BIND} -eq 1 ]]; then @@ -413,7 +458,11 @@ if [[ ${HPCBIND_OPENMP_PROC_BIND} -eq 1 ]]; then #default proc bind logic if [[ "${HPCBIND_OPENMP_VERSION}" == "4.0" || "${HPCBIND_OPENMP_VERSION}" > "4.0" ]]; then export OMP_PLACES="${HPCBIND_OPENMP_PLACES}" - export OMP_PROC_BIND="spread" + if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then + export OMP_PROC_BIND="spread,spread" + else + export OMP_PROC_BIND="spread" + fi else export OMP_PROC_BIND="true" unset OMP_PLACES @@ -429,9 +478,17 @@ else unset OMP_PROC_BIND fi -# set OMP_NESTED -export OMP_NESTED=${HPCBIND_OPENMP_NESTED} +# set up hot teams (intel specific) +if [[ ${HPCBIND_OPENMP_NESTED} -eq 1 ]]; then + export OMP_NESTED="true" + export OMP_MAX_ACTIVE_LEVELS=2 + export KMP_HOT_TEAMS=1 + export KMP_HOT_TEAMS_MAX_LEVEL=2 +else + export OMP_NESTED="false" +fi +# set OMP_NESTED ################################################################################ # Set CUDA environment variables @@ -442,7 +499,7 @@ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" else - declare -i MY_TASK_ID=$((HPCBIND_QUEUE_INDEX * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) + declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" fi @@ -451,12 +508,17 @@ fi ################################################################################ # Set hpcbind environment variables ################################################################################ +export HPCBIND_HWLOC_VERSION=${HPCBIND_HWLOC_VERSION} export HPCBIND_HAS_HWLOC=${HPCBIND_HAS_HWLOC} export HPCBIND_HAS_NVIDIA=${HPCBIND_HAS_NVIDIA} export HPCBIND_NUM_PUS=${HPCBIND_NUM_PUS} +export HPCBIND_NUM_CORES=${HPCBIND_NUM_CORES} +export HPCBIND_NUM_NUMAS=${HPCBIND_NUM_NUMAS} +export HPCBIND_NUM_SOCKETS=${HPCBIND_NUM_SOCKETS} export HPCBIND_HWLOC_CPUSET="${HPCBIND_HWLOC_CPUSET}" export HPCBIND_HWLOC_DISTRIBUTE=${HPCBIND_DISTRIBUTE} export HPCBIND_HWLOC_DISTRIBUTE_PARTITION=${HPCBIND_PARTITION} +export HPCBIND_OPENMP_RATIO="${HPCBIND_OPENMP_RATIO_NUMERATOR}/${HPCBIND_OPENMP_RATIO_DENOMINATOR}" if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then export HPCBIND_HWLOC_PARENT_CPUSET="all" else @@ -467,7 +529,8 @@ export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') export HPCBIND_OPENMP_VERSION="${HPCBIND_OPENMP_VERSION}" if [[ "${HPCBIND_QUEUE_NAME}" != "" ]]; then - export HPCBIND_QUEUE_INDEX=${HPCBIND_QUEUE_INDEX} + export HPCBIND_QUEUE_RANK=${HPCBIND_QUEUE_RANK} + export HPCBIND_QUEUE_SIZE=${HPCBIND_QUEUE_SIZE} export HPCBIND_QUEUE_NAME="${HPCBIND_QUEUE_NAME}" export HPCBIND_QUEUE_MAPPING=${HPCBIND_QUEUE_MAPPING} fi @@ -487,10 +550,16 @@ if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG} echo "[OPENMP]" >> ${HPCBIND_LOG} echo "${TMP_ENV}" | grep -E "^OMP_" >> ${HPCBIND_LOG} + echo "[GOMP] (gcc, g++, and gfortran)" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^GOMP_" >> ${HPCBIND_LOG} + echo "[KMP] (icc, icpc, and ifort)" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^KMP_" >> ${HPCBIND_LOG} + echo "[XLSMPOPTS] (xlc, xlc++, and xlf)" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^XLSMPOPTS" >> ${HPCBIND_LOG} if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then echo "[BINDINGS]" >> ${HPCBIND_LOG} - hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu >> ${HPCBIND_LOG} + hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" >> ${HPCBIND_LOG} else echo "Unable to show bindings, hwloc not available." >> ${HPCBIND_LOG} fi @@ -503,10 +572,16 @@ else echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG}) echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG}) echo "${TMP_ENV}" | grep -E "^OMP_" > >(tee -a ${HPCBIND_LOG}) + echo "[GOMP] (gcc, g++, and gfortran)" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^GOMP_" > >(tee -a ${HPCBIND_LOG}) + echo "[KMP] (icc, icpc, and ifort)" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^KMP_" > >(tee -a ${HPCBIND_LOG}) + echo "[XLSMPOPTS] (xlc, xlc++, and xlf)" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^XLSMPOPTS" > >(tee -a ${HPCBIND_LOG}) if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then echo "[BINDINGS]" > >(tee -a ${HPCBIND_LOG}) - hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --only pu > >(tee -a ${HPCBIND_LOG}) + hwloc-ls --restrict "${HPCBIND_HWLOC_CPUSET}" --no-io --no-bridges > >(tee -a ${HPCBIND_LOG}) else echo "Unable to show bindings, hwloc not available." > >(tee -a ${HPCBIND_LOG}) fi diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index 76e33f3c66..d339da4fcd 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -39,6 +39,12 @@ cuda_args="" # Arguments for both NVCC and Host compiler shared_args="" +# Argument -c +compile_arg="" + +# Argument -o +output_arg="" + # Linker arguments xlinker_args="" @@ -66,6 +72,7 @@ dry_run=0 # Skip NVCC compilation and use host compiler directly host_only=0 +host_only_args="" # Enable workaround for CUDA 6.5 for pragma ident replace_pragma_ident=0 @@ -81,6 +88,11 @@ optimization_applied=0 # Check if we have -std=c++X or --std=c++X already stdcxx_applied=0 +# Run nvcc a second time to generate dependencies if needed +depfile_separate=0 +depfile_output_arg="" +depfile_target_arg="" + #echo "Arguments: $# $@" while [ $# -gt 0 ] @@ -112,12 +124,31 @@ do fi ;; #Handle shared args (valid for both nvcc and the host compiler) - -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) + -D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) shared_args="$shared_args $1" ;; - #Handle shared args that have an argument - -o|-MT) - shared_args="$shared_args $1 $2" + #Handle compilation argument + -c) + compile_arg="$1" + ;; + #Handle output argument + -o) + output_arg="$output_arg $1 $2" + shift + ;; + # Handle depfile arguments. We map them to a separate call to nvcc. + -MD|-MMD) + depfile_separate=1 + host_only_args="$host_only_args $1" + ;; + -MF) + depfile_output_arg="-o $2" + host_only_args="$host_only_args $1 $2" + shift + ;; + -MT) + depfile_target_arg="$1 $2" + host_only_args="$host_only_args $1 $2" shift ;; #Handle known nvcc args @@ -242,7 +273,7 @@ if [ $first_xcompiler_arg -eq 0 ]; then fi #Compose host only command -host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host" +host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host" #nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' if [ $replace_pragma_ident -eq 1 ]; then @@ -274,10 +305,21 @@ else host_command="$host_command $object_files" fi +if [ $depfile_separate -eq 1 ]; then + # run nvcc a second time to generate dependencies (without compiling) + nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg" +else + nvcc_depfile_command="" +fi + +nvcc_command="$nvcc_command $compile_arg $output_arg" + #Print command for dryrun if [ $dry_run -eq 1 ]; then if [ $host_only -eq 1 ]; then echo $host_command + elif [ -n "$nvcc_depfile_command" ]; then + echo $nvcc_command "&&" $nvcc_depfile_command else echo $nvcc_command fi @@ -287,6 +329,8 @@ fi #Run compilation command if [ $host_only -eq 1 ]; then $host_command +elif [ -n "$nvcc_depfile_command" ]; then + $nvcc_command && $nvcc_depfile_command else $nvcc_command fi diff --git a/lib/kokkos/cmake/Makefile.generate_cmake_settings b/lib/kokkos/cmake/Makefile.generate_cmake_settings new file mode 100644 index 0000000000..da076b23db --- /dev/null +++ b/lib/kokkos/cmake/Makefile.generate_cmake_settings @@ -0,0 +1,8 @@ +ifndef KOKKOS_PATH + MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) + KOKKOS_PATH = $(subst Makefile,,$(MAKEFILE_PATH)).. +endif + +include $(KOKKOS_PATH)/Makefile.kokkos +include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists +include $(KOKKOS_PATH)/core/src/Makefile.generate_build_files diff --git a/lib/kokkos/cmake/kokkos.cmake b/lib/kokkos/cmake/kokkos.cmake deleted file mode 100644 index 396822c7fa..0000000000 --- a/lib/kokkos/cmake/kokkos.cmake +++ /dev/null @@ -1,1202 +0,0 @@ - - -# Set which Kokkos backend to use. -set(KOKKOS_ENABLE_CUDA OFF CACHE BOOL "Use Kokkos CUDA backend") -set(KOKKOS_ENABLE_OPENMP ON CACHE BOOL "Use Kokkos OpenMP backend") -set(KOKKOS_ENABLE_PTHREAD OFF CACHE BOOL "Use Kokkos Pthreads backend") -set(KOKKOS_ENABLE_QTHREADS OFF CACHE BOOL "Use Kokkos Qthreads backend") -set(KOKKOS_ENABLE_SERIAL ON CACHE BOOL "Use Kokkos Serial backend") - -# List of possible host architectures. -list(APPEND KOKKOS_HOST_ARCH_LIST - None # No architecture optimization - AMDAVX # AMD chip - ARMv80 # ARMv8.0 Compatible CPU - ARMv81 # ARMv8.1 Compatible CPU - ARMv8-ThunderX # ARMv8 Cavium ThunderX CPU - SNB # Intel Sandy/Ivy Bridge CPUs - HSW # Intel Haswell CPUs - BDW # Intel Broadwell Xeon E-class CPUs - SKX # Intel Sky Lake Xeon E-class HPC CPUs (AVX512) - KNC # Intel Knights Corner Xeon Phi - KNL # Intel Knights Landing Xeon Phi - BGQ # IBM Blue Gene Q - Power7 # IBM POWER7 CPUs - Power8 # IBM POWER8 CPUs - Power9 # IBM POWER9 CPUs - ) - -# Setting this variable to a value other than "None" can improve host -# performance by turning on architecture specific code. -set(KOKKOS_HOST_ARCH "None" CACHE STRING "Optimize for specific host architecture.") -set_property(CACHE KOKKOS_HOST_ARCH PROPERTY STRINGS ${KOKKOS_HOST_ARCH_LIST}) - -# List of possible GPU architectures. -list(APPEND KOKKOS_GPU_ARCH_LIST - None # No architecture optimization - Kepler # NVIDIA Kepler default (generation CC 3.5) - Kepler30 # NVIDIA Kepler generation CC 3.0 - Kepler32 # NVIDIA Kepler generation CC 3.2 - Kepler35 # NVIDIA Kepler generation CC 3.5 - Kepler37 # NVIDIA Kepler generation CC 3.7 - Maxwell # NVIDIA Maxwell default (generation CC 5.0) - Maxwell50 # NVIDIA Maxwell generation CC 5.0 - Maxwell52 # NVIDIA Maxwell generation CC 5.2 - Maxwell53 # NVIDIA Maxwell generation CC 5.3 - Pascal60 # NVIDIA Pascal generation CC 6.0 - Pascal61 # NVIDIA Pascal generation CC 6.1 - ) - -# Setting this variable to a value other than "None" can improve GPU -# performance by turning on architecture specific code. -set(KOKKOS_GPU_ARCH "None" CACHE STRING "Optimize for specific GPU architecture.") -set_property(CACHE KOKKOS_GPU_ARCH PROPERTY STRINGS ${KOKKOS_GPU_ARCH_LIST}) - -set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.") - -# Enable hwloc library. -set(KOKKOS_ENABLE_HWLOC OFF CACHE BOOL "Enable hwloc for better process placement.") -set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library.") - -# Enable memkind library. -set(KOKKOS_ENABLE_MEMKIND OFF CACHE BOOL "Enable memkind.") -set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library.") - -set(KOKKOS_ENABLE_LIBRT OFF CACHE BOOL "Enable librt for more precise timer.") - -# Enable debugging. -set(KOKKOS_DEBUG OFF CACHE BOOL "Enable debugging in Kokkos.") - -# Enable profiling. -set(KOKKOS_ENABLE_PROFILING ON CACHE BOOL "Enable profiling.") - -# Enable aggressive vectorization. -set(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION OFF CACHE BOOL "Enable aggressive vectorization.") - -# Qthreads options. -set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") - -# CUDA options. -set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") -set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC OFF CACHE BOOL "Enable CUDA LDG.") -set(KOKKOS_ENABLE_CUDA_UVM OFF CACHE BOOL "Enable CUDA unified virtual memory.") -set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OFF CACHE BOOL "Enable relocatable device code for CUDA.") -set(KOKKOS_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Enable lambdas for CUDA.") - -################################### FUNCTIONS ################################## - -# Sets the following compiler variables that are analogous to the CMAKE_* -# versions. We add the ability to detect NVCC (really nvcc_wrapper). -# KOKKOS_CXX_COMPILER -# KOKKOS_CXX_COMPILER_ID -# KOKKOS_CXX_COMPILER_VERSION -# -# Also verifies the compiler version meets the minimum required by Kokkos. -function(set_kokkos_cxx_compiler) - # Since CMake doesn't recognize the nvcc compiler until 3.8, we use our own - # version of the CMake variables and detect nvcc ourselves. Initially set to - # the CMake variable values. - set(INTERNAL_CXX_COMPILER ${CMAKE_CXX_COMPILER}) - set(INTERNAL_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) - set(INTERNAL_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION}) - - # Check if the compiler is nvcc (which really means nvcc_wrapper). - execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version - COMMAND grep nvcc - COMMAND wc -l - OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REGEX REPLACE "^ +" "" - INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) - - if(INTERNAL_HAVE_COMPILER_NVCC) - # Set the compiler id to nvcc. We use the value used by CMake 3.8. - set(INTERNAL_CXX_COMPILER_ID NVIDIA) - - # Set nvcc's compiler version. - execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version - COMMAND grep release - OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+$" - INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) - endif() - - # Enforce the minimum compilers supported by Kokkos. - set(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.7.2 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 14.0.4 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n") - - if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 3.5.2) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL GNU) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.7.2) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL Intel) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 14.0.4) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 7.0.28) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL PGI) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 17.1) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - endif() - - # Enforce that extensions are turned off for nvcc_wrapper. - if(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - if(NOT DEFINED CMAKE_CXX_EXTENSIONS OR CMAKE_CXX_EXTENSIONS STREQUAL ON) - message(FATAL_ERROR "NVCC doesn't support C++ extensions. Set CMAKE_CXX_EXTENSIONS to OFF in your CMakeLists.txt.") - endif() - endif() - - if(KOKKOS_ENABLE_CUDA) - # Enforce that the compiler can compile CUDA code. - if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.0.0) - message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") - endif() - elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang.") - endif() - endif() - - set(KOKKOS_CXX_COMPILER ${INTERNAL_CXX_COMPILER} PARENT_SCOPE) - set(KOKKOS_CXX_COMPILER_ID ${INTERNAL_CXX_COMPILER_ID} PARENT_SCOPE) - set(KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION} PARENT_SCOPE) -endfunction() - -# Transitively enforces that the appropriate CXX standard compile flags (C++11 -# or above) are added to targets that use the Kokkos library. Compile features -# are used if possible. Otherwise, the appropriate flags are added to -# KOKKOS_CXX_FLAGS. Values set by the user to CMAKE_CXX_STANDARD and -# CMAKE_CXX_EXTENSIONS are honored. -function(set_kokkos_compiler_standard) - # The following table lists the versions of CMake that supports CXX_STANDARD - # and the CXX compile features for different compilers. The versions are - # based on CMake documentation, looking at CMake code, and verifying by - # testing with specific CMake versions. - # - # COMPILER CXX_STANDARD Compile Features - # --------------------------------------------------------------- - # Clang 3.1 3.1 - # GNU 3.1 3.2 - # AppleClang 3.2 3.2 - # Intel 3.6 3.6 - # Cray No No - # PGI No No - # XL No No - # - # For compiling CUDA code using nvcc_wrapper, we will use the host compiler's - # flags for turning on C++11. Since for compiler ID and versioning purposes - # CMake recognizes the host compiler when calling nvcc_wrapper, this just - # works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means - # that we can only use host compilers for CUDA builds that use those flags. - # It also means that extensions (gnu++11) can't be turned on for CUDA builds. - - # Check if we can use compile features. - if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) - if(NOT CMAKE_VERSION VERSION_LESS 3.1) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang OR CMAKE_CXX_COMPILER_ID STREQUAL GNU) - if(NOT CMAKE_VERSION VERSION_LESS 3.2) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) - if(NOT CMAKE_VERSION VERSION_LESS 3.6) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - endif() - endif() - - if(INTERNAL_USE_COMPILE_FEATURES) - # Use the compile features aspect of CMake to transitively cause C++ flags - # to populate to user code. - - # I'm using a hack by requiring features that I know force the lowest version - # of the compilers we want to support. Clang 3.3 and later support all of - # the C++11 standard. With CMake 3.8 and higher, we could switch to using - # cxx_std_11. - set(KOKKOS_CXX11_FEATURES - cxx_nonstatic_member_init # Forces GCC 4.7 or later and Intel 14.0 or later. - PARENT_SCOPE - ) - else() - # CXX compile features are not yet implemented for this combination of - # compiler and version of CMake. - - if(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang) - # Versions of CMAKE before 3.2 don't support CXX_STANDARD or C++ compile - # features for the AppleClang compiler. Set compiler flags transitively - # here such that they trickle down to a call to target_compile_options(). - - # The following two blocks of code were copied from - # /Modules/Compiler/AppleClang-CXX.cmake from CMake 3.7.2 and then - # modified. - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=gnu++11") - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.1) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++14") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++14") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) - # AppleClang 5.0 knows this flag, but does not set a __cplusplus macro - # greater than 201103L. - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++1y") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++1y") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) - # Versions of CMAKE before 3.6 don't support CXX_STANDARD or C++ compile - # features for the Intel compiler. Set compiler flags transitively here - # such that they trickle down to a call to target_compile_options(). - - # The following three blocks of code were copied from - # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. - if("x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC") - set(_std -Qstd) - set(_ext c++) - else() - set(_std -std) - set(_ext gnu++) - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++14") - # TODO: There is no gnu++14 value supported; figure out what to do. - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++14") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++1y") - # TODO: There is no gnu++14 value supported; figure out what to do. - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++1y") - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}11") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.1) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++0x") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}0x") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Cray) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-hstd=c++11") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "--c++11") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=c++11") - else() - # Assume GNU. CMAKE_CXX_STANDARD is handled correctly by CMake 3.1 and - # above for this compiler. If the user explicitly requests a C++ - # standard, CMake takes care of it. If not, transitively require C++11. - if(NOT CMAKE_CXX_STANDARD) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION ${CMAKE_CXX11_STANDARD_COMPILE_OPTION}) - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION ${CMAKE_CXX11_EXTENSION_COMPILE_OPTION}) - endif() - endif() - - # Set the C++ standard info for Kokkos respecting user set values for - # CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS. - if(CMAKE_CXX_STANDARD EQUAL 14) - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_STANDARD_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_EXTENSION_COMPILE_OPTION}) - endif() - elseif(CMAKE_CXX_STANDARD EQUAL 11) - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) - endif() - else() - # The user didn't explicitly request a standard, transitively require - # C++11 respecting CMAKE_CXX_EXTENSIONS. - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) - endif() - endif() - - set(KOKKOS_CXX_FLAGS ${INTERNAL_CXX_FLAGS} PARENT_SCOPE) - endif() -endfunction() - -########################## COMPILER AND FEATURE CHECKS ######################### - -# TODO: We are assuming that nvcc_wrapper is using g++ as the host compiler. -# Should we allow the user the option to change this? The host compiler -# for nvcc_wrapper can be set via the NVCC_WRAPPER_DEFAULT_COMPILER -# environment variable or by passing a different host compiler with the -# -ccbin flag. - -# TODO: Fully add CUDA support for Clang. -set_kokkos_cxx_compiler() - -set_kokkos_compiler_standard() - -######################### INITIALIZE INTERNAL VARIABLES ######################## - -# Add Kokkos' modules to CMake's module path. -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") - -# Start with all global variables set to false. This guarantees correct -# results with changes and multiple configures. -set(KOKKOS_HAVE_CUDA OFF CACHE INTERNAL "") -set(KOKKOS_USE_CUDA_UVM OFF CACHE INTERNAL "") -set(KOKKOS_HAVE_CUDA_RDC OFF CACHE INTERNAL "") -set(KOKKOS_HAVE_CUDA_LAMBDA OFF CACHE INTERNAL "") -set(KOKKOS_CUDA_CLANG_WORKAROUND OFF CACHE INTERNAL "") -set(KOKKOS_HAVE_OPENMP OFF CACHE INTERNAL "") -set(KOKKOS_HAVE_PTHREAD OFF CACHE INTERNAL "") -set(KOKKOS_HAVE_QTHREADS OFF CACHE INTERNAL "") -set(KOKKOS_HAVE_SERIAL OFF CACHE INTERNAL "") -set(KOKKOS_HAVE_HWLOC OFF CACHE INTERNAL "") -set(KOKKOS_ENABLE_HBWSPACE OFF CACHE INTERNAL "") -set(KOKKOS_HAVE_DEBUG OFF CACHE INTERNAL "") -set(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK OFF CACHE INTERNAL "") -set(KOKKOS_ENABLE_ISA_X86_64 OFF CACHE INTERNAL "") -set(KOKKOS_ENABLE_ISA_KNC OFF CACHE INTERNAL "") -set(KOKKOS_ENABLE_ISA_POWERPCLE OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_ARMV80 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_ARMV81 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_ARMV8_THUNDERX OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_AVX OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_AVX2 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_AVX512MIC OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_AVX512XEON OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_KNC OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_POWER8 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_POWER9 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_KEPLER OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_KEPLER30 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_KEPLER32 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_KEPLER35 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_KEPLER37 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_MAXWELL OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_MAXWELL50 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_MAXWELL52 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_MAXWELL53 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_PASCAL OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_PASCAL60 OFF CACHE INTERNAL "") -set(KOKKOS_ARCH_PASCAL61 OFF CACHE INTERNAL "") - -############################## SET BACKEND OPTIONS ############################# - -# Make sure at least one backend is selected. -if(NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_ENABLE_PTHREAD AND NOT KOKKOS_ENABLE_QTHREADS AND NOT KOKKOS_ENABLE_SERIAL) - message(FATAL_ERROR "Must set one of KOKKOS_ENABLE_CUDA, KOKKOS_ENABLE_OPENMP, KOKKOS_ENABLE_PTHREAD, KOKKOS_ENABLE_QTHREADS, or KOKKOS_ENABLE_SERIAL") -endif() - -# Only one of OpenMP, Pthreads, and Qthreads can be set. -set(KOKKOS_MESSAGE_TEXT "Only one of KOKKOS_ENABLE_OPENMP, KOKKOS_ENABLE_PTHREAD, and KOKKOS_ENABLE_QTHREADS can be selected") -if(KOKKOS_ENABLE_OPENMP AND KOKKOS_ENABLE_PTHREAD) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") -elseif(KOKKOS_ENABLE_OPENMP AND KOKKOS_ENABLE_QTHREADS) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") -elseif(KOKKOS_ENABLE_PTHREAD AND KOKKOS_ENABLE_QTHREADS) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") -endif() - -# Get source files. -file(GLOB KOKKOS_CORE_SRCS core/src/impl/*.cpp) -file(GLOB KOKKOS_CONTAINERS_SRCS containers/src/impl/*.cpp) - -# Set options if using CUDA backend. -if(KOKKOS_ENABLE_CUDA) - if(KOKKOS_CUDA_DIR) - set(CUDA_TOOLKIT_ROOT_DIR ${KOKKOS_CUDA_DIR}) - endif() - - find_package(CUDA) - - if(NOT CUDA_FOUND) - if(KOKKOS_CUDA_DIR) - message(FATAL_ERROR "Couldn't find CUDA in default locations, and KOKKOS_CUDA_DIR points to an invalid installation.") - else() - message(FATAL_ERROR "Couldn't find CUDA in default locations. Set KOKKOS_CUDA_DIR.") - endif() - endif() - - list(APPEND KOKKOS_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) - list(APPEND KOKKOS_LD_FLAGS -L${CUDA_TOOLKIT_ROOT_DIR}/lib64) - list(APPEND KOKKOS_LIBS cudart cuda) - - set(KOKKOS_HAVE_CUDA ON CACHE INTERNAL "") - file(GLOB KOKKOS_CUDA_SRCS core/src/Cuda/*.cpp) - list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_CUDA_SRCS}) - - # Set CUDA UVM if requested. - if(KOKKOS_ENABLE_CUDA_UVM) - set(KOKKOS_USE_CUDA_UVM ON CACHE INTERNAL "") - endif() - - # Set CUDA relocatable device code if requested. - if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) - set(KOKKOS_HAVE_CUDA_RDC ON CACHE INTERNAL "") - list(APPEND KOKKOS_CXX_FLAGS --relocatable-device-code=true) - list(APPEND KOKKOS_LD_FLAGS --relocatable-device-code=true) - endif() - - # Set CUDA lambda if requested. - if(KOKKOS_ENABLE_CUDA_LAMBDA) - set(KOKKOS_HAVE_CUDA_LAMBDA ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7.5) - message(FATAL_ERROR "CUDA lambda support requires CUDA 7.5 or higher. Disable it or use a 7.5 or later compiler.") - else() - list(APPEND KOKKOS_CXX_FLAGS -expt-extended-lambda) - endif() - endif() - endif() - - # Set Clang specific options. - if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) - list(APPEND KOKKOS_CXX_FLAGS --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}) - - set(KOKKOS_CUDA_CLANG_WORKAROUND ON CACHE INTERNAL "") - - # Force CUDA_LDG_INTRINSIC on when using Clang. - set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ON CACHE BOOL "Enable CUDA LDG." FORCE) - endif() -endif() - -# Set options if using OpenMP backend. -if(KOKKOS_ENABLE_OPENMP) - find_package(OpenMP REQUIRED) - - if(OPENMP_FOUND) - if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - list(APPEND KOKKOS_CXX_FLAGS -Xcompiler) - endif() - - list(APPEND KOKKOS_CXX_FLAGS ${OpenMP_CXX_FLAGS}) - list(APPEND KOKKOS_LD_FLAGS ${OpenMP_CXX_FLAGS}) - endif() - - set(KOKKOS_HAVE_OPENMP ON CACHE INTERNAL "") - file(GLOB KOKKOS_OPENMP_SRCS core/src/OpenMP/*.cpp) - list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_OPENMP_SRCS}) -endif() - -# Set options if using Pthreads backend. -if(KOKKOS_ENABLE_PTHREAD) - find_package(Threads REQUIRED) - - list(APPEND KOKKOS_LIBS Threads::Threads) - - set(KOKKOS_HAVE_PTHREAD ON CACHE INTERNAL "") - file(GLOB KOKKOS_PTHREAD_SRCS core/src/Threads/*.cpp) - list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_PTHREAD_SRCS}) -endif() - -# Set options if using Qthreads backend. -if(KOKKOS_ENABLE_QTHREADS) - if(KOKKOS_QTHREADS_DIR) - list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_QTHREADS_DIR}) - endif() - - find_package(Qthreads) - - if(NOT QTHREADS_FOUND) - if(KOKKOS_QTHREADS_DIR) - message(FATAL_ERROR "Couldn't find Qthreads in default locations, and KOKKOS_QTHREADS_DIR points to an invalid installation.") - else() - message(FATAL_ERROR "Couldn't find Qthreads in default locations. Set KOKKOS_QTHREADS_DIR.") - endif() - endif() - - list(APPEND KOKKOS_INCLUDE_DIRS ${QTHREADS_INCLUDE_DIR}) - list(APPEND KOKKOS_LIBS ${QTHREADS_LIBRARIES}) - - set(KOKKOS_HAVE_QTHREADS ON CACHE INTERNAL "") - file(GLOB KOKKOS_QTHREADS_SRCS core/src/Threads/*.cpp) - list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_QTHREADS_SRCS}) - - if(KOKKOS_QTHREADS_DIR) - list(REMOVE_AT CMAKE_PREFIX_PATH -1) - endif() -endif() - -# Set options if using Serial backend. -if(KOKKOS_ENABLE_SERIAL) - set(KOKKOS_HAVE_SERIAL ON CACHE INTERNAL "") -else() - # Remove serial source files. - list(REMOVE_ITEM KOKKOS_CORE_SRCS - "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Serial.cpp" - "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Serial_Task.cpp") -endif() - -########################### SET ARCHITECTURE OPTIONS ########################### - -# Make sure the host architecture option is valid. Need to verify in case user -# passes the option via the command line. -list(FIND KOKKOS_HOST_ARCH_LIST "${KOKKOS_HOST_ARCH}" KOKKOS_VALID_HOST_ARCH) -if(KOKKOS_VALID_HOST_ARCH EQUAL -1) - set(KOKKOS_ARCH_TEXT "\n ${KOKKOS_HOST_ARCH_LIST}") - string(REPLACE ";" "\n " KOKKOS_ARCH_TEXT "${KOKKOS_ARCH_TEXT}") - set(KOKKOS_MESSAGE_TEXT "Invalid architecture for KOKKOS_HOST_ARCH: '${KOKKOS_HOST_ARCH}'") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Choices:${KOKKOS_ARCH_TEXT}\n") - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") -endif() - -# Make sure the GPU architecture option is valid. Need to verify in case user -# passes the option via the command line. -list(FIND KOKKOS_GPU_ARCH_LIST "${KOKKOS_GPU_ARCH}" KOKKOS_VALID_GPU_ARCH) -if(KOKKOS_VALID_GPU_ARCH EQUAL -1) - set(KOKKOS_ARCH_TEXT "\n ${KOKKOS_GPU_ARCH_LIST}") - string(REPLACE ";" "\n " KOKKOS_ARCH_TEXT "${KOKKOS_ARCH_TEXT}") - set(KOKKOS_MESSAGE_TEXT "Invalid architecture for KOKKOS_GPU_ARCH: '${KOKKOS_GPU_ARCH}'") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Choices:${KOKKOS_ARCH_TEXT}\n") - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") -endif() - -# Decide what ISA level we are able to support. -if(KOKKOS_HOST_ARCH STREQUAL SNB OR KOKKOS_HOST_ARCH STREQUAL HSW OR KOKKOS_HOST_ARCH STREQUAL BDW OR - KOKKOS_HOST_ARCH STREQUAL SKX OR KOKKOS_HOST_ARCH STREQUAL KNL) - set(KOKKOS_ENABLE_ISA_X86_64 ON CACHE INTERNAL "") -endif() - -if(KOKKOS_HOST_ARCH STREQUAL KNC) - set(KOKKOS_ENABLE_ISA_KNC ON CACHE INTERNAL "") -endif() - -if(KOKKOS_HOST_ARCH STREQUAL Power8 OR KOKKOS_HOST_ARCH STREQUAL Power9) - set(KOKKOS_ENABLE_ISA_POWERPCLE ON CACHE INTERNAL "") -endif() - -# Add host architecture options. -if(KOKKOS_HOST_ARCH STREQUAL ARMv80) - set(KOKKOS_ARCH_ARMV80 ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - else() - list(APPEND KOKKOS_CXX_FLAGS -march=armv8-a) - list(APPEND KOKKOS_LD_FLAGS -march=armv8-a) - endif() -elseif(KOKKOS_HOST_ARCH STREQUAL ARMv81) - set(KOKKOS_ARCH_ARMV81 ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - else() - list(APPEND KOKKOS_CXX_FLAGS -march=armv8.1-a) - list(APPEND KOKKOS_LD_FLAGS -march=armv8.1-a) - endif() -elseif(KOKKOS_HOST_ARCH STREQUAL ARMv8-ThunderX) - set(KOKKOS_ARCH_ARMV80 ON CACHE INTERNAL "") - set(KOKKOS_ARCH_ARMV8_THUNDERX ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - else() - list(APPEND KOKKOS_CXX_FLAGS -march=armv8-a -mtune=thunderx) - list(APPEND KOKKOS_LD_FLAGS -march=armv8-a -mtune=thunderx) - endif() -elseif(KOKKOS_HOST_ARCH STREQUAL SNB OR KOKKOS_HOST_ARCH STREQUAL AMDAVX) - set(KOKKOS_ARCH_AVX ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - list(APPEND KOKKOS_CXX_FLAGS -mavx) - list(APPEND KOKKOS_LD_FLAGS -mavx) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - list(APPEND KOKKOS_CXX_FLAGS -tp=sandybridge) - list(APPEND KOKKOS_LD_FLAGS -tp=sandybridge) - else() - list(APPEND KOKKOS_CXX_FLAGS -mavx) - list(APPEND KOKKOS_LD_FLAGS -mavx) - endif() -elseif(KOKKOS_HOST_ARCH STREQUAL HSW OR KOKKOS_HOST_ARCH STREQUAL BDW) - set(KOKKOS_ARCH_AVX2 ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - list(APPEND KOKKOS_CXX_FLAGS -xCORE-AVX2) - list(APPEND KOKKOS_LD_FLAGS -xCORE-AVX2) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - list(APPEND KOKKOS_CXX_FLAGS -tp=haswell) - list(APPEND KOKKOS_LD_FLAGS -tp=haswell) - else() - list(APPEND KOKKOS_CXX_FLAGS -march=core-avx2 -mtune=core-avx2) - list(APPEND KOKKOS_LD_FLAGS -march=core-avx2 -mtune=core-avx2) - endif() -elseif(KOKKOS_HOST_ARCH STREQUAL KNL) - set(KOKKOS_ARCH_AVX512MIC ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - list(APPEND KOKKOS_CXX_FLAGS -xMIC-AVX512) - list(APPEND KOKKOS_LD_FLAGS -xMIC-AVX512) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - else() - list(APPEND KOKKOS_CXX_FLAGS -march=knl) - list(APPEND KOKKOS_LD_FLAGS -march=knl) - endif() -elseif(KOKKOS_HOST_ARCH STREQUAL SKX) - set(KOKKOS_ARCH_AVX512XEON ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - list(APPEND KOKKOS_CXX_FLAGS -xCORE-AVX512) - list(APPEND KOKKOS_LD_FLAGS -xCORE-AVX512) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - else() - list(APPEND KOKKOS_CXX_FLAGS -march=skylake-avx512) - list(APPEND KOKKOS_LD_FLAGS -march=skylake-avx512) - endif() -elseif(KOKKOS_HOST_ARCH STREQUAL KNC) - set(KOKKOS_ARCH_KNC ON CACHE INTERNAL "") - list(APPEND KOKKOS_CXX_FLAGS -mmic) - list(APPEND KOKKOS_LD_FLAGS -mmic) -elseif(KOKKOS_HOST_ARCH STREQUAL Power8) - set(KOKKOS_ARCH_POWER8 ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - else() - list(APPEND KOKKOS_CXX_FLAGS -mcpu=power8 -mtune=power8) - list(APPEND KOKKOS_LD_FLAGS -mcpu=power8 -mtune=power8) - endif() -elseif(KOKKOS_HOST_ARCH STREQUAL Power9) - set(KOKKOS_ARCH_POWER9 ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - else() - list(APPEND KOKKOS_CXX_FLAGS -mcpu=power9 -mtune=power9) - list(APPEND KOKKOS_LD_FLAGS -mcpu=power9 -mtune=power9) - endif() -endif() - -# Add GPU architecture options. -if(KOKKOS_ENABLE_CUDA) - if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - set(KOKKOS_GPU_ARCH_FLAG -arch) - elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) - list(APPEND KOKKOS_CXX_FLAGS -x cuda) - set(KOKKOS_GPU_ARCH_FLAG --cuda-gpu-arch) - endif() - - if(KOKKOS_GPU_ARCH STREQUAL Kepler30) - set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") - set(KOKKOS_ARCH_KEPLER30 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_30) - elseif(KOKKOS_GPU_ARCH STREQUAL Kepler32) - set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") - set(KOKKOS_ARCH_KEPLER32 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_32) - elseif(KOKKOS_GPU_ARCH STREQUAL Kepler35 OR KOKKOS_GPU_ARCH STREQUAL Kepler) - set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") - set(KOKKOS_ARCH_KEPLER35 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_35) - elseif(KOKKOS_GPU_ARCH STREQUAL Kepler37) - set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") - set(KOKKOS_ARCH_KEPLER37 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_37) - elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell50 OR KOKKOS_GPU_ARCH STREQUAL Maxwell) - set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") - set(KOKKOS_ARCH_MAXWELL50 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_50) - elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell52) - set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") - set(KOKKOS_ARCH_MAXWELL52 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_52) - elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell53) - set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") - set(KOKKOS_ARCH_MAXWELL53 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_53) - elseif(KOKKOS_GPU_ARCH STREQUAL Pascal60) - set(KOKKOS_ARCH_PASCAL ON CACHE INTERNAL "") - set(KOKKOS_ARCH_PASCAL60 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_60) - elseif(KOKKOS_GPU_ARCH STREQUAL Pascal61) - set(KOKKOS_ARCH_PASCAL ON CACHE INTERNAL "") - set(KOKKOS_ARCH_PASCAL61 ON CACHE INTERNAL "") - set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_61) - endif() - - if(NOT KOKKOS_GPU_ARCH STREQUAL None) - list(APPEND KOKKOS_CXX_FLAGS ${KOKKOS_GPU_ARCH_FLAG}) - - if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - list(APPEND KOKKOS_LD_FLAGS ${KOKKOS_GPU_ARCH_FLAG}) - endif() - endif() -endif() - -############################### SET OTHER OPTIONS ############################## - -# Set options if using hwloc. -if(KOKKOS_ENABLE_HWLOC) - if(KOKKOS_HWLOC_DIR) - list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_HWLOC_DIR}) - endif() - - find_package(HWLOC) - - if(NOT HWLOC_FOUND) - if(KOKKOS_HWLOC_DIR) - message(FATAL_ERROR "Couldn't find HWLOC in default locations, and KOKKOS_HWLOC_DIR points to an invalid installation.") - else() - message(FATAL_ERROR "Couldn't find HWLOC in default locations. Set KOKKOS_HWLOC_DIR.") - endif() - endif() - - list(APPEND KOKKOS_INCLUDE_DIRS ${HWLOC_INCLUDE_DIR}) - list(APPEND KOKKOS_LIBS ${HWLOC_LIBRARIES}) - - set(KOKKOS_HAVE_HWLOC ON CACHE INTERNAL "") - - if(KOKKOS_HWLOC_DIR) - list(REMOVE_AT CMAKE_PREFIX_PATH -1) - endif() -endif() - -# Set options if using memkind. -if(KOKKOS_ENABLE_MEMKIND) - if(KOKKOS_MEMKIND_DIR) - list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_MEMKIND_DIR}) - endif() - - find_package(Memkind) - - if(NOT MEMKIND_FOUND) - if(KOKKOS_MEMKIND_DIR) - message(FATAL_ERROR "Couldn't find Memkind in default locations, and KOKKOS_MEMKIND_DIR points to an invalid installation.") - else() - message(FATAL_ERROR "Couldn't find Memkind in default locations. Set KOKKOS_MEMKIND_DIR.") - endif() - endif() - - set(KOKKOS_ENABLE_HBWSPACE ON CACHE INTERNAL "") - list(APPEND KOKKOS_INCLUDE_DIRS ${MEMKIND_INCLUDE_DIR}) - list(APPEND KOKKOS_LIBS ${MEMKIND_LIBRARIES}) - - if(KOKKOS_MEMKIND_DIR) - list(REMOVE_AT CMAKE_PREFIX_PATH -1) - endif() -else() - # Remove HBW source file. - list(REMOVE_ITEM KOKKOS_CORE_SRCS - "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_HBWSpace.cpp") -endif() - -# Set options if using librt. -if(KOKKOS_ENABLE_LIBRT) - list(APPEND KOKKOS_LIBS rt) -endif() - -# Set debugging if requested. -if(KOKKOS_DEBUG) - set(KOKKOS_HAVE_DEBUG ON CACHE INTERNAL "") - set(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ON CACHE INTERNAL "") - - if(KOKKOS_CXX_COVIDIA) - list(APPEND KOKKOS_CXX_FLAGS -lineinfo) - endif() - - list(APPEND KOKKOS_CXX_FLAGS -g) - list(APPEND KOKKOS_LD_FLAGS -g) -endif() - -# Set profiling if requested. -if(KOKKOS_ENABLE_PROFILING) - list(APPEND KOKKOS_LIBS dl) -else() - # Remove profiling source file. - list(REMOVE_ITEM KOKKOS_CORE_SRCS - "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Profiling_Interface.cpp") -endif() - -# Use GCC toolchain with Clang. -if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT APPLE) - find_program(KOKKOS_GCC_PATH g++) - if(NOT KOKKOS_GCC_PATH) - message(FATAL_ERROR "Can't find GCC path to get toolchain for Clang.") - endif() - string(REPLACE "/bin/g++" "" KOKKOS_GCC_PATH ${KOKKOS_GCC_PATH}) - - list(APPEND KOKKOS_CXX_FLAGS --gcc-toolchain=${KOKKOS_GCC_PATH}) - list(APPEND KOKKOS_LD_FLAGS --gcc-toolchain=${KOKKOS_GCC_PATH}) -endif() - -############################ Detect if submodule ############################### -# -# With thanks to StackOverflow: -# http://stackoverflow.com/questions/25199677/how-to-detect-if-current-scope-has-a-parent-in-cmake -# -get_directory_property(HAS_PARENT PARENT_DIRECTORY) -if(HAS_PARENT) - message(STATUS "Submodule build") - SET(KOKKOS_HEADER_DIR "include/kokkos") -else() - message(STATUS "Standalone build") - SET(KOKKOS_HEADER_DIR "include") -endif() - -############################ PRINT CONFIGURE STATUS ############################ - -message(STATUS "") -message(STATUS "****************** Kokkos Settings ******************") -message(STATUS "Execution Spaces") - -if(KOKKOS_ENABLE_CUDA) - message(STATUS " Device Parallel: Cuda") -else() - message(STATUS " Device Parallel: None") -endif() - -if(KOKKOS_ENABLE_OPENMP) - message(STATUS " Host Parallel: OpenMP") -elseif(KOKKOS_ENABLE_PTHREAD) - message(STATUS " Host Parallel: Pthread") -elseif(KOKKOS_ENABLE_QTHREADS) - message(STATUS " Host Parallel: Qthreads") -else() - message(STATUS " Host Parallel: None") -endif() - -if(KOKKOS_ENABLE_SERIAL) - message(STATUS " Host Serial: Serial") -else() - message(STATUS " Host Serial: None") -endif() - -message(STATUS "") -message(STATUS "Architectures") -message(STATUS " Host Architecture: ${KOKKOS_HOST_ARCH}") -message(STATUS " Device Architecture: ${KOKKOS_GPU_ARCH}") - -message(STATUS "") -message(STATUS "Enabled options") - -if(KOKKOS_SEPARATE_LIBS) - message(STATUS " KOKKOS_SEPARATE_LIBS") -endif() - -if(KOKKOS_ENABLE_HWLOC) - message(STATUS " KOKKOS_ENABLE_HWLOC") -endif() - -if(KOKKOS_ENABLE_MEMKIND) - message(STATUS " KOKKOS_ENABLE_MEMKIND") -endif() - -if(KOKKOS_DEBUG) - message(STATUS " KOKKOS_DEBUG") -endif() - -if(KOKKOS_ENABLE_PROFILING) - message(STATUS " KOKKOS_ENABLE_PROFILING") -endif() - -if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) - message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION") -endif() - -if(KOKKOS_ENABLE_CUDA) - if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC) - message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") - endif() - - if(KOKKOS_ENABLE_CUDA_UVM) - message(STATUS " KOKKOS_ENABLE_CUDA_UVM") - endif() - - if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) - message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") - endif() - - if(KOKKOS_ENABLE_CUDA_LAMBDA) - message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA") - endif() - - if(KOKKOS_CUDA_DIR) - message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}") - endif() -endif() - -if(KOKKOS_QTHREADS_DIR) - message(STATUS " KOKKOS_QTHREADS_DIR: ${KOKKOS_QTHREADS_DIR}") -endif() - -if(KOKKOS_HWLOC_DIR) - message(STATUS " KOKKOS_HWLOC_DIR: ${KOKKOS_HWLOC_DIR}") -endif() - -if(KOKKOS_MEMKIND_DIR) - message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}") -endif() - -message(STATUS "*****************************************************") -message(STATUS "") - -################################ SET UP PROJECT ################################ - -configure_file( - ${Kokkos_SOURCE_DIR}/core/cmake/KokkosCore_config.h.in - ${Kokkos_BINARY_DIR}/KokkosCore_config.h -) - -SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") -SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") -SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH - "Installation directory for header files") -IF(WIN32 AND NOT CYGWIN) - SET(DEF_INSTALL_CMAKE_DIR CMake) -ELSE() - SET(DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos) -ENDIF() - -SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH - "Installation directory for CMake files") - -# Make relative paths absolute (needed later on) -FOREACH(p LIB BIN INCLUDE CMAKE) - SET(var INSTALL_${p}_DIR) - IF(NOT IS_ABSOLUTE "${${var}}") - SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") - ENDIF() -ENDFOREACH() - -# set up include-directories -SET (Kokkos_INCLUDE_DIRS - ${Kokkos_SOURCE_DIR}/core/src - ${Kokkos_SOURCE_DIR}/containers/src - ${Kokkos_SOURCE_DIR}/algorithms/src - ${Kokkos_BINARY_DIR} # to find KokkosCore_config.h - ${KOKKOS_INCLUDE_DIRS} -) - -# pass include dirs back to parent scope -SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS} PARENT_SCOPE) - -INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS}) - -IF(KOKKOS_SEPARATE_LIBS) - # kokkoscore - ADD_LIBRARY( - kokkoscore - ${KOKKOS_CORE_SRCS} - ) - - target_compile_options( - kokkoscore - PUBLIC ${KOKKOS_CXX_FLAGS} - ) - - target_compile_features( - kokkoscore - PUBLIC ${KOKKOS_CXX11_FEATURES} - ) - - # Install the kokkoscore library - INSTALL (TARGETS kokkoscore - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin - ) - - # Install the kokkoscore headers - INSTALL (DIRECTORY - ${Kokkos_SOURCE_DIR}/core/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" - ) - - # Install KokkosCore_config.h header - INSTALL (FILES - ${Kokkos_BINARY_DIR}/KokkosCore_config.h - DESTINATION ${KOKKOS_HEADER_DIR} - ) - - TARGET_LINK_LIBRARIES( - kokkoscore - ${KOKKOS_LD_FLAGS} - ${KOKKOS_LIBS} - ) - - # kokkoscontainers - ADD_LIBRARY( - kokkoscontainers - ${KOKKOS_CONTAINERS_SRCS} - ) - - TARGET_LINK_LIBRARIES( - kokkoscontainers - kokkoscore - ) - - # Install the kokkocontainers library - INSTALL (TARGETS kokkoscontainers - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - # Install the kokkoscontainers headers - INSTALL (DIRECTORY - ${Kokkos_SOURCE_DIR}/containers/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" - ) - - # kokkosalgorithms - Build as interface library since no source files. - ADD_LIBRARY( - kokkosalgorithms - INTERFACE - ) - - target_include_directories( - kokkosalgorithms - INTERFACE ${Kokkos_SOURCE_DIR}/algorithms/src - ) - - TARGET_LINK_LIBRARIES( - kokkosalgorithms - INTERFACE kokkoscore - ) - - # Install the kokkoalgorithms library - INSTALL (TARGETS kokkosalgorithms - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - # Install the kokkosalgorithms headers - INSTALL (DIRECTORY - ${Kokkos_SOURCE_DIR}/algorithms/src/ - DESTINATION ${KOKKOS_INSTALL_INDLUDE_DIR} - FILES_MATCHING PATTERN "*.hpp" - ) - - SET (Kokkos_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms) - -ELSE() - # kokkos - ADD_LIBRARY( - kokkos - ${KOKKOS_CORE_SRCS} - ${KOKKOS_CONTAINERS_SRCS} - ) - - target_compile_options( - kokkos - PUBLIC ${KOKKOS_CXX_FLAGS} - ) - - target_compile_features( - kokkos - PUBLIC ${KOKKOS_CXX11_FEATURES} - ) - - TARGET_LINK_LIBRARIES( - kokkos - ${KOKKOS_LD_FLAGS} - ${KOKKOS_LIBS} - ) - - # Install the kokkos library - INSTALL (TARGETS kokkos - EXPORT KokkosTargets - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - - # Install the kokkos headers - INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/core/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" - ) - INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/containers/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" - ) - INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/algorithms/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" - ) - - INSTALL (FILES - ${Kokkos_BINARY_DIR}/KokkosCore_config.h - DESTINATION ${KOKKOS_HEADER_DIR} - ) - - include_directories(${Kokkos_BINARY_DIR}) - include_directories(${Kokkos_SOURCE_DIR}/core/src) - include_directories(${Kokkos_SOURCE_DIR}/containers/src) - include_directories(${Kokkos_SOURCE_DIR}/algorithms/src) - - - SET (Kokkos_LIBRARIES_NAMES kokkos) - -endif() - -# Add all targets to the build-tree export set -export(TARGETS ${Kokkos_LIBRARIES_NAMES} - FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake") - -# Export the package for use from the build-tree -# (this registers the build-tree with a global CMake-registry) -export(PACKAGE Kokkos) - -# Create the KokkosConfig.cmake and KokkosConfigVersion files -file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" - "${INSTALL_INCLUDE_DIR}") -# ... for the build tree -set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}") -configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in - "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @ONLY) -# ... for the install tree -set(CONF_INCLUDE_DIRS "\${Kokkos_CMAKE_DIR}/${REL_INCLUDE_DIR}") -configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in - "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" @ONLY) - -# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake -install(FILES - "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" - DESTINATION "${INSTALL_CMAKE_DIR}") - -# Install the export set for use with the install-tree -INSTALL(EXPORT KokkosTargets DESTINATION - "${INSTALL_CMAKE_DIR}") diff --git a/lib/kokkos/cmake/kokkos_build.cmake b/lib/kokkos/cmake/kokkos_build.cmake new file mode 100644 index 0000000000..f31680d6e2 --- /dev/null +++ b/lib/kokkos/cmake/kokkos_build.cmake @@ -0,0 +1,219 @@ +# kokkos_generated_settings.cmake includes the kokkos library itself in KOKKOS_LIBS +# which we do not want to use for the cmake builds so clean this up +string(REGEX REPLACE "-lkokkos" "" KOKKOS_LIBS ${KOKKOS_LIBS}) + +############################ Detect if submodule ############################### +# +# With thanks to StackOverflow: +# http://stackoverflow.com/questions/25199677/how-to-detect-if-current-scope-has-a-parent-in-cmake +# +get_directory_property(HAS_PARENT PARENT_DIRECTORY) +if(HAS_PARENT) + message(STATUS "Submodule build") + SET(KOKKOS_HEADER_DIR "include/kokkos") +else() + message(STATUS "Standalone build") + SET(KOKKOS_HEADER_DIR "include") +endif() + +################################ Handle the actual build ####################### + +SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") +SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") +SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH + "Installation directory for header files") +IF(WIN32 AND NOT CYGWIN) + SET(DEF_INSTALL_CMAKE_DIR CMake) +ELSE() + SET(DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos) +ENDIF() + +SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH + "Installation directory for CMake files") + +# Make relative paths absolute (needed later on) +FOREACH(p LIB BIN INCLUDE CMAKE) + SET(var INSTALL_${p}_DIR) + IF(NOT IS_ABSOLUTE "${${var}}") + SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") + ENDIF() +ENDFOREACH() + +# set up include-directories +SET (Kokkos_INCLUDE_DIRS + ${Kokkos_SOURCE_DIR}/core/src + ${Kokkos_SOURCE_DIR}/containers/src + ${Kokkos_SOURCE_DIR}/algorithms/src + ${Kokkos_BINARY_DIR} # to find KokkosCore_config.h + ${KOKKOS_INCLUDE_DIRS} +) + +# pass include dirs back to parent scope +if(HAS_PARENT) +SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS} PARENT_SCOPE) +else() +SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS}) +endif() + +INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS}) + +IF(KOKKOS_SEPARATE_LIBS) + # Sources come from makefile-generated kokkos_generated_settings.cmake file + # Separate libs need to separate the sources + set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) + + # kokkoscore + ADD_LIBRARY( + kokkoscore + ${KOKKOS_CORE_SRCS} + ) + + target_compile_options( + kokkoscore + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> + ) + + # Install the kokkoscore library + INSTALL (TARGETS kokkoscore + EXPORT KokkosTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin + ) + + TARGET_LINK_LIBRARIES( + kokkoscore + ${KOKKOS_LD_FLAGS} + ${KOKKOS_EXTRA_LIBS_LIST} + ) + + # kokkoscontainers + if (DEFINED KOKKOS_CONTAINERS_SRCS) + ADD_LIBRARY( + kokkoscontainers + ${KOKKOS_CONTAINERS_SRCS} + ) + endif() + + TARGET_LINK_LIBRARIES( + kokkoscontainers + kokkoscore + ) + + # Install the kokkocontainers library + INSTALL (TARGETS kokkoscontainers + EXPORT KokkosTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + # kokkosalgorithms - Build as interface library since no source files. + ADD_LIBRARY( + kokkosalgorithms + INTERFACE + ) + + target_include_directories( + kokkosalgorithms + INTERFACE ${Kokkos_SOURCE_DIR}/algorithms/src + ) + + TARGET_LINK_LIBRARIES( + kokkosalgorithms + INTERFACE kokkoscore + ) + + # Install the kokkoalgorithms library + INSTALL (TARGETS kokkosalgorithms + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + SET (Kokkos_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms) + +ELSE() + # kokkos + ADD_LIBRARY( + kokkos + ${KOKKOS_CORE_SRCS} + ${KOKKOS_CONTAINERS_SRCS} + ) + + target_compile_options( + kokkos + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> + ) + + TARGET_LINK_LIBRARIES( + kokkos + ${KOKKOS_LD_FLAGS} + ${KOKKOS_EXTRA_LIBS_LIST} + ) + + # Install the kokkos library + INSTALL (TARGETS kokkos + EXPORT KokkosTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + + SET (Kokkos_LIBRARIES_NAMES kokkos) + +endif() # KOKKOS_SEPARATE_LIBS + +# Install the kokkos headers +INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/core/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" +) +INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/containers/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" +) +INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/algorithms/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" +) + +INSTALL (FILES + ${Kokkos_BINARY_DIR}/KokkosCore_config.h + DESTINATION ${KOKKOS_HEADER_DIR} +) + +# Add all targets to the build-tree export set +export(TARGETS ${Kokkos_LIBRARIES_NAMES} + FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake") + +# Export the package for use from the build-tree +# (this registers the build-tree with a global CMake-registry) +export(PACKAGE Kokkos) + +# Create the KokkosConfig.cmake and KokkosConfigVersion files +file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" + "${INSTALL_INCLUDE_DIR}") +# ... for the build tree +set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}") +configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in + "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @ONLY) +# ... for the install tree +set(CONF_INCLUDE_DIRS "\${Kokkos_CMAKE_DIR}/${REL_INCLUDE_DIR}") +configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in + "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" @ONLY) + +# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake +install(FILES + "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" + DESTINATION "${INSTALL_CMAKE_DIR}") + +#This seems not to do anything? +#message(STATUS "KokkosTargets: " ${KokkosTargets}) +# Install the export set for use with the install-tree +INSTALL(EXPORT KokkosTargets DESTINATION + "${INSTALL_CMAKE_DIR}") diff --git a/lib/kokkos/cmake/kokkos_functions.cmake b/lib/kokkos/cmake/kokkos_functions.cmake new file mode 100644 index 0000000000..c0c62ccb6a --- /dev/null +++ b/lib/kokkos/cmake/kokkos_functions.cmake @@ -0,0 +1,345 @@ +################################### FUNCTIONS ################################## +# List of functions +# set_kokkos_cxx_compiler +# set_kokkos_cxx_standard +# set_kokkos_srcs + +#------------------------------------------------------------------------------- +# function(set_kokkos_cxx_compiler) +# Sets the following compiler variables that are analogous to the CMAKE_* +# versions. We add the ability to detect NVCC (really nvcc_wrapper). +# KOKKOS_CXX_COMPILER +# KOKKOS_CXX_COMPILER_ID +# KOKKOS_CXX_COMPILER_VERSION +# +# Inputs: +# KOKKOS_ENABLE_CUDA +# CMAKE_CXX_COMPILER +# CMAKE_CXX_COMPILER_ID +# CMAKE_CXX_COMPILER_VERSION +# +# Also verifies the compiler version meets the minimum required by Kokkos. +function(set_kokkos_cxx_compiler) + # Since CMake doesn't recognize the nvcc compiler until 3.8, we use our own + # version of the CMake variables and detect nvcc ourselves. Initially set to + # the CMake variable values. + set(INTERNAL_CXX_COMPILER ${CMAKE_CXX_COMPILER}) + set(INTERNAL_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) + set(INTERNAL_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION}) + + # Check if the compiler is nvcc (which really means nvcc_wrapper). + execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version + COMMAND grep nvcc + COMMAND wc -l + OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC + OUTPUT_STRIP_TRAILING_WHITESPACE) + + string(REGEX REPLACE "^ +" "" + INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) + + if(INTERNAL_HAVE_COMPILER_NVCC) + # Set the compiler id to nvcc. We use the value used by CMake 3.8. + set(INTERNAL_CXX_COMPILER_ID NVIDIA) + + # Set nvcc's compiler version. + execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version + COMMAND grep release + OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + + string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+$" + INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) + endif() + + # Enforce the minimum compilers supported by Kokkos. + set(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n") + + if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 3.5.2) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL GNU) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.8.4) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL Intel) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 7.0.28) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL PGI) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 17.1) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + endif() + + # Enforce that extensions are turned off for nvcc_wrapper. + if(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) + message(FATAL_ERROR "NVCC doesn't support C++ extensions. Set CMAKE_CXX_EXTENSIONS to OFF in your CMakeLists.txt.") + endif() + endif() + + if(KOKKOS_ENABLE_CUDA) + # Enforce that the compiler can compile CUDA code. + if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.0.0) + message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") + endif() + elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang.") + endif() + endif() + + set(KOKKOS_CXX_COMPILER ${INTERNAL_CXX_COMPILER} PARENT_SCOPE) + set(KOKKOS_CXX_COMPILER_ID ${INTERNAL_CXX_COMPILER_ID} PARENT_SCOPE) + set(KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION} PARENT_SCOPE) +endfunction() + +#------------------------------------------------------------------------------- +# function(set_kokkos_cxx_standard) +# Transitively enforces that the appropriate CXX standard compile flags (C++11 +# or above) are added to targets that use the Kokkos library. Compile features +# are used if possible. Otherwise, the appropriate flags are added to +# KOKKOS_CXX_FLAGS. Values set by the user to CMAKE_CXX_STANDARD and +# CMAKE_CXX_EXTENSIONS are honored. +# +# Outputs: +# KOKKOS_CXX11_FEATURES +# KOKKOS_CXX_FLAGS +# +# Inputs: +# KOKKOS_CXX_COMPILER +# KOKKOS_CXX_COMPILER_ID +# KOKKOS_CXX_COMPILER_VERSION +# +function(set_kokkos_cxx_standard) + # The following table lists the versions of CMake that supports CXX_STANDARD + # and the CXX compile features for different compilers. The versions are + # based on CMake documentation, looking at CMake code, and verifying by + # testing with specific CMake versions. + # + # COMPILER CXX_STANDARD Compile Features + # --------------------------------------------------------------- + # Clang 3.1 3.1 + # GNU 3.1 3.2 + # AppleClang 3.2 3.2 + # Intel 3.6 3.6 + # Cray No No + # PGI No No + # XL No No + # + # For compiling CUDA code using nvcc_wrapper, we will use the host compiler's + # flags for turning on C++11. Since for compiler ID and versioning purposes + # CMake recognizes the host compiler when calling nvcc_wrapper, this just + # works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means + # that we can only use host compilers for CUDA builds that use those flags. + # It also means that extensions (gnu++11) can't be turned on for CUDA builds. + + # Check if we can use compile features. + if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) + if(NOT CMAKE_VERSION VERSION_LESS 3.1) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang OR CMAKE_CXX_COMPILER_ID STREQUAL GNU) + if(NOT CMAKE_VERSION VERSION_LESS 3.2) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) + if(NOT CMAKE_VERSION VERSION_LESS 3.6) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + endif() + endif() + + if(INTERNAL_USE_COMPILE_FEATURES) + # Use the compile features aspect of CMake to transitively cause C++ flags + # to populate to user code. + + # I'm using a hack by requiring features that I know force the lowest version + # of the compilers we want to support. Clang 3.3 and later support all of + # the C++11 standard. With CMake 3.8 and higher, we could switch to using + # cxx_std_11. + set(KOKKOS_CXX11_FEATURES + cxx_nonstatic_member_init # Forces GCC 4.7 or later and Intel 14.0 or later. + PARENT_SCOPE + ) + else() + # CXX compile features are not yet implemented for this combination of + # compiler and version of CMake. + + if(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang) + # Versions of CMAKE before 3.2 don't support CXX_STANDARD or C++ compile + # features for the AppleClang compiler. Set compiler flags transitively + # here such that they trickle down to a call to target_compile_options(). + + # The following two blocks of code were copied from + # /Modules/Compiler/AppleClang-CXX.cmake from CMake 3.7.2 and then + # modified. + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=gnu++11") + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.1) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++14") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++14") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) + # AppleClang 5.0 knows this flag, but does not set a __cplusplus macro + # greater than 201103L. + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++1y") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++1y") + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) + # Versions of CMAKE before 3.6 don't support CXX_STANDARD or C++ compile + # features for the Intel compiler. Set compiler flags transitively here + # such that they trickle down to a call to target_compile_options(). + + # The following three blocks of code were copied from + # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. + if("x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC") + set(_std -Qstd) + set(_ext c++) + else() + set(_std -std) + set(_ext gnu++) + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++14") + # TODO: There is no gnu++14 value supported; figure out what to do. + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++14") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++1y") + # TODO: There is no gnu++14 value supported; figure out what to do. + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++1y") + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}11") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.1) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++0x") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}0x") + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Cray) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-hstd=c++11") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "--c++11") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=c++11") + else() + # Assume GNU. CMAKE_CXX_STANDARD is handled correctly by CMake 3.1 and + # above for this compiler. If the user explicitly requests a C++ + # standard, CMake takes care of it. If not, transitively require C++11. + if(NOT CMAKE_CXX_STANDARD) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION ${CMAKE_CXX11_STANDARD_COMPILE_OPTION}) + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION ${CMAKE_CXX11_EXTENSION_COMPILE_OPTION}) + endif() + endif() + + # Set the C++ standard info for Kokkos respecting user set values for + # CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS. + # Only use cxx extension if explicitly requested + if(CMAKE_CXX_STANDARD EQUAL 14) + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_EXTENSION_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_STANDARD_COMPILE_OPTION}) + endif() + elseif(CMAKE_CXX_STANDARD EQUAL 11) + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) + endif() + else() + # The user didn't explicitly request a standard, transitively require + # C++11 respecting CMAKE_CXX_EXTENSIONS. + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) + endif() + endif() + + set(KOKKOS_CXX_FLAGS ${INTERNAL_CXX_FLAGS} PARENT_SCOPE) + endif() +endfunction() + + +#------------------------------------------------------------------------------- +# function(set_kokkos_sources) +# Takes a list of sources for kokkos (e.g., KOKKOS_SRC from Makefile.kokkos and +# put it into kokkos_generated_settings.cmake) and sorts the files into the subpackages or +# separate_libraries. This is core and containers (algorithms is pure header +# files). +# +# Inputs: +# KOKKOS_SRC +# +# Outputs: +# KOKKOS_CORE_SRCS +# KOKKOS_CONTAINERS_SRCS +# +function(set_kokkos_srcs) + set(opts ) # no-value args + set(oneValArgs ) + set(multValArgs KOKKOS_SRC) # e.g., lists + cmake_parse_arguments(IN "${opts}" "${oneValArgs}" "${multValArgs}" ${ARGN}) + + foreach(sfile ${IN_KOKKOS_SRC}) + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" stripfile "${sfile}") + string(REPLACE "/" ";" striplist "${stripfile}") + list(GET striplist 0 firstdir) + if(${firstdir} STREQUAL "core") + list(APPEND KOKKOS_CORE_SRCS ${sfile}) + else() + list(APPEND KOKKOS_CONTAINERS_SRCS ${sfile}) + endif() + endforeach() + set(KOKKOS_CORE_SRCS ${KOKKOS_CORE_SRCS} PARENT_SCOPE) + set(KOKKOS_CONTAINERS_SRCS ${KOKKOS_CONTAINERS_SRCS} PARENT_SCOPE) + return() +endfunction() + +# Setting a default value if it is not already set +macro(set_kokkos_default_default VARIABLE DEFAULT) + IF( "${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}" STREQUAL "" ) + IF( "${KOKKOS_ENABLE_${VARIABLE}}" STREQUAL "" ) + set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${DEFAULT}) + # MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}") + ELSE() + set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${KOKKOS_ENABLE_${VARIABLE}}) + # MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}") + ENDIF() + ENDIF() + UNSET(KOKKOS_ENABLE_${VARIABLE} CACHE) +endmacro() diff --git a/lib/kokkos/cmake/kokkos_options.cmake b/lib/kokkos/cmake/kokkos_options.cmake new file mode 100644 index 0000000000..f17710a4ce --- /dev/null +++ b/lib/kokkos/cmake/kokkos_options.cmake @@ -0,0 +1,365 @@ +########################## NOTES ############################################### +# List the options for configuring kokkos using CMake method of doing it. +# These options then get mapped onto KOKKOS_SETTINGS environment variable by +# kokkos_settings.cmake. It is separate to allow other packages to override +# these variables (e.g., TriBITS). + +########################## AVAILABLE OPTIONS ################################### +# Use lists for documentation, verification, and programming convenience + +# All CMake options of the type KOKKOS_ENABLE_* +set(KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST) +list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST + Serial + OpenMP + Pthread + Qthread + Cuda + ROCm + HWLOC + MEMKIND + LIBRT + Cuda_Lambda + Cuda_Relocatable_Device_Code + Cuda_UVM + Cuda_LDG_Intrinsic + Debug + Debug_DualView_Modify_Check + Debug_Bounds_Checkt + Compiler_Warnings + Profiling + Profiling_Load_Print + Aggressive_Vectorization + ) + +#------------------------------------------------------------------------------- +#------------------------------- Recognize CamelCase Options --------------------------- +#------------------------------------------------------------------------------- + +foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) + string(TOUPPER ${opt} OPT ) + IF(DEFINED Kokkos_ENABLE_${opt}) + IF(DEFINED KOKKOS_ENABLE_${OPT}) + IF(NOT ("${KOKKOS_ENABLE_${OPT}}" STREQUAL "${Kokkos_ENABLE_${opt}}")) + IF(DEFINED KOKKOS_ENABLE_${OPT}_INTERNAL) + MESSAGE(WARNING "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ! Could be caused by old CMakeCache Variable. Run CMake again and warning should disappear. If not you are truly setting both variables.") + IF(NOT ("${Kokkos_ENABLE_${opt}}" STREQUAL "${KOKKOS_ENABLE_${OPT}_INTERNAL}")) + UNSET(KOKKOS_ENABLE_${OPT} CACHE) + SET(KOKKOS_ENABLE_${OPT} ${Kokkos_ENABLE_${opt}}) + MESSAGE(WARNING "SET BOTH VARIABLES KOKKOS_ENABLE_${OPT}: ${KOKKOS_ENABLE_${OPT}}") + ELSE() + SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}}) + ENDIF() + ELSE() + MESSAGE(FATAL_ERROR "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ!") + ENDIF() + ENDIF() + ELSE() + SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}}) + ENDIF() + ENDIF() +endforeach() + +IF(DEFINED Kokkos_Arch) + IF(DEFINED KOKKOS_ARCH) + IF(NOT (${KOKKOS_ARCH} STREQUAL "${Kokkos_Arch}")) + MESSAGE(FATAL_ERROR "Defined both Kokkos_Arch and KOKKOS_ARCH and they differ!") + ENDIF() + ELSE() + SET(KOKKOS_ARCH ${Kokkos_Arch}) + ENDIF() +ENDIF() + +#------------------------------------------------------------------------------- +# List of possible host architectures. +#------------------------------------------------------------------------------- +set(KOKKOS_ARCH_LIST) +list(APPEND KOKKOS_ARCH_LIST + None # No architecture optimization + AMDAVX # (HOST) AMD chip + ARMv80 # (HOST) ARMv8.0 Compatible CPU + ARMv81 # (HOST) ARMv8.1 Compatible CPU + ARMv8-ThunderX # (HOST) ARMv8 Cavium ThunderX CPU + WSM # (HOST) Intel Westmere CPU + SNB # (HOST) Intel Sandy/Ivy Bridge CPUs + HSW # (HOST) Intel Haswell CPUs + BDW # (HOST) Intel Broadwell Xeon E-class CPUs + SKX # (HOST) Intel Sky Lake Xeon E-class HPC CPUs (AVX512) + KNC # (HOST) Intel Knights Corner Xeon Phi + KNL # (HOST) Intel Knights Landing Xeon Phi + BGQ # (HOST) IBM Blue Gene Q + Power7 # (HOST) IBM POWER7 CPUs + Power8 # (HOST) IBM POWER8 CPUs + Power9 # (HOST) IBM POWER9 CPUs + Kepler # (GPU) NVIDIA Kepler default (generation CC 3.5) + Kepler30 # (GPU) NVIDIA Kepler generation CC 3.0 + Kepler32 # (GPU) NVIDIA Kepler generation CC 3.2 + Kepler35 # (GPU) NVIDIA Kepler generation CC 3.5 + Kepler37 # (GPU) NVIDIA Kepler generation CC 3.7 + Maxwell # (GPU) NVIDIA Maxwell default (generation CC 5.0) + Maxwell50 # (GPU) NVIDIA Maxwell generation CC 5.0 + Maxwell52 # (GPU) NVIDIA Maxwell generation CC 5.2 + Maxwell53 # (GPU) NVIDIA Maxwell generation CC 5.3 + Pascal60 # (GPU) NVIDIA Pascal generation CC 6.0 + Pascal61 # (GPU) NVIDIA Pascal generation CC 6.1 + ) + +# List of possible device architectures. +# The case and spelling here needs to match Makefile.kokkos +set(KOKKOS_DEVICES_LIST) +# Options: Cuda,ROCm,OpenMP,Pthread,Qthreads,Serial +list(APPEND KOKKOS_DEVICES_LIST + Cuda # NVIDIA GPU -- see below + OpenMP # OpenMP + Pthread # pthread + Qthreads # qthreads + Serial # serial + ROCm # Relocatable device code + ) + +# List of possible TPLs for Kokkos +# From Makefile.kokkos: Options: hwloc,librt,experimental_memkind +set(KOKKOS_USE_TPLS_LIST) +list(APPEND KOKKOS_USE_TPLS_LIST + HWLOC # hwloc + LIBRT # librt + MEMKIND # experimental_memkind + ) +# Map of cmake variables to Makefile variables +set(KOKKOS_INTERNAL_HWLOC hwloc) +set(KOKKOS_INTERNAL_LIBRT librt) +set(KOKKOS_INTERNAL_MEMKIND experimental_memkind) + +# List of possible Advanced options +set(KOKKOS_OPTIONS_LIST) +list(APPEND KOKKOS_OPTIONS_LIST + AGGRESSIVE_VECTORIZATION + DISABLE_PROFILING + DISABLE_DUALVIEW_MODIFY_CHECK + ENABLE_PROFILE_LOAD_PRINT + ) +# Map of cmake variables to Makefile variables +set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg) +set(KOKKOS_INTERNAL_UVM librt) +set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc) + + +#------------------------------------------------------------------------------- +# List of possible Options for CUDA +#------------------------------------------------------------------------------- +# From Makefile.kokkos: Options: use_ldg,force_uvm,rdc +set(KOKKOS_CUDA_OPTIONS_LIST) +list(APPEND KOKKOS_CUDA_OPTIONS_LIST + LDG_INTRINSIC # use_ldg + UVM # force_uvm + RELOCATABLE_DEVICE_CODE # rdc + LAMBDA # enable_lambda + ) + +# Map of cmake variables to Makefile variables +set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg) +set(KOKKOS_INTERNAL_UVM force_uvm) +set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc) +set(KOKKOS_INTERNAL_LAMBDA enable_lambda) + + +#------------------------------------------------------------------------------- +#------------------------------- Create doc strings ---------------------------- +#------------------------------------------------------------------------------- + +set(tmpr "\n ") +string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}") +# This would be useful, but we use Foo_ENABLE mechanisms +#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_DEVICES_DOCSTR "${KOKKOS_DEVICES_LIST}") +#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_USE_TPLS_DOCSTR "${KOKKOS_USE_TPLS_LIST}") +#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_CUDA_OPTIONS_DOCSTR "${KOKKOS_CUDA_OPTIONS_LIST}") + +#------------------------------------------------------------------------------- +#------------------------------- GENERAL OPTIONS ------------------------------- +#------------------------------------------------------------------------------- + +# Setting this variable to a value other than "None" can improve host +# performance by turning on architecture specific code. +# NOT SET is used to determine if the option is passed in. It is reset to +# default "None" down below. +set(KOKKOS_ARCH "NOT_SET" CACHE STRING + "Optimize for specific host architecture. Options are: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") + +# Whether to build separate libraries or now +set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.") + +# Qthreads options. +set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") + + +#------------------------------------------------------------------------------- +#------------------------------- KOKKOS_DEVICES -------------------------------- +#------------------------------------------------------------------------------- +# Figure out default settings +IF(Trilinos_ENABLE_Kokkos) + set_kokkos_default_default(SERIAL ON) + set_kokkos_default_default(PTHREAD OFF) + IF(TPL_ENABLE_QTHREAD) + set_kokkos_default_default(QTHREADS ${TPL_ENABLE_QTHREAD}) + ELSE() + set_kokkos_default_default(QTHREADS OFF) + ENDIF() + IF(Trilinos_ENABLE_OpenMP) + set_kokkos_default_default(OPENMP ${Trilinos_ENABLE_OpenMP}) + ELSE() + set_kokkos_default_default(OPENMP OFF) + ENDIF() + IF(TPL_ENABLE_CUDA) + set_kokkos_default_default(CUDA ${TPL_ENABLE_CUDA}) + ELSE() + set_kokkos_default_default(CUDA OFF) + ENDIF() + set_kokkos_default_default(ROCM OFF) +ELSE() + set_kokkos_default_default(SERIAL ON) + set_kokkos_default_default(OPENMP OFF) + set_kokkos_default_default(PTHREAD OFF) + set_kokkos_default_default(QTHREAD OFF) + set_kokkos_default_default(CUDA OFF) + set_kokkos_default_default(ROCM OFF) +ENDIF() + +# Set which Kokkos backend to use. +# These are the actual options that define the settings. +set(KOKKOS_ENABLE_SERIAL ${KOKKOS_INTERNAL_ENABLE_SERIAL_DEFAULT} CACHE BOOL "Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads.") +set(KOKKOS_ENABLE_OPENMP ${KOKKOS_INTERNAL_ENABLE_OPENMP_DEFAULT} CACHE BOOL "Enable OpenMP support in Kokkos." FORCE) +set(KOKKOS_ENABLE_PTHREAD ${KOKKOS_INTERNAL_ENABLE_PTHREAD_DEFAULT} CACHE BOOL "Enable Pthread support in Kokkos.") +set(KOKKOS_ENABLE_QTHREADS ${KOKKOS_INTERNAL_ENABLE_QTHREADS_DEFAULT} CACHE BOOL "Enable Qthreads support in Kokkos.") +set(KOKKOS_ENABLE_CUDA ${KOKKOS_INTERNAL_ENABLE_CUDA_DEFAULT} CACHE BOOL "Enable CUDA support in Kokkos.") +set(KOKKOS_ENABLE_ROCM ${KOKKOS_INTERNAL_ENABLE_ROCM_DEFAULT} CACHE BOOL "Enable ROCm support in Kokkos.") + + + +#------------------------------------------------------------------------------- +#------------------------------- KOKKOS DEBUG and PROFILING -------------------- +#------------------------------------------------------------------------------- + +# Debug related options enable compiler warnings + +set_kokkos_default_default(DEBUG OFF) +set(KOKKOS_ENABLE_DEBUG ${KOKKOS_INTERNAL_ENABLE_DEBUG_DEFAULT} CACHE BOOL "Enable Kokkos Debug.") + +# From Makefile.kokkos: Advanced Options: +#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print +set_kokkos_default_default(COMPILER_WARNINGS OFF) +set(KOKKOS_ENABLE_COMPILER_WARNINGS ${KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS_DEFAULT} CACHE BOOL "Enable compiler warnings.") + +set_kokkos_default_default(DEBUG_DUALVIEW_MODIFY_CHECK OFF) +set(KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK ${KOKKOS_INTERNAL_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK_DEFAULT} CACHE BOOL "Enable dualview modify check.") + +# Enable aggressive vectorization. +set_kokkos_default_default(AGGRESSIVE_VECTORIZATION OFF) +set(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ${KOKKOS_INTERNAL_ENABLE_AGGRESSIVE_VECTORIZATION_DEFAULT} CACHE BOOL "Enable aggressive vectorization.") + +# Enable profiling. +set_kokkos_default_default(PROFILING ON) +set(KOKKOS_ENABLE_PROFILING ${KOKKOS_INTERNAL_ENABLE_PROFILING_DEFAULT} CACHE BOOL "Enable profiling.") + +set_kokkos_default_default(PROFILING_LOAD_PRINT OFF) +set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT_DEFAULT} CACHE BOOL "Enable profile load print.") + + + + +#------------------------------------------------------------------------------- +#------------------------------- KOKKOS_USE_TPLS ------------------------------- +#------------------------------------------------------------------------------- +# Enable hwloc library. +# Figure out default: +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HWLOC) + set_kokkos_default_default(HWLOC ON) +ELSE() + set_kokkos_default_default(HWLOC OFF) +ENDIF() +set(KOKKOS_ENABLE_HWLOC ${KOKKOS_INTERNAL_ENABLE_HWLOC_DEFAULT} CACHE BOOL "Enable hwloc for better process placement.") +set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library. (kokkos tpl)") + +# Enable memkind library. +set_kokkos_default_default(MEMKIND OFF) +set(KOKKOS_ENABLE_MEMKIND ${KOKKOS_INTERNAL_ENABLE_MEMKIND_DEFAULT} CACHE BOOL "Enable memkind. (kokkos tpl)") +set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library. (kokkos tpl)") + +# Enable rt library. +IF(Trilinos_ENABLE_Kokkos) + IF(DEFINED TPL_ENABLE_LIBRT) + set_kokkos_default_default(LIBRT ${TPL_ENABLE_LIBRT}) + ELSE() + set_kokkos_default_default(LIBRT OFF) + ENDIF() +ELSE() + set_kokkos_default_default(LIBRT ON) +ENDIF() +set(KOKKOS_ENABLE_LIBRT ${KOKKOS_INTERNAL_ENABLE_LIBRT_DEFAULT} CACHE BOOL "Enable librt for more precise timer. (kokkos tpl)") + + +#------------------------------------------------------------------------------- +#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- +#------------------------------------------------------------------------------- + +# CUDA options. +# Set Defaults +set_kokkos_default_default(CUDA_LDG_INTRINSIC_DEFAULT OFF) +set_kokkos_default_default(CUDA_UVM_DEFAULT OFF) +set_kokkos_default_default(CUDA_RELOCATABLE_DEVICE_CODE OFF) +IF(Trilinos_ENABLE_Kokkos) + IF(KOKKOS_ENABLE_CUDA) + find_package(CUDA) + ENDIF() + IF (DEFINED CUDA_VERSION) + IF (CUDA_VERSION VERSION_GREATER "7.0") + set_kokkos_default_default(CUDA_LAMBDA ON) + ELSE() + set_kokkos_default_default(CUDA_LAMBDA OFF) + ENDIF() + ENDIF() +ELSE() + set_kokkos_default_default(CUDA_LAMBDA OFF) +ENDIF() + +# Set actual options +set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") +set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ${KOKKOS_INTERNAL_ENABLE_CUDA_LDG_INTRINSIC_DEFAULT} CACHE BOOL "Enable CUDA LDG. (cuda option)") +set(KOKKOS_ENABLE_CUDA_UVM ${KOKKOS_INTERNAL_ENABLE_CUDA_UVM_DEFAULT} CACHE BOOL "Enable CUDA unified virtual memory.") +set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ${KOKKOS_INTERNAL_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE_DEFAULT} CACHE BOOL "Enable relocatable device code for CUDA. (cuda option)") +set(KOKKOS_ENABLE_CUDA_LAMBDA ${KOKKOS_INTERNAL_ENABLE_CUDA_LAMBDA_DEFAULT} CACHE BOOL "Enable lambdas for CUDA. (cuda option)") + + +#------------------------------------------------------------------------------- +#----------------------- HOST ARCH AND LEGACY TRIBITS -------------------------- +#------------------------------------------------------------------------------- + +# This defines the previous legacy TriBITS builds. +set(KOKKOS_LEGACY_TRIBITS False) +IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET") + set(KOKKOS_ARCH "None") + IF(KOKKOS_HAS_TRILINOS) + set(KOKKOS_LEGACY_TRIBITS True) + ENDIF() +ENDIF() +IF (KOKKOS_HAS_TRILINOS) + IF (KOKKOS_LEGACY_TRIBITS) + message(STATUS "Using the legacy tribits build because KOKKOS_ARCH not set") + ELSE() + message(STATUS "NOT using the legacy tribits build because KOKKOS_ARCH *is* set") + ENDIF() +ENDIF() + +#------------------------------------------------------------------------------- +#----------------------- Set CamelCase Options if they are not yet set --------- +#------------------------------------------------------------------------------- + +foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) + string(TOUPPER ${opt} OPT ) + UNSET(KOKKOS_ENABLE_${OPT}_INTERNAL CACHE) + SET(KOKKOS_ENABLE_${OPT}_INTERNAL ${KOKKOS_ENABLE_${OPT}} CACHE BOOL INTERNAL) + IF(DEFINED KOKKOS_ENABLE_${OPT}) + UNSET(Kokkos_ENABLE_${opt} CACHE) + SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}} CACHE BOOL "CamelCase Compatibility setting for KOKKOS_ENABLE_${OPT}") + ENDIF() +endforeach() + diff --git a/lib/kokkos/cmake/kokkos_settings.cmake b/lib/kokkos/cmake/kokkos_settings.cmake new file mode 100644 index 0000000000..850a74a670 --- /dev/null +++ b/lib/kokkos/cmake/kokkos_settings.cmake @@ -0,0 +1,257 @@ +########################## NOTES ############################################### +# This files goal is to take CMake options found in kokkos_options.cmake but +# possibly set from elsewhere +# (see: trilinos/cmake/ProjectCOmpilerPostConfig.cmake) +# using CMake idioms and map them onto the KOKKOS_SETTINGS variables that gets +# passed to the kokkos makefile configuration: +# make -f ${CMAKE_SOURCE_DIR}/core/src/Makefile ${KOKKOS_SETTINGS} build-makefile-cmake-kokkos +# that generates KokkosCore_config.h and kokkos_generated_settings.cmake +# To understand how to form KOKKOS_SETTINGS, see +# /Makefile.kokkos + +#------------------------------------------------------------------------------- +#------------------------------- GENERAL OPTIONS ------------------------------- +#------------------------------------------------------------------------------- + +# Ensure that KOKKOS_ARCH is in the ARCH_LIST +foreach(arch ${KOKKOS_ARCH}) + list(FIND KOKKOS_ARCH_LIST ${arch} indx) + if (indx EQUAL -1) + message(FATAL_ERROR "${arch} is not an accepted value for KOKKOS_ARCH." + " Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") + endif () +endforeach() + +# KOKKOS_SETTINGS uses KOKKOS_ARCH +string(REPLACE ";" "," KOKKOS_ARCH "${KOKKOS_ARCH}") +set(KOKKOS_ARCH ${KOKKOS_ARCH}) + +# From Makefile.kokkos: Options: yes,no +if(${KOKKOS_ENABLE_DEBUG}) + set(KOKKOS_DEBUG yes) +else() + set(KOKKOS_DEBUG no) +endif() + +#------------------------------- KOKKOS_DEVICES -------------------------------- +# Can have multiple devices +set(KOKKOS_DEVICESl) +foreach(devopt ${KOKKOS_DEVICES_LIST}) + string(TOUPPER ${devopt} devoptuc) + if (${KOKKOS_ENABLE_${devoptuc}}) + list(APPEND KOKKOS_DEVICESl ${devopt}) + endif () +endforeach() +# List needs to be comma-delmitted +string(REPLACE ";" "," KOKKOS_DEVICES "${KOKKOS_DEVICESl}") + +#------------------------------- KOKKOS_OPTIONS -------------------------------- +# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling +#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print + +set(KOKKOS_OPTIONSl) +if(${KOKKOS_ENABLE_COMPILER_WARNINGS}) + list(APPEND KOKKOS_OPTIONSl compiler_warnings) +endif() +if(${KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION}) + list(APPEND KOKKOS_OPTIONSl aggressive_vectorization) +endif() +if(NOT ${KOKKOS_ENABLE_PROFILING}) + list(APPEND KOKKOS_OPTIONSl disable_vectorization) +endif() +if(NOT ${KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK}) + list(APPEND KOKKOS_OPTIONSl disable_dualview_modify_check) +endif() +if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT}) + list(APPEND KOKKOS_OPTIONSl enable_profile_load_print) +endif() +# List needs to be comma-delimitted +string(REPLACE ";" "," KOKKOS_OPTIONS "${KOKKOS_OPTIONSl}") + + +#------------------------------- KOKKOS_USE_TPLS ------------------------------- +# Construct the Makefile options +set(KOKKOS_USE_TPLSl) +foreach(tplopt ${KOKKOS_USE_TPLS_LIST}) + if (${KOKKOS_ENABLE_${tplopt}}) + list(APPEND KOKKOS_USE_TPLSl ${KOKKOS_INTERNAL_${tplopt}}) + endif () +endforeach() +# List needs to be comma-delimitted +string(REPLACE ";" "," KOKKOS_USE_TPLS "${KOKKOS_USE_TPLSl}") + + +#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- +# Construct the Makefile options +set(KOKKOS_CUDA_OPTIONS) +foreach(cudaopt ${KOKKOS_CUDA_OPTIONS_LIST}) + if (${KOKKOS_ENABLE_CUDA_${cudaopt}}) + list(APPEND KOKKOS_CUDA_OPTIONSl ${KOKKOS_INTERNAL_${cudaopt}}) + endif () +endforeach() +# List needs to be comma-delmitted +string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}") + +#------------------------------- PATH VARIABLES -------------------------------- +# Want makefile to use same executables specified which means modifying +# the path so the $(shell ...) commands in the makefile see the right exec +# Also, the Makefile's use FOO_PATH naming scheme for -I/-L construction +#TODO: Makefile.kokkos allows this to be overwritten? ROCM_HCC_PATH + +set(KOKKOS_INTERNAL_PATHS) +set(addpathl) +foreach(kvar "CUDA;QTHREADS;${KOKKOS_USE_TPLS_LIST}") + if(${KOKKOS_ENABLE_${kvar}}) + if(DEFINED KOKKOS_${kvar}_DIR) + set(KOKKOS_INTERNAL_PATHS "${KOKKOS_INTERNAL_PATHS} ${kvar}_PATH=${KOKKOS_${kvar}_DIR}") + if(IS_DIRECTORY ${KOKKOS_${kvar}_DIR}/bin) + list(APPEND addpathl ${KOKKOS_${kvar}_DIR}/bin) + endif() + endif() + endif() +endforeach() +# Path env is : delimitted +string(REPLACE ";" ":" KOKKOS_INTERNAL_ADDTOPATH "${addpathl}") + + +######################### SET KOKKOS_SETTINGS ################################## +# Set the KOKKOS_SETTINGS String -- this is the primary communication with the +# makefile configuration. See Makefile.kokkos + +set(KOKKOS_SETTINGS KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}) +set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_PATH=${KOKKOS_PATH}) +set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFIX}) + +# Form of KOKKOS_foo=$KOKKOS_foo +foreach(kvar ARCH;DEVICES;DEBUG;OPTIONS;CUDA_OPTIONS;USE_TPLS) + set(KOKKOS_VAR KOKKOS_${kvar}) + if(DEFINED KOKKOS_${kvar}) + if (NOT "${${KOKKOS_VAR}}" STREQUAL "") + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_VAR}=${${KOKKOS_VAR}}) + endif() + endif() +endforeach() + +# Form of VAR=VAL +#TODO: Makefile supports MPICH_CXX, OMPI_CXX as well +foreach(ovar CXX;CXXFLAGS;LDFLAGS) + if(DEFINED ${ovar}) + if (NOT "${${ovar}}" STREQUAL "") + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${ovar}=${${ovar}}) + endif() + endif() +endforeach() + +# Finally, do the paths +if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "") + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS}) +endif() +if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "") + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} PATH=${KOKKOS_INTERNAL_ADDTOPATH}:\${PATH}) +endif() + +# Final form that gets passed to make +set(KOKKOS_SETTINGS env ${KOKKOS_SETTINGS}) + + +############################ PRINT CONFIGURE STATUS ############################ + +if(KOKKOS_CMAKE_VERBOSE) + message(STATUS "") + message(STATUS "****************** Kokkos Settings ******************") + message(STATUS "Execution Spaces") + + if(KOKKOS_ENABLE_CUDA) + message(STATUS " Device Parallel: Cuda") + else() + message(STATUS " Device Parallel: None") + endif() + + if(KOKKOS_ENABLE_OPENMP) + message(STATUS " Host Parallel: OpenMP") + elseif(KOKKOS_ENABLE_PTHREAD) + message(STATUS " Host Parallel: Pthread") + elseif(KOKKOS_ENABLE_QTHREADS) + message(STATUS " Host Parallel: Qthreads") + else() + message(STATUS " Host Parallel: None") + endif() + + if(KOKKOS_ENABLE_SERIAL) + message(STATUS " Host Serial: Serial") + else() + message(STATUS " Host Serial: None") + endif() + + message(STATUS "") + message(STATUS "Architectures:") + message(STATUS " ${KOKKOS_ARCH}") + + message(STATUS "") + message(STATUS "Enabled options") + + if(KOKKOS_SEPARATE_LIBS) + message(STATUS " KOKKOS_SEPARATE_LIBS") + endif() + + if(KOKKOS_ENABLE_HWLOC) + message(STATUS " KOKKOS_ENABLE_HWLOC") + endif() + + if(KOKKOS_ENABLE_MEMKIND) + message(STATUS " KOKKOS_ENABLE_MEMKIND") + endif() + + if(KOKKOS_ENABLE_DEBUG) + message(STATUS " KOKKOS_ENABLE_DEBUG") + endif() + + if(KOKKOS_ENABLE_PROFILING) + message(STATUS " KOKKOS_ENABLE_PROFILING") + endif() + + if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) + message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION") + endif() + + if(KOKKOS_ENABLE_CUDA) + if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC) + message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") + endif() + + if(KOKKOS_ENABLE_CUDA_UVM) + message(STATUS " KOKKOS_ENABLE_CUDA_UVM") + endif() + + if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) + message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") + endif() + + if(KOKKOS_ENABLE_CUDA_LAMBDA) + message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA") + endif() + + if(KOKKOS_CUDA_DIR) + message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}") + endif() + endif() + + if(KOKKOS_QTHREADS_DIR) + message(STATUS " KOKKOS_QTHREADS_DIR: ${KOKKOS_QTHREADS_DIR}") + endif() + + if(KOKKOS_HWLOC_DIR) + message(STATUS " KOKKOS_HWLOC_DIR: ${KOKKOS_HWLOC_DIR}") + endif() + + if(KOKKOS_MEMKIND_DIR) + message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}") + endif() + + message(STATUS "") + message(STATUS "Final kokkos settings variable:") + message(STATUS " ${KOKKOS_SETTINGS}") + + message(STATUS "*****************************************************") + message(STATUS "") +endif() diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake index 0f00f1dd2e..321704a1c8 100644 --- a/lib/kokkos/cmake/tribits.cmake +++ b/lib/kokkos/cmake/tribits.cmake @@ -3,10 +3,6 @@ INCLUDE(CTest) cmake_policy(SET CMP0054 NEW) -IF(NOT DEFINED ${PROJECT_NAME}) - project(KokkosCMake) -ENDIF() - MESSAGE(WARNING "The project name is: ${PROJECT_NAME}") IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) @@ -46,26 +42,26 @@ MACRO(PREPEND_GLOBAL_SET VARNAME) GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) ENDMACRO() -FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME) - ASSERT_DEFINED(${VARNAME}) - IF (${VARNAME}) - SET(TMP ${${VARNAME}}) - LIST(REMOVE_DUPLICATES TMP) - GLOBAL_SET(${VARNAME} ${TMP}) - ENDIF() -ENDFUNCTION() +#FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME) +# ASSERT_DEFINED(${VARNAME}) +# IF (${VARNAME}) +# SET(TMP ${${VARNAME}}) +# LIST(REMOVE_DUPLICATES TMP) +# GLOBAL_SET(${VARNAME} ${TMP}) +# ENDIF() +#ENDFUNCTION() -MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE) - MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'") - SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) - IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") - IF(${USER_OPTION_NAME}) - GLOBAL_SET(${MACRO_DEFINE_NAME} ON) - ELSE() - GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) - ENDIF() - ENDIF() -ENDMACRO() +#MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE) +# MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'") +# SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) +# IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") +# IF(${USER_OPTION_NAME}) +# GLOBAL_SET(${MACRO_DEFINE_NAME} ON) +# ELSE() +# GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) +# ENDIF() +# ENDIF() +#ENDMACRO() FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) @@ -77,17 +73,20 @@ FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) ENDFUNCTION() -MACRO(TRIBITS_ADD_DEBUG_OPTION) - TRIBITS_ADD_OPTION_AND_DEFINE( - ${PROJECT_NAME}_ENABLE_DEBUG - HAVE_${PROJECT_NAME_UC}_DEBUG - "Enable a host of runtime debug checking." - OFF - ) -ENDMACRO() +#MACRO(TRIBITS_ADD_DEBUG_OPTION) +# TRIBITS_ADD_OPTION_AND_DEFINE( +# ${PROJECT_NAME}_ENABLE_DEBUG +# HAVE_${PROJECT_NAME_UC}_DEBUG +# "Enable a host of runtime debug checking." +# OFF +# ) +#ENDMACRO() MACRO(TRIBITS_ADD_TEST_DIRECTORIES) + message(STATUS "ProjectName: " ${PROJECT_NAME}) + message(STATUS "Tests: " ${${PROJECT_NAME}_ENABLE_TESTS}) + IF(${${PROJECT_NAME}_ENABLE_TESTS}) FOREACH(TEST_DIR ${ARGN}) ADD_SUBDIRECTORY(${TEST_DIR}) @@ -387,17 +386,17 @@ FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) ENDFUNCTION() -MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE) - GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE) - INCLUDE("${TPL_FILE}") - IF(TARGET TPL_LIB_${TPL_NAME}) - MESSAGE(STATUS "Found tpl library: ${TPL_NAME}") - SET(TPL_ENABLE_${TPL_NAME} TRUE) - ELSE() - MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}") - SET(TPL_ENABLE_${TPL_NAME} FALSE) - ENDIF() -ENDMACRO() +#MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE) +# GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE) +# INCLUDE("${TPL_FILE}") +# IF(TARGET TPL_LIB_${TPL_NAME}) +# MESSAGE(STATUS "Found tpl library: ${TPL_NAME}") +# SET(TPL_ENABLE_${TPL_NAME} TRUE) +# ELSE() +# MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}") +# SET(TPL_ENABLE_${TPL_NAME} FALSE) +# ENDIF() +#ENDMACRO() MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) IF(TYPE STREQUAL "REQUIRED") @@ -475,6 +474,7 @@ MACRO(TRIBITS_SUBPACKAGE NAME) SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) @@ -494,11 +494,11 @@ MACRO(TRIBITS_PACKAGE_DECL NAME) SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) - SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") - FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") - FOREACH(TPL_FILE ${TPLS_FILES}) - TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) - ENDFOREACH() + #SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") + #FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") + #FOREACH(TPL_FILE ${TPLS_FILES}) + # TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) + #ENDFOREACH() ENDMACRO() diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt index c30817d43e..06c3f95a80 100644 --- a/lib/kokkos/config/master_history.txt +++ b/lib/kokkos/config/master_history.txt @@ -11,3 +11,4 @@ tag: 2.03.13 date: 07:27:2017 master: da314444 develop: 29ccb58a tag: 2.04.00 date: 08:16:2017 master: 54eb75c0 develop: 32fb8ee1 tag: 2.04.04 date: 09:11:2017 master: 2b7e9c20 develop: 51e7b25a tag: 2.04.11 date: 10:28:2017 master: 54a1330a develop: ed36c017 +tag: 2.5.11 date: 12:15:2017 master: dfe685f4 develop: ec7ad6d8 diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper index cb206cf88b..d339da4fcd 100755 --- a/lib/kokkos/config/nvcc_wrapper +++ b/lib/kokkos/config/nvcc_wrapper @@ -39,6 +39,12 @@ cuda_args="" # Arguments for both NVCC and Host compiler shared_args="" +# Argument -c +compile_arg="" + +# Argument -o +output_arg="" + # Linker arguments xlinker_args="" @@ -66,6 +72,7 @@ dry_run=0 # Skip NVCC compilation and use host compiler directly host_only=0 +host_only_args="" # Enable workaround for CUDA 6.5 for pragma ident replace_pragma_ident=0 @@ -78,6 +85,14 @@ temp_dir=${TMPDIR:-/tmp} # Check if we have an optimization argument already optimization_applied=0 +# Check if we have -std=c++X or --std=c++X already +stdcxx_applied=0 + +# Run nvcc a second time to generate dependencies if needed +depfile_separate=0 +depfile_output_arg="" +depfile_target_arg="" + #echo "Arguments: $# $@" while [ $# -gt 0 ] @@ -109,12 +124,31 @@ do fi ;; #Handle shared args (valid for both nvcc and the host compiler) - -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) + -D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) shared_args="$shared_args $1" ;; - #Handle shared args that have an argument - -o|-MT) - shared_args="$shared_args $1 $2" + #Handle compilation argument + -c) + compile_arg="$1" + ;; + #Handle output argument + -o) + output_arg="$output_arg $1 $2" + shift + ;; + # Handle depfile arguments. We map them to a separate call to nvcc. + -MD|-MMD) + depfile_separate=1 + host_only_args="$host_only_args $1" + ;; + -MF) + depfile_output_arg="-o $2" + host_only_args="$host_only_args $1 $2" + shift + ;; + -MT) + depfile_target_arg="$1 $2" + host_only_args="$host_only_args $1 $2" shift ;; #Handle known nvcc args @@ -130,16 +164,25 @@ do cuda_args="$cuda_args $1 $2" shift ;; - #Handle c++11 setting - --std=c++11|-std=c++11) - shared_args="$shared_args $1" + #Handle c++11 + --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z) + if [ $stdcxx_applied -eq 1 ]; then + echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting" + else + shared_args="$shared_args $1" + stdcxx_applied=1 + fi ;; + #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 -std=c++98|--std=c++98) ;; #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor -pedantic|-Wpedantic|-ansi) ;; + #strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C" + -Woverloaded-virtual) + ;; #strip -Xcompiler because we add it -Xcompiler) if [ $first_xcompiler_arg -eq 1 ]; then @@ -190,7 +233,7 @@ do object_files_xlinker="$object_files_xlinker -Xlinker $1" ;; #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking - *.dylib) + @*|*.dylib) object_files="$object_files -Xlinker $1" object_files_xlinker="$object_files_xlinker -Xlinker $1" ;; @@ -230,7 +273,7 @@ if [ $first_xcompiler_arg -eq 0 ]; then fi #Compose host only command -host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host" +host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host" #nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' if [ $replace_pragma_ident -eq 1 ]; then @@ -262,10 +305,21 @@ else host_command="$host_command $object_files" fi +if [ $depfile_separate -eq 1 ]; then + # run nvcc a second time to generate dependencies (without compiling) + nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg" +else + nvcc_depfile_command="" +fi + +nvcc_command="$nvcc_command $compile_arg $output_arg" + #Print command for dryrun if [ $dry_run -eq 1 ]; then if [ $host_only -eq 1 ]; then echo $host_command + elif [ -n "$nvcc_depfile_command" ]; then + echo $nvcc_command "&&" $nvcc_depfile_command else echo $nvcc_command fi @@ -275,6 +329,8 @@ fi #Run compilation command if [ $host_only -eq 1 ]; then $host_command +elif [ -n "$nvcc_depfile_command" ]; then + $nvcc_command && $nvcc_depfile_command else $nvcc_command fi diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia index e6fcaad261..660ab91ff5 100755 --- a/lib/kokkos/config/test_all_sandia +++ b/lib/kokkos/config/test_all_sandia @@ -16,12 +16,12 @@ if [[ "$HOSTNAME" =~ (white|ride).* ]]; then MACHINE=white elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then MACHINE=bowman -elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name +elif [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name if [[ "$PROCESSOR" = "aarch64" ]]; then MACHINE=sullivan - else - MACHINE=shepard fi +elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name + MACHINE=shepard elif [[ "$HOSTNAME" =~ apollo ]]; then MACHINE=apollo elif [[ "$HOSTNAME" =~ sullivan ]]; then @@ -45,7 +45,8 @@ GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CUDA_WARNING_FLAGS="" +CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +PGI_WARNING_FLAGS="" # Default. Machine specific can override. DEBUG=False @@ -61,6 +62,8 @@ SPOT_CHECK=False PRINT_HELP=False OPT_FLAG="" +CXX_FLAGS_EXTRA="" +LD_FLAGS_EXTRA="" KOKKOS_OPTIONS="" # @@ -111,6 +114,12 @@ do --with-cuda-options*) KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" ;; + --cxxflags-extra*) + CXX_FLAGS_EXTRA="${key#*=}" + ;; + --ldflags-extra*) + LD_FLAGS_EXTRA="${key#*=}" + ;; --help*) PRINT_HELP=True ;; @@ -150,20 +159,18 @@ if [ "$MACHINE" = "sems" ]; then if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" - "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" - "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" + COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS" + "gcc/6.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" + "intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" @@ -184,6 +191,7 @@ elif [ "$MACHINE" = "white" ]; then BASE_MODULE_LIST="/" IBM_MODULE_LIST="/xl/" CUDA_MODULE_LIST="/,gcc/5.4.0" + CUDA_MODULE_LIST2="/,gcc/6.3.0,ibm/xl/13.1.6-BETA" # Don't do pthread on white. GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" @@ -192,6 +200,7 @@ elif [ "$MACHINE" = "white" ]; then COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) if [ -z "$ARCH_FLAG" ]; then @@ -210,8 +219,9 @@ elif [ "$MACHINE" = "bowman" ]; then OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + COMPILERS=("intel/16.4.258 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.2.174 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/18.0.128 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" ) if [ -z "$ARCH_FLAG" ]; then @@ -241,13 +251,13 @@ elif [ "$MACHINE" = "shepard" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="/compilers/" - - OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + BASE_MODULE_LIST="/" + BASE_MODULE_LIST_INTEL="/compilers/" # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + COMPILERS=("intel/17.4.196 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/18.0.128 $BASE_MODULE_LIST_INTEL $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "pgi/17.10.0 $BASE_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS" ) if [ -z "$ARCH_FLAG" ]; then @@ -280,7 +290,7 @@ elif [ "$MACHINE" = "apollo" ]; then if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" @@ -292,14 +302,13 @@ elif [ "$MACHINE" = "apollo" ]; then COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "clang/4.0.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" - "gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" @@ -336,6 +345,8 @@ if [ "$PRINT_HELP" = "True" ]; then echo "--dry-run: Just print what would be executed" echo "--build-only: Just do builds, don't run anything" echo "--opt-flag=FLAG: Optimization flag (default: -O3)" + echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS" + echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS" echo "--arch=ARCHITECTURE: overwrite architecture flags" echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" echo "--build-list=BUILD,BUILD,BUILD..." @@ -361,14 +372,14 @@ if [ "$PRINT_HELP" = "True" ]; then echo " Run all gcc tests" echo " % test_all_sandia gcc" echo "" - echo " Run all gcc/4.7.2 and all intel tests" - echo " % test_all_sandia gcc/4.7.2 intel" + echo " Run all gcc/4.8.4 and all intel tests" + echo " % test_all_sandia gcc/4.8.4 intel" echo "" echo " Run all tests in debug" echo " % test_all_sandia --debug" echo "" - echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds" - echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial" + echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds" + echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial" echo "" echo "If you want to kill the tests, do:" echo " hit ctrl-z" @@ -566,10 +577,15 @@ single_build_and_test() { if [[ "$build_type" = *debug* ]]; then local extra_args="$extra_args --debug" local cxxflags="-g $compiler_warning_flags" + local ldflags="-g" else local cxxflags="$OPT_FLAG $compiler_warning_flags" + local ldflags="${OPT_FLAG}" fi + local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}" + local ldflags="${ldflags} ${LD_FLAGS_EXTRA}" + if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" fi @@ -586,7 +602,7 @@ single_build_and_test() { run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } fi else - run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } local -i build_start_time=$(date +%s) run_cmd make -j 32 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } local -i build_end_time=$(date +%s) diff --git a/lib/kokkos/containers/CMakeLists.txt b/lib/kokkos/containers/CMakeLists.txt index 894935fa01..c37aa3e3e2 100644 --- a/lib/kokkos/containers/CMakeLists.txt +++ b/lib/kokkos/containers/CMakeLists.txt @@ -2,7 +2,10 @@ TRIBITS_SUBPACKAGE(Containers) -ADD_SUBDIRECTORY(src) + +IF(KOKKOS_HAS_TRILINOS) + ADD_SUBDIRECTORY(src) +ENDIF() TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt index 403ac746f6..1203a8bd81 100644 --- a/lib/kokkos/containers/performance_tests/CMakeLists.txt +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -3,6 +3,14 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) +IF(NOT KOKKOS_HAS_TRILINOS) + IF(KOKKOS_SEPARATE_LIBS) + set(TEST_LINK_TARGETS kokkoscore) + ELSE() + set(TEST_LINK_TARGETS kokkos) + ENDIF() +ENDIF() + SET(SOURCES TestMain.cpp TestCuda.cpp @@ -24,7 +32,7 @@ TRIBITS_ADD_EXECUTABLE( PerfTestExec SOURCES ${SOURCES} COMM serial mpi - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) TRIBITS_ADD_TEST( diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile index f2f9b597e8..ebed75ccd6 100644 --- a/lib/kokkos/containers/performance_tests/Makefile +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -15,7 +15,8 @@ endif CXXFLAGS = -O3 LINK ?= $(CXX) -LDFLAGS ?= -lpthread +LDFLAGS ?= +override LDFLAGS += -lpthread include $(KOKKOS_PATH)/Makefile.kokkos diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp index 4c0ccb6b88..ced74c6f51 100644 --- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -180,8 +180,8 @@ void test_dynrankview_op_perf( const int par_size ) typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; - const size_type dim2 = 90; - const size_type dim3 = 30; + const size_type dim_2 = 90; + const size_type dim_3 = 30; double elapsed_time_view = 0; double elapsed_time_compview = 0; @@ -191,7 +191,7 @@ void test_dynrankview_op_perf( const int par_size ) double elapsed_time_compdrview = 0; Kokkos::Timer timer; { - Kokkos::View testview("testview",par_size,dim2,dim3); + Kokkos::View testview("testview",par_size,dim_2,dim_3); typedef InitViewFunctor FunctorType; timer.reset(); @@ -220,7 +220,7 @@ void test_dynrankview_op_perf( const int par_size ) std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl; } { - Kokkos::View testview("testview",par_size,dim2,dim3,1,1,1,1); + Kokkos::View testview("testview",par_size,dim_2,dim_3,1,1,1,1); typedef InitViewRank7Functor FunctorType; timer.reset(); @@ -231,7 +231,7 @@ void test_dynrankview_op_perf( const int par_size ) std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl; } { - Kokkos::DynRankView testdrview("testdrview",par_size,dim2,dim3); + Kokkos::DynRankView testdrview("testdrview",par_size,dim_2,dim_3); typedef InitDynRankViewFunctor FunctorType; timer.reset(); diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp index 6631184624..012f45bab7 100644 --- a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -54,6 +54,7 @@ #include #include +#include #include #include @@ -122,6 +123,18 @@ TEST_F( openmp, unordered_map_performance_far) Perf::run_performance_tests(base_file_name.str()); } +TEST_F( openmp, scatter_view) +{ + std::cout << "ScatterView data-duplicated test:\n"; + Perf::test_scatter_view(10, 1000 * 1000); +//std::cout << "ScatterView atomics test:\n"; +//Perf::test_scatter_view(10, 1000 * 1000); +} + } // namespace test #else void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() {} diff --git a/lib/kokkos/containers/performance_tests/TestScatterView.hpp b/lib/kokkos/containers/performance_tests/TestScatterView.hpp new file mode 100644 index 0000000000..4fd69173c0 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestScatterView.hpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_SCATTER_VIEW_HPP +#define KOKKOS_TEST_SCATTER_VIEW_HPP + +#include +#include + +namespace Perf { + +template +void test_scatter_view(int m, int n) +{ + Kokkos::View original_view("original_view", n); + { + auto scatter_view = Kokkos::Experimental::create_scatter_view + < Kokkos::Experimental::ScatterSum + , duplication + , contribution + > (original_view); + Kokkos::Experimental::UniqueToken< + ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> + unique_token{ExecSpace()}; + //auto internal_view = scatter_view.internal_view; + auto policy = Kokkos::RangePolicy(0, n); + for (int foo = 0; foo < 5; ++foo) { + { + auto num_threads = unique_token.size(); + std::cout << "num_threads " << num_threads << '\n'; + Kokkos::View hand_coded_duplicate_view("hand_coded_duplicate", num_threads, n); + auto f2 = KOKKOS_LAMBDA(int i) { + auto thread_id = unique_token.acquire(); + for (int j = 0; j < 10; ++j) { + auto k = (i + j) % n; + hand_coded_duplicate_view(thread_id, k, 0) += 4.2; + hand_coded_duplicate_view(thread_id, k, 1) += 2.0; + hand_coded_duplicate_view(thread_id, k, 2) += 1.0; + } + }; + Kokkos::Timer timer; + timer.reset(); + for (int k = 0; k < m; ++k) { + Kokkos::parallel_for(policy, f2, "hand_coded_duplicate_scatter_view_test"); + } + auto t = timer.seconds(); + std::cout << "hand-coded test took " << t << " seconds\n"; + } + { + auto f = KOKKOS_LAMBDA(int i) { + auto scatter_access = scatter_view.access(); + for (int j = 0; j < 10; ++j) { + auto k = (i + j) % n; + scatter_access(k, 0) += 4.2; + scatter_access(k, 1) += 2.0; + scatter_access(k, 2) += 1.0; + } + }; + Kokkos::Timer timer; + timer.reset(); + for (int k = 0; k < m; ++k) { + Kokkos::parallel_for(policy, f, "scatter_view_test"); + } + auto t = timer.seconds(); + std::cout << "test took " << t << " seconds\n"; + } + } + } +} + +} + +#endif diff --git a/lib/kokkos/containers/src/CMakeLists.txt b/lib/kokkos/containers/src/CMakeLists.txt index da5a791530..e68fcad5e9 100644 --- a/lib/kokkos/containers/src/CMakeLists.txt +++ b/lib/kokkos/containers/src/CMakeLists.txt @@ -6,26 +6,42 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) #----------------------------------------------------------------------------- -SET(HEADERS "") -SET(SOURCES "") - -SET(HEADERS_IMPL "") - -FILE(GLOB HEADERS *.hpp) -FILE(GLOB HEADERS_IMPL impl/*.hpp) -FILE(GLOB SOURCES impl/*.cpp) - SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) -INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) +if(KOKKOS_LEGACY_TRIBITS) -TRIBITS_ADD_LIBRARY( - kokkoscontainers - HEADERS ${HEADERS} - NOINSTALLHEADERS ${HEADERS_IMPL} - SOURCES ${SOURCES} - DEPLIBS - ) + SET(HEADERS "") + SET(SOURCES "") + SET(HEADERS_IMPL "") + + FILE(GLOB HEADERS *.hpp) + FILE(GLOB HEADERS_IMPL impl/*.hpp) + FILE(GLOB SOURCES impl/*.cpp) + + INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) + + TRIBITS_ADD_LIBRARY( + kokkoscontainers + HEADERS ${HEADERS} + NOINSTALLHEADERS ${HEADERS_IMPL} + SOURCES ${SOURCES} + DEPLIBS + ) + +else() + + INSTALL ( + DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" + DESTINATION ${TRILINOS_INCDIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + TRIBITS_ADD_LIBRARY( + kokkoscontainers + SOURCES ${KOKKOS_CONTAINERS_SRCS} + DEPLIBS + ) + +endif() #----------------------------------------------------------------------------- - diff --git a/lib/kokkos/containers/src/Kokkos_ScatterView.hpp b/lib/kokkos/containers/src/Kokkos_ScatterView.hpp new file mode 100644 index 0000000000..48c4709480 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_ScatterView.hpp @@ -0,0 +1,999 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +/// \file Kokkos_ScatterView.hpp +/// \brief Declaration and definition of Kokkos::ScatterView. +/// +/// This header file declares and defines Kokkos::ScatterView and its +/// related nonmember functions. + +#ifndef KOKKOS_SCATTER_VIEW_HPP +#define KOKKOS_SCATTER_VIEW_HPP + +#include +#include + +namespace Kokkos { +namespace Experimental { + +//TODO: replace this enum with the Kokkos::Sum, etc reducers for parallel_reduce +enum : int { + ScatterSum, +}; + +enum : int { + ScatterNonDuplicated = 0, + ScatterDuplicated = 1 +}; + +enum : int { + ScatterNonAtomic = 0, + ScatterAtomic = 1 +}; + +}} // Kokkos::Experimental + +namespace Kokkos { +namespace Impl { +namespace Experimental { + +template +struct DefaultDuplication; + +template +struct DefaultContribution; + +#ifdef KOKKOS_ENABLE_SERIAL +template <> +struct DefaultDuplication { + enum : int { value = Kokkos::Experimental::ScatterNonDuplicated }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; +}; +#endif + +#ifdef KOKKOS_ENABLE_OPENMP +template <> +struct DefaultDuplication { + enum : int { value = Kokkos::Experimental::ScatterDuplicated }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterAtomic }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; +}; +#endif + +#ifdef KOKKOS_ENABLE_THREADS +template <> +struct DefaultDuplication { + enum : int { value = Kokkos::Experimental::ScatterDuplicated }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterAtomic }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; +}; +#endif + +#ifdef KOKKOS_ENABLE_CUDA +template <> +struct DefaultDuplication { + enum : int { value = Kokkos::Experimental::ScatterNonDuplicated }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterAtomic }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterAtomic }; +}; +#endif + +/* ScatterValue is the object returned by the access operator() of ScatterAccess, + similar to that returned by an Atomic View, it wraps Kokkos::atomic_add with convenient + operator+=, etc. */ +template +struct ScatterValue; + +template +struct ScatterValue { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : value( other.value ) {} + KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { + value += rhs; + } + KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) { + value -= rhs; + } + private: + ValueType& value; +}; + +template +struct ScatterValue { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {} + KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { + Kokkos::atomic_add(&value, rhs); + } + KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) { + Kokkos::atomic_add(&value, -rhs); + } + private: + ValueType& value; +}; + +/* DuplicatedDataType, given a View DataType, will create a new DataType + that has a new runtime dimension which becomes the largest-stride dimension. + In the case of LayoutLeft, due to the limitation induced by the design of DataType + itself, it must convert any existing compile-time dimensions into runtime dimensions. */ +template +struct DuplicatedDataType; + +template +struct DuplicatedDataType { + typedef T* value_type; // For LayoutRight, add a star all the way on the left +}; + +template +struct DuplicatedDataType { + typedef typename DuplicatedDataType::value_type value_type[N]; +}; + +template +struct DuplicatedDataType { + typedef typename DuplicatedDataType::value_type value_type[]; +}; + +template +struct DuplicatedDataType { + typedef typename DuplicatedDataType::value_type* value_type; +}; + +template +struct DuplicatedDataType { + typedef T* value_type; +}; + +template +struct DuplicatedDataType { + typedef typename DuplicatedDataType::value_type* value_type; +}; + +template +struct DuplicatedDataType { + typedef typename DuplicatedDataType::value_type* value_type; +}; + +template +struct DuplicatedDataType { + typedef typename DuplicatedDataType::value_type* value_type; +}; + +/* Slice is just responsible for stuffing the correct number of Kokkos::ALL + arguments on the correct side of the index in a call to subview() to get a + subview where the index specified is the largest-stride one. */ +template +struct Slice { + typedef Slice next; + typedef typename next::value_type value_type; + + static + value_type get(V const& src, const size_t i, Args ... args) { + return next::get(src, i, Kokkos::ALL, args...); + } +}; + +template +struct Slice { + typedef typename Kokkos::Impl::ViewMapping + < void + , V + , const size_t + , Args ... + >::type value_type; + static + value_type get(V const& src, const size_t i, Args ... args) { + return Kokkos::subview(src, i, args...); + } +}; + +template +struct Slice { + typedef typename Kokkos::Impl::ViewMapping + < void + , V + , Args ... + , const size_t + >::type value_type; + static + value_type get(V const& src, const size_t i, Args ... args) { + return Kokkos::subview(src, args..., i); + } +}; + +template +struct ReduceDuplicates; + +template +struct ReduceDuplicatesBase { + typedef ReduceDuplicates Derived; + ValueType const* src; + ValueType* dst; + size_t stride; + size_t start; + size_t n; + ReduceDuplicatesBase(ValueType const* src_in, ValueType* dest_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name) + : src(src_in) + , dst(dest_in) + , stride(stride_in) + , start(start_in) + , n(n_in) + { +#if defined(KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID); + } +#endif + typedef RangePolicy policy_type; + typedef Kokkos::Impl::ParallelFor closure_type; + const closure_type closure(*(static_cast(this)), policy_type(0, stride)); + closure.execute(); +#if defined(KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } +#endif + } +}; + +template +struct ReduceDuplicates : + public ReduceDuplicatesBase +{ + typedef ReduceDuplicatesBase Base; + ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name): + Base(src_in, dst_in, stride_in, start_in, n_in, name) + {} + KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { + for (size_t j = Base::start; j < Base::n; ++j) { + Base::dst[i] += Base::src[i + Base::stride * j]; + } + } +}; + +template +struct ResetDuplicates; + +template +struct ResetDuplicatesBase { + typedef ResetDuplicates Derived; + ValueType* data; + ResetDuplicatesBase(ValueType* data_in, size_t size_in, std::string const& name) + : data(data_in) + { +#if defined(KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID); + } +#endif + typedef RangePolicy policy_type; + typedef Kokkos::Impl::ParallelFor closure_type; + const closure_type closure(*(static_cast(this)), policy_type(0, size_in)); + closure.execute(); +#if defined(KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } +#endif + } +}; + +template +struct ResetDuplicates : + public ResetDuplicatesBase +{ + typedef ResetDuplicatesBase Base; + ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name): + Base(data_in, size_in, name) + {} + KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { + Base::data[i] = Kokkos::reduction_identity::sum(); + } +}; + +}}} // Kokkos::Impl::Experimental + +namespace Kokkos { +namespace Experimental { + +template ::value + ,int contribution = Kokkos::Impl::Experimental::DefaultContribution::value + > +class ScatterView; + +template +class ScatterAccess; + +// non-duplicated implementation +template +class ScatterView +{ +public: + typedef Kokkos::View original_view_type; + typedef typename original_view_type::value_type original_value_type; + typedef typename original_view_type::reference_type original_reference_type; + friend class ScatterAccess; + friend class ScatterAccess; + + ScatterView() + { + } + + template + ScatterView(View const& original_view) + : internal_view(original_view) + { + } + + template + ScatterView(std::string const& name, Dims ... dims) + : internal_view(name, dims ...) + { + } + + template + KOKKOS_FORCEINLINE_FUNCTION + ScatterAccess + access() const { + return ScatterAccess{*this}; + } + + original_view_type subview() const { + return internal_view; + } + + template + void contribute_into(View const& dest) const + { + typedef View dest_type; + static_assert(std::is_same< + typename dest_type::array_layout, + Layout>::value, + "ScatterView contribute destination has different layout"); + static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< + typename ExecSpace::memory_space, + typename dest_type::memory_space>::value, + "ScatterView contribute destination memory space not accessible"); + if (dest.data() == internal_view.data()) return; + Kokkos::Impl::Experimental::ReduceDuplicates( + internal_view.data(), + dest.data(), + 0, + 0, + 1, + internal_view.label()); + } + + void reset() { + Kokkos::Impl::Experimental::ResetDuplicates( + internal_view.data(), + internal_view.size(), + internal_view.label()); + } + template + void reset_except(View const& view) { + if (view.data() != internal_view.data()) reset(); + } + + void resize(const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0, + const size_t n7 = 0) { + ::Kokkos::resize(internal_view,n0,n1,n2,n3,n4,n5,n6,n7); + } + + void realloc(const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0, + const size_t n7 = 0) { + ::Kokkos::realloc(internal_view,n0,n1,n2,n3,n4,n5,n6,n7); + } + +protected: + template + KOKKOS_FORCEINLINE_FUNCTION + original_reference_type at(Args ... args) const { + return internal_view(args...); + } +private: + typedef original_view_type internal_view_type; + internal_view_type internal_view; +}; + +template +class ScatterAccess +{ +public: + typedef ScatterView view_type; + typedef typename view_type::original_value_type original_value_type; + typedef Kokkos::Impl::Experimental::ScatterValue< + original_value_type, Op, override_contribution> value_type; + + KOKKOS_INLINE_FUNCTION + ScatterAccess(view_type const& view_in) + : view(view_in) + { + } + + template + KOKKOS_FORCEINLINE_FUNCTION + value_type operator()(Args ... args) const { + return view.at(args...); + } + + template + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if::value, value_type>::type + operator[](Arg arg) const { + return view.at(arg); + } + +private: + view_type const& view; +}; + +// duplicated implementation +// LayoutLeft and LayoutRight are different enough that we'll just specialize each + +template +class ScatterView +{ +public: + typedef Kokkos::View original_view_type; + typedef typename original_view_type::value_type original_value_type; + typedef typename original_view_type::reference_type original_reference_type; + friend class ScatterAccess; + friend class ScatterAccess; + typedef typename Kokkos::Impl::Experimental::DuplicatedDataType data_type_info; + typedef typename data_type_info::value_type internal_data_type; + typedef Kokkos::View internal_view_type; + + ScatterView() + { + } + + template + ScatterView(View const& original_view) + : unique_token() + , internal_view(Kokkos::ViewAllocateWithoutInitializing( + std::string("duplicated_") + original_view.label()), + unique_token.size(), + original_view.extent(0), + original_view.extent(1), + original_view.extent(2), + original_view.extent(3), + original_view.extent(4), + original_view.extent(5), + original_view.extent(6)) + { + reset(); + } + + template + ScatterView(std::string const& name, Dims ... dims) + : internal_view(Kokkos::ViewAllocateWithoutInitializing(name), unique_token.size(), dims ...) + { + reset(); + } + + template + inline + ScatterAccess + access() const { + return ScatterAccess{*this}; + } + + typename Kokkos::Impl::Experimental::Slice< + Kokkos::LayoutRight, internal_view_type::rank, internal_view_type>::value_type + subview() const + { + return Kokkos::Impl::Experimental::Slice< + Kokkos::LayoutRight, internal_view_type::Rank, internal_view_type>::get(internal_view, 0); + } + + template + void contribute_into(View const& dest) const + { + typedef View dest_type; + static_assert(std::is_same< + typename dest_type::array_layout, + Kokkos::LayoutRight>::value, + "ScatterView deep_copy destination has different layout"); + static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< + typename ExecSpace::memory_space, + typename dest_type::memory_space>::value, + "ScatterView deep_copy destination memory space not accessible"); + size_t strides[8]; + internal_view.stride(strides); + bool is_equal = (dest.data() == internal_view.data()); + size_t start = is_equal ? 1 : 0; + Kokkos::Impl::Experimental::ReduceDuplicates( + internal_view.data(), + dest.data(), + strides[0], + start, + internal_view.extent(0), + internal_view.label()); + } + + void reset() { + Kokkos::Impl::Experimental::ResetDuplicates( + internal_view.data(), + internal_view.size(), + internal_view.label()); + } + template + void reset_except(View const& view) { + if (view.data() != internal_view.data()) { + reset(); + return; + } + Kokkos::Impl::Experimental::ResetDuplicates( + internal_view.data() + view.size(), + internal_view.size() - view.size(), + internal_view.label()); + } + + void resize(const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0) { + ::Kokkos::resize(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6); + } + + void realloc(const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0) { + ::Kokkos::realloc(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6); + } + +protected: + template + KOKKOS_FORCEINLINE_FUNCTION + original_reference_type at(int rank, Args ... args) const { + return internal_view(rank, args...); + } + +protected: + typedef Kokkos::Experimental::UniqueToken< + ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; + + unique_token_type unique_token; + internal_view_type internal_view; +}; + +template +class ScatterView +{ +public: + typedef Kokkos::View original_view_type; + typedef typename original_view_type::value_type original_value_type; + typedef typename original_view_type::reference_type original_reference_type; + friend class ScatterAccess; + friend class ScatterAccess; + typedef typename Kokkos::Impl::Experimental::DuplicatedDataType data_type_info; + typedef typename data_type_info::value_type internal_data_type; + typedef Kokkos::View internal_view_type; + + ScatterView() + { + } + + template + ScatterView(View const& original_view) + : unique_token() + { + size_t arg_N[8] = { + original_view.extent(0), + original_view.extent(1), + original_view.extent(2), + original_view.extent(3), + original_view.extent(4), + original_view.extent(5), + original_view.extent(6), + 0 + }; + arg_N[internal_view_type::rank - 1] = unique_token.size(); + internal_view = internal_view_type( + Kokkos::ViewAllocateWithoutInitializing( + std::string("duplicated_") + original_view.label()), + arg_N[0], arg_N[1], arg_N[2], arg_N[3], + arg_N[4], arg_N[5], arg_N[6], arg_N[7]); + reset(); + } + + template + ScatterView(std::string const& name, Dims ... dims) + : internal_view(Kokkos::ViewAllocateWithoutInitializing(name), dims ..., unique_token.size()) + { + reset(); + } + + template + inline + ScatterAccess + access() const { + return ScatterAccess{*this}; + } + + typename Kokkos::Impl::Experimental::Slice< + Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::value_type + subview() const + { + return Kokkos::Impl::Experimental::Slice< + Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::get(internal_view, 0); + } + + template + void contribute_into(View const& dest) const + { + typedef View dest_type; + static_assert(std::is_same< + typename dest_type::array_layout, + Kokkos::LayoutLeft>::value, + "ScatterView deep_copy destination has different layout"); + static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< + typename ExecSpace::memory_space, + typename dest_type::memory_space>::value, + "ScatterView deep_copy destination memory space not accessible"); + size_t strides[8]; + internal_view.stride(strides); + size_t stride = strides[internal_view_type::rank - 1]; + auto extent = internal_view.extent( + internal_view_type::rank - 1); + bool is_equal = (dest.data() == internal_view.data()); + size_t start = is_equal ? 1 : 0; + Kokkos::Impl::Experimental::ReduceDuplicates( + internal_view.data(), + dest.data(), + stride, + start, + extent, + internal_view.label()); + } + + void reset() { + Kokkos::Impl::Experimental::ResetDuplicates( + internal_view.data(), + internal_view.size(), + internal_view.label()); + } + template + void reset_except(View const& view) { + if (view.data() != internal_view.data()) { + reset(); + return; + } + Kokkos::Impl::Experimental::ResetDuplicates( + internal_view.data() + view.size(), + internal_view.size() - view.size(), + internal_view.label()); + } + + void resize(const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0) { + + size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0}; + const int i = internal_view.rank-1; + arg_N[i] = unique_token.size(); + + ::Kokkos::resize(internal_view, + arg_N[0], arg_N[1], arg_N[2], arg_N[3], + arg_N[4], arg_N[5], arg_N[6], arg_N[7]); + } + + void realloc(const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0) { + + size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0}; + const int i = internal_view.rank-1; + arg_N[i] = unique_token.size(); + + ::Kokkos::realloc(internal_view, + arg_N[0], arg_N[1], arg_N[2], arg_N[3], + arg_N[4], arg_N[5], arg_N[6], arg_N[7]); + } + +protected: + template + inline original_reference_type at(int thread_id, Args ... args) const { + return internal_view(args..., thread_id); + } + +protected: + typedef Kokkos::Experimental::UniqueToken< + ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; + + unique_token_type unique_token; + internal_view_type internal_view; +}; + + +/* This object has to be separate in order to store the thread ID, which cannot + be obtained until one is inside a parallel construct, and may be relatively + expensive to obtain at every contribution + (calls a non-inlined function, looks up a thread-local variable). + Due to the expense, it is sensible to query it at most once per parallel iterate + (ideally once per thread, but parallel_for doesn't expose that) + and then store it in a stack variable. + ScatterAccess serves as a non-const object on the stack which can store the thread ID */ + +template +class ScatterAccess +{ +public: + typedef ScatterView view_type; + typedef typename view_type::original_value_type original_value_type; + typedef Kokkos::Impl::Experimental::ScatterValue< + original_value_type, Op, override_contribution> value_type; + + inline ScatterAccess(view_type const& view_in) + : view(view_in) + , thread_id(view_in.unique_token.acquire()) { + } + + inline ~ScatterAccess() { + if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id); + } + + template + KOKKOS_FORCEINLINE_FUNCTION + value_type operator()(Args ... args) const { + return view.at(thread_id, args...); + } + + template + KOKKOS_FORCEINLINE_FUNCTION + typename std::enable_if::value, value_type>::type + operator[](Arg arg) const { + return view.at(thread_id, arg); + } + +private: + + view_type const& view; + + // simplify RAII by disallowing copies + ScatterAccess(ScatterAccess const& other) = delete; + ScatterAccess& operator=(ScatterAccess const& other) = delete; + ScatterAccess& operator=(ScatterAccess&& other) = delete; + +public: + // do need to allow moves though, for the common + // auto b = a.access(); + // that assignments turns into a move constructor call + inline ScatterAccess(ScatterAccess&& other) + : view(other.view) + , thread_id(other.thread_id) + { + other.thread_id = ~thread_id_type(0); + } + +private: + + typedef typename view_type::unique_token_type unique_token_type; + typedef typename unique_token_type::size_type thread_id_type; + thread_id_type thread_id; +}; + +template +ScatterView + < RT + , typename ViewTraits::array_layout + , typename ViewTraits::execution_space + , Op + /* just setting defaults if not specified... things got messy because the view type + does not come before the duplication/contribution settings in the + template parameter list */ + , duplication == -1 ? Kokkos::Impl::Experimental::DefaultDuplication::execution_space>::value : duplication + , contribution == -1 ? + Kokkos::Impl::Experimental::DefaultContribution< + typename ViewTraits::execution_space, + (duplication == -1 ? + Kokkos::Impl::Experimental::DefaultDuplication< + typename ViewTraits::execution_space + >::value + : duplication + ) + >::value + : contribution + > +create_scatter_view(View const& original_view) { + return original_view; // implicit ScatterView constructor call +} + +}} // namespace Kokkos::Experimental + +namespace Kokkos { +namespace Experimental { + +template +void +contribute(View& dest, Kokkos::Experimental::ScatterView const& src) +{ + src.contribute_into(dest); +} + +}} // namespace Kokkos::Experimental + +namespace Kokkos { + +template +void +realloc(Kokkos::Experimental::ScatterView& scatter_view, IS ... is) +{ + scatter_view.realloc(is ...); +} + +template +void +resize(Kokkos::Experimental::ScatterView& scatter_view, IS ... is) +{ + scatter_view.resize(is ...); +} + +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/containers/src/Kokkos_Vector.hpp b/lib/kokkos/containers/src/Kokkos_Vector.hpp index 15c677f7f8..03bbefab10 100644 --- a/lib/kokkos/containers/src/Kokkos_Vector.hpp +++ b/lib/kokkos/containers/src/Kokkos_Vector.hpp @@ -56,6 +56,7 @@ template< class Scalar, class Arg1Type = void> class vector : public DualView { +public: typedef Scalar value_type; typedef Scalar* pointer; typedef const Scalar* const_pointer; diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt index 0c59c616d6..1162d2a6ba 100644 --- a/lib/kokkos/containers/unit_tests/CMakeLists.txt +++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt @@ -3,7 +3,13 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) -SET(LIBRARIES kokkoscore) +IF(NOT KOKKOS_HAS_TRILINOS) + IF(KOKKOS_SEPARATE_LIBS) + set(TEST_LINK_TARGETS kokkoscore) + ELSE() + set(TEST_LINK_TARGETS kokkos) + ENDIF() +ENDIF() IF(Kokkos_ENABLE_Pthread) TRIBITS_ADD_EXECUTABLE_AND_TEST( @@ -12,7 +18,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() @@ -23,7 +29,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() @@ -34,7 +40,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() @@ -45,7 +51,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile index 13da516b6c..2b6861f6d7 100644 --- a/lib/kokkos/containers/unit_tests/Makefile +++ b/lib/kokkos/containers/unit_tests/Makefile @@ -15,7 +15,8 @@ endif CXXFLAGS = -O3 LINK ?= $(CXX) -LDFLAGS ?= -lpthread +LDFLAGS ?= +override LDFLAGS += -lpthread include $(KOKKOS_PATH)/Makefile.kokkos diff --git a/lib/kokkos/containers/unit_tests/TestCuda.cpp b/lib/kokkos/containers/unit_tests/TestCuda.cpp index 651a4e7eb8..ddd6bdae6d 100644 --- a/lib/kokkos/containers/unit_tests/TestCuda.cpp +++ b/lib/kokkos/containers/unit_tests/TestCuda.cpp @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -201,10 +202,18 @@ void cuda_test_bitset() cuda_test_dualview_combinations(size); \ } +#define CUDA_SCATTERVIEW_TEST( size ) \ + TEST_F( cuda, scatterview_##size##x) { \ + test_scatter_view(size); \ + } + CUDA_DUALVIEW_COMBINE_TEST( 10 ) CUDA_VECTOR_COMBINE_TEST( 10 ) CUDA_VECTOR_COMBINE_TEST( 3057 ) +CUDA_SCATTERVIEW_TEST( 10 ) + +CUDA_SCATTERVIEW_TEST( 1000000 ) CUDA_INSERT_TEST(close, 100000, 90000, 100, 500) CUDA_INSERT_TEST(far, 100000, 90000, 100, 500) diff --git a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp index 5365d91361..6b2223f418 100644 --- a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp @@ -63,6 +63,8 @@ #include #include +#include + #include #include @@ -152,6 +154,11 @@ TEST_F( openmp , staticcrsgraph ) test_dualview_combinations(size); \ } +#define OPENMP_SCATTERVIEW_TEST( size ) \ + TEST_F( openmp, scatterview_##size##x) { \ + test_scatter_view(size); \ + } + OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true) OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false) OPENMP_FAILED_INSERT_TEST( 10000, 1000 ) @@ -161,6 +168,10 @@ OPENMP_VECTOR_COMBINE_TEST( 10 ) OPENMP_VECTOR_COMBINE_TEST( 3057 ) OPENMP_DUALVIEW_COMBINE_TEST( 10 ) +OPENMP_SCATTERVIEW_TEST( 10 ) + +OPENMP_SCATTERVIEW_TEST( 1000000 ) + #undef OPENMP_INSERT_TEST #undef OPENMP_FAILED_INSERT_TEST #undef OPENMP_ASSIGNEMENT_TEST diff --git a/lib/kokkos/containers/unit_tests/TestScatterView.hpp b/lib/kokkos/containers/unit_tests/TestScatterView.hpp new file mode 100644 index 0000000000..42e6c09307 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/TestScatterView.hpp @@ -0,0 +1,156 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_SCATTER_VIEW_HPP +#define KOKKOS_TEST_SCATTER_VIEW_HPP + +#include + +namespace Test { + +template +void test_scatter_view_config(int n) +{ + Kokkos::View original_view("original_view", n); + { + auto scatter_view = Kokkos::Experimental::create_scatter_view + < Kokkos::Experimental::ScatterSum + , duplication + , contribution + > (original_view); +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) + auto policy = Kokkos::RangePolicy(0, n); + auto f = KOKKOS_LAMBDA(int i) { + auto scatter_access = scatter_view.access(); + auto scatter_access_atomic = scatter_view.template access(); + for (int j = 0; j < 10; ++j) { + auto k = (i + j) % n; + scatter_access(k, 0) += 4.2; + scatter_access_atomic(k, 1) += 2.0; + scatter_access(k, 2) += 1.0; + } + }; + Kokkos::parallel_for(policy, f, "scatter_view_test"); +#endif + Kokkos::Experimental::contribute(original_view, scatter_view); + scatter_view.reset_except(original_view); +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) + Kokkos::parallel_for(policy, f, "scatter_view_test"); +#endif + Kokkos::Experimental::contribute(original_view, scatter_view); + } +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), original_view); + for (typename decltype(host_view)::size_type i = 0; i < host_view.dimension_0(); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 84.0) / 84.0) < 1e-15); + EXPECT_TRUE(std::fabs((val1 - 40.0) / 40.0) < 1e-15); + EXPECT_TRUE(std::fabs((val2 - 20.0) / 20.0) < 1e-15); + } +#endif + { + Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterSum + , duplication + , contribution + > + persistent_view("persistent", n); + auto result_view = persistent_view.subview(); + contribute(result_view, persistent_view); + } +} + +template +struct TestDuplicatedScatterView { + TestDuplicatedScatterView(int n) { + test_scatter_view_config(n); + test_scatter_view_config(n); + } +}; + +#ifdef KOKKOS_ENABLE_CUDA +// disable duplicated instantiation with CUDA until +// UniqueToken can support it +template <> +struct TestDuplicatedScatterView { + TestDuplicatedScatterView(int) { + } +}; +#endif + +template +void test_scatter_view(int n) +{ + // all of these configurations should compile okay, but only some of them are + // correct and/or sensible in terms of memory use + Kokkos::Experimental::UniqueToken unique_token{ExecSpace()}; + + // no atomics or duplication is only sensible if the execution space + // is running essentially in serial (doesn't have to be Serial though, + // we also test OpenMP with one thread: LAMMPS cares about that) + if (unique_token.size() == 1) { + test_scatter_view_config(n); + } + test_scatter_view_config(n); + + TestDuplicatedScatterView duptest(n); +} + +} // namespace Test + +#endif //KOKKOS_TEST_UNORDERED_MAP_HPP + + diff --git a/lib/kokkos/containers/unit_tests/TestSerial.cpp b/lib/kokkos/containers/unit_tests/TestSerial.cpp index 1b9b5a2da3..c9b7392d48 100644 --- a/lib/kokkos/containers/unit_tests/TestSerial.cpp +++ b/lib/kokkos/containers/unit_tests/TestSerial.cpp @@ -58,6 +58,7 @@ #include #include #include +#include #include @@ -148,6 +149,11 @@ TEST_F( serial, bitset ) test_dualview_combinations(size); \ } +#define SERIAL_SCATTERVIEW_TEST( size ) \ + TEST_F( serial, scatterview_##size##x) { \ + test_scatter_view(size); \ + } + SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true) SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false) SERIAL_FAILED_INSERT_TEST( 10000, 1000 ) @@ -157,6 +163,10 @@ SERIAL_VECTOR_COMBINE_TEST( 10 ) SERIAL_VECTOR_COMBINE_TEST( 3057 ) SERIAL_DUALVIEW_COMBINE_TEST( 10 ) +SERIAL_SCATTERVIEW_TEST( 10 ) + +SERIAL_SCATTERVIEW_TEST( 1000000 ) + #undef SERIAL_INSERT_TEST #undef SERIAL_FAILED_INSERT_TEST #undef SERIAL_ASSIGNEMENT_TEST diff --git a/lib/kokkos/core/CMakeLists.txt b/lib/kokkos/core/CMakeLists.txt index 42fce6b2f2..93db0d2ecf 100644 --- a/lib/kokkos/core/CMakeLists.txt +++ b/lib/kokkos/core/CMakeLists.txt @@ -2,7 +2,9 @@ TRIBITS_SUBPACKAGE(Core) -ADD_SUBDIRECTORY(src) +IF(KOKKOS_HAS_TRILINOS) + ADD_SUBDIRECTORY(src) +ENDIF() TRIBITS_ADD_TEST_DIRECTORIES(unit_test) TRIBITS_ADD_TEST_DIRECTORIES(perf_test) diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt index 9f19a2a73e..84c49a7713 100644 --- a/lib/kokkos/core/perf_test/CMakeLists.txt +++ b/lib/kokkos/core/perf_test/CMakeLists.txt @@ -2,6 +2,14 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +IF(NOT KOKKOS_HAS_TRILINOS) + IF(KOKKOS_SEPARATE_LIBS) + set(TEST_LINK_TARGETS kokkoscore) + ELSE() + set(TEST_LINK_TARGETS kokkos) + ENDIF() +ENDIF() + # warning: PerfTest_CustomReduction.cpp uses # ../../algorithms/src/Kokkos_Random.hpp # we'll just allow it to be included, but note @@ -23,7 +31,7 @@ TRIBITS_ADD_EXECUTABLE( PerfTestExec SOURCES ${SOURCES} COMM serial mpi - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) TRIBITS_ADD_TEST( diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile index 877b53020a..cdb8e03c1e 100644 --- a/lib/kokkos/core/perf_test/Makefile +++ b/lib/kokkos/core/perf_test/Makefile @@ -17,7 +17,8 @@ endif CXXFLAGS = -O3 #CXXFLAGS += -DGENERIC_REDUCER LINK ?= $(CXX) -LDFLAGS ?= -lpthread +LDFLAGS ?= +override LDFLAGS += -lpthread include $(KOKKOS_PATH)/Makefile.kokkos diff --git a/lib/kokkos/core/src/CMakeLists.txt b/lib/kokkos/core/src/CMakeLists.txt index 0d5d97a829..1914b6ba96 100644 --- a/lib/kokkos/core/src/CMakeLists.txt +++ b/lib/kokkos/core/src/CMakeLists.txt @@ -1,15 +1,4 @@ -TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Serial - KOKKOS_HAVE_SERIAL - "Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads." - ON - ) - -ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11) -ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA) - -TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) @@ -20,68 +9,90 @@ SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DI #----------------------------------------------------------------------------- -SET(HEADERS_PUBLIC "") -SET(HEADERS_PRIVATE "") -SET(SOURCES "") +IF(KOKKOS_LEGACY_TRIBITS) -FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp) -LIST( APPEND HEADERS_PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h ) + ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11) + ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA) + + SET(HEADERS_PUBLIC "") + SET(HEADERS_PRIVATE "") + SET(SOURCES "") + + FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp) + LIST( APPEND HEADERS_PUBLIC ${CMAKE_BINARY_DIR}/${PACKAGE_NAME}_config.h ) + + #----------------------------------------------------------------------------- + + FILE(GLOB HEADERS_IMPL impl/*.hpp) + FILE(GLOB SOURCES_IMPL impl/*.cpp) + + LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} ) + LIST(APPEND SOURCES ${SOURCES_IMPL} ) + + INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) + + #----------------------------------------------------------------------------- + + FILE(GLOB HEADERS_THREADS Threads/*.hpp) + FILE(GLOB SOURCES_THREADS Threads/*.cpp) + + LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} ) + LIST(APPEND SOURCES ${SOURCES_THREADS} ) + + INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/) + + #----------------------------------------------------------------------------- + + FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp) + FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp) + + LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} ) + LIST(APPEND SOURCES ${SOURCES_OPENMP} ) + + INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/) + + #----------------------------------------------------------------------------- + + FILE(GLOB HEADERS_CUDA Cuda/*.hpp) + FILE(GLOB SOURCES_CUDA Cuda/*.cpp) + + LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} ) + LIST(APPEND SOURCES ${SOURCES_CUDA} ) + + INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/) + + #----------------------------------------------------------------------------- + FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp) + FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp) + + LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} ) + LIST(APPEND SOURCES ${SOURCES_QTHREADS} ) + + INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/) + + TRIBITS_ADD_LIBRARY( + kokkoscore + HEADERS ${HEADERS_PUBLIC} + NOINSTALLHEADERS ${HEADERS_PRIVATE} + SOURCES ${SOURCES} + DEPLIBS + ) #----------------------------------------------------------------------------- +# In the new build system, sources are calculated by Makefile.kokkos +else() -FILE(GLOB HEADERS_IMPL impl/*.hpp) -FILE(GLOB SOURCES_IMPL impl/*.cpp) + INSTALL (DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}/" + DESTINATION ${TRILINOS_INCDIR} + FILES_MATCHING PATTERN "*.hpp" + ) -LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} ) -LIST(APPEND SOURCES ${SOURCES_IMPL} ) - -INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) + TRIBITS_ADD_LIBRARY( + kokkoscore + SOURCES ${KOKKOS_CORE_SRCS} + DEPLIBS + ) +endif() #----------------------------------------------------------------------------- - -FILE(GLOB HEADERS_THREADS Threads/*.hpp) -FILE(GLOB SOURCES_THREADS Threads/*.cpp) - -LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} ) -LIST(APPEND SOURCES ${SOURCES_THREADS} ) - -INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/) - -#----------------------------------------------------------------------------- - -FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp) -FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp) - -LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} ) -LIST(APPEND SOURCES ${SOURCES_OPENMP} ) - -INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/) - -#----------------------------------------------------------------------------- - -FILE(GLOB HEADERS_CUDA Cuda/*.hpp) -FILE(GLOB SOURCES_CUDA Cuda/*.cpp) - -LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} ) -LIST(APPEND SOURCES ${SOURCES_CUDA} ) - -INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/) - -#----------------------------------------------------------------------------- -FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp) -FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp) - -LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} ) -LIST(APPEND SOURCES ${SOURCES_QTHREADS} ) - -INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/) - -#----------------------------------------------------------------------------- - -TRIBITS_ADD_LIBRARY( - kokkoscore - HEADERS ${HEADERS_PUBLIC} - NOINSTALLHEADERS ${HEADERS_PRIVATE} - SOURCES ${SOURCES} - DEPLIBS - ) diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 33e247f14b..33f77ea835 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -366,7 +366,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >:: if(Kokkos::Profiling::profileLibraryLoaded()) { SharedAllocationHeader header ; - Kokkos::Impl::DeepCopy::DeepCopy( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) ); + Kokkos::Impl::DeepCopy( & header , RecordBase::m_alloc_ptr , sizeof(SharedAllocationHeader) ); Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::CudaSpace::name()),header.m_label, @@ -446,7 +446,7 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space ); // Copy to device memory - Kokkos::Impl::DeepCopy::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) ); + Kokkos::Impl::DeepCopy( RecordBase::m_alloc_ptr , & header , sizeof(SharedAllocationHeader) ); } SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: @@ -655,7 +655,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr Header const * const head_cuda = alloc_ptr ? Header::get_header( alloc_ptr ) : (Header*) 0 ; if ( alloc_ptr ) { - Kokkos::Impl::DeepCopy::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) ); + Kokkos::Impl::DeepCopy( & head , head_cuda , sizeof(SharedAllocationHeader) ); } RecordCuda * const record = alloc_ptr ? static_cast< RecordCuda * >( head.m_record ) : (RecordCuda *) 0 ; @@ -724,7 +724,7 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & , bool detail ) if ( detail ) { do { if ( r->m_alloc_ptr ) { - Kokkos::Impl::DeepCopy::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) ); + Kokkos::Impl::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) ); } else { head.m_label[0] = 0 ; @@ -759,7 +759,7 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & , bool detail ) do { if ( r->m_alloc_ptr ) { - Kokkos::Impl::DeepCopy::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) ); + Kokkos::Impl::DeepCopy( & head , r->m_alloc_ptr , sizeof(SharedAllocationHeader) ); //Formatting dependent on sizeof(uintptr_t) const char * format_string; diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index 4e84ba236b..5fd442ffc9 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -648,10 +648,11 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ; public: @@ -721,7 +722,7 @@ public: } // Reduce with final value at blockDim.y - 1 location. - if ( cuda_single_inter_block_reduce_scan( + if ( cuda_single_inter_block_reduce_scan( ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x , kokkos_impl_cuda_shared_memory() , m_scratch_space , m_scratch_flags ) ) { @@ -731,7 +732,7 @@ public: size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; if ( threadIdx.y == 0 ) { - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); } if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } @@ -766,11 +767,11 @@ public: value_type init; ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init); - if(Impl::cuda_inter_block_reduction + if(Impl::cuda_inter_block_reduction (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) { const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; if(id==0) { - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); *result = value; } } @@ -875,10 +876,11 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ; public: @@ -942,7 +944,7 @@ public: // Reduce with final value at blockDim.y - 1 location. // Problem: non power-of-two blockDim - if ( cuda_single_inter_block_reduce_scan( + if ( cuda_single_inter_block_reduce_scan( ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x , kokkos_impl_cuda_shared_memory() , m_scratch_space , m_scratch_flags ) ) { @@ -951,7 +953,7 @@ public: size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; if ( threadIdx.y == 0 ) { - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); } if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } @@ -983,11 +985,11 @@ public: value_type init; ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init); - if(Impl::cuda_inter_block_reduction + if(Impl::cuda_inter_block_reduction (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) { const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; if(id==0) { - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); *result = value; } } @@ -1100,10 +1102,11 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ; typedef typename ValueTraits::pointer_type pointer_type ; typedef typename ValueTraits::reference_type reference_type ; @@ -1222,7 +1225,7 @@ public: size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ; if ( threadIdx.y == 0 ) { - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , shared ); } if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); } @@ -1260,7 +1263,7 @@ public: (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,blockDim.y)) { const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; if(id==0) { - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value ); *result = value; } } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index 709cbbd534..a478396910 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -69,7 +69,7 @@ void cuda_shfl( T & out , T const & in , int lane , typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) { *reinterpret_cast(&out) = - __shfl( *reinterpret_cast(&in) , lane , width ); + KOKKOS_IMPL_CUDA_SHFL( *reinterpret_cast(&in) , lane , width ); } template< typename T > @@ -83,7 +83,7 @@ void cuda_shfl( T & out , T const & in , int lane , for ( int i = 0 ; i < N ; ++i ) { reinterpret_cast(&out)[i] = - __shfl( reinterpret_cast(&in)[i] , lane , width ); + KOKKOS_IMPL_CUDA_SHFL( reinterpret_cast(&in)[i] , lane , width ); } } @@ -95,7 +95,7 @@ void cuda_shfl_down( T & out , T const & in , int delta , typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) { *reinterpret_cast(&out) = - __shfl_down( *reinterpret_cast(&in) , delta , width ); + KOKKOS_IMPL_CUDA_SHFL_DOWN( *reinterpret_cast(&in) , delta , width ); } template< typename T > @@ -109,7 +109,7 @@ void cuda_shfl_down( T & out , T const & in , int delta , for ( int i = 0 ; i < N ; ++i ) { reinterpret_cast(&out)[i] = - __shfl_down( reinterpret_cast(&in)[i] , delta , width ); + KOKKOS_IMPL_CUDA_SHFL_DOWN( reinterpret_cast(&in)[i] , delta , width ); } } @@ -121,7 +121,7 @@ void cuda_shfl_up( T & out , T const & in , int delta , typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) { *reinterpret_cast(&out) = - __shfl_up( *reinterpret_cast(&in) , delta , width ); + KOKKOS_IMPL_CUDA_SHFL_UP( *reinterpret_cast(&in) , delta , width ); } template< typename T > @@ -135,7 +135,7 @@ void cuda_shfl_up( T & out , T const & in , int delta , for ( int i = 0 ; i < N ; ++i ) { reinterpret_cast(&out)[i] = - __shfl_up( reinterpret_cast(&in)[i] , delta , width ); + KOKKOS_IMPL_CUDA_SHFL_UP( reinterpret_cast(&in)[i] , delta , width ); } } @@ -268,31 +268,31 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT if( id + 1 < int(gridDim.x) ) join(value, tmp); } - int active = __ballot(1); + int active = KOKKOS_IMPL_CUDA_BALLOT(1); if (int(blockDim.x*blockDim.y) > 2) { value_type tmp = Kokkos::shfl_down(value, 2,32); if( id + 2 < int(gridDim.x) ) join(value, tmp); } - active += __ballot(1); + active += KOKKOS_IMPL_CUDA_BALLOT(1); if (int(blockDim.x*blockDim.y) > 4) { value_type tmp = Kokkos::shfl_down(value, 4,32); if( id + 4 < int(gridDim.x) ) join(value, tmp); } - active += __ballot(1); + active += KOKKOS_IMPL_CUDA_BALLOT(1); if (int(blockDim.x*blockDim.y) > 8) { value_type tmp = Kokkos::shfl_down(value, 8,32); if( id + 8 < int(gridDim.x) ) join(value, tmp); } - active += __ballot(1); + active += KOKKOS_IMPL_CUDA_BALLOT(1); if (int(blockDim.x*blockDim.y) > 16) { value_type tmp = Kokkos::shfl_down(value, 16,32); if( id + 16 < int(gridDim.x) ) join(value, tmp); } - active += __ballot(1); + active += KOKKOS_IMPL_CUDA_BALLOT(1); } } //The last block has in its thread=0 the global reduction value through "value" @@ -432,31 +432,31 @@ cuda_inter_block_reduction( const ReducerType& reducer, if( id + 1 < int(gridDim.x) ) reducer.join(value, tmp); } - int active = __ballot(1); + int active = KOKKOS_IMPL_CUDA_BALLOT(1); if (int(blockDim.x*blockDim.y) > 2) { value_type tmp = Kokkos::shfl_down(value, 2,32); if( id + 2 < int(gridDim.x) ) reducer.join(value, tmp); } - active += __ballot(1); + active += KOKKOS_IMPL_CUDA_BALLOT(1); if (int(blockDim.x*blockDim.y) > 4) { value_type tmp = Kokkos::shfl_down(value, 4,32); if( id + 4 < int(gridDim.x) ) reducer.join(value, tmp); } - active += __ballot(1); + active += KOKKOS_IMPL_CUDA_BALLOT(1); if (int(blockDim.x*blockDim.y) > 8) { value_type tmp = Kokkos::shfl_down(value, 8,32); if( id + 8 < int(gridDim.x) ) reducer.join(value, tmp); } - active += __ballot(1); + active += KOKKOS_IMPL_CUDA_BALLOT(1); if (int(blockDim.x*blockDim.y) > 16) { value_type tmp = Kokkos::shfl_down(value, 16,32); if( id + 16 < int(gridDim.x) ) reducer.join(value, tmp); } - active += __ballot(1); + active += KOKKOS_IMPL_CUDA_BALLOT(1); } } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp index e11ae4798f..1ff4ff3540 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp @@ -73,16 +73,16 @@ public: KOKKOS_INLINE_FUNCTION UniqueToken() : m_buffer(0), m_count(0) {} - KOKKOS_INLINE_FUNCTION + KOKKOS_FUNCTION_DEFAULTED UniqueToken( const UniqueToken & ) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_FUNCTION_DEFAULTED UniqueToken( UniqueToken && ) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_FUNCTION_DEFAULTED UniqueToken & operator=( const UniqueToken & ) = default ; - KOKKOS_INLINE_FUNCTION + KOKKOS_FUNCTION_DEFAULTED UniqueToken & operator=( UniqueToken && ) = default ; /// \brief upper bound for acquired values, i.e. 0 <= value < size() diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp index 99d8fcc999..264f77b3bc 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp @@ -47,7 +47,7 @@ #ifdef KOKKOS_ENABLE_CUDA #include - +#include namespace Kokkos { @@ -91,12 +91,12 @@ namespace Impl { KOKKOS_INLINE_FUNCTION int shfl(const int &val, const int& srcLane, const int& width ) { - return __shfl(val,srcLane,width); + return KOKKOS_IMPL_CUDA_SHFL(val,srcLane,width); } KOKKOS_INLINE_FUNCTION float shfl(const float &val, const int& srcLane, const int& width ) { - return __shfl(val,srcLane,width); + return KOKKOS_IMPL_CUDA_SHFL(val,srcLane,width); } template @@ -105,7 +105,7 @@ namespace Impl { ) { Scalar tmp1 = val; float tmp = *reinterpret_cast(&tmp1); - tmp = __shfl(tmp,srcLane,width); + tmp = KOKKOS_IMPL_CUDA_SHFL(tmp,srcLane,width); return *reinterpret_cast(&tmp); } @@ -113,8 +113,8 @@ namespace Impl { double shfl(const double &val, const int& srcLane, const int& width) { int lo = __double2loint(val); int hi = __double2hiint(val); - lo = __shfl(lo,srcLane,width); - hi = __shfl(hi,srcLane,width); + lo = KOKKOS_IMPL_CUDA_SHFL(lo,srcLane,width); + hi = KOKKOS_IMPL_CUDA_SHFL(hi,srcLane,width); return __hiloint2double(hi,lo); } @@ -123,8 +123,8 @@ namespace Impl { Scalar shfl(const Scalar &val, const int& srcLane, const typename Impl::enable_if< (sizeof(Scalar) == 8) ,int>::type& width) { int lo = __double2loint(*reinterpret_cast(&val)); int hi = __double2hiint(*reinterpret_cast(&val)); - lo = __shfl(lo,srcLane,width); - hi = __shfl(hi,srcLane,width); + lo = KOKKOS_IMPL_CUDA_SHFL(lo,srcLane,width); + hi = KOKKOS_IMPL_CUDA_SHFL(hi,srcLane,width); const double tmp = __hiloint2double(hi,lo); return *(reinterpret_cast(&tmp)); } @@ -137,18 +137,18 @@ namespace Impl { s_val = val; for(int i = 0; i @@ -156,7 +156,7 @@ namespace Impl { Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) { Scalar tmp1 = val; float tmp = *reinterpret_cast(&tmp1); - tmp = __shfl_down(tmp,delta,width); + tmp = KOKKOS_IMPL_CUDA_SHFL_DOWN(tmp,delta,width); return *reinterpret_cast(&tmp); } @@ -164,8 +164,8 @@ namespace Impl { double shfl_down(const double &val, const int& delta, const int& width) { int lo = __double2loint(val); int hi = __double2hiint(val); - lo = __shfl_down(lo,delta,width); - hi = __shfl_down(hi,delta,width); + lo = KOKKOS_IMPL_CUDA_SHFL_DOWN(lo,delta,width); + hi = KOKKOS_IMPL_CUDA_SHFL_DOWN(hi,delta,width); return __hiloint2double(hi,lo); } @@ -174,8 +174,8 @@ namespace Impl { Scalar shfl_down(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) { int lo = __double2loint(*reinterpret_cast(&val)); int hi = __double2hiint(*reinterpret_cast(&val)); - lo = __shfl_down(lo,delta,width); - hi = __shfl_down(hi,delta,width); + lo = KOKKOS_IMPL_CUDA_SHFL_DOWN(lo,delta,width); + hi = KOKKOS_IMPL_CUDA_SHFL_DOWN(hi,delta,width); const double tmp = __hiloint2double(hi,lo); return *(reinterpret_cast(&tmp)); } @@ -188,18 +188,18 @@ namespace Impl { s_val = val; for(int i = 0; i @@ -207,7 +207,7 @@ namespace Impl { Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 4) , int >::type & width) { Scalar tmp1 = val; float tmp = *reinterpret_cast(&tmp1); - tmp = __shfl_up(tmp,delta,width); + tmp = KOKKOS_IMPL_CUDA_SHFL_UP(tmp,delta,width); return *reinterpret_cast(&tmp); } @@ -215,8 +215,8 @@ namespace Impl { double shfl_up(const double &val, const int& delta, const int& width ) { int lo = __double2loint(val); int hi = __double2hiint(val); - lo = __shfl_up(lo,delta,width); - hi = __shfl_up(hi,delta,width); + lo = KOKKOS_IMPL_CUDA_SHFL_UP(lo,delta,width); + hi = KOKKOS_IMPL_CUDA_SHFL_UP(hi,delta,width); return __hiloint2double(hi,lo); } @@ -225,8 +225,8 @@ namespace Impl { Scalar shfl_up(const Scalar &val, const int& delta, const typename Impl::enable_if< (sizeof(Scalar) == 8) , int >::type & width) { int lo = __double2loint(*reinterpret_cast(&val)); int hi = __double2hiint(*reinterpret_cast(&val)); - lo = __shfl_up(lo,delta,width); - hi = __shfl_up(hi,delta,width); + lo = KOKKOS_IMPL_CUDA_SHFL_UP(lo,delta,width); + hi = KOKKOS_IMPL_CUDA_SHFL_UP(hi,delta,width); const double tmp = __hiloint2double(hi,lo); return *(reinterpret_cast(&tmp)); } @@ -239,7 +239,7 @@ namespace Impl { s_val = val; for(int i = 0; i +#if ( CUDA_VERSION < 9000 ) +#define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot(x) +#define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl(x,y,z) +#define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up(x,y,z) +#define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) __shfl_down(x,y,z) +#else +#define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot_sync(0xffffffff,x) +#define KOKKOS_IMPL_CUDA_SHFL(x,y,z) __shfl_sync(0xffffffff,x,y,z) +#define KOKKOS_IMPL_CUDA_SHFL_UP(x,y,z) __shfl_up_sync(0xffffffff,x,y,z) +#define KOKKOS_IMPL_CUDA_SHFL_DOWN(x,y,z) __shfl_down_sync(0xffffffff,x,y,z) +#endif diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index d0d6e76e19..b51f38efb2 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -251,7 +251,7 @@ #endif #endif -#if defined( __PGIC__ ) && !defined( __GNUC__ ) +#if defined( __PGIC__ ) #define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__ #if ( 1540 > KOKKOS_COMPILER_PGI ) @@ -268,7 +268,9 @@ #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 #define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 #define KOKKOS_ENABLE_PRAGMA_VECTOR 1 - #define KOKKOS_ENABLE_PRAGMA_SIMD 1 + #if ( 1800 > KOKKOS_COMPILER_INTEL ) + #define KOKKOS_ENABLE_PRAGMA_SIMD 1 + #endif #if ( __INTEL_COMPILER > 1400 ) #define KOKKOS_ENABLE_PRAGMA_IVDEP 1 @@ -511,5 +513,11 @@ #define KOKKOS_ENABLE_TASKDAG #endif + +#if defined ( KOKKOS_ENABLE_CUDA ) + #if ( 9000 <= CUDA_VERSION ) + #define KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND + #endif +#endif #endif // #ifndef KOKKOS_MACROS_HPP diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp index 023310cf00..9199725767 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -51,6 +51,27 @@ #include #include +namespace Kokkos { +namespace Impl { +/* Report violation of size constraints: + * min_block_alloc_size <= max_block_alloc_size + * max_block_alloc_size <= min_superblock_size + * min_superblock_size <= max_superblock_size + * min_superblock_size <= min_total_alloc_size + * min_superblock_size <= min_block_alloc_size * + * max_block_per_superblock + */ +void memory_pool_bounds_verification + ( size_t min_block_alloc_size + , size_t max_block_alloc_size + , size_t min_superblock_size + , size_t max_superblock_size + , size_t max_block_per_superblock + , size_t min_total_alloc_size + ); +} +} + namespace Kokkos { template< typename DeviceType > @@ -332,39 +353,23 @@ public: //-------------------------------------------------- - { - /* Enforce size constraints: - * min_block_alloc_size <= max_block_alloc_size - * max_block_alloc_size <= min_superblock_size - * min_superblock_size <= max_superblock_size - * min_superblock_size <= min_total_alloc_size - * min_superblock_size <= min_block_alloc_size * - * max_block_per_superblock - */ + /* Enforce size constraints: + * min_block_alloc_size <= max_block_alloc_size + * max_block_alloc_size <= min_superblock_size + * min_superblock_size <= max_superblock_size + * min_superblock_size <= min_total_alloc_size + * min_superblock_size <= min_block_alloc_size * + * max_block_per_superblock + */ - const size_t max_superblock = - min_block_alloc_size * max_block_per_superblock ; - - if ( ( size_t(max_superblock_size) < min_superblock_size ) || - ( min_total_alloc_size < min_superblock_size ) || - ( max_superblock < min_superblock_size ) || - ( min_superblock_size < max_block_alloc_size ) || - ( max_block_alloc_size < min_block_alloc_size ) ) { - -#if 1 - printf( " MemoryPool min_block_alloc_size(%ld) max_block_alloc_size(%ld) min_superblock_size(%ld) min_total_alloc_size(%ld) ; max_superblock_size(%ld) max_block_per_superblock(%ld)\n" - , min_block_alloc_size + Kokkos::Impl::memory_pool_bounds_verification + ( min_block_alloc_size , max_block_alloc_size , min_superblock_size + , max_superblock_size + , max_block_per_superblock , min_total_alloc_size - , size_t(max_superblock_size) - , size_t(max_block_per_superblock) ); -#endif - - Kokkos::abort("Kokkos MemoryPool size constraint violation"); - } - } //-------------------------------------------------- // Block and superblock size is power of two: diff --git a/lib/kokkos/core/src/Kokkos_NumericTraits.hpp b/lib/kokkos/core/src/Kokkos_NumericTraits.hpp index a825fd54d3..7264ba7f38 100644 --- a/lib/kokkos/core/src/Kokkos_NumericTraits.hpp +++ b/lib/kokkos/core/src/Kokkos_NumericTraits.hpp @@ -204,6 +204,7 @@ struct reduction_identity { KOKKOS_FORCEINLINE_FUNCTION constexpr static double min() {return DBL_MAX;} }; +#if !defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) template<> struct reduction_identity { KOKKOS_FORCEINLINE_FUNCTION constexpr static long double sum() {return static_cast(0.0);} @@ -211,6 +212,7 @@ struct reduction_identity { KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max() {return -LDBL_MAX;} KOKKOS_FORCEINLINE_FUNCTION constexpr static long double min() {return LDBL_MAX;} }; +#endif } diff --git a/lib/kokkos/core/src/Kokkos_Pair.hpp b/lib/kokkos/core/src/Kokkos_Pair.hpp index 067767f2f8..b6b1596883 100644 --- a/lib/kokkos/core/src/Kokkos_Pair.hpp +++ b/lib/kokkos/core/src/Kokkos_Pair.hpp @@ -78,7 +78,7 @@ struct pair /// This calls the default constructors of T1 and T2. It won't /// compile if those default constructors are not defined and /// public. - KOKKOS_FORCEINLINE_FUNCTION constexpr + KOKKOS_FUNCTION_DEFAULTED constexpr pair() = default ; /// \brief Constructor that takes both elements of the pair. @@ -458,7 +458,7 @@ struct pair first_type first; enum { second = 0 }; - KOKKOS_FORCEINLINE_FUNCTION constexpr + KOKKOS_FUNCTION_DEFAULTED constexpr pair() = default ; KOKKOS_FORCEINLINE_FUNCTION constexpr diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp index fc8d6bec81..0ceae866c4 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp @@ -241,7 +241,7 @@ void parallel_for( const std::string & str std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl; #endif - parallel_for(policy,functor,str); + ::Kokkos::parallel_for(policy,functor,str); #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); @@ -487,7 +487,7 @@ void parallel_scan( const std::string& str std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl; #endif - parallel_scan(policy,functor,str); + ::Kokkos::parallel_scan(policy,functor,str); #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); diff --git a/lib/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp b/lib/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp new file mode 100644 index 0000000000..b5e58507d6 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Profiling_ProfileSection.hpp @@ -0,0 +1,111 @@ +/* + //@HEADER + // ************************************************************************ + // + // Kokkos v. 2.0 + // Copyright (2014) Sandia Corporation + // + // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) + // + // ************************************************************************ + //@HEADER + */ + +#ifndef KOKKOSP_PROFILE_SECTION_HPP +#define KOKKOSP_PROFILE_SECTION_HPP + +#include +#include + +#include + +namespace Kokkos { +namespace Profiling { + +class ProfilingSection { + +public: + ProfilingSection(const std::string& sectionName) : + secName(sectionName) { + + #if defined( KOKKOS_ENABLE_PROFILING ) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::createProfileSection(secName, &secID); + } + #else + secID = 0; + #endif + } + + void start() { + #if defined( KOKKOS_ENABLE_PROFILING ) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::startSection(secID); + } + #endif + } + + void stop() { + #if defined( KOKKOS_ENABLE_PROFILING ) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::stopSection(secID); + } + #endif + } + + ~ProfilingSection() { + #if defined( KOKKOS_ENABLE_PROFILING ) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::destroyProfileSection(secID); + } + #endif + } + + std::string getName() { + return secName; + } + + uint32_t getSectionID() { + return secID; + } + +protected: + const std::string secName; + uint32_t secID; + +}; + +} +} + +#endif \ No newline at end of file diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp index f81a17412e..e713461f8d 100644 --- a/lib/kokkos/core/src/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -145,7 +145,7 @@ public: unsigned use_cores_per_numa = 0 , bool allow_asynchronous_threadpool = false); - static int is_initialized(); + static bool is_initialized(); /** \brief Return the maximum amount of concurrency. */ static int concurrency() {return 1;}; @@ -424,11 +424,13 @@ private: typedef typename Policy::work_tag WorkTag ; typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; @@ -488,7 +490,7 @@ public: this-> template exec< WorkTag >( update ); - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >:: + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >:: final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } @@ -675,12 +677,13 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; typedef typename ReducerTypeFwd::value_type ValueType; typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; @@ -735,7 +738,7 @@ public: this-> exec( update ); - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >:: + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >:: final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } @@ -878,8 +881,9 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; @@ -940,7 +944,7 @@ public: this-> template exec< WorkTag >( data , update ); - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >:: + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >:: final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile index a917cf1656..6ee5fec716 100644 --- a/lib/kokkos/core/src/Makefile +++ b/lib/kokkos/core/src/Makefile @@ -5,51 +5,44 @@ endif PREFIX ?= /usr/local/lib/kokkos -default: messages build-lib - echo "End Build" +default: build-lib ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper + CXX ?= $(KOKKOS_PATH)/bin/nvcc_wrapper else - CXX = g++ + CXX ?= g++ endif -CXXFLAGS = -O3 +CXXFLAGS ?= -O3 LINK ?= $(CXX) LDFLAGS ?= include $(KOKKOS_PATH)/Makefile.kokkos - -PWD = $(shell pwd) - -KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) -KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) -KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) -KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) -KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) +include $(KOKKOS_PATH)/core/src/Makefile.generate_header_lists +include $(KOKKOS_PATH)/core/src/Makefile.generate_build_files CONDITIONAL_COPIES = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) CONDITIONAL_COPIES += copy-cuda endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) CONDITIONAL_COPIES += copy-threads endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - KOKKOS_HEADERS_QTHREADS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) CONDITIONAL_COPIES += copy-qthreads endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) CONDITIONAL_COPIES += copy-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) + CONDITIONAL_COPIES += copy-rocm +endif + ifeq ($(KOKKOS_OS),CYGWIN) COPY_FLAG = -u endif @@ -66,104 +59,7 @@ else KOKKOS_DEBUG_CMAKE = ON endif -messages: - echo "Start Build" - -build-makefile-kokkos: - rm -f Makefile.kokkos - echo "#Global Settings used to generate this library" >> Makefile.kokkos - echo "KOKKOS_PATH = $(PREFIX)" >> Makefile.kokkos - echo "KOKKOS_DEVICES = $(KOKKOS_DEVICES)" >> Makefile.kokkos - echo "KOKKOS_ARCH = $(KOKKOS_ARCH)" >> Makefile.kokkos - echo "KOKKOS_DEBUG = $(KOKKOS_DEBUG)" >> Makefile.kokkos - echo "KOKKOS_USE_TPLS = $(KOKKOS_USE_TPLS)" >> Makefile.kokkos - echo "KOKKOS_CXX_STANDARD = $(KOKKOS_CXX_STANDARD)" >> Makefile.kokkos - echo "KOKKOS_OPTIONS = $(KOKKOS_OPTIONS)" >> Makefile.kokkos - echo "KOKKOS_CUDA_OPTIONS = $(KOKKOS_CUDA_OPTIONS)" >> Makefile.kokkos - echo "CXX ?= $(CXX)" >> Makefile.kokkos - echo "NVCC_WRAPPER ?= $(PREFIX)/bin/nvcc_wrapper" >> Makefile.kokkos - echo "" >> Makefile.kokkos - echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> Makefile.kokkos - echo "KOKKOS_HEADERS = $(KOKKOS_HEADERS)" >> Makefile.kokkos - echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos - echo "" >> Makefile.kokkos - echo "#Variables used in application Makefiles" >> Makefile.kokkos - echo "KOKKOS_OS = $(KOKKOS_OS)" >> Makefile.kokkos - echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos - echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos - echo "KOKKOS_CPPFLAGS = $(KOKKOS_CPPFLAGS)" >> Makefile.kokkos - echo "KOKKOS_LINK_DEPENDS = $(KOKKOS_LINK_DEPENDS)" >> Makefile.kokkos - echo "KOKKOS_LIBS = $(KOKKOS_LIBS)" >> Makefile.kokkos - echo "KOKKOS_LDFLAGS = $(KOKKOS_LDFLAGS)" >> Makefile.kokkos - echo "" >> Makefile.kokkos - echo "#Internal settings which need to propagated for Kokkos examples" >> Makefile.kokkos - echo "KOKKOS_INTERNAL_USE_CUDA = ${KOKKOS_INTERNAL_USE_CUDA}" >> Makefile.kokkos - echo "KOKKOS_INTERNAL_USE_QTHREADS = ${KOKKOS_INTERNAL_USE_QTHREADS}" >> Makefile.kokkos - echo "KOKKOS_INTERNAL_USE_OPENMP = ${KOKKOS_INTERNAL_USE_OPENMP}" >> Makefile.kokkos - echo "KOKKOS_INTERNAL_USE_PTHREADS = ${KOKKOS_INTERNAL_USE_PTHREADS}" >> Makefile.kokkos - echo "" >> Makefile.kokkos - echo "#Fake kokkos-clean target" >> Makefile.kokkos - echo "kokkos-clean:" >> Makefile.kokkos - echo "" >> Makefile.kokkos - sed \ - -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ - -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ - -e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \ - -e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \ - -e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \ - -e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' Makefile.kokkos \ - > Makefile.kokkos.tmp - mv -f Makefile.kokkos.tmp Makefile.kokkos - -build-cmake-kokkos: - rm -f kokkos.cmake - echo "#Global Settings used to generate this library" >> kokkos.cmake - echo "set(KOKKOS_PATH $(PREFIX) CACHE PATH \"Kokkos installation path\")" >> kokkos.cmake - echo "set(KOKKOS_DEVICES $(KOKKOS_DEVICES) CACHE STRING \"Kokkos devices list\")" >> kokkos.cmake - echo "set(KOKKOS_ARCH $(KOKKOS_ARCH) CACHE STRING \"Kokkos architecture flags\")" >> kokkos.cmake - echo "set(KOKKOS_DEBUG $(KOKKOS_DEBUG_CMAKE) CACHE BOOL \"Kokkos debug enabled ?)\")" >> kokkos.cmake - echo "set(KOKKOS_USE_TPLS $(KOKKOS_USE_TPLS) CACHE STRING \"Kokkos templates list\")" >> kokkos.cmake - echo "set(KOKKOS_CXX_STANDARD $(KOKKOS_CXX_STANDARD) CACHE STRING \"Kokkos C++ standard\")" >> kokkos.cmake - echo "set(KOKKOS_OPTIONS $(KOKKOS_OPTIONS) CACHE STRING \"Kokkos options\")" >> kokkos.cmake - echo "set(KOKKOS_CUDA_OPTIONS $(KOKKOS_CUDA_OPTIONS) CACHE STRING \"Kokkos Cuda options\")" >> kokkos.cmake - echo "if(NOT $ENV{CXX})" >> kokkos.cmake - echo ' message(WARNING "You are currently using compiler $${CMAKE_CXX_COMPILER} while Kokkos was built with $(CXX) ; make sure this is the behavior you intended to be.")' >> kokkos.cmake - echo "endif()" >> kokkos.cmake - echo "if(NOT DEFINED ENV{NVCC_WRAPPER})" >> kokkos.cmake - echo " set(NVCC_WRAPPER \"$(NVCC_WRAPPER)\" CACHE FILEPATH \"Path to command nvcc_wrapper\")" >> kokkos.cmake - echo "else()" >> kokkos.cmake - echo ' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")' >> kokkos.cmake - echo "endif()" >> kokkos.cmake - echo "" >> kokkos.cmake - echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> kokkos.cmake - echo "set(KOKKOS_HEADERS \"$(KOKKOS_HEADERS)\" CACHE STRING \"Kokkos headers list\")" >> kokkos.cmake - echo "set(KOKKOS_SRC \"$(KOKKOS_SRC)\" CACHE STRING \"Kokkos source list\")" >> kokkos.cmake - echo "" >> kokkos.cmake - echo "#Variables used in application Makefiles" >> kokkos.cmake - echo "set(KOKKOS_CPP_DEPENDS \"$(KOKKOS_CPP_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake - echo "set(KOKKOS_CXXFLAGS \"$(KOKKOS_CXXFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake - echo "set(KOKKOS_CPPFLAGS \"$(KOKKOS_CPPFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake - echo "set(KOKKOS_LINK_DEPENDS \"$(KOKKOS_LINK_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake - echo "set(KOKKOS_LIBS \"$(KOKKOS_LIBS)\" CACHE STRING \"\")" >> kokkos.cmake - echo "set(KOKKOS_LDFLAGS \"$(KOKKOS_LDFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake - echo "" >> kokkos.cmake - echo "#Internal settings which need to propagated for Kokkos examples" >> kokkos.cmake - echo "set(KOKKOS_INTERNAL_USE_CUDA \"${KOKKOS_INTERNAL_USE_CUDA}\" CACHE STRING \"\")" >> kokkos.cmake - echo "set(KOKKOS_INTERNAL_USE_OPENMP \"${KOKKOS_INTERNAL_USE_OPENMP}\" CACHE STRING \"\")" >> kokkos.cmake - echo "set(KOKKOS_INTERNAL_USE_PTHREADS \"${KOKKOS_INTERNAL_USE_PTHREADS}\" CACHE STRING \"\")" >> kokkos.cmake - echo "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS)" >> kokkos.cmake - echo "" >> kokkos.cmake - sed \ - -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ - -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ - -e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \ - -e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \ - -e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \ - -e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' kokkos.cmake \ - > kokkos.cmake.tmp - mv -f kokkos.cmake.tmp kokkos.cmake - -build-lib: build-makefile-kokkos build-cmake-kokkos $(KOKKOS_LINK_DEPENDS) +build-lib: $(KOKKOS_LINK_DEPENDS) mkdir: mkdir -p $(PREFIX) @@ -188,14 +84,18 @@ copy-openmp: mkdir mkdir -p $(PREFIX)/include/OpenMP cp $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP -install: mkdir $(CONDITIONAL_COPIES) build-lib +copy-rocm: mkdir + mkdir -p $(PREFIX)/include/ROCm + cp $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm + +install: mkdir $(CONDITIONAL_COPIES) build-lib generate_build_settings cp $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl - cp $(COPY_FLAG) Makefile.kokkos $(PREFIX) - cp $(COPY_FLAG) kokkos.cmake $(PREFIX) + cp $(COPY_FLAG) $(KOKKOS_MAKEFILE) $(PREFIX) + cp $(COPY_FLAG) $(KOKKOS_CMAKEFILE) $(PREFIX) cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib - cp $(COPY_FLAG) KokkosCore_config.h $(PREFIX)/include + cp $(COPY_FLAG) $(KOKKOS_CONFIG_HEADER) $(PREFIX)/include clean: kokkos-clean - rm -f Makefile.kokkos + rm -f $(KOKKOS_MAKEFILE) $(KOKKOS_CMAKEFILE) diff --git a/lib/kokkos/core/src/Makefile.generate_build_files b/lib/kokkos/core/src/Makefile.generate_build_files new file mode 100644 index 0000000000..d55967f84f --- /dev/null +++ b/lib/kokkos/core/src/Makefile.generate_build_files @@ -0,0 +1,100 @@ +# This file is responsible for generating files which will be used +# by build system (make and cmake) in scenarios where the kokkos library +# gets installed before building the app + +# These files are generated by this makefile +KOKKOS_MAKEFILE=Makefile.kokkos +KOKKOS_CMAKEFILE=kokkos_generated_settings.cmake + +ifeq ($(KOKKOS_DEBUG),"no") + KOKKOS_DEBUG_CMAKE = OFF +else + KOKKOS_DEBUG_CMAKE = ON +endif + +# Functions for generating makefile and cmake file +# In calling these routines, do not put space after , +# e.g., $(call kokkos_append_var,KOKKOS_PATH,$(PREFIX)) +kokkos_append_makefile = echo $1 >> $(KOKKOS_MAKEFILE) +kokkos_append_cmakefile = echo $1 >> $(KOKKOS_CMAKEFILE) + +kokkos_setvar_cmakefile = echo set\($1 $2\) >> $(KOKKOS_CMAKEFILE) +kokkos_setlist_cmakefile = echo set\($1 \"$2\"\) >> $(KOKKOS_CMAKEFILE) + +kokkos_appendvar_makefile = echo $1 = $($(1)) >> $(KOKKOS_MAKEFILE) +kokkos_appendvar2_makefile = echo $1 ?= $($(1)) >> $(KOKKOS_MAKEFILE) +kokkos_appendvar_cmakefile = echo set\($1 $($(1)) CACHE $2 FORCE\) >> $(KOKKOS_CMAKEFILE) +kokkos_appendval_makefile = echo $1 = $2 >> $(KOKKOS_MAKEFILE) +kokkos_appendval_cmakefile = echo set\($1 $2 CACHE $3 FORCE\) >> $(KOKKOS_CMAKEFILE) + +kokkos_append_string = $(call kokkos_append_makefile,$1); $(call kokkos_append_cmakefile,$1) +kokkos_append_var = $(call kokkos_appendvar_makefile,$1); $(call kokkos_appendvar_cmakefile,$1,$2) +kokkos_append_var2 = $(call kokkos_appendvar2_makefile,$1); $(call kokkos_appendvar_cmakefile,$1,$2) +kokkos_append_varval = $(call kokkos_appendval_makefile,$1,$2); $(call kokkos_appendval_cmakefile,$1,$2,$3) + +generate_build_settings: $(KOKKOS_CONFIG_HEADER) + @rm -f $(KOKKOS_MAKEFILE) + @rm -f $(KOKKOS_CMAKEFILE) + @$(call kokkos_append_string, "#Global Settings used to generate this library") + @$(call kokkos_append_varval,KOKKOS_PATH,$(KOKKOS_INSTALL_PATH),'FILEPATH "Kokkos installation path"') + @$(call kokkos_append_var,KOKKOS_DEVICES,'STRING "Kokkos devices list"') + @$(call kokkos_append_var,KOKKOS_ARCH,'STRING "Kokkos architecture flags"') + @$(call kokkos_appendvar_makefile,KOKKOS_DEBUG) + @$(call kokkos_appendvar_cmakefile,KOKKOS_DEBUG_CMAKE,'BOOL "Kokkos debug enabled ?"') + @$(call kokkos_append_var,KOKKOS_USE_TPLS,'STRING "Kokkos templates list"') + @$(call kokkos_append_var,KOKKOS_CXX_STANDARD,'STRING "Kokkos C++ standard"') + @$(call kokkos_append_var,KOKKOS_OPTIONS,'STRING "Kokkos options"') + @$(call kokkos_append_var,KOKKOS_CUDA_OPTIONS,'STRING "Kokkos Cuda options"') + @$(call kokkos_appendvar2,CXX,'KOKKOS C++ Compiler') + @$(call kokkos_append_cmakefile,"if(NOT DEFINED ENV{NVCC_WRAPPER})") + @$(call kokkos_append_var2,NVCC_WRAPPER,'FILEPATH "Path to command nvcc_wrapper"') + @$(call kokkos_append_cmakefile,"else()") + @$(call kokkos_append_cmakefile,' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")') + @$(call kokkos_append_cmakefile,"endif()") + @$(call kokkos_append_string,"") + @$(call kokkos_append_string,"#Source and Header files of Kokkos relative to KOKKOS_PATH") + @$(call kokkos_append_var,KOKKOS_HEADERS,'STRING "Kokkos headers list"') + @$(call kokkos_append_var,KOKKOS_HEADERS_IMPL,'STRING "Kokkos headers impl list"') + @$(call kokkos_append_var,KOKKOS_HEADERS_CUDA,'STRING "Kokkos headers Cuda list"') + @$(call kokkos_append_var,KOKKOS_HEADERS_OPENMP,'STRING "Kokkos headers OpenMP list"') + @$(call kokkos_append_var,KOKKOS_HEADERS_ROCM,'STRING "Kokkos headers ROCm list"') + @$(call kokkos_append_var,KOKKOS_HEADERS_THREADS,'STRING "Kokkos headers Threads list"') + @$(call kokkos_append_var,KOKKOS_HEADERS_QTHREADS,'STRING "Kokkos headers QThreads list"') + @$(call kokkos_append_var,KOKKOS_SRC,'STRING "Kokkos source list"') + @$(call kokkos_append_string,"") + @$(call kokkos_append_string,"#Variables used in application Makefiles") + @$(call kokkos_append_var,KOKKOS_OS,'STRING ""') # This was not in original cmake gen + @$(call kokkos_append_var,KOKKOS_CPP_DEPENDS,'STRING ""') + @$(call kokkos_append_var,KOKKOS_LINK_DEPENDS,'STRING ""') + @$(call kokkos_append_var,KOKKOS_CXXFLAGS,'STRING ""') + @$(call kokkos_append_var,KOKKOS_CPPFLAGS,'STRING ""') + @$(call kokkos_append_var,KOKKOS_LDFLAGS,'STRING ""') + @$(call kokkos_append_var,KOKKOS_LIBS,'STRING ""') + @$(call kokkos_append_var,KOKKOS_EXTRA_LIBS,'STRING ""') + @$(call kokkos_append_string,"") + @$(call kokkos_append_string,"#Internal settings which need to propagated for Kokkos examples") + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_CUDA,'STRING ""') + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_OPENMP,'STRING ""') + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_PTHREADS,'STRING ""') + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_ROCM,'STRING ""') + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_QTHREADS,'STRING ""') # Not in original cmake gen + @$(call kokkos_append_cmakefile "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS)") + @$(call kokkos_append_makefile,"") + @$(call kokkos_append_makefile,"#Fake kokkos-clean target") + @$(call kokkos_append_makefile,"kokkos-clean:") + @$(call kokkos_append_makefile,"") + @sed \ + -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \ + -e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \ + -e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \ + -e 's|= $(KOKKOS_CONFIG_HEADER)|= $(PREFIX)/include/$(KOKKOS_CONFIG_HEADER)|g' $(KOKKOS_MAKEFILE) \ + > $(KOKKOS_MAKEFILE).tmp + @mv -f $(KOKKOS_MAKEFILE).tmp $(KOKKOS_MAKEFILE) + @$(call kokkos_setvar_cmakefile,KOKKOS_CXX_FLAGS,$(KOKKOS_CXXFLAGS)) + @$(call kokkos_setvar_cmakefile,KOKKOS_CPP_FLAGS,$(KOKKOS_CPPFLAGS)) + @$(call kokkos_setvar_cmakefile,KOKKOS_LD_FLAGS,$(KOKKOS_LDFLAGS)) + @$(call kokkos_setlist_cmakefile,KOKKOS_LIBS_LIST,$(KOKKOS_LIBS)) + @$(call kokkos_setlist_cmakefile,KOKKOS_EXTRA_LIBS_LIST,$(KOKKOS_EXTRA_LIBS)) + diff --git a/lib/kokkos/core/src/Makefile.generate_header_lists b/lib/kokkos/core/src/Makefile.generate_header_lists new file mode 100644 index 0000000000..cd308bf8f4 --- /dev/null +++ b/lib/kokkos/core/src/Makefile.generate_header_lists @@ -0,0 +1,28 @@ +# Build a List of Header Files + +KOKKOS_HEADERS_INCLUDE = $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) +KOKKOS_HEADERS_INCLUDE_IMPL = $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) +KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) +KOKKOS_HEADERS_INCLUDE_IMPL += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) +KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + KOKKOS_HEADERS_QTHREADS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) + KOKKOS_HEADERS_ROCM += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp) +endif + diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index a424edc689..70115b4728 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -292,11 +292,12 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; // Static Assert WorkTag void if ReducerType not InvalidType - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ; typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; @@ -393,7 +394,7 @@ public: , m_instance->get_thread_data(i)->pool_reduce_local() ); } - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); if ( m_result_ptr ) { const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) ); @@ -463,11 +464,12 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; typedef typename ReducerTypeFwd::value_type ValueType; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTagFwd > ValueJoin ; typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; @@ -558,7 +560,7 @@ public: , m_instance->get_thread_data(i)->pool_reduce_local() ); } - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); if ( m_result_ptr ) { const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) ); @@ -920,9 +922,10 @@ private: , FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ; typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; @@ -1067,7 +1070,7 @@ public: , m_instance->get_thread_data(i)->pool_reduce_local() ); } - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); if ( m_result_ptr ) { const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) ); diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp index b624384e7c..035ee2e7a6 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp @@ -248,12 +248,13 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; // Static Assert WorkTag void if ReducerType not InvalidType - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ; enum {HasJoin = ReduceFunctorHasJoin::value }; enum {UseReducer = is_reducer_type::value }; @@ -620,10 +621,11 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTagFwd > ValueJoin ; typedef typename ValueTraits::pointer_type pointer_type ; typedef typename ValueTraits::reference_type reference_type ; diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp index b45c7114a3..6c94319004 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp @@ -150,11 +150,12 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType > ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void >::type WorkTagFwd; // Static Assert WorkTag void if ReducerType not InvalidType - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; typedef typename ValueTraits::pointer_type pointer_type ; typedef typename ValueTraits::reference_type reference_type ; @@ -213,7 +214,7 @@ public: const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result(); - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , data ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer) , data ); if ( m_result_ptr ) { const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); @@ -331,9 +332,10 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void >::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; typedef typename ValueTraits::pointer_type pointer_type ; typedef typename ValueTraits::reference_type reference_type ; @@ -394,7 +396,7 @@ public: const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result(); - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer), data ); + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTagFwd >::final( ReducerConditional::select(m_functor , m_reducer), data ); if ( m_result_ptr ) { const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp index f2674e5929..4e96aa6eaf 100644 --- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp +++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Reduce.hpp @@ -102,11 +102,12 @@ void reduce_enqueue( typedef Kokkos::Impl::if_c< std::is_same::value, F, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, Tag, void >::type TagFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , Tag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , Tag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , Tag > ValueJoin ; - typedef Kokkos::Impl::FunctorFinal< ReducerTypeFwd , Tag > ValueFinal ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , TagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , TagFwd > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , TagFwd > ValueJoin ; + typedef Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagFwd > ValueFinal ; typedef typename ValueTraits::pointer_type pointer_type ; typedef typename ValueTraits::reference_type reference_type ; diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index 4f8e1bd7b7..71189cf7cc 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -50,7 +50,6 @@ #include #include -#include #include #include diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp index 2b5802d840..be9f5a6f87 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp @@ -396,9 +396,10 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; typedef typename ValueTraits::pointer_type pointer_type ; typedef typename ValueTraits::reference_type reference_type ; @@ -458,7 +459,7 @@ private: ( self.m_functor , range.begin() , range.end() , ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) ); - exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) ); } template @@ -484,7 +485,7 @@ private: work_index = exec.get_work_index(); } - exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) ); } public: @@ -564,11 +565,12 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; typedef typename ReducerTypeFwd::value_type ValueType; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; typedef typename ValueTraits::pointer_type pointer_type ; typedef typename ValueTraits::reference_type reference_type ; @@ -618,7 +620,7 @@ private: ( self.m_mdr_policy, self.m_functor , range.begin() , range.end() , ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) ); - exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) ); } template @@ -644,7 +646,7 @@ private: work_index = exec.get_work_index(); } - exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) ); } public: @@ -725,9 +727,10 @@ private: typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; + typedef typename Kokkos::Impl::if_c< std::is_same::value, WorkTag, void>::type WorkTagFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; - typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTagFwd > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTagFwd > ValueInit ; typedef typename ValueTraits::pointer_type pointer_type ; typedef typename ValueTraits::reference_type reference_type ; @@ -767,7 +770,7 @@ private: ( self.m_functor , Member( & exec , self.m_policy , self.m_shared ) , ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) ); - exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + exec.template fan_in_reduce< ReducerTypeFwd , WorkTagFwd >( ReducerConditional::select(self.m_functor , self.m_reducer) ); } public: diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index 49fca9c855..bc0d969699 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -49,6 +49,10 @@ #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP ) #define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + namespace Kokkos { //---------------------------------------------------------------------------- @@ -103,7 +107,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - unsigned int active = __ballot(1); + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -115,7 +119,7 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare , done = 1; } } - done_active = __ballot(done); + done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; } diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp index 9ba3cae9fc..2f5bfd44e8 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -49,6 +49,10 @@ #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP ) #define KOKKOS_ATOMIC_EXCHANGE_HPP +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + namespace Kokkos { //---------------------------------------------------------------------------- @@ -126,7 +130,7 @@ T atomic_exchange( volatile T * const dest , #endif int done = 0; - unsigned int active = __ballot(1); + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -137,7 +141,7 @@ T atomic_exchange( volatile T * const dest , done = 1; } } - done_active = __ballot(done); + done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; } diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index 2af1737c31..dfdd133a3c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -49,6 +49,10 @@ #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP ) #define KOKKOS_ATOMIC_FETCH_ADD_HPP +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + namespace Kokkos { //---------------------------------------------------------------------------- @@ -139,7 +143,7 @@ T atomic_fetch_add( volatile T * const dest , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - unsigned int active = __ballot(1); + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -151,7 +155,7 @@ T atomic_fetch_add( volatile T * const dest , done = 1; } } - done_active = __ballot(done); + done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; } diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index b7c14052eb..fc8955d909 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -49,6 +49,10 @@ #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP ) #define KOKKOS_ATOMIC_FETCH_SUB_HPP +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + namespace Kokkos { //---------------------------------------------------------------------------- @@ -117,7 +121,7 @@ T atomic_fetch_sub( volatile T * const dest , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - unsigned int active = __ballot(1); + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -128,7 +132,7 @@ T atomic_fetch_sub( volatile T * const dest , done = 1; } } - done_active = __ballot(done); + done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; } diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp index f47ba1a98a..3a2a9e1f80 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -46,6 +46,10 @@ #define KOKKOS_ATOMIC_GENERIC_HPP #include +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + // Combination operands to be used in an Compare and Exchange based atomic operation namespace Kokkos { namespace Impl { @@ -242,7 +246,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , // This is a way to (hopefully) avoid dead lock in a warp T return_val; int done = 0; - unsigned int active = __ballot(1); + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -253,7 +257,7 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , done=1; } } - done_active = __ballot(done); + done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; #endif @@ -281,7 +285,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; - unsigned int active = __ballot(1); + unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { @@ -292,7 +296,7 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , done=1; } } - done_active = __ballot(done); + done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp index c5e73c8b26..18c61a209c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp @@ -48,6 +48,10 @@ #include #include +#if defined( __HCC_ACCELERATOR__ ) +#include +#endif + namespace Kokkos { namespace Impl { diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index f59d349908..2d03cd2f72 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -132,10 +132,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0); // struct, you may remove this line of code. (void) args; - if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || - std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { - Kokkos::Serial::initialize(); - } + // Always initialize Serial if it is configure time enabled + Kokkos::Serial::initialize(); #endif #if defined( KOKKOS_ENABLE_OPENMPTARGET ) @@ -234,12 +232,8 @@ void finalize_internal( const bool all_spaces = false ) #endif #if defined( KOKKOS_ENABLE_SERIAL ) - if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || - std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value || - all_spaces ) { - if(Kokkos::Serial::is_initialized()) - Kokkos::Serial::finalize(); - } + if(Kokkos::Serial::is_initialized()) + Kokkos::Serial::finalize(); #endif g_is_initialized = false; @@ -383,6 +377,7 @@ void initialize(int& narg, char* arg[]) } if((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || !kokkos_ndevices_found) ndevices = atoi(num1_only); + delete [] num1_only; if( num2 != NULL ) { if(( !Impl::is_unsigned_int(num2+1) ) || (strlen(num2)==1) ) @@ -439,7 +434,7 @@ void initialize(int& narg, char* arg[]) std::cout << "The following arguments exist also without prefix 'kokkos' (e.g. --help)." << std::endl; std::cout << "The prefixed arguments will be removed from the list by Kokkos::initialize()," << std::endl; std::cout << "the non-prefixed ones are not removed. Prefixed versions take precedence over " << std::endl; - std::cout << "non prefixed ones, and the last occurence of an argument overwrites prior" << std::endl; + std::cout << "non prefixed ones, and the last occurrence of an argument overwrites prior" << std::endl; std::cout << "settings." << std::endl; std::cout << std::endl; std::cout << "--kokkos-help : print this message" << std::endl; diff --git a/lib/kokkos/core/src/impl/Kokkos_HostBarrier.cpp b/lib/kokkos/core/src/impl/Kokkos_HostBarrier.cpp new file mode 100644 index 0000000000..e382acae32 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostBarrier.cpp @@ -0,0 +1,204 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +#include +#include + +namespace Kokkos { namespace Impl { + +namespace { + +enum : int { HEADER_SIZE = HostBarrier::HEADER / sizeof(uint64_t) }; + +inline constexpr int length64( const int nthreads ) noexcept +{ + return (nthreads-1 + sizeof(uint64_t)-1) / sizeof(uint64_t); +} + +} // namespace + +void rendezvous_initialize( volatile void * buffer + , const int size + , const int rank + ) noexcept +{ + Kokkos::store_fence(); + + // ensure that the buffer has been zero'd out + constexpr uint8_t zero8 = static_cast(0); + constexpr uint64_t zero64 = static_cast(0); + + volatile uint64_t * header = reinterpret_cast(buffer); + + if (rank > 0) { + volatile uint8_t * bytes = reinterpret_cast(buffer) + RENDEZVOUS_HEADER; + + bytes[rank-1] = zero8; + + // last thread is responsible for zeroing out the final bytes of the last uint64_t + if (rank == size-1) { + const int tmp = (size-1) % sizeof(uint64_t); + const int rem = tmp ? sizeof(uint64_t) - tmp : 0; + for (int i=0; i(buffer) + HEADER_SIZE; + + // wait for other threads to finish initializing + for (int i=0; i(step + 1u) + ? step + 1u + : step + 2u + ; + + // if size == 1, it is incorrect for rank 0 to check the tail value of the buffer + // this optimization prevents a potential read of uninitialized memory + if ( size == 1 ) { return true; } + + const uint8_t byte_value = static_cast(step); + + // byte that is set in the spin_value rotates every time + // this prevents threads from overtaking the master thread + const uint64_t spin_value = static_cast(byte_value) << (byte_value&7); + + if ( rank > 0 ) { + volatile uint64_t * header = reinterpret_cast(buffer); + volatile uint8_t * bytes = reinterpret_cast(buffer) + RENDEZVOUS_HEADER; + + bytes[ rank-1 ] = byte_value; + + if ( active_wait ) { + spinwait_until_equal( *header, spin_value ); + } + else { + yield_until_equal( *header, spin_value ); + } + } + else { // rank 0 + volatile uint64_t * buff = reinterpret_cast(buffer) + HEADER_SIZE; + const int n = length64(size); + + uint64_t comp = byte_value; + comp = comp | (comp << 8); + comp = comp | (comp << 16); + comp = comp | (comp << 32); + + const int rem = (size-1) % sizeof(uint64_t); + + union { + volatile uint64_t value; + volatile uint8_t array[sizeof(uint64_t)]; + } tmp{}; + + for (int i=0; i(step); + const uint64_t spin_value = static_cast(byte_value) << (byte_value&7); + volatile uint64_t * header = reinterpret_cast(buffer); + + // Force all outstanding stores from this thread to retire before releasing + // the other threads. This forces correctness on systems with out-of-order + // memory (Power and ARM) + Kokkos::store_fence(); + + *header = spin_value; + + Kokkos::memory_fence(); +} + +}} // namespace Kokkos::Impl + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostBarrier.hpp b/lib/kokkos/core/src/impl/Kokkos_HostBarrier.hpp new file mode 100644 index 0000000000..733b69e79f --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostBarrier.hpp @@ -0,0 +1,146 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HOST_BARRIER_HPP +#define KOKKOS_HOST_BARRIER_HPP + +#include +#include + +namespace Kokkos { namespace Impl { + +//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ + +enum : int { RENDEZVOUS_ALIGNMENT = 128 + , RENDEZVOUS_HEADER = RENDEZVOUS_ALIGNMENT + }; + +inline constexpr int rendezvous_buffer_size( const int nthreads ) noexcept +{ + return RENDEZVOUS_HEADER + ((nthreads-1 + RENDEZVOUS_ALIGNMENT-1) / RENDEZVOUS_ALIGNMENT) * RENDEZVOUS_ALIGNMENT; +} + +void rendezvous_initialize( volatile void * buffer + , const int size + , const int rank + ) noexcept; + + +bool rendezvous( volatile void * buffer + , uint64_t & step + , const int size + , const int rank + , bool active_wait = true + ) noexcept; + +void rendezvous_release( volatile void * buffer + , const uint64_t step + ) noexcept; + + +//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ + + +class HostBarrier +{ +public: + + enum : int { ALIGNMENT = RENDEZVOUS_ALIGNMENT }; + enum : int { HEADER = ALIGNMENT}; + + enum Policy : int { ACTIVE, PASSIVE }; + + inline static constexpr int buffer_size( const int nthreads ) noexcept + { + return rendezvous_buffer_size(nthreads); + } + + HostBarrier( volatile void * arg_buffer + , int arg_size + , int arg_rank + , Policy arg_policy + ) noexcept + : m_buffer{arg_buffer} + , m_size{arg_size} + , m_rank{arg_rank} + , m_policy{arg_policy} + , m_step{0} + { + rendezvous_initialize( m_buffer, m_size, m_rank ); + } + + bool rendezvous() const noexcept + { + return Kokkos::Impl::rendezvous( m_buffer + , m_step + , m_size + , m_rank + , m_policy == ACTIVE + ); + } + + void rendezvous_release() const noexcept + { + Kokkos::Impl::rendezvous_release( m_buffer, m_step ); + } + +private: + volatile void * m_buffer ; + const int m_size ; + const int m_rank ; + const Policy m_policy ; + mutable uint64_t m_step ; + +private: + HostBarrier( const HostBarrier & ) = delete; + HostBarrier( HostBarrier && ) = delete; + HostBarrier & operator=( const HostBarrier & ) = delete; + HostBarrier & operator=( HostBarrier && ) = delete; +}; + +}} // namespace Kokkos::Impl + +#endif // KOKKOS_HOST_BARRIER_HPP + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp index ba15f870fc..c2c6e45ef8 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp @@ -206,158 +206,6 @@ void HostThreadTeamData::disband_team() m_team_rendezvous_step = 0 ; } -//---------------------------------------------------------------------------- -/* pattern for rendezvous - * - * if ( rendezvous() ) { - * ... all other threads are still in team_rendezvous() ... - * rendezvous_release(); - * ... all other threads are released from team_rendezvous() ... - * } - */ - -int HostThreadTeamData::rendezvous( int64_t * const buffer - , int & rendezvous_step - , int const size - , int const rank ) noexcept -{ - enum : int { shift_byte = 3 }; - enum : int { size_byte = ( 01 << shift_byte ) }; // == 8 - enum : int { mask_byte = size_byte - 1 }; - - enum : int { shift_mem_cycle = 2 }; - enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 - enum : int { mask_mem_cycle = size_mem_cycle - 1 }; - - // Cycle step values: 1 <= step <= size_val_cycle - // An odd multiple of memory cycle so that when a memory location - // is reused it has a different value. - // Must be representable within a single byte: size_val_cycle < 16 - - enum : int { size_val_cycle = 3 * size_mem_cycle }; - - // Requires: - // Called by rank = [ 0 .. size ) - // buffer aligned to int64_t[4] - - // A sequence of rendezvous uses four cycled locations in memory - // and non-equal cycled synchronization values to - // 1) prevent rendezvous from overtaking one another and - // 2) give each spin wait location an int64_t[4] span - // so that it has its own cache line. - - const int step = ( rendezvous_step % size_val_cycle ) + 1 ; - - rendezvous_step = step ; - - // The leading int64_t[4] span is for thread 0 to write - // and all other threads to read spin-wait. - // sync_offset is the index into this array for this step. - - const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle ; - - if ( rank ) { - - const int group_begin = rank << shift_byte ; // == rank * size_byte - - if ( group_begin < size ) { - - // This thread waits for threads - // [ group_begin .. group_begin + 8 ) - // [ rank*8 .. rank*8 + 8 ) - // to write to their designated bytes. - - const int end = group_begin + size_byte < size - ? size_byte : size - group_begin ; - - int64_t value = 0 ; - - for ( int i = 0 ; i < end ; ++i ) { - ((int8_t*) & value )[i] = int8_t( step ); - } - // Do not REMOVE this store fence!!! - // Makes stuff hang on GCC with more than 8 threads - store_fence(); - spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ] - , value ); - } - - { - // This thread sets its designated byte. - // ( rank % size_byte ) + - // ( ( rank / size_byte ) * size_byte * size_mem_cycle ) + - // ( sync_offset * size_byte ) - int offset = ( rank & mask_byte ) - + ( ( rank & ~mask_byte ) << shift_mem_cycle ) - + ( sync_offset << shift_byte ); - - // Switch designated byte if running on big endian machine - volatile uint16_t value = 1; - volatile uint8_t* byte = (uint8_t*) &value; - volatile bool is_big_endian = (!(byte[0] == 1)); - if (is_big_endian) { - int remainder = ((offset) % 8); - int base = offset - remainder; - int shift = 7 - remainder; - offset = base + shift; - } - - // All of this thread's previous memory stores must be complete before - // this thread stores the step value at this thread's designated byte - // in the shared synchronization array. - - Kokkos::memory_fence(); - - ((volatile int8_t*) buffer)[ offset ] = int8_t( step ); - - // Memory fence to push the previous store out - Kokkos::memory_fence(); - } - - // Wait for thread 0 to release all other threads - - spinwait_until_equal( buffer[ step & mask_mem_cycle ] , int64_t(step) ); - - } - else { - // Thread 0 waits for threads [1..7] - // to write to their designated bytes. - - const int end = size_byte < size ? 8 : size ; - - int64_t value = 0 ; - for ( int i = 1 ; i < end ; ++i ) { - ((int8_t *) & value)[i] = int8_t( step ); - } - - spinwait_until_equal( buffer[ sync_offset ], value ); - } - - return rank ? 0 : 1 ; -} - -void HostThreadTeamData:: - rendezvous_release( int64_t * const buffer - , int const rendezvous_step ) noexcept -{ - enum : int { shift_mem_cycle = 2 }; - enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 - enum : int { mask_mem_cycle = size_mem_cycle - 1 }; - - // Requires: - // Called after team_rendezvous - // Called only by true == team_rendezvous(root) - - // Memory fence to be sure all previous writes are complete: - Kokkos::memory_fence(); - - ((volatile int64_t*) buffer)[ rendezvous_step & mask_mem_cycle ] = - int64_t( rendezvous_step ); - - // Memory fence to push the store out - Kokkos::memory_fence(); -} - //---------------------------------------------------------------------------- int HostThreadTeamData::get_work_stealing() noexcept diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index 7facc0a410..dc3b89c7c7 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -50,7 +50,7 @@ #include #include #include -#include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -113,50 +113,29 @@ private: int m_league_size ; int m_work_chunk ; int m_steal_rank ; // work stealing rank - int mutable m_team_rendezvous_step ; + uint64_t mutable m_pool_rendezvous_step ; + uint64_t mutable m_team_rendezvous_step ; HostThreadTeamData * team_member( int r ) const noexcept { return ((HostThreadTeamData**)(m_pool_scratch+m_pool_members))[m_team_base+r]; } - // Rendezvous pattern: - // if ( rendezvous(root) ) { - // ... only root thread here while all others wait ... - // rendezvous_release(); - // } - // else { - // ... all other threads release here ... - // } - // - // Requires: buffer[ ( max_threads / 8 ) * 4 + 4 ]; 0 == max_threads % 8 - // - static - int rendezvous( int64_t * const buffer - , int & rendezvous_step - , int const size - , int const rank ) noexcept ; - - static - void rendezvous_release( int64_t * const buffer - , int const rendezvous_step ) noexcept ; - public: inline int team_rendezvous( int const root ) const noexcept { return 1 == m_team_size ? 1 : - HostThreadTeamData:: rendezvous( m_team_scratch + m_team_rendezvous , m_team_rendezvous_step , m_team_size - , ( m_team_rank + m_team_size - root ) % m_team_size ); + , ( m_team_rank + m_team_size - root ) % m_team_size + ); } inline int team_rendezvous() const noexcept { return 1 == m_team_size ? 1 : - HostThreadTeamData:: rendezvous( m_team_scratch + m_team_rendezvous , m_team_rendezvous_step , m_team_size @@ -167,7 +146,6 @@ public: void team_rendezvous_release() const noexcept { if ( 1 < m_team_size ) { - HostThreadTeamData:: rendezvous_release( m_team_scratch + m_team_rendezvous , m_team_rendezvous_step ); } @@ -176,30 +154,30 @@ public: inline int pool_rendezvous() const noexcept { - static constexpr int yield_wait = + static constexpr bool active_wait = #if defined( KOKKOS_COMPILER_IBM ) // If running on IBM POWER architecture the global // level rendzvous should immediately yield when // waiting for other threads in the pool to arrive. - 1 + false #else - 0 + true #endif ; return 1 == m_pool_size ? 1 : - Kokkos::Impl:: rendezvous( m_pool_scratch + m_pool_rendezvous + , m_pool_rendezvous_step , m_pool_size , m_pool_rank - , yield_wait ); + , active_wait + ); } inline void pool_rendezvous_release() const noexcept { if ( 1 < m_pool_size ) { - Kokkos::Impl:: - rendezvous_release( m_pool_scratch + m_pool_rendezvous ); + rendezvous_release( m_pool_scratch + m_pool_rendezvous, m_pool_rendezvous_step ); } } @@ -225,6 +203,7 @@ public: , m_league_size(1) , m_work_chunk(0) , m_steal_rank(0) + , m_pool_rendezvous_step(0) , m_team_rendezvous_step(0) {} diff --git a/lib/kokkos/core/src/impl/Kokkos_MemoryPool.cpp b/lib/kokkos/core/src/impl/Kokkos_MemoryPool.cpp new file mode 100644 index 0000000000..d7fe74a6d8 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_MemoryPool.cpp @@ -0,0 +1,125 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/* Verify size constraints: + * min_block_alloc_size <= max_block_alloc_size + * max_block_alloc_size <= min_superblock_size + * min_superblock_size <= max_superblock_size + * min_superblock_size <= min_total_alloc_size + * min_superblock_size <= min_block_alloc_size * + * max_block_per_superblock + */ +void memory_pool_bounds_verification + ( size_t min_block_alloc_size + , size_t max_block_alloc_size + , size_t min_superblock_size + , size_t max_superblock_size + , size_t max_block_per_superblock + , size_t min_total_alloc_size + ) +{ + const size_t max_superblock = + min_block_alloc_size * max_block_per_superblock ; + + if ( ( size_t(max_superblock_size) < min_superblock_size ) || + ( min_total_alloc_size < min_superblock_size ) || + ( max_superblock < min_superblock_size ) || + ( min_superblock_size < max_block_alloc_size ) || + ( max_block_alloc_size < min_block_alloc_size ) ) { + + std::ostringstream msg ; + + msg << "Kokkos::MemoryPool size constraint violation" ; + + if ( size_t(max_superblock_size) < min_superblock_size ) { + msg << " : max_superblock_size(" + << max_superblock_size + << ") < min_superblock_size(" + << min_superblock_size << ")" ; + } + + if ( min_total_alloc_size < min_superblock_size ) { + msg << " : min_total_alloc_size(" + << min_total_alloc_size + << ") < min_superblock_size(" + << min_superblock_size << ")" ; + } + + if ( max_superblock < min_superblock_size ) { + msg << " : max_superblock(" + << max_superblock + << ") < min_superblock_size(" + << min_superblock_size << ")" ; + } + + if ( min_superblock_size < max_block_alloc_size ) { + msg << " : min_superblock_size(" + << min_superblock_size + << ") < max_block_alloc_size(" + << max_block_alloc_size << ")" ; + } + + if ( max_block_alloc_size < min_block_alloc_size ) { + msg << " : max_block_alloc_size(" + << max_block_alloc_size + << ") < min_block_alloc_size(" + << min_block_alloc_size << ")" ; + } + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } +} + +} +} + diff --git a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp index 718e307517..abd9fe6724 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp @@ -45,7 +45,9 @@ #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP ) #define KOKKOS_MEMORY_FENCE_HPP +#if !defined(_OPENMP) #include +#endif namespace Kokkos { @@ -54,8 +56,10 @@ namespace Kokkos { KOKKOS_FORCEINLINE_FUNCTION void memory_fence() { -#if defined( __CUDA_ARCH__ ) +#if defined( __CUDA_ARCH__ ) __threadfence(); +#elif defined( _OPENMP ) + #pragma omp flush #else std::atomic_thread_fence( std::memory_order_seq_cst ); #endif @@ -71,6 +75,8 @@ void store_fence() { #if defined( __CUDA_ARCH__ ) __threadfence(); +#elif defined( _OPENMP ) + #pragma omp flush #else std::atomic_thread_fence( std::memory_order_seq_cst ); #endif @@ -86,6 +92,8 @@ void load_fence() { #if defined( __CUDA_ARCH__ ) __threadfence(); +#elif defined( _OPENMP ) + #pragma omp flush #else std::atomic_thread_fence( std::memory_order_seq_cst ); #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp index 608d514c79..a90bd507d5 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -69,6 +69,13 @@ static deallocateDataFunction deallocateDataCallee = nullptr; static beginDeepCopyFunction beginDeepCopyCallee = nullptr; static endDeepCopyFunction endDeepCopyCallee = nullptr; +static createProfileSectionFunction createSectionCallee = nullptr; +static startProfileSectionFunction startSectionCallee = nullptr; +static stopProfileSectionFunction stopSectionCallee = nullptr; +static destroyProfileSectionFunction destroySectionCallee = nullptr; + +static profileEventFunction profileEventCallee = nullptr; + SpaceHandle::SpaceHandle(const char* space_name) { strncpy(name,space_name,64); } @@ -162,6 +169,37 @@ void endDeepCopy() { } } +void createProfileSection(const std::string& sectionName, uint32_t* secID) { + + if(nullptr != createSectionCallee) { + (*createSectionCallee)(sectionName.c_str(), secID); + } +} + +void startSection(const uint32_t secID) { + if(nullptr != startSectionCallee) { + (*startSectionCallee)(secID); + } +} + +void stopSection(const uint32_t secID) { + if(nullptr != stopSectionCallee) { + (*stopSectionCallee)(secID); + } +} + +void destroyProfileSection(const uint32_t secID) { + if(nullptr != destroySectionCallee) { + (*destroySectionCallee)(secID); + } +} + +void markEvent(const std::string& eventName) { + if(nullptr != profileEventCallee) { + (*profileEventCallee)(eventName.c_str()); + } +} + void initialize() { // Make sure initialize calls happens only once @@ -230,7 +268,18 @@ void initialize() { beginDeepCopyCallee = *((beginDeepCopyFunction*) &p13); auto p14 = dlsym(firstProfileLibrary, "kokkosp_end_deep_copy"); endDeepCopyCallee = *((endDeepCopyFunction*) &p14); - + + auto p15 = dlsym(firstProfileLibrary, "kokkosp_create_profile_section"); + createSectionCallee = *((createProfileSectionFunction*) &p15); + auto p16 = dlsym(firstProfileLibrary, "kokkosp_start_profile_section"); + startSectionCallee = *((startProfileSectionFunction*) &p16); + auto p17 = dlsym(firstProfileLibrary, "kokkosp_stop_profile_section"); + stopSectionCallee = *((stopProfileSectionFunction*) &p17); + auto p18 = dlsym(firstProfileLibrary, "kokkosp_destroy_profile_section"); + destroySectionCallee = *((destroyProfileSectionFunction*) &p18); + + auto p19 = dlsym(firstProfileLibrary, "kokkosp_profile_event"); + profileEventCallee = *((profileEventFunction*) &p19); } } @@ -274,6 +323,13 @@ void finalize() { beginDeepCopyCallee = nullptr; endDeepCopyCallee = nullptr; + + createSectionCallee = nullptr; + startSectionCallee = nullptr; + stopSectionCallee = nullptr; + destroySectionCallee = nullptr; + + profileEventCallee = nullptr; } } } diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index 2c2e524d9d..f348239e08 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -45,6 +45,7 @@ #define KOKKOSP_INTERFACE_HPP #include + #if defined(KOKKOS_ENABLE_PROFILING) #include @@ -57,7 +58,7 @@ #include #include -#define KOKKOSP_INTERFACE_VERSION 20150628 +#define KOKKOSP_INTERFACE_VERSION 20171029 namespace Kokkos { namespace Profiling { @@ -81,6 +82,13 @@ typedef void (*popFunction)(); typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); +typedef void (*createProfileSectionFunction)(const char*, uint32_t*); +typedef void (*startProfileSectionFunction)(const uint32_t); +typedef void (*stopProfileSectionFunction)(const uint32_t); +typedef void (*destroyProfileSectionFunction)(const uint32_t); + +typedef void (*profileEventFunction)(const char*); + typedef void (*beginDeepCopyFunction)( SpaceHandle, const char*, const void*, SpaceHandle, const char*, const void*, @@ -99,6 +107,13 @@ void endParallelReduce(const uint64_t kernelID); void pushRegion(const std::string& kName); void popRegion(); +void createProfileSection(const std::string& sectionName, uint32_t* secID); +void startSection(const uint32_t secID); +void stopSection(const uint32_t secID); +void destroyProfileSection(const uint32_t secID); + +void markEvent(const std::string* evName); + void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); diff --git a/lib/kokkos/core/src/impl/Kokkos_Rendezvous.cpp b/lib/kokkos/core/src/impl/Kokkos_Rendezvous.cpp deleted file mode 100644 index 013aec03d3..0000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Rendezvous.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include -#include -#include - -namespace Kokkos { namespace Impl { - -//---------------------------------------------------------------------------- -/* pattern for rendezvous - * - * if ( rendezvous() ) { - * ... all other threads are still in team_rendezvous() ... - * rendezvous_release(); - * ... all other threads are released from team_rendezvous() ... - * } - */ - -int rendezvous( volatile int64_t * const buffer - , int const size - , int const rank - , int const slow - ) noexcept -{ - enum : int { shift_byte = 3 }; - enum : int { size_byte = ( 01 << shift_byte ) }; // == 8 - enum : int { mask_byte = size_byte - 1 }; - - enum : int { shift_mem_cycle = 2 }; - enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 - enum : int { mask_mem_cycle = size_mem_cycle - 1 }; - - // Cycle step values: 1 <= step <= size_val_cycle - // An odd multiple of memory cycle so that when a memory location - // is reused it has a different value. - // Must be representable within a single byte: size_val_cycle < 16 - - enum : int { size_val_cycle = 3 * size_mem_cycle }; - - // Requires: - // Called by rank = [ 0 .. size ) - // buffer aligned to int64_t[4] - - // A sequence of rendezvous uses four cycled locations in memory - // and non-equal cycled synchronization values to - // 1) prevent rendezvous from overtaking one another and - // 2) give each spin wait location an int64_t[4] span - // so that it has its own cache line. - - const int64_t step = (buffer[0] % size_val_cycle ) + 1 ; - - // The leading int64_t[4] span is for thread 0 to write - // and all other threads to read spin-wait. - // sync_offset is the index into this array for this step. - - const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle + size_mem_cycle ; - - if ( rank ) { - - const int group_begin = rank << shift_byte ; // == rank * size_byte - - if ( group_begin < size ) { - - // This thread waits for threads - // [ group_begin .. group_begin + 8 ) - // [ rank*8 .. rank*8 + 8 ) - // to write to their designated bytes. - - const int end = group_begin + size_byte < size - ? size_byte : size - group_begin ; - - int64_t value = 0; - for ( int i = 0 ; i < end ; ++i ) { - value |= step << (i * size_byte ); - } - - store_fence(); // This should not be needed but fixes #742 - - if ( slow ) { - yield_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ] - , value ); - } - else { - spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ] - , value ); - } - } - - { - // This thread sets its designated byte. - // ( rank % size_byte ) + - // ( ( rank / size_byte ) * size_byte * size_mem_cycle ) + - // ( sync_offset * size_byte ) - int offset = ( rank & mask_byte ) - + ( ( rank & ~mask_byte ) << shift_mem_cycle ) - + ( sync_offset << shift_byte ); - - // Switch designated byte if running on big endian machine - volatile uint16_t value = 1; - volatile uint8_t* byte = (uint8_t*) &value; - volatile bool is_big_endian = (!(byte[0] == 1)); - if (is_big_endian) { - int remainder = ((offset) % 8); - int base = offset - remainder; - int shift = 7 - remainder; - offset = base + shift; - } - - // All of this thread's previous memory stores must be complete before - // this thread stores the step value at this thread's designated byte - // in the shared synchronization array. - - Kokkos::memory_fence(); - - ((volatile int8_t*) buffer)[ offset ] = int8_t( step ); - - // Memory fence to push the previous store out - Kokkos::memory_fence(); - } - - // Wait for thread 0 to release all other threads - - if ( slow ) { - yield_until_equal( buffer[ (step & mask_mem_cycle) + size_mem_cycle ] , int64_t(step) ); - } - else { - spinwait_until_equal( buffer[ (step & mask_mem_cycle) + size_mem_cycle ] , int64_t(step) ); - } - } - else { - // Thread 0 waits for threads [1..7] - // to write to their designated bytes. - - const int end = size_byte < size ? 8 : size ; - - int64_t value = 0; - for ( int i = 1 ; i < end ; ++i ) { - value |= step << (i * size_byte ); - } - - if ( slow ) { - yield_until_equal( buffer[ sync_offset ], value ); - } - else { - spinwait_until_equal( buffer[ sync_offset ], value ); - } - } - - return rank ? 0 : 1 ; -} - -void rendezvous_release( volatile int64_t * const buffer ) noexcept -{ - enum : int { shift_mem_cycle = 2 }; - enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 - enum : int { mask_mem_cycle = size_mem_cycle - 1 }; - enum : int { size_val_cycle = 3 * size_mem_cycle }; - - // Requires: - // Called after team_rendezvous - // Called only by true == team_rendezvous(root) - - // update step - const int64_t step = (buffer[0] % size_val_cycle ) + 1; - buffer[0] = step; - - // Memory fence to be sure all previous writes are complete: - Kokkos::memory_fence(); - - buffer[ (step & mask_mem_cycle) + size_mem_cycle ] = step; - - // Memory fence to push the store out - Kokkos::memory_fence(); -} - -}} // namespace Kokkos::Impl - diff --git a/lib/kokkos/core/src/impl/Kokkos_Rendezvous.hpp b/lib/kokkos/core/src/impl/Kokkos_Rendezvous.hpp deleted file mode 100644 index 57f8633bca..0000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Rendezvous.hpp +++ /dev/null @@ -1,87 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_IMPL_RENDEZVOUS_HPP -#define KOKKOS_IMPL_RENDEZVOUS_HPP - -#include - -namespace Kokkos { namespace Impl { - -inline -constexpr int rendezvous_buffer_size( int max_members ) noexcept -{ - return (((max_members + 7) / 8) * 4) + 4 + 4; -} - -/** \brief Thread pool rendezvous - * - * Rendezvous pattern: - * if ( rendezvous(root) ) { - * ... only root thread here while all others wait ... - * rendezvous_release(); - * } - * else { - * ... all other threads release here ... - * } - * - * Requires: buffer[ rendezvous_buffer_size( max_threads ) ]; - * - * When slow != 0 the expectation is thread arrival will be - * slow so the threads that arrive early should quickly yield - * their core to the runtime thus possibly allowing the late - * arriving threads to have more resources - * (e.g., power and clock frequency). - */ -int rendezvous( volatile int64_t * const buffer - , int const size - , int const rank - , int const slow = 0 ) noexcept ; - -void rendezvous_release( volatile int64_t * const buffer ) noexcept ; - - -}} // namespace Kokkos::Impl - -#endif // KOKKOS_IMPL_RENDEZVOUS_HPP - diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp index dfbeba461e..f3b048d58c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp @@ -60,6 +60,8 @@ namespace { HostThreadTeamData g_serial_thread_team_data ; +bool g_serial_is_initialized = false; + } // Resize thread team data scratch memory @@ -136,9 +138,9 @@ HostThreadTeamData * serial_get_thread_team_data() namespace Kokkos { -int Serial::is_initialized() +bool Serial::is_initialized() { - return 1 ; + return Impl::g_serial_is_initialized ; } void Serial::initialize( unsigned threads_count @@ -158,6 +160,8 @@ void Serial::initialize( unsigned threads_count #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif + + Impl::g_serial_is_initialized = true; } void Serial::finalize() @@ -177,6 +181,8 @@ void Serial::finalize() #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif + + Impl::g_serial_is_initialized = false; } const char* Serial::name() { return "Serial"; } diff --git a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp index 6300417576..b59548ea1d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -435,21 +436,12 @@ struct power_of_two<1,true> /** \brief If power of two then return power, * otherwise return ~0u. */ -static KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION unsigned power_of_two_if_valid( const unsigned N ) { unsigned p = ~0u ; - if ( N && ! ( N & ( N - 1 ) ) ) { -#if defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA ) - p = __ffs(N) - 1 ; -#elif defined( __GNUC__ ) || defined( __GNUG__ ) - p = __builtin_ffs(N) - 1 ; -#elif defined( __INTEL_COMPILER ) - p = _bit_scan_forward(N); -#else - p = 0 ; - for ( unsigned j = 1 ; ! ( N & j ) ; j <<= 1 ) { ++p ; } -#endif + if ( is_integral_power_of_two ( N ) ) { + p = bit_scan_forward ( N ) ; } return p ; } diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp index 37367f68e4..54d061a503 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp @@ -144,9 +144,9 @@ public: //---------------------------------------- KOKKOS_FUNCTION_DEFAULTED ~ViewOffset() = default ; - KOKKOS_INLINE_FUNCTION ViewOffset() = default ; - KOKKOS_INLINE_FUNCTION ViewOffset( const ViewOffset & ) = default ; - KOKKOS_INLINE_FUNCTION ViewOffset & operator = ( const ViewOffset & ) = default ; + KOKKOS_FUNCTION_DEFAULTED ViewOffset() = default ; + KOKKOS_FUNCTION_DEFAULTED ViewOffset( const ViewOffset & ) = default ; + KOKKOS_FUNCTION_DEFAULTED ViewOffset & operator = ( const ViewOffset & ) = default ; template< unsigned TrivialScalarSize > KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index 573b7c9b9c..83d617b9a6 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -2,7 +2,16 @@ # Add test-only library for gtest to be reused by all the subpackages # +IF(NOT KOKKOS_HAS_TRILINOS) + IF(KOKKOS_SEPARATE_LIBS) + set(TEST_LINK_TARGETS kokkoscore) + ELSE() + set(TEST_LINK_TARGETS kokkos) + ENDIF() +ENDIF() + SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) TRIBITS_ADD_LIBRARY( @@ -63,7 +72,7 @@ IF(Kokkos_ENABLE_Serial) COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() @@ -111,7 +120,7 @@ IF(Kokkos_ENABLE_Pthread) COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() @@ -160,7 +169,7 @@ IF(Kokkos_ENABLE_OpenMP) COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() @@ -194,7 +203,7 @@ IF(Kokkos_ENABLE_Qthreads) COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() @@ -251,10 +260,11 @@ IF(Kokkos_ENABLE_Cuda) cuda/TestCuda_ViewOfClass.cpp cuda/TestCuda_Crs.cpp cuda/TestCuda_WorkGraph.cpp + cuda/TestCuda_UniqueToken.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() @@ -271,7 +281,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) foreach(INITTESTS_NUM RANGE 1 16) @@ -281,7 +291,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) endforeach(INITTESTS_NUM) @@ -291,5 +301,5 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index 4103901894..ace6181ddf 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -27,7 +27,8 @@ endif CXXFLAGS = -O3 LINK ?= $(CXX) -LDFLAGS ?= -lpthread +LDFLAGS ?= +override LDFLAGS += -lpthread include $(KOKKOS_PATH)/Makefile.kokkos @@ -329,7 +330,7 @@ KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HWLOC KokkosCore_UnitTest_AllocationTracker: $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $( $(KOKKOS_LDFLAGS) $(LDFLAGS)LIB) -o KokkosCore_UnitTest_AllocationTracker + $(LINK) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(LIB) -o KokkosCore_UnitTest_AllocationTracker KokkosCore_UnitTest_Default: $(OBJ_DEFAULT) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_DEFAULT) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Default diff --git a/lib/kokkos/core/unit_test/TestReduce.hpp b/lib/kokkos/core/unit_test/TestReduce.hpp index 86982e6a55..56d6259b5a 100644 --- a/lib/kokkos/core/unit_test/TestReduce.hpp +++ b/lib/kokkos/core/unit_test/TestReduce.hpp @@ -475,6 +475,8 @@ public: namespace Test { +struct ReducerTag {}; + template< class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace > struct TestReducers { struct SumFunctor { @@ -590,6 +592,118 @@ struct TestReducers { } }; + struct SumFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, Scalar & value ) const { + value += values( i ); + } + }; + + struct ProdFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, Scalar & value ) const { + value *= values( i ); + } + }; + + struct MinFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, Scalar & value ) const { + if ( values( i ) < value ) value = values( i ); + } + }; + + struct MaxFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, Scalar & value ) const { + if ( values( i ) > value ) value = values( i ); + } + }; + + struct MinLocFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) < value.val ) { + value.val = values( i ); + value.loc = i; + } + } + }; + + struct MaxLocFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) > value.val ) { + value.val = values( i ); + value.loc = i; + } + } + }; + + struct MinMaxLocFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) > value.max_val ) { + value.max_val = values( i ); + value.max_loc = i; + } + + if ( values( i ) < value.min_val ) { + value.min_val = values( i ); + value.min_loc = i; + } + } + }; + + struct BAndFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, Scalar & value ) const { + value = value & values( i ); + } + }; + + struct BOrFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, Scalar & value ) const { + value = value | values( i ); + } + }; + + struct LAndFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, Scalar & value ) const { + value = value && values( i ); + } + }; + + struct LOrFunctorTag { + Kokkos::View< const Scalar*, ExecSpace > values; + + KOKKOS_INLINE_FUNCTION + void operator()( const ReducerTag, const int & i, Scalar & value ) const { + value = value || values( i ); + } + }; static void test_sum( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); @@ -603,13 +717,19 @@ struct TestReducers { SumFunctor f; f.values = values; + SumFunctorTag f_tag; + f_tag.values = values; Scalar init = 0; { Scalar sum_scalar = init; Kokkos::Experimental::Sum< Scalar > reducer_scalar( sum_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); - + ASSERT_EQ( sum_scalar, reference_sum ); + + sum_scalar = init; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( sum_scalar, reference_sum ); Scalar sum_scalar_view = reducer_scalar.reference(); @@ -643,13 +763,19 @@ struct TestReducers { ProdFunctor f; f.values = values; + ProdFunctorTag f_tag; + f_tag.values = values; Scalar init = 1; { Scalar prod_scalar = init; Kokkos::Experimental::Prod< Scalar > reducer_scalar( prod_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); - + ASSERT_EQ( prod_scalar, reference_prod ); + + prod_scalar = init; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( prod_scalar, reference_prod ); Scalar prod_scalar_view = reducer_scalar.reference(); @@ -684,13 +810,19 @@ struct TestReducers { MinFunctor f; f.values = values; + MinFunctorTag f_tag; + f_tag.values = values; Scalar init = std::numeric_limits< Scalar >::max(); { Scalar min_scalar = init; Kokkos::Experimental::Min< Scalar > reducer_scalar( min_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + ASSERT_EQ( min_scalar, reference_min ); + min_scalar = init; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( min_scalar, reference_min ); Scalar min_scalar_view = reducer_scalar.reference(); @@ -725,13 +857,19 @@ struct TestReducers { MaxFunctor f; f.values = values; + MaxFunctorTag f_tag; + f_tag.values = values; Scalar init = std::numeric_limits< Scalar >::min(); { Scalar max_scalar = init; Kokkos::Experimental::Max< Scalar > reducer_scalar( max_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + ASSERT_EQ( max_scalar, reference_max ); + + max_scalar = init; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( max_scalar, reference_max ); Scalar max_scalar_view = reducer_scalar.reference(); @@ -776,12 +914,19 @@ struct TestReducers { MinLocFunctor f; f.values = values; + MinLocFunctorTag f_tag; + f_tag.values = values; { value_type min_scalar; Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar( min_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + ASSERT_EQ( min_scalar.val, reference_min ); + ASSERT_EQ( min_scalar.loc, reference_loc ); + + min_scalar = value_type(); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( min_scalar.val, reference_min ); ASSERT_EQ( min_scalar.loc, reference_loc ); @@ -829,12 +974,19 @@ struct TestReducers { MaxLocFunctor f; f.values = values; + MaxLocFunctorTag f_tag; + f_tag.values = values; { value_type max_scalar; Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar( max_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + ASSERT_EQ( max_scalar.val, reference_max ); + ASSERT_EQ( max_scalar.loc, reference_loc ); + + max_scalar = value_type(); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( max_scalar.val, reference_max ); ASSERT_EQ( max_scalar.loc, reference_loc ); @@ -898,12 +1050,35 @@ struct TestReducers { MinMaxLocFunctor f; f.values = values; + MinMaxLocFunctorTag f_tag; + f_tag.values = values; { value_type minmax_scalar; Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar( minmax_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + ASSERT_EQ( minmax_scalar.min_val, reference_min ); + + for ( int i = 0; i < N; i++ ) { + if ( ( i == minmax_scalar.min_loc ) && ( h_values( i ) == reference_min ) ) { + reference_minloc = i; + } + } + + ASSERT_EQ( minmax_scalar.min_loc, reference_minloc ); + ASSERT_EQ( minmax_scalar.max_val, reference_max ); + + for ( int i = 0; i < N; i++ ) { + if ( ( i == minmax_scalar.max_loc ) && ( h_values( i ) == reference_max ) ) { + reference_maxloc = i; + } + } + + ASSERT_EQ( minmax_scalar.max_loc, reference_maxloc ); + + minmax_scalar = value_type(); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( minmax_scalar.min_val, reference_min ); for ( int i = 0; i < N; i++ ) { @@ -962,14 +1137,21 @@ struct TestReducers { BAndFunctor f; f.values = values; + BAndFunctorTag f_tag; + f_tag.values = values; Scalar init = Scalar() | ( ~Scalar() ); { Scalar band_scalar = init; Kokkos::Experimental::BAnd< Scalar > reducer_scalar( band_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( band_scalar, reference_band ); + + band_scalar = init; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); + ASSERT_EQ( band_scalar, reference_band ); + Scalar band_scalar_view = reducer_scalar.reference(); ASSERT_EQ( band_scalar_view, reference_band ); @@ -1002,13 +1184,19 @@ struct TestReducers { BOrFunctor f; f.values = values; + BOrFunctorTag f_tag; + f_tag.values = values; Scalar init = Scalar() & ( ~Scalar() ); { Scalar bor_scalar = init; Kokkos::Experimental::BOr< Scalar > reducer_scalar( bor_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + ASSERT_EQ( bor_scalar, reference_bor ); + + bor_scalar = init; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( bor_scalar, reference_bor ); Scalar bor_scalar_view = reducer_scalar.reference(); @@ -1042,13 +1230,19 @@ struct TestReducers { LAndFunctor f; f.values = values; + LAndFunctorTag f_tag; + f_tag.values = values; Scalar init = 1; { Scalar land_scalar = init; Kokkos::Experimental::LAnd< Scalar > reducer_scalar( land_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + ASSERT_EQ( land_scalar, reference_land ); + + land_scalar = init; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( land_scalar, reference_land ); Scalar land_scalar_view = reducer_scalar.reference(); @@ -1082,13 +1276,19 @@ struct TestReducers { LOrFunctor f; f.values = values; + LOrFunctorTag f_tag; + f_tag.values = values; Scalar init = 0; { Scalar lor_scalar = init; Kokkos::Experimental::LOr< Scalar > reducer_scalar( lor_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + ASSERT_EQ( lor_scalar, reference_lor ); + + lor_scalar = init; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , ReducerTag >( 0, N ), f_tag, reducer_scalar ); ASSERT_EQ( lor_scalar, reference_lor ); Scalar lor_scalar_view = reducer_scalar.reference(); diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index 7f4663d0f9..be048b19e4 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -46,6 +46,7 @@ #include #include #include +#include namespace TestTeamVector { @@ -840,7 +841,8 @@ public: const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols; if ( int64_t(solution) != int64_t(result) ) { - printf( " TestTripleNestedReduce failed solution(%ld) != result(%ld), nrows(%d) ncols(%d) league_size(%d) team_size(%d)\n" + printf( " TestTripleNestedReduce failed solution(%" PRId64 ") != result(%" PRId64 ")," + " nrows(%" PRId32 ") ncols(%" PRId32 ") league_size(%" PRId32 ") team_size(%" PRId32 ")\n" , int64_t(solution) , int64_t(result) , int32_t(nrows) diff --git a/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp b/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp index 219a4d1f20..d6db548665 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp @@ -79,14 +79,18 @@ struct TestViewMappingSubview typedef Kokkos::View< int***[13][14], Kokkos::LayoutLeft, ExecSpace > DLT; typedef Kokkos::Subview< DLT, range, int, int, int, int > DLS1; + #if !defined(KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND) static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout, Kokkos::LayoutLeft >::value , "Subview layout error for rank 1 subview of left-most range of LayoutLeft" ); + #endif typedef Kokkos::View< int***[13][14], Kokkos::LayoutRight, ExecSpace > DRT; typedef Kokkos::Subview< DRT, int, int, int, int, range > DRS1; + #if !defined(KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND) static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout, Kokkos::LayoutRight >::value , "Subview layout error for rank 1 subview of right-most range of LayoutRight" ); + #endif AT Aa; AS Ab; diff --git a/lib/kokkos/core/unit_test/UnitTestConfig.make b/lib/kokkos/core/unit_test/UnitTestConfig.make new file mode 100644 index 0000000000..97f4af5a8b --- /dev/null +++ b/lib/kokkos/core/unit_test/UnitTestConfig.make @@ -0,0 +1,52 @@ +KOKKOS_PATH = ../.. + +# See $(KOKKOS_PATH)/Makefile.kokkos and $(KOKKOS_PATH)/generate_makefile.bash +KOKKOS_ARCH_OPTIONS="None AMDAVX ARMv80 ARMv81 ARMv8-ThunderX \ + BGQ Power7 Power8 Power9 \ + WSM SNB HSW BDW SKX KNC KNL \ + Kepler Kepler30 Kepler32 Kepler35 Kepler37 \ + Maxwell Maxwell50 Maxwell52 Maxwell53 Pascal60 Pascal61" +#KOKKOS_ARCH_OPTIONS="AMDAVX" + +KOKKOS_DEVICE_OPTIONS="Cuda ROCm OpenMP Pthread Serial Qthreads" +#KOKKOS_DEVICE_OPTIONS="Cuda" + +# Configure paths to enable environment query in Makefile.kokkos to work +ROCM_HCC_PATH="config" +CXX="./config/cxx" +ipath=env CXX=$(CXX) env PATH=./config:$$PATH env ROCM_HCC_PATH=$(ROCM_HCC_PATH) + +# Defined in core/src/Makefile -- this should be consistent +KOKKOS_MAKEFILE=Makefile.kokkos +KOKKOS_CMAKEFILE=kokkos_generated_settings.cmake + +# Defined in Makefile.kokkos -- this should be consistent +KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp +KOKKOS_CONFIG_HEADER=KokkosCore_config.h + +d='\#' + +# diff => 0 is no difference. if => 0 is false +testmake=if test "`testmake.sh $1 $2 $3`" = 'Passed'; then echo OK $d $1; else echo not OK $d $1; fi +testconf=if test "`diffconfig.sh $1`" = 'Passed'; then echo OK $d $1; else echo not OK $d $1; fi + +# testing tmp and cmakefile files is unnecessary here +test: + @for karch in "$(KOKKOS_ARCH_OPTIONS)"; do \ + for device in "$(KOKKOS_DEVICE_OPTIONS)"; do \ + $(ipath) KOKKOS_DEVICES=$$device KOKKOS_ARCH=$$karch make -e -f ../src/Makefile build-makefile-cmake-kokkos; \ + rm -f $(KOKKOS_INTERNAL_CONFIG_TMP) $(KOKKOS_CMAKEFILE); \ + prfx="$$karch"_"$$device"_; \ + newmake="$$prfx"$(KOKKOS_MAKEFILE); \ + newconf="$$prfx"$(KOKKOS_CONFIG_HEADER); \ + mv $(KOKKOS_MAKEFILE) config/tmpstore/$$newmake; \ + mv $(KOKKOS_CONFIG_HEADER) config/tmpstore/$$newconf; \ + $(call testmake,$$newmake,$$karch,$$device); \ + $(call testconf,$$newconf); \ + done; \ + done + +test-cmake: + @cd config/cmaketest; \ + cmake . ; \ + make test diff --git a/lib/kokkos/core/unit_test/config/bin/hcc-config b/lib/kokkos/core/unit_test/config/bin/hcc-config new file mode 100755 index 0000000000..fc09138bcc --- /dev/null +++ b/lib/kokkos/core/unit_test/config/bin/hcc-config @@ -0,0 +1,2 @@ +#!/bin/sh +echo "--foo --bar" diff --git a/lib/kokkos/core/unit_test/config/clang b/lib/kokkos/core/unit_test/config/clang new file mode 100755 index 0000000000..34c6919410 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/clang @@ -0,0 +1,5 @@ +#!/bin/sh +echo="Apple LLVM version 8.1.0 (clang-802.0.42)" +echo="Target: x86_64-apple-darwin16.7.0" +echo="Thread model: posix" +echo="InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin" diff --git a/lib/kokkos/core/unit_test/config/cmaketest/CMakeLists.txt b/lib/kokkos/core/unit_test/config/cmaketest/CMakeLists.txt new file mode 100644 index 0000000000..54a4c4a74a --- /dev/null +++ b/lib/kokkos/core/unit_test/config/cmaketest/CMakeLists.txt @@ -0,0 +1,80 @@ +cmake_minimum_required(VERSION 3.1 FATAL_ERROR) +project(Kokkos CXX) + +enable_testing() + +# Initialization +get_filename_component(KOKKOS_TESTDIR ${CMAKE_SOURCE_DIR}/../.. REALPATH) +get_filename_component(KOKKOS_SRCDIR ${CMAKE_SOURCE_DIR}/../../../.. REALPATH) +set(KOKKOS_SRC_PATH ${KOKKOS_SRCDIR}) +set(KOKKOS_PATH ${KOKKOS_SRC_PATH}) + +set(CXX ${KOKKOS_TESTDIR}/config/cxx) + +# Defined in core/src/Makefile -- this should be consistent +set(KOKKOS_MAKEFILE Makefile.kokkos) +set(KOKKOS_CMAKEFILE kokkos_generated_settings.cmake) + +# Defined in Makefile.kokkos -- this should be consistent +set(KOKKOS_INTERNAL_CONFIG_TMP KokkosCore_config.tmp) +set(KOKKOS_CONFIG_HEADER KokkosCore_config.h) + +set(KOKKOS_CMAKE_VERBOSE False) +include(${KOKKOS_SRCDIR}/cmake/kokkos_options.cmake) +foreach(KOKKOS_DEV ${KOKKOS_DEVICES_LIST}) +# Do some initialization: Want to turn everything off for testing + string(TOUPPER ${KOKKOS_DEV} KOKKOS_DEVUC) + set(KOKKOS_ENABLE_${KOKKOS_DEVUC} OFF) +endforeach() + + +#TEST set(KOKKOS_HOST_ARCH_LIST ARMv80) +#TEST set(KOKKOS_DEVICES_LIST Cuda) +#set(KOKKOS_HOST_ARCH_LIST AMDAVX) +#set(KOKKOS_DEVICES_LIST Cuda) + +foreach(KOKKOS_HOST_ARCH ${KOKKOS_HOST_ARCH_LIST}) + foreach(KOKKOS_DEV ${KOKKOS_DEVICES_LIST}) + string(TOUPPER ${KOKKOS_DEV} KOKKOS_DEVUC) + set(KOKKOS_ENABLE_${KOKKOS_DEVUC} On) + + set(KOKKOS_CMAKE_VERBOSE True) + include(${KOKKOS_SRCDIR}/cmake/kokkos_options.cmake) + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ROCM_HCC_PATH=${KOKKOS_TESTDIR}/config) + + #message(STATUS "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRCDIR}/core/src/Makefile build-makefile-cmake-kokkos") + execute_process( + COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRCDIR}/core/src/Makefile build-makefile-cmake-kokkos + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + OUTPUT_FILE ${CMAKE_BINARY_DIR}/core_src_make.out + RESULT_VARIABLE res + ) + #message(STATUS "RESULT ${res}") + + file(REMOVE ${KOKKOS_INTERNAL_CONFIG_TMP} ${KOKKOS_MAKEFILE}) + set(PREFIX "${KOKKOS_HOST_ARCH}_${KOKKOS_DEV}_") + set(NEWCMAKE ${PREFIX}${KOKKOS_CMAKEFILE}) + set(NEWCONFH ${PREFIX}${KOKKOS_CONFIG_HEADER}) + file(RENAME ${KOKKOS_CMAKEFILE} ${NEWCMAKE}) + file(RENAME ${KOKKOS_CONFIG_HEADER} ${NEWCONFH}) + + add_test(NAME ${NEWCMAKE}-test + COMMAND ${KOKKOS_TESTDIR}/testmake.sh ${NEWCMAKE} ${KOKKOS_HOST_ARCH} ${KOKKOS_DEV} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + set_tests_properties(${NEWCMAKE}-test + PROPERTIES PASS_REGULAR_EXPRESSION Passed + TIMEOUT 15 + ) + add_test(NAME ${NEWCONFH}-test + COMMAND ${KOKKOS_TESTDIR}/diffconfig.sh ${NEWCONFH} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + set_tests_properties(${NEWCONFH}-test + PROPERTIES PASS_REGULAR_EXPRESSION Passed + TIMEOUT 15 + ) + set(KOKKOS_ENABLE_${KOKKOS_DEVUC} Off) + + endforeach() +endforeach() diff --git a/lib/kokkos/core/unit_test/config/cxx b/lib/kokkos/core/unit_test/config/cxx new file mode 100755 index 0000000000..f25d7714a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/cxx @@ -0,0 +1,5 @@ +#!/bin/sh +echo "g++ (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)" +echo "Copyright (C) 2016 Free Software Foundation, Inc." +echo "This is free software; see the source for copying conditions. There is NO" +echo "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." diff --git a/lib/kokkos/core/unit_test/config/mpic++ b/lib/kokkos/core/unit_test/config/mpic++ new file mode 100755 index 0000000000..f25d7714a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/mpic++ @@ -0,0 +1,5 @@ +#!/bin/sh +echo "g++ (GCC) 6.3.1 20161221 (Red Hat 6.3.1-1)" +echo "Copyright (C) 2016 Free Software Foundation, Inc." +echo "This is free software; see the source for copying conditions. There is NO" +echo "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." diff --git a/lib/kokkos/core/unit_test/config/nvcc b/lib/kokkos/core/unit_test/config/nvcc new file mode 100755 index 0000000000..b5bcbf234c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/nvcc @@ -0,0 +1,5 @@ +#!/bin/sh +echo "nvcc: NVIDIA (R) Cuda compiler driver" +echo "Copyright (c) 2005-2016 NVIDIA Corporation" +echo "Built on Tue_Jan_10_13:22:03_CST_2017" +echo "Cuda compilation tools, release 8.0, V8.0.61" diff --git a/lib/kokkos/core/unit_test/config/results/AMDAVX_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/AMDAVX_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..1a737a3b2f --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/AMDAVX_Cuda_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:09 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/AMDAVX_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/AMDAVX_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..7a704e4185 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/AMDAVX_OpenMP_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:10 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/AMDAVX_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/AMDAVX_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..c478a5c252 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/AMDAVX_Pthread_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:10 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/AMDAVX_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/AMDAVX_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..fb5d214630 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/AMDAVX_Qthreads_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:11 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/AMDAVX_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/AMDAVX_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..7b7e2b8153 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/AMDAVX_ROCm_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:09 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/AMDAVX_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/AMDAVX_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..9930bacc47 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/AMDAVX_Serial_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:11 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..7f172c00e4 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:17 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 +#define KOKKOS_ARCH_ARMV8_THUNDERX 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..d25b832ca2 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_OpenMP_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:18 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 +#define KOKKOS_ARCH_ARMV8_THUNDERX 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..cd3a603092 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Pthread_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:19 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 +#define KOKKOS_ARCH_ARMV8_THUNDERX 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..3865bc4a9a --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Qthreads_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:20 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 +#define KOKKOS_ARCH_ARMV8_THUNDERX 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..86b9f84585 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_ROCm_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:18 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 +#define KOKKOS_ARCH_ARMV8_THUNDERX 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..75ada8c01f --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv8-ThunderX_Serial_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:19 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 +#define KOKKOS_ARCH_ARMV8_THUNDERX 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv80_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv80_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..796c0aab65 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv80_Cuda_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:12 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv80_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv80_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..dcf7ff7ea2 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv80_OpenMP_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:13 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv80_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv80_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..298966b6d4 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv80_Pthread_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:14 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv80_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv80_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..7259a9e964 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv80_Qthreads_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:14 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv80_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv80_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..c2b4f146cb --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv80_ROCm_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:12 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv80_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv80_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..fe5fe66445 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv80_Serial_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:14 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV80 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv81_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv81_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..3d02142438 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv81_Cuda_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:15 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV81 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv81_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv81_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..aa194c77be --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv81_OpenMP_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:16 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV81 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv81_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv81_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..6d2dbeeef4 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv81_Pthread_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:16 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV81 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv81_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv81_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..e9fc71ad9b --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv81_Qthreads_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:17 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV81 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv81_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv81_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..28a56596b4 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv81_ROCm_KokkosCore_config.h @@ -0,0 +1,18 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:15 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV81 1 diff --git a/lib/kokkos/core/unit_test/config/results/ARMv81_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/ARMv81_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..1d29fd1390 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/ARMv81_Serial_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:16 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_ARMV81 1 diff --git a/lib/kokkos/core/unit_test/config/results/BDW_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BDW_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..ce2582b23f --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BDW_Cuda_KokkosCore_config.h @@ -0,0 +1,24 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:37 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/BDW_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BDW_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..118d1b225f --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BDW_OpenMP_KokkosCore_config.h @@ -0,0 +1,23 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:38 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/BDW_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BDW_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..6d0215baf6 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BDW_Pthread_KokkosCore_config.h @@ -0,0 +1,23 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:38 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/BDW_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BDW_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..e879e7e1fe --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BDW_Qthreads_KokkosCore_config.h @@ -0,0 +1,23 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:39 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/BDW_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BDW_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..3f86d055af --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BDW_ROCm_KokkosCore_config.h @@ -0,0 +1,24 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:37 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/BDW_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BDW_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..fba671ab1a --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BDW_Serial_KokkosCore_config.h @@ -0,0 +1,23 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:39 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/BGQ_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BGQ_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..93c74d41e2 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BGQ_Cuda_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:43 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/BGQ_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BGQ_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..533da16028 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BGQ_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:43 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/BGQ_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BGQ_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..9524c94f2b --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BGQ_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:44 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/BGQ_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BGQ_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..60c7ddcdb5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BGQ_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:44 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/BGQ_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BGQ_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..f5bc1f54a9 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BGQ_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:44 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/BGQ_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/BGQ_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..8372c00699 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/BGQ_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:44 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/HSW_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/HSW_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..7bbe9fa84c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/HSW_Cuda_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:34 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/HSW_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/HSW_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..17f75872f8 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/HSW_OpenMP_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:35 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/HSW_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/HSW_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..5df1be17ad --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/HSW_Pthread_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:35 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/HSW_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/HSW_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..253dc35bdf --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/HSW_Qthreads_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:36 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/HSW_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/HSW_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..8e04801b86 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/HSW_ROCm_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:35 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/HSW_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/HSW_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..99f76aff0b --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/HSW_Serial_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:36 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX2 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNC_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNC_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..bdc270fd0d --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNC_Cuda_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:42 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_KNC +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KNC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNC_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNC_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..f9b79f552d --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNC_OpenMP_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:43 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_KNC +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KNC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNC_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNC_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..15d9d01a0a --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNC_Pthread_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:44 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_KNC +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KNC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNC_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNC_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..5f95a83c27 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNC_Qthreads_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:45 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_KNC +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KNC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNC_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNC_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..5991d3065f --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNC_ROCm_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:43 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_KNC +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KNC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNC_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNC_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..3a8ddecf14 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNC_Serial_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:44 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_KNC +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KNC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNL_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNL_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..bd7e2ca330 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNL_Cuda_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:45 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512MIC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNL_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNL_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..0f567f241c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNL_OpenMP_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:46 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512MIC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNL_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNL_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..1cf3f0997a --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNL_Pthread_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:47 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512MIC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNL_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNL_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..6d179d82f8 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNL_Qthreads_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:48 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512MIC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNL_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNL_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..ae2938e34a --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNL_ROCm_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:46 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512MIC 1 diff --git a/lib/kokkos/core/unit_test/config/results/KNL_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/KNL_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..21f6e7e434 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/KNL_Serial_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:47 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512MIC 1 diff --git a/lib/kokkos/core/unit_test/config/results/Kepler30_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler30_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..78e9335e24 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler30_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:48 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KEPLER 1 +#define KOKKOS_ARCH_KEPLER30 1 diff --git a/lib/kokkos/core/unit_test/config/results/Kepler30_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler30_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..769d9c8789 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler30_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:49 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler30_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler30_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..2cc728a5e3 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler30_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:49 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler30_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler30_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..410ba5ea15 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler30_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:50 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler30_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler30_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..34867aa91e --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler30_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:48 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler30_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler30_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..54943b244f --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler30_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:50 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler32_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler32_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..c7e23d503c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler32_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:50 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KEPLER 1 +#define KOKKOS_ARCH_KEPLER32 1 diff --git a/lib/kokkos/core/unit_test/config/results/Kepler32_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler32_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..fcfbf97ef2 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler32_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:51 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler32_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler32_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..5cea100aa4 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler32_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:52 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler32_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler32_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..f42d0cc5f2 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler32_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:53 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler32_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler32_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..0ae47b6976 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler32_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:51 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler32_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler32_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..0d20b1dc81 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler32_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:52 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler35_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler35_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..f7935927c3 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler35_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:53 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KEPLER 1 +#define KOKKOS_ARCH_KEPLER35 1 diff --git a/lib/kokkos/core/unit_test/config/results/Kepler35_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler35_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..02777df40a --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler35_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:54 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler35_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler35_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..f51f00ce95 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler35_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:55 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler35_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler35_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..429f5e9e28 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler35_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:55 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler35_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler35_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..111bb09340 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler35_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:54 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler35_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler35_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..da61dabb58 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler35_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:55 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler37_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler37_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..c70ce2e04c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler37_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:56 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KEPLER 1 +#define KOKKOS_ARCH_KEPLER37 1 diff --git a/lib/kokkos/core/unit_test/config/results/Kepler37_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler37_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..d8c6c74832 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler37_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:57 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler37_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler37_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..b832ef36e5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler37_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:58 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler37_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler37_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..2b8a7f8183 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler37_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:59 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler37_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler37_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..6a661f8842 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler37_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:57 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler37_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler37_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..469f3d96a7 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler37_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:58 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..1ccf1bef54 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:50 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_KEPLER 1 +#define KOKKOS_ARCH_KEPLER35 1 diff --git a/lib/kokkos/core/unit_test/config/results/Kepler_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..9d87c958a2 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:51 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..263870be9f --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:51 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..021d18c002 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:51 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..2826fdfb88 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:52 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Kepler_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Kepler_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..69097e034d --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Kepler_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:52 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell50_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell50_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..fac64e9e98 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell50_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:59 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_MAXWELL 1 +#define KOKKOS_ARCH_MAXWELL50 1 diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell50_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell50_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..3f5b3eea13 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell50_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:00 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell50_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell50_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..b249c88be5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell50_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:01 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell50_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell50_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..be1353365c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell50_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:02 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell50_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell50_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..ce9f67d5be --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell50_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:00 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell50_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell50_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..f8c6be139e --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell50_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:02 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell52_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell52_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..ce28f3e4b7 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell52_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:03 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_MAXWELL 1 +#define KOKKOS_ARCH_MAXWELL52 1 diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell52_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell52_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..35635063a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell52_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:04 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell52_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell52_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..140740f81f --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell52_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:04 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell52_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell52_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..58a043c6a3 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell52_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:05 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell52_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell52_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..06ff6935ca --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell52_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:03 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell52_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell52_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..eac120d061 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell52_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:05 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell53_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell53_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..ad8344a099 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell53_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:06 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_MAXWELL 1 +#define KOKKOS_ARCH_MAXWELL53 1 diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell53_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell53_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..ab1e801267 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell53_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:06 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell53_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell53_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..0b1e3bf311 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell53_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:07 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell53_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell53_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..96fdbef3dc --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell53_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:08 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell53_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell53_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..82414cf358 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell53_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:06 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell53_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell53_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..b10b80b3bc --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell53_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:07 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..d81a715007 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:20:00 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_MAXWELL 1 +#define KOKKOS_ARCH_MAXWELL50 1 diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..98e93c7b28 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:20:00 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..47a7ccb7a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:20:00 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..a7f1fd3803 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:20:01 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..c438f4f7d5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:20:01 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Maxwell_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Maxwell_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..d66c569084 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Maxwell_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:20:01 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/None_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/None_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..6bf2755fd0 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/None_Cuda_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:22 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/None_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/None_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..4dd2eed180 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/None_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:23 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/None_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/None_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..1bdd29b6a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/None_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:23 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/None_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/None_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..6bd8addd97 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/None_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:23 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/None_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/None_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..74b0d7335c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/None_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:24 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/None_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/None_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..a9d0b264b8 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/None_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Tue Sep 26 15:19:23 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal60_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal60_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..8fe1aa698d --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal60_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:08 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_PASCAL 1 +#define KOKKOS_ARCH_PASCAL60 1 diff --git a/lib/kokkos/core/unit_test/config/results/Pascal60_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal60_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..93173f4e11 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal60_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:09 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal60_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal60_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..a05d5729e0 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal60_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:09 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal60_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal60_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..c5a2d1d707 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal60_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:10 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal60_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal60_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..9c04befef5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal60_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:09 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal60_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal60_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..c6038c2965 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal60_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:10 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal61_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal61_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..0de37df960 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal61_Cuda_KokkosCore_config.h @@ -0,0 +1,19 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:11 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_PASCAL 1 +#define KOKKOS_ARCH_PASCAL61 1 diff --git a/lib/kokkos/core/unit_test/config/results/Pascal61_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal61_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..2c392cc0df --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal61_OpenMP_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:12 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal61_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal61_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..f704aa9c81 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal61_Pthread_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:12 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal61_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal61_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..958aac11da --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal61_Qthreads_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:13 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal61_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal61_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..4a4d8cc683 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal61_ROCm_KokkosCore_config.h @@ -0,0 +1,17 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:11 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Pascal61_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Pascal61_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..6fb2cf9e9d --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Pascal61_Serial_KokkosCore_config.h @@ -0,0 +1,16 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:23:12 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ diff --git a/lib/kokkos/core/unit_test/config/results/Power7_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power7_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..a78e1ffc8d --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power7_Cuda_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:20 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCBE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER7 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power7_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power7_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..bd856b80a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power7_OpenMP_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:21 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCBE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER7 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power7_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power7_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..8b3ac2aff9 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power7_Pthread_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:21 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCBE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER7 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power7_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power7_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..dffa8a3f58 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power7_Qthreads_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:22 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCBE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER7 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power7_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power7_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..e16cfb37bd --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power7_ROCm_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:20 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCBE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER7 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power7_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power7_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..6831f3ce25 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power7_Serial_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:22 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCBE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER7 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power8_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power8_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..1ab0b04c6c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power8_Cuda_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:23 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER8 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power8_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power8_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..54750405ca --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power8_OpenMP_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:24 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER8 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power8_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power8_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..5d71338d23 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power8_Pthread_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:24 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER8 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power8_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power8_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..9da90f4f7e --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power8_Qthreads_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:25 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER8 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power8_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power8_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..f3fd70b0cf --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power8_ROCm_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:24 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER8 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power8_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power8_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..7c0ecc22d3 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power8_Serial_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:25 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER8 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power9_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power9_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..47d518f407 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power9_Cuda_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:26 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER9 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power9_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power9_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..106bf33e44 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power9_OpenMP_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:27 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER9 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power9_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power9_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..108e5eba47 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power9_Pthread_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:27 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER9 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power9_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power9_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..5c5be2ed3c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power9_Qthreads_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:28 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER9 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power9_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power9_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..8b6a391d95 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power9_ROCm_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:26 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER9 1 diff --git a/lib/kokkos/core/unit_test/config/results/Power9_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/Power9_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..6f7aefe62e --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/Power9_Serial_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:27 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_POWERPCLE +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_POWER9 1 diff --git a/lib/kokkos/core/unit_test/config/results/SKX_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SKX_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..8f4380d992 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SKX_Cuda_KokkosCore_config.h @@ -0,0 +1,24 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:40 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512XEON 1 diff --git a/lib/kokkos/core/unit_test/config/results/SKX_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SKX_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..0a907a2ae1 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SKX_OpenMP_KokkosCore_config.h @@ -0,0 +1,23 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:40 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512XEON 1 diff --git a/lib/kokkos/core/unit_test/config/results/SKX_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SKX_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..50a95223c9 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SKX_Pthread_KokkosCore_config.h @@ -0,0 +1,23 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:41 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512XEON 1 diff --git a/lib/kokkos/core/unit_test/config/results/SKX_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SKX_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..2e4b1d61ef --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SKX_Qthreads_KokkosCore_config.h @@ -0,0 +1,23 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:42 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512XEON 1 diff --git a/lib/kokkos/core/unit_test/config/results/SKX_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SKX_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..12293350a1 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SKX_ROCm_KokkosCore_config.h @@ -0,0 +1,24 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:40 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512XEON 1 diff --git a/lib/kokkos/core/unit_test/config/results/SKX_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SKX_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..4ea457aacf --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SKX_Serial_KokkosCore_config.h @@ -0,0 +1,23 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:41 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_ENABLE_TM +#endif +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX512XEON 1 diff --git a/lib/kokkos/core/unit_test/config/results/SNB_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SNB_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..34c9537834 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SNB_Cuda_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:31 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/SNB_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SNB_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..f7ed4d720c --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SNB_OpenMP_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:32 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/SNB_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SNB_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..126c29ba77 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SNB_Pthread_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:33 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/SNB_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SNB_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..2f0216f9c4 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SNB_Qthreads_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:34 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/SNB_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SNB_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..5c68008bea --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SNB_ROCm_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:32 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/SNB_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/SNB_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..0278d0d079 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/SNB_Serial_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:33 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_AVX 1 diff --git a/lib/kokkos/core/unit_test/config/results/WSM_Cuda_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/WSM_Cuda_KokkosCore_config.h new file mode 100644 index 0000000000..97389bb1bf --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/WSM_Cuda_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:28 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_CUDA 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_SSE42 1 diff --git a/lib/kokkos/core/unit_test/config/results/WSM_OpenMP_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/WSM_OpenMP_KokkosCore_config.h new file mode 100644 index 0000000000..dd5648f0c8 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/WSM_OpenMP_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:29 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_OPENMP 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_SSE42 1 diff --git a/lib/kokkos/core/unit_test/config/results/WSM_Pthread_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/WSM_Pthread_KokkosCore_config.h new file mode 100644 index 0000000000..c8a7adbd89 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/WSM_Pthread_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:30 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_PTHREAD 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_SSE42 1 diff --git a/lib/kokkos/core/unit_test/config/results/WSM_Qthreads_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/WSM_Qthreads_KokkosCore_config.h new file mode 100644 index 0000000000..d4a78790e3 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/WSM_Qthreads_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:31 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_QTHREADS 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_SSE42 1 diff --git a/lib/kokkos/core/unit_test/config/results/WSM_ROCm_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/WSM_ROCm_KokkosCore_config.h new file mode 100644 index 0000000000..712b5686f0 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/WSM_ROCm_KokkosCore_config.h @@ -0,0 +1,21 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:29 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_ENABLE_ROCM 1 +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_SSE42 1 diff --git a/lib/kokkos/core/unit_test/config/results/WSM_Serial_KokkosCore_config.h b/lib/kokkos/core/unit_test/config/results/WSM_Serial_KokkosCore_config.h new file mode 100644 index 0000000000..5bac7c2660 --- /dev/null +++ b/lib/kokkos/core/unit_test/config/results/WSM_Serial_KokkosCore_config.h @@ -0,0 +1,20 @@ +/* --------------------------------------------- +Makefile constructed configuration: +Fri Sep 22 17:22:30 MDT 2017 +----------------------------------------------*/ +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif +/* Execution Spaces */ +#define KOKKOS_HAVE_SERIAL 1 +#ifndef __CUDA_ARCH__ +#define KOKKOS_USE_ISA_X86_64 +#endif +/* General Settings */ +#define KOKKOS_HAVE_CXX11 1 +#define KOKKOS_ENABLE_PROFILING +/* Optimization Settings */ +/* Cuda Settings */ +#define KOKKOS_ARCH_SSE42 1 diff --git a/lib/kokkos/core/unit_test/diffconfig.sh b/lib/kokkos/core/unit_test/diffconfig.sh new file mode 100755 index 0000000000..0c8836ff83 --- /dev/null +++ b/lib/kokkos/core/unit_test/diffconfig.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# CMake and Make tests run in separate directories +# The mapping of ARCH to #define is very complicated +# so diff is used instead of grepping +if test "`basename $PWD`" = "cmaketest"; then + outfile=$1 + resfile=../results/$1 +else + outfile=config/tmpstore/$1 + resfile=config/results/$1 +fi + +diff=`diff $outfile $resfile 2>&1 | grep -e define -e "such file"` +if test -z "$diff"; then + echo Passed +else + echo Failed: $diff +fi diff --git a/lib/kokkos/core/unit_test/testmake.sh b/lib/kokkos/core/unit_test/testmake.sh new file mode 100755 index 0000000000..b5d4e8874d --- /dev/null +++ b/lib/kokkos/core/unit_test/testmake.sh @@ -0,0 +1,18 @@ +#!/bin/bash +if test "`basename $PWD`" = "cmaketest"; then + outfile=$1 +else + outfile=config/tmpstore/$1 +fi + +grep_arch=`grep KOKKOS_ARCH $outfile | grep $2 2>&1` +grep_devs=`grep KOKKOS_DEVICES $outfile | grep $3 2>&1` +if test -n "$grep_arch"; then + if test -n "$grep_devs"; then + echo Passed + else + echo Failed + fi +else + echo Failed +fi diff --git a/lib/kokkos/doc/develop_builds.md b/lib/kokkos/doc/develop_builds.md new file mode 100644 index 0000000000..9a211fa776 --- /dev/null +++ b/lib/kokkos/doc/develop_builds.md @@ -0,0 +1,76 @@ + +# Places to build options: architecture, device, advanced options, cuda options + +These are the files that need to be updated when a new architecture or device is +added: + + + generate_makefile.bash + * Interface for makefile system + + cmake/kokkos_options.cmake + * Interface for cmake system + + Makefile.kokkos + * Main logic for build (make and cmake) and defines (KokkosCore_config.h) + + core/unit_test/UnitTestConfig.make + * Unit test for Makefile.kokkos + +In general, an architecture is going to be from on of these platforms: + + AMD + + ARM + + IBM + + Intel + + Intel Xeon Phi + + NVIDIA +Although not strictly necessary, it is helpful to keep things organized by +grouping by platform. + +### generate_makefile.sh + +The bash code does not do any error checking on the `--arch=` or `--device=` +arguments thus strictly speaking you do not *need* to do anything to add a +device or architecture; however, you should add it to the help menu. For the +archictectures, please group by one of the platforms listed above. + + +### cmake/kokkos_options.cmake and cmake/kokkos_settings.cmake + +The options for the CMake build system are: `-DKOKKOS_HOST_ARCH:STRING=` and +`-DKOKKOS_ENABLE_:BOOL=`. Although any string can be passed into +KOKKOS_HOST_ARCH option, it is checked against an accepted list. Likewise, the +KOKKOS_ENABLE_ must have the option added AND it is formed using the +list. Thus: + + A new architecture should be added to the KOKKOS_HOST_ARCH_LIST variable. + + A new device should be added to the KOKKOS_DEVICES_LIST variable **AND** a + KOKKOS_ENABLE_ option specified (see KOKKOS_ENABLE_CUDA for + example). + + A new device should be added to the KOKKOS_DEVICES_LIST variable **AND** a + +The translation from option to the `KOKKOS_SETTINGS` is done in +`kokkos_settings.cmake`. This translation is automated for some types if you ad +to the list, but for others, it may need to be hand coded. + + +### Makefile.kokkos + +This is the main coding used by both the make and cmake system for defining +the sources (generated makefile and cmake snippets by `core/src/Makefile`), for +setting the defines in KokkosCore_config.h, and defining various internal +variables. To understand how to add to this file, you should work closely with +the Kokkos development team. + + +### core/unit_test/UnitTestConfig.make + +This file is used to check the build system in a platform-independent way. It +works by looping over available architectures and devices; thus, you should add +your new architecure to KOKKOS_ARCH_OPTIONS and your new device to +KOKKOS_DEVICE_OPTIONS to be tested. The build system tests work by grepping the +generated build files (automatically). The header file tests work by diffing +the generated file with results that are stored in +`core/unit_tests/config/results` (namespaced by ARCH_DEVICE_). Thus, you will +need to add accepted results to this directory for diffing. + +The CMake build system is also tested in `core/unit_tests/config/cmaketest`. +Because it uses cmake/kokkos_options.cmake, it already has the tests to loop +over. It is diffed with the same files that the build system is tested with. +Thus, if you are consistent in all of the files listed, the unit tests should +pass automatically. diff --git a/lib/kokkos/example/cmake/Dependencies.cmake b/lib/kokkos/example/cmake/Dependencies.cmake index ca50a45c11..ed1ec4c725 100644 --- a/lib/kokkos/example/cmake/Dependencies.cmake +++ b/lib/kokkos/example/cmake/Dependencies.cmake @@ -1,4 +1,3 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_REQUIRED_PACKAGES KokkosCore KokkosContainers KokkosAlgorithms TEST_OPTIONAL_TPLS CUSPARSE MKL ) diff --git a/lib/kokkos/example/cmake_build/CMakeLists.txt b/lib/kokkos/example/cmake_build/CMakeLists.txt index f92c5c6513..8e1aa04727 100644 --- a/lib/kokkos/example/cmake_build/CMakeLists.txt +++ b/lib/kokkos/example/cmake_build/CMakeLists.txt @@ -32,15 +32,13 @@ # 4. make cmake_minimum_required(VERSION 3.1) -project(Example CXX C) +project(Example CXX C Fortran) -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_EXTENSIONS OFF) list(APPEND CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} -O3) add_subdirectory(${Example_SOURCE_DIR}/../.. ${Example_BINARY_DIR}/kokkos) include_directories(${Kokkos_INCLUDE_DIRS_RET}) -add_executable(example cmake_example.cpp) +add_executable(example cmake_example.cpp foo.f) target_link_libraries(example kokkos) diff --git a/lib/kokkos/example/cmake_build/cmake_example.cpp b/lib/kokkos/example/cmake_build/cmake_example.cpp index 4786eeb859..bba4b7bd01 100644 --- a/lib/kokkos/example/cmake_build/cmake_example.cpp +++ b/lib/kokkos/example/cmake_build/cmake_example.cpp @@ -44,6 +44,8 @@ #include #include +extern "C" void print_fortran_(); + int main(int argc, char* argv[]) { Kokkos::initialize(argc, argv); Kokkos::DefaultExecutionSpace::print_configuration(std::cout); @@ -81,6 +83,8 @@ int main(int argc, char* argv[]) { count_time = timer.seconds(); printf("Sequential: %ld %10.6f\n", seq_count, count_time); + print_fortran_(); + Kokkos::finalize(); return (count == seq_count) ? 0 : -1; diff --git a/lib/kokkos/example/cmake_build/foo.f b/lib/kokkos/example/cmake_build/foo.f new file mode 100644 index 0000000000..e618455283 --- /dev/null +++ b/lib/kokkos/example/cmake_build/foo.f @@ -0,0 +1,4 @@ + FUNCTION print_fortran() + PRINT *, 'Hello World from Fortran' + RETURN + END diff --git a/lib/kokkos/example/tutorial/01_hello_world/Makefile b/lib/kokkos/example/tutorial/01_hello_world/Makefile index 62ab22f17e..02a0fb10a0 100644 --- a/lib/kokkos/example/tutorial/01_hello_world/Makefile +++ b/lib/kokkos/example/tutorial/01_hello_world/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_hello_world.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_hello_world.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -37,7 +37,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile b/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile index 52d5fb07c4..4fe3765c52 100644 --- a/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_hello_world_lambda.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -19,7 +19,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_hello_world_lambda.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -38,7 +38,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host diff --git a/lib/kokkos/example/tutorial/02_simple_reduce/Makefile b/lib/kokkos/example/tutorial/02_simple_reduce/Makefile index 7dbff1733b..bda28fbac0 100644 --- a/lib/kokkos/example/tutorial/02_simple_reduce/Makefile +++ b/lib/kokkos/example/tutorial/02_simple_reduce/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_simple_reduce.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_simple_reduce.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_simple_reduce.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -46,7 +46,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile index b75c05e745..a9542c6a43 100644 --- a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_simple_reduce_lambda.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -19,7 +19,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_simple_reduce_lambda.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -28,7 +28,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_simple_reduce_lambda.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -48,7 +48,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/03_simple_view/Makefile b/lib/kokkos/example/tutorial/03_simple_view/Makefile index d19d4b394a..de994a8df9 100644 --- a/lib/kokkos/example/tutorial/03_simple_view/Makefile +++ b/lib/kokkos/example/tutorial/03_simple_view/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_simple_view.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_simple_view.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_simple_view.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -48,7 +48,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile b/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile index 89f8f0564b..81910a4571 100644 --- a/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile +++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_simple_view_lambda.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -19,7 +19,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_simple_view_lambda.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -28,7 +28,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_simple_view_lambda.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -48,7 +48,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile b/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile index 160dfb3d20..0e84ac9c68 100644 --- a/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile +++ b/lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_simple_memoryspaces.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_simple_memoryspaces.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_simple_memoryspaces.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/05_simple_atomics/Makefile b/lib/kokkos/example/tutorial/05_simple_atomics/Makefile index 2fdd89dead..67fbd90c55 100644 --- a/lib/kokkos/example/tutorial/05_simple_atomics/Makefile +++ b/lib/kokkos/example/tutorial/05_simple_atomics/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 05_simple_atomics.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 05_simple_atomics.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 05_simple_atomics.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/06_simple_mdrangepolicy/Makefile b/lib/kokkos/example/tutorial/06_simple_mdrangepolicy/Makefile index b012ffe855..7d3498ed17 100644 --- a/lib/kokkos/example/tutorial/06_simple_mdrangepolicy/Makefile +++ b/lib/kokkos/example/tutorial/06_simple_mdrangepolicy/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 06_simple_mdrangepolicy.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 06_simple_mdrangepolicy.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -37,7 +37,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host diff --git a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile index 009c2ad7e0..94ace811f3 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_data_layouts.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_data_layouts.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_data_layouts.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile index f924c26ec7..f64ee3540e 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_memory_traits.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_memory_traits.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_memory_traits.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile index e77d9e6588..ad70ee02d1 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_subviews.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_subviews.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_subviews.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile index 566a1ecbd4..e08be5c1df 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_dualviews.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_dualviews.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_dualviews.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile index 615ee2887a..ffd8184304 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 05_NVIDIA_UVM.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 05_NVIDIA_UVM.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -37,7 +37,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host diff --git a/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile index 752fe3cfb6..725d0de0e2 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 06_AtomicViews.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 06_AtomicViews.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 06_AtomicViews.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile index 432a90126d..8983b46d60 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 --default-stream per-thread LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 07_Overlapping_DeepCopy.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 07_Overlapping_DeepCopy.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -37,7 +37,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host diff --git a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile index d38d376eb7..386a87474d 100644 --- a/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile +++ b/lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_random_numbers.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_random_numbers.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_random_numbers.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile index 32818e6d72..7282abc30c 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_thread_teams.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_thread_teams.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_thread_teams.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile index aa799ea3c5..4049dbde34 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_thread_teams_lambda.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -19,7 +19,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_thread_teams_lambda.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -28,7 +28,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 01_thread_teams_lambda.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -48,7 +48,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile index 8685670a9f..fe882f36b8 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_nested_parallel_for.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_nested_parallel_for.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 02_nested_parallel_for.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile index 877897c9a3..4481889cdb 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_vectorization.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_vectorization.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 03_vectorization.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile index 6effcd7d77..0f0bcf70de 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_team_scan.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_team_scan.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = 04_team_scan.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -47,7 +47,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host *.rocm diff --git a/lib/kokkos/example/tutorial/launch_bounds/Makefile b/lib/kokkos/example/tutorial/launch_bounds/Makefile index 01d58dd7fb..4a1bf17344 100644 --- a/lib/kokkos/example/tutorial/launch_bounds/Makefile +++ b/lib/kokkos/example/tutorial/launch_bounds/Makefile @@ -10,7 +10,7 @@ ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = launch_bounds.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -18,7 +18,7 @@ else CXX = g++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = launch_bounds.host KOKKOS_DEVICES = "OpenMP" KOKKOS_ARCH = "SNB" @@ -27,7 +27,7 @@ ifneq (,$(findstring ROCm,$(KOKKOS_DEVICES))) CXX = /opt/rocm/hcc/bin/clang++ CXXFLAGS = -O3 LINK = ${CXX} -LINKFLAGS = +LDFLAGS = EXE = launch_bounds.rocm KOKKOS_DEVICES = "ROCm" KOKKOS_ARCH = "Fiji" @@ -55,7 +55,7 @@ test: $(EXE) ./$(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.rocm diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index b4a69d30fd..521a77d1af 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -123,6 +123,7 @@ do echo " ARMv81 = ARMv8.1 Compatible CPU" echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" echo " [IBM]" + echo " BGQ = IBM Blue Gene Q" echo " Power7 = IBM POWER7 and POWER7+ CPUs" echo " Power8 = IBM POWER8 CPUs" echo " Power9 = IBM POWER9 CPUs" @@ -271,9 +272,10 @@ else fi mkdir -p install -echo "#Makefile to satisfy existens of target kokkos-clean before installing the library" > install/Makefile.kokkos -echo "kokkos-clean:" >> install/Makefile.kokkos -echo "" >> install/Makefile.kokkos +gen_makefile=Makefile.kokkos +echo "#Makefile to satisfy existens of target kokkos-clean before installing the library" > install/${gen_makefile} +echo "kokkos-clean:" >> install/${gen_makefile} +echo "" >> install/${gen_makefile} mkdir -p core mkdir -p core/unit_test mkdir -p core/perf_test