/* //@HEADER // ************************************************************************ // // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #include #include // This file is largely duplicating TestRange.hpp but it applies // Kokkos::Experimental require at every place where a parallel // operation is executed. namespace Test { namespace { template struct TestRangeRequire { using value_type = int; ///< alias required for the parallel_reduce using view_type = Kokkos::View; view_type m_flags; struct VerifyInitTag {}; struct ResetTag {}; struct VerifyResetTag {}; struct OffsetTag {}; struct VerifyOffsetTag {}; int N; static const int offset = 13; TestRangeRequire(const size_t N_) : m_flags(Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"), N_), N(N_) {} void test_for() { typename view_type::HostMirror host_flags = Kokkos::create_mirror_view(m_flags); Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); { using ThisType = TestRangeRequire; std::string label("parallel_for"); Kokkos::Impl::ParallelConstructName pcn(label); ASSERT_EQ(pcn.get(), label); std::string empty_label(""); Kokkos::Impl::ParallelConstructName empty_pcn( empty_label); ASSERT_EQ(empty_pcn.get(), typeid(ThisType).name()); } Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); { using ThisType = TestRangeRequire; std::string label("parallel_for"); Kokkos::Impl::ParallelConstructName pcn(label); ASSERT_EQ(pcn.get(), label); std::string empty_label(""); Kokkos::Impl::ParallelConstructName empty_pcn( empty_label); ASSERT_EQ(empty_pcn.get(), std::string(typeid(ThisType).name()) + "/" + typeid(VerifyInitTag).name()); } Kokkos::deep_copy(host_flags, m_flags); int error_count = 0; for (int i = 0; i < N; ++i) { if (int(i) != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); Kokkos::deep_copy(host_flags, m_flags); error_count = 0; for (int i = 0; i < N; ++i) { if (int(2 * i) != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(offset, N + offset), Property()), *this); Kokkos::parallel_for( std::string("TestKernelFor"), Kokkos::Experimental::require( Kokkos::RangePolicy, VerifyOffsetTag>(0, N), Property()), *this); Kokkos::deep_copy(host_flags, m_flags); error_count = 0; for (int i = 0; i < N; ++i) { if (i + offset != host_flags(i)) ++error_count; } ASSERT_EQ(error_count, int(0)); } KOKKOS_INLINE_FUNCTION void operator()(const int i) const { m_flags(i) = i; } KOKKOS_INLINE_FUNCTION void operator()(const VerifyInitTag &, const int i) const { if (i != m_flags(i)) { KOKKOS_IMPL_DO_NOT_USE_PRINTF( "TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); } } KOKKOS_INLINE_FUNCTION void operator()(const ResetTag &, const int i) const { m_flags(i) = 2 * m_flags(i); } KOKKOS_INLINE_FUNCTION void operator()(const VerifyResetTag &, const int i) const { if (2 * i != m_flags(i)) { KOKKOS_IMPL_DO_NOT_USE_PRINTF( "TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); } } KOKKOS_INLINE_FUNCTION void operator()(const OffsetTag &, const int i) const { m_flags(i - offset) = i; } KOKKOS_INLINE_FUNCTION void operator()(const VerifyOffsetTag &, const int i) const { if (i + offset != m_flags(i)) { KOKKOS_IMPL_DO_NOT_USE_PRINTF( "TestRangeRequire::test_for error at %d != %d\n", i + offset, m_flags(i)); } } //---------------------------------------- void test_reduce() { int total = 0; Kokkos::parallel_for( Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this); Kokkos::parallel_reduce( "TestKernelReduce", Kokkos::Experimental::require( Kokkos::RangePolicy(0, N), Property()), *this, total); // sum( 0 .. N-1 ) ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); Kokkos::parallel_reduce( Kokkos::Experimental::require( Kokkos::RangePolicy(offset, N + offset), Property()), *this, total); // sum( 1 .. N ) ASSERT_EQ(size_t((N) * (N + 1) / 2), size_t(total)); } KOKKOS_INLINE_FUNCTION void operator()(const int i, value_type &update) const { update += m_flags(i); } KOKKOS_INLINE_FUNCTION void operator()(const OffsetTag &, const int i, value_type &update) const { update += 1 + m_flags(i - offset); } //---------------------------------------- void test_scan() { Kokkos::parallel_for(Kokkos::RangePolicy(0, N), *this); Kokkos::parallel_scan( "TestKernelScan", Kokkos::RangePolicy(0, N), *this); int total = 0; Kokkos::parallel_scan( "TestKernelScanWithTotal", Kokkos::RangePolicy(0, N), *this, total); ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); // sum( 0 .. N-1 ) } KOKKOS_INLINE_FUNCTION void operator()(const OffsetTag &, const int i, value_type &update, bool final) const { update += m_flags(i); if (final) { if (update != (i * (i + 1)) / 2) { KOKKOS_IMPL_DO_NOT_USE_PRINTF( "TestRangeRequire::test_scan error %d : %d != %d\n", i, (i * (i + 1)) / 2, m_flags(i)); } } } void test_dynamic_policy() { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) auto const N_no_implicit_capture = N; using policy_t = Kokkos::RangePolicy >; { Kokkos::View > count("Count", ExecSpace::concurrency()); Kokkos::View a("A", N); Kokkos::parallel_for( policy_t(0, N), KOKKOS_LAMBDA(const int &i) { for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); k++) { a(i)++; } count(ExecSpace::impl_hardware_thread_id())++; }); int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); }, error); ASSERT_EQ(error, 0); if ((ExecSpace::concurrency() > (int)1) && (N > static_cast(4 * ExecSpace::concurrency()))) { size_t min = N; size_t max = 0; for (int t = 0; t < ExecSpace::concurrency(); t++) { if (count(t) < min) min = count(t); if (count(t) > max) max = count(t); } ASSERT_LT(min, max); // if ( ExecSpace::concurrency() > 2 ) { // ASSERT_LT( 2 * min, max ); //} } } { Kokkos::View > count("Count", ExecSpace::concurrency()); Kokkos::View a("A", N); int sum = 0; Kokkos::parallel_reduce( policy_t(0, N), KOKKOS_LAMBDA(const int &i, int &lsum) { for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); k++) { a(i)++; } count(ExecSpace::impl_hardware_thread_id())++; lsum++; }, sum); ASSERT_EQ(sum, N); int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); }, error); ASSERT_EQ(error, 0); if ((ExecSpace::concurrency() > (int)1) && (N > static_cast(4 * ExecSpace::concurrency()))) { size_t min = N; size_t max = 0; for (int t = 0; t < ExecSpace::concurrency(); t++) { if (count(t) < min) min = count(t); if (count(t) > max) max = count(t); } ASSERT_LT(min, max); // if ( ExecSpace::concurrency() > 2 ) { // ASSERT_LT( 2 * min, max ); //} } } #endif } }; } // namespace TEST(TEST_CATEGORY, range_for_require) { using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; { TestRangeRequire, Property> f(0); f.test_for(); } { TestRangeRequire, Property> f(0); f.test_for(); } { TestRangeRequire, Property> f(2); f.test_for(); } { TestRangeRequire, Property> f(3); f.test_for(); } { TestRangeRequire, Property> f(1000); f.test_for(); } { TestRangeRequire, Property> f(1001); f.test_for(); } } TEST(TEST_CATEGORY, range_reduce_require) { using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; { TestRangeRequire, Property> f(0); f.test_reduce(); } { TestRangeRequire, Property> f(0); f.test_reduce(); } { TestRangeRequire, Property> f(2); f.test_reduce(); } { TestRangeRequire, Property> f(3); f.test_reduce(); } { TestRangeRequire, Property> f(1000); f.test_reduce(); } { TestRangeRequire, Property> f(1001); f.test_reduce(); } } #ifndef KOKKOS_ENABLE_OPENMPTARGET TEST(TEST_CATEGORY, range_scan_require) { using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; { TestRangeRequire, Property> f(0); f.test_scan(); } { TestRangeRequire, Property> f(0); f.test_scan(); } #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \ !defined(KOKKOS_ENABLE_SYCL) { TestRangeRequire, Property> f(0); f.test_dynamic_policy(); } #endif { TestRangeRequire, Property> f(2); f.test_scan(); } { TestRangeRequire, Property> f(3); f.test_scan(); } #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \ !defined(KOKKOS_ENABLE_SYCL) { TestRangeRequire, Property> f(3); f.test_dynamic_policy(); } #endif { TestRangeRequire, Property> f(1000); f.test_scan(); } { TestRangeRequire, Property> f(1001); f.test_scan(); } #if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) && \ !defined(KOKKOS_ENABLE_SYCL) { TestRangeRequire, Property> f(1001); f.test_dynamic_policy(); } #endif } #endif } // namespace Test