//@HEADER // ************************************************************************ // // Kokkos v. 4.0 // Copyright (2022) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. // See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER #include namespace { // Struct for testing arbitrary size atomics. template struct SuperScalar { double val[N]; KOKKOS_INLINE_FUNCTION SuperScalar() { for (int i = 0; i < N; i++) { val[i] = 0.0; } } KOKKOS_INLINE_FUNCTION SuperScalar(const SuperScalar& src) { for (int i = 0; i < N; i++) { val[i] = src.val[i]; } } KOKKOS_INLINE_FUNCTION SuperScalar(const volatile SuperScalar& src) { for (int i = 0; i < N; i++) { val[i] = src.val[i]; } } KOKKOS_INLINE_FUNCTION SuperScalar& operator=(const SuperScalar& src) { for (int i = 0; i < N; i++) { val[i] = src.val[i]; } return *this; } KOKKOS_INLINE_FUNCTION SuperScalar& operator=(const volatile SuperScalar& src) { for (int i = 0; i < N; i++) { val[i] = src.val[i]; } return *this; } KOKKOS_INLINE_FUNCTION void operator=(const SuperScalar& src) volatile { for (int i = 0; i < N; i++) { val[i] = src.val[i]; } } KOKKOS_INLINE_FUNCTION SuperScalar operator+(const SuperScalar& src) const { SuperScalar tmp = *this; for (int i = 0; i < N; i++) { tmp.val[i] += src.val[i]; } return tmp; } KOKKOS_INLINE_FUNCTION SuperScalar& operator+=(const double& src) { for (int i = 0; i < N; i++) { val[i] += 1.0 * (i + 1) * src; } return *this; } KOKKOS_INLINE_FUNCTION SuperScalar& operator+=(const SuperScalar& src) { for (int i = 0; i < N; i++) { val[i] += src.val[i]; } return *this; } KOKKOS_INLINE_FUNCTION bool operator==(const SuperScalar& src) const { bool compare = true; for (int i = 0; i < N; i++) { compare = compare && (val[i] == src.val[i]); } return compare; } KOKKOS_INLINE_FUNCTION bool operator!=(const SuperScalar& src) const { bool compare = true; for (int i = 0; i < N; i++) { compare = compare && (val[i] == src.val[i]); } return !compare; } KOKKOS_INLINE_FUNCTION SuperScalar(const double& src) { for (int i = 0; i < N; i++) { val[i] = 1.0 * (i + 1) * src; } } }; template std::ostream& operator<<(std::ostream& os, const SuperScalar& dt) { os << "{ "; for (int i = 0; i < N - 1; i++) { os << dt.val[i] << ", "; } os << dt.val[N - 1] << "}"; return os; } template struct ZeroFunctor { using execution_space = DEVICE_TYPE; using type = typename Kokkos::View; using h_type = typename Kokkos::View::HostMirror; type data; KOKKOS_INLINE_FUNCTION void operator()(int) const { data() = 0; } }; //--------------------------------------------------- //--------------atomic_fetch_add--------------------- //--------------------------------------------------- template struct AddFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_fetch_add(&data(), (T)1); } }; template T AddLoop(int loop) { struct ZeroFunctor f_zero; typename ZeroFunctor::type data("Data"); typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1, f_zero); execution_space().fence(); struct AddFunctor f_add; f_add.data = data; Kokkos::parallel_for(loop, f_add); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T AddLoopSerial(int loop) { T* data = new T[1]; data[0] = 0; for (int i = 0; i < loop; i++) { *data += (T)1; } T val = *data; delete[] data; return val; } //------------------------------------------------------ //--------------atomic_compare_exchange----------------- //------------------------------------------------------ template struct CASFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; KOKKOS_INLINE_FUNCTION void operator()(int) const { T old = data(); T newval, assumed; do { assumed = old; newval = assumed + (T)1; old = Kokkos::atomic_compare_exchange(&data(), assumed, newval); } while (old != assumed); } }; template T CASLoop(int loop) { struct ZeroFunctor f_zero; typename ZeroFunctor::type data("Data"); typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1, f_zero); execution_space().fence(); struct CASFunctor f_cas; f_cas.data = data; Kokkos::parallel_for(loop, f_cas); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T CASLoopSerial(int loop) { T* data = new T[1]; data[0] = 0; for (int i = 0; i < loop; i++) { T assumed; T newval; T old; do { assumed = *data; newval = assumed + (T)1; old = *data; *data = newval; } while (!(assumed == old)); } T val = *data; delete[] data; return val; } //---------------------------------------------- //--------------atomic_compare_exchange_strong-- //---------------------------------------------- #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() #endif template struct DeprecatedCASFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; KOKKOS_INLINE_FUNCTION void operator()(int) const { T newval, assumed; do { assumed = Kokkos::volatile_load(&data()); newval = assumed + (T)1; } while (!Kokkos::atomic_compare_exchange_strong(&data(), assumed, newval)); } }; #ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() #endif template T DeprecatedCASLoop(int loop) { struct ZeroFunctor f_zero; typename ZeroFunctor::type data("Data"); typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1, f_zero); execution_space().fence(); struct DeprecatedCASFunctor f_cas; f_cas.data = data; Kokkos::parallel_for(loop, f_cas); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } #endif //---------------------------------------------- //--------------atomic_exchange----------------- //---------------------------------------------- template struct ExchFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data, data2; KOKKOS_INLINE_FUNCTION void operator()(int i) const { T old = Kokkos::atomic_exchange(&data(), (T)i); Kokkos::atomic_fetch_add(&data2(), old); } }; template T ExchLoop(int loop) { struct ZeroFunctor f_zero; typename ZeroFunctor::type data("Data"); typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1, f_zero); execution_space().fence(); typename ZeroFunctor::type data2("Data"); typename ZeroFunctor::h_type h_data2("HData"); f_zero.data = data2; Kokkos::parallel_for(1, f_zero); execution_space().fence(); struct ExchFunctor f_exch; f_exch.data = data; f_exch.data2 = data2; Kokkos::parallel_for(loop, f_exch); execution_space().fence(); Kokkos::deep_copy(h_data, data); Kokkos::deep_copy(h_data2, data2); T val = h_data() + h_data2(); return val; } template T ExchLoopSerial( std::conditional_t >, int, void> loop) { T* data = new T[1]; T* data2 = new T[1]; data[0] = 0; data2[0] = 0; for (int i = 0; i < loop; i++) { T old = *data; *data = (T)i; *data2 += old; } T val = *data2 + *data; delete[] data; delete[] data2; return val; } template T ExchLoopSerial( std::conditional_t >, int, void> loop) { T* data = new T[1]; T* data2 = new T[1]; data[0] = 0; data2[0] = 0; for (int i = 0; i < loop; i++) { T old = *data; data->real() = (static_cast(i)); data->imag() = 0; *data2 += old; } T val = *data2 + *data; delete[] data; delete[] data2; return val; } template T LoopVariant(int loop, int test) { switch (test) { case 1: return AddLoop(loop); case 2: return CASLoop(loop); case 3: return ExchLoop(loop); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 case 4: return DeprecatedCASLoop(loop); #endif } Kokkos::abort("unreachable"); return 0; } template T LoopVariantSerial(int loop, int test) { switch (test) { case 1: return AddLoopSerial(loop); case 2: return CASLoopSerial(loop); case 3: return ExchLoopSerial(loop); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 case 4: return CASLoopSerial(loop); #endif } Kokkos::abort("unreachable"); return 0; } template void Loop(int loop, int test) { T res = LoopVariant(loop, test); T resSerial = LoopVariantSerial(loop, test); ASSERT_EQ(res, resSerial) << "Loop<" << Kokkos::Impl::TypeInfo::name() << ">(loop=" << loop << ",test=" << test << ")"; } TEST(TEST_CATEGORY, atomics) { const int loop_count = 1e4; Loop(loop_count, 1); Loop(loop_count, 2); Loop(loop_count, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop(loop_count, 4); #endif Loop(loop_count, 1); Loop(loop_count, 2); Loop(loop_count, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop(loop_count, 4); #endif Loop(loop_count, 1); Loop(loop_count, 2); Loop(loop_count, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop(loop_count, 4); #endif Loop(loop_count, 1); Loop(loop_count, 2); Loop(loop_count, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop(loop_count, 4); #endif Loop(loop_count, 1); Loop(loop_count, 2); Loop(loop_count, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop(loop_count, 4); #endif Loop(loop_count, 1); Loop(loop_count, 2); Loop(loop_count, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop(loop_count, 4); #endif Loop(100, 1); Loop(100, 2); Loop(100, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop(100, 4); #endif // FIXME_OPENMPTARGET // FIXME_OPENACC: atomic operations on composite types are not supported. #if !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(KOKKOS_ENABLE_OPENACC) Loop, TEST_EXECSPACE>(1, 1); Loop, TEST_EXECSPACE>(1, 2); Loop, TEST_EXECSPACE>(1, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop, TEST_EXECSPACE>(1, 4); #endif Loop, TEST_EXECSPACE>(100, 1); Loop, TEST_EXECSPACE>(100, 2); Loop, TEST_EXECSPACE>(100, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop, TEST_EXECSPACE>(100, 4); #endif // FIXME_SYCL Replace macro by SYCL_EXT_ONEAPI_DEVICE_GLOBAL or remove // condition alltogether when possible. #if defined(KOKKOS_ENABLE_SYCL) && \ !defined(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED) if (std::is_same_v) return; #endif Loop, TEST_EXECSPACE>(1, 1); Loop, TEST_EXECSPACE>(1, 2); Loop, TEST_EXECSPACE>(1, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop, TEST_EXECSPACE>(1, 4); #endif Loop, TEST_EXECSPACE>(100, 1); Loop, TEST_EXECSPACE>(100, 2); Loop, TEST_EXECSPACE>(100, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop, TEST_EXECSPACE>(100, 4); #endif // WORKAROUND MSVC #ifndef _WIN32 Loop, TEST_EXECSPACE>(100, 1); Loop, TEST_EXECSPACE>(100, 2); Loop, TEST_EXECSPACE>(100, 3); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 Loop, TEST_EXECSPACE>(100, 4); #endif #endif #endif } // see https://github.com/trilinos/Trilinos/pull/11506 struct TpetraUseCase { template struct AbsMaxHelper { Scalar value; KOKKOS_FUNCTION AbsMaxHelper& operator+=(AbsMaxHelper const& rhs) { Scalar lhs_abs_value = Kokkos::abs(value); Scalar rhs_abs_value = Kokkos::abs(rhs.value); value = lhs_abs_value > rhs_abs_value ? lhs_abs_value : rhs_abs_value; return *this; } KOKKOS_FUNCTION AbsMaxHelper operator+(AbsMaxHelper const& rhs) const { AbsMaxHelper ret = *this; ret += rhs; return ret; } }; using T = int; Kokkos::View d_{"lbl"}; KOKKOS_FUNCTION void operator()(int i) const { // 0, -1, 2, -3, ... auto v_i = static_cast(i); if (i % 2 == 1) v_i = -v_i; Kokkos::atomic_add(reinterpret_cast*>(&d_()), AbsMaxHelper{v_i}); } TpetraUseCase() { Kokkos::parallel_for(Kokkos::RangePolicy(0, 10), *this); } void check() { T v; Kokkos::deep_copy(v, d_); ASSERT_EQ(v, 9); } }; TEST(TEST_CATEGORY, atomics_tpetra_max_abs) { TpetraUseCase().check(); } } // namespace