/* //@HEADER // ************************************************************************ // // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #include namespace TestAtomicOperations { //----------------------------------------------- //--------------zero_functor--------------------- //----------------------------------------------- template struct ZeroFunctor { using execution_space = DEVICE_TYPE; using type = typename Kokkos::View; using h_type = typename Kokkos::View::HostMirror; type data; KOKKOS_INLINE_FUNCTION void operator()(int) const { data() = 0; } }; //----------------------------------------------- //--------------init_functor--------------------- //----------------------------------------------- template struct InitFunctor { using execution_space = DEVICE_TYPE; using type = typename Kokkos::View; using h_type = typename Kokkos::View::HostMirror; type data; T init_value; KOKKOS_INLINE_FUNCTION void operator()(int) const { data() = init_value; } InitFunctor(T _init_value) : init_value(_init_value) {} }; //--------------------------------------------------- //--------------atomic_load/store/assign--------------------- //--------------------------------------------------- #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS template struct LoadStoreFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { T old = Kokkos::atomic_load(&data()); if (old != i0) Kokkos::abort("Kokkos Atomic Load didn't get the right value"); Kokkos::atomic_store(&data(), i1); Kokkos::atomic_assign(&data(), old); } LoadStoreFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; #endif template bool LoadStoreAtomicTest(T i0, T i1) { using execution_space = typename DeviceType::execution_space; struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); #ifdef KOKKOS_ENABLE_DESUL_ATOMICS struct LoadStoreFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); #else h_data() = i1; #endif Kokkos::deep_copy(h_data, data); return h_data() == i0; } //--------------------------------------------------- //--------------atomic_fetch_max--------------------- //--------------------------------------------------- template struct MaxFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { // Kokkos::atomic_fetch_max( &data(), (T) 1 ); Kokkos::atomic_fetch_max(&data(), (T)i1); } MaxFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T MaxAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct MaxFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T MaxAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = (i0 > i1 ? i0 : i1); T val = *data; delete[] data; return val; } template bool MaxAtomicTest(T i0, T i1) { T res = MaxAtomic(i0, i1); T resSerial = MaxAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = MaxAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_min--------------------- //--------------------------------------------------- template struct MinFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_fetch_min(&data(), (T)i1); } MinFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T MinAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct MinFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T MinAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = (i0 < i1 ? i0 : i1); T val = *data; delete[] data; return val; } template bool MinAtomicTest(T i0, T i1) { T res = MinAtomic(i0, i1); T resSerial = MinAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = MinAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_increment--------------------- //--------------------------------------------------- template struct IncFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_increment(&data()); } IncFunctor(T _i0) : i0(_i0) {} }; template T IncAtomic(T i0) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct IncFunctor f(i0); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T IncAtomicCheck(T i0) { T* data = new T[1]; data[0] = 0; *data = i0 + 1; T val = *data; delete[] data; return val; } template bool IncAtomicTest(T i0) { T res = IncAtomic(i0); T resSerial = IncAtomicCheck(i0); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = IncAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_decrement--------------------- //--------------------------------------------------- template struct DecFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_decrement(&data()); } DecFunctor(T _i0) : i0(_i0) {} }; template T DecAtomic(T i0) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct DecFunctor f(i0); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T DecAtomicCheck(T i0) { T* data = new T[1]; data[0] = 0; *data = i0 - 1; T val = *data; delete[] data; return val; } template bool DecAtomicTest(T i0) { T res = DecAtomic(i0); T resSerial = DecAtomicCheck(i0); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = DecAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_mul--------------------- //--------------------------------------------------- template struct MulFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_fetch_mul(&data(), (T)i1); } MulFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T MulAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct MulFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T MulAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = i0 * i1; T val = *data; delete[] data; return val; } template bool MulAtomicTest(T i0, T i1) { T res = MulAtomic(i0, i1); T resSerial = MulAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = MulAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_div--------------------- //--------------------------------------------------- template struct DivFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_fetch_div(&data(), (T)i1); } DivFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T DivAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct DivFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T DivAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = i0 / i1; T val = *data; delete[] data; return val; } template bool DivAtomicTest(T i0, T i1) { T res = DivAtomic(i0, i1); T resSerial = DivAtomicCheck(i0, i1); bool passed = true; using Kokkos::abs; using std::abs; if (abs((resSerial - res) * 1.) > 1e-5) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = DivAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_mod--------------------- //--------------------------------------------------- template struct ModFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_fetch_mod(&data(), (T)i1); } ModFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T ModAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct ModFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T ModAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = i0 % i1; T val = *data; delete[] data; return val; } template bool ModAtomicTest(T i0, T i1) { T res = ModAtomic(i0, i1); T resSerial = ModAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = ModAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_and--------------------- //--------------------------------------------------- template struct AndFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { T result = Kokkos::atomic_fetch_and(&data(), (T)i1); Kokkos::atomic_and(&data(), result); } AndFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T AndAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct AndFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T AndAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = i0 & i1; T val = *data; delete[] data; return val; } template bool AndAtomicTest(T i0, T i1) { T res = AndAtomic(i0, i1); T resSerial = AndAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = AndAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_or---------------------- //--------------------------------------------------- template struct OrFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { T result = Kokkos::atomic_fetch_or(&data(), (T)i1); Kokkos::atomic_or(&data(), result); } OrFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T OrAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct OrFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T OrAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = i0 | i1; T val = *data; delete[] data; return val; } template bool OrAtomicTest(T i0, T i1) { T res = OrAtomic(i0, i1); T resSerial = OrAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = OrAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_xor--------------------- //--------------------------------------------------- template struct XorFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_fetch_xor(&data(), (T)i1); } XorFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T XorAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct XorFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T XorAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = i0 ^ i1; T val = *data; delete[] data; return val; } template bool XorAtomicTest(T i0, T i1) { T res = XorAtomic(i0, i1); T resSerial = XorAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = XorAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_lshift--------------------- //--------------------------------------------------- template struct LShiftFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_fetch_lshift(&data(), (T)i1); } LShiftFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T LShiftAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct LShiftFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T LShiftAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = i0 << i1; T val = *data; delete[] data; return val; } template bool LShiftAtomicTest(T i0, T i1) { T res = LShiftAtomic(i0, i1); T resSerial = LShiftAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = LShiftAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_fetch_rshift--------------------- //--------------------------------------------------- template struct RShiftFunctor { using execution_space = DEVICE_TYPE; using type = Kokkos::View; type data; T i0; T i1; KOKKOS_INLINE_FUNCTION void operator()(int) const { Kokkos::atomic_fetch_rshift(&data(), (T)i1); } RShiftFunctor(T _i0, T _i1) : i0(_i0), i1(_i1) {} }; template T RShiftAtomic(T i0, T i1) { struct InitFunctor f_init(i0); typename InitFunctor::type data("Data"); typename InitFunctor::h_type h_data("HData"); f_init.data = data; Kokkos::parallel_for(1, f_init); execution_space().fence(); struct RShiftFunctor f(i0, i1); f.data = data; Kokkos::parallel_for(1, f); execution_space().fence(); Kokkos::deep_copy(h_data, data); T val = h_data(); return val; } template T RShiftAtomicCheck(T i0, T i1) { T* data = new T[1]; data[0] = 0; *data = i0 >> i1; T val = *data; delete[] data; return val; } template bool RShiftAtomicTest(T i0, T i1) { T res = RShiftAtomic(i0, i1); T resSerial = RShiftAtomicCheck(i0, i1); bool passed = true; if (resSerial != res) { passed = false; std::cout << "Loop<" << typeid(T).name() << ">( test = RShiftAtomicTest" << " FAILED : " << resSerial << " != " << res << std::endl; } return passed; } //--------------------------------------------------- //--------------atomic_test_control------------------ //--------------------------------------------------- template bool AtomicOperationsTestIntegralType(int i0, int i1, int test) { switch (test) { case 1: return MaxAtomicTest((T)i0, (T)i1); case 2: return MinAtomicTest((T)i0, (T)i1); case 3: return MulAtomicTest((T)i0, (T)i1); case 4: return DivAtomicTest((T)i0, (T)i1); case 5: return ModAtomicTest((T)i0, (T)i1); case 6: return AndAtomicTest((T)i0, (T)i1); case 7: return OrAtomicTest((T)i0, (T)i1); case 8: return XorAtomicTest((T)i0, (T)i1); case 9: return LShiftAtomicTest((T)i0, (T)i1); case 10: return RShiftAtomicTest((T)i0, (T)i1); case 11: return IncAtomicTest((T)i0); case 12: return DecAtomicTest((T)i0); case 13: return LoadStoreAtomicTest((T)i0, (T)i1); } return 0; } template bool AtomicOperationsTestNonIntegralType(int i0, int i1, int test) { switch (test) { case 1: return MaxAtomicTest((T)i0, (T)i1); case 2: return MinAtomicTest((T)i0, (T)i1); case 3: return MulAtomicTest((T)i0, (T)i1); case 4: return DivAtomicTest((T)i0, (T)i1); case 5: return LoadStoreAtomicTest((T)i0, (T)i1); } return 0; } } // namespace TestAtomicOperations