Update Kokkos library in LAMMPS to v4.5.0
This commit is contained in:
@ -16,7 +16,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
namespace TestAtomic {
|
||||
namespace {
|
||||
|
||||
// Struct for testing arbitrary size atomics.
|
||||
|
||||
@ -157,17 +157,6 @@ struct AddFunctor {
|
||||
void operator()(int) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
|
||||
};
|
||||
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct AddFunctorReduce {
|
||||
using execution_space = DEVICE_TYPE;
|
||||
using type = Kokkos::View<T, execution_space>;
|
||||
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int, int&) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
|
||||
};
|
||||
|
||||
template <class T, class execution_space>
|
||||
T AddLoop(int loop) {
|
||||
struct ZeroFunctor<T, execution_space> f_zero;
|
||||
@ -188,12 +177,6 @@ T AddLoop(int loop) {
|
||||
Kokkos::deep_copy(h_data, data);
|
||||
T val = h_data();
|
||||
|
||||
struct AddFunctorReduce<T, execution_space> f_add_red;
|
||||
f_add_red.data = data;
|
||||
int dummy_result;
|
||||
Kokkos::parallel_reduce(loop, f_add_red, dummy_result);
|
||||
execution_space().fence();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
@ -236,26 +219,6 @@ struct CASFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct CASFunctorReduce {
|
||||
using execution_space = DEVICE_TYPE;
|
||||
using type = Kokkos::View<T, execution_space>;
|
||||
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int, int&) const {
|
||||
T old = data();
|
||||
T newval, assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
newval = assumed + (T)1;
|
||||
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
|
||||
} while (old != assumed);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class execution_space>
|
||||
T CASLoop(int loop) {
|
||||
struct ZeroFunctor<T, execution_space> f_zero;
|
||||
@ -274,12 +237,6 @@ T CASLoop(int loop) {
|
||||
Kokkos::deep_copy(h_data, data);
|
||||
T val = h_data();
|
||||
|
||||
struct CASFunctorReduce<T, execution_space> f_cas_red;
|
||||
f_cas_red.data = data;
|
||||
int dummy_result;
|
||||
Kokkos::parallel_reduce(loop, f_cas_red, dummy_result);
|
||||
execution_space().fence();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
@ -307,6 +264,58 @@ T CASLoopSerial(int loop) {
|
||||
return val;
|
||||
}
|
||||
|
||||
//----------------------------------------------
|
||||
//--------------atomic_compare_exchange_strong--
|
||||
//----------------------------------------------
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
|
||||
KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
|
||||
#endif
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct DeprecatedCASFunctor {
|
||||
using execution_space = DEVICE_TYPE;
|
||||
using type = Kokkos::View<T, execution_space>;
|
||||
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int) const {
|
||||
T newval, assumed;
|
||||
|
||||
do {
|
||||
assumed = Kokkos::volatile_load(&data());
|
||||
newval = assumed + (T)1;
|
||||
} while (!Kokkos::atomic_compare_exchange_strong(&data(), assumed, newval));
|
||||
}
|
||||
};
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
|
||||
KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
|
||||
#endif
|
||||
|
||||
template <class T, class execution_space>
|
||||
T DeprecatedCASLoop(int loop) {
|
||||
struct ZeroFunctor<T, execution_space> f_zero;
|
||||
typename ZeroFunctor<T, execution_space>::type data("Data");
|
||||
typename ZeroFunctor<T, execution_space>::h_type h_data("HData");
|
||||
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for(1, f_zero);
|
||||
execution_space().fence();
|
||||
|
||||
struct DeprecatedCASFunctor<T, execution_space> f_cas;
|
||||
f_cas.data = data;
|
||||
Kokkos::parallel_for(loop, f_cas);
|
||||
execution_space().fence();
|
||||
|
||||
Kokkos::deep_copy(h_data, data);
|
||||
T val = h_data();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------
|
||||
//--------------atomic_exchange-----------------
|
||||
//----------------------------------------------
|
||||
@ -325,20 +334,6 @@ struct ExchFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct ExchFunctorReduce {
|
||||
using execution_space = DEVICE_TYPE;
|
||||
using type = Kokkos::View<T, execution_space>;
|
||||
|
||||
type data, data2;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i, int&) const {
|
||||
T old = Kokkos::atomic_exchange(&data(), (T)i);
|
||||
Kokkos::atomic_fetch_add(&data2(), old);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, class execution_space>
|
||||
T ExchLoop(int loop) {
|
||||
struct ZeroFunctor<T, execution_space> f_zero;
|
||||
@ -366,20 +361,13 @@ T ExchLoop(int loop) {
|
||||
Kokkos::deep_copy(h_data2, data2);
|
||||
T val = h_data() + h_data2();
|
||||
|
||||
struct ExchFunctorReduce<T, execution_space> f_exch_red;
|
||||
f_exch_red.data = data;
|
||||
f_exch_red.data2 = data2;
|
||||
int dummy_result;
|
||||
Kokkos::parallel_reduce(loop, f_exch_red, dummy_result);
|
||||
execution_space().fence();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T ExchLoopSerial(std::conditional_t<
|
||||
!std::is_same<T, Kokkos::complex<double> >::value, int, void>
|
||||
loop) {
|
||||
T ExchLoopSerial(
|
||||
std::conditional_t<!std::is_same_v<T, Kokkos::complex<double> >, int, void>
|
||||
loop) {
|
||||
T* data = new T[1];
|
||||
T* data2 = new T[1];
|
||||
data[0] = 0;
|
||||
@ -399,9 +387,9 @@ T ExchLoopSerial(std::conditional_t<
|
||||
}
|
||||
|
||||
template <class T>
|
||||
T ExchLoopSerial(std::conditional_t<
|
||||
std::is_same<T, Kokkos::complex<double> >::value, int, void>
|
||||
loop) {
|
||||
T ExchLoopSerial(
|
||||
std::conditional_t<std::is_same_v<T, Kokkos::complex<double> >, int, void>
|
||||
loop) {
|
||||
T* data = new T[1];
|
||||
T* data2 = new T[1];
|
||||
data[0] = 0;
|
||||
@ -427,8 +415,12 @@ T LoopVariant(int loop, int test) {
|
||||
case 1: return AddLoop<T, DeviceType>(loop);
|
||||
case 2: return CASLoop<T, DeviceType>(loop);
|
||||
case 3: return ExchLoop<T, DeviceType>(loop);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
case 4: return DeprecatedCASLoop<T, DeviceType>(loop);
|
||||
#endif
|
||||
}
|
||||
|
||||
Kokkos::abort("unreachable");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -438,108 +430,121 @@ T LoopVariantSerial(int loop, int test) {
|
||||
case 1: return AddLoopSerial<T>(loop);
|
||||
case 2: return CASLoopSerial<T>(loop);
|
||||
case 3: return ExchLoopSerial<T>(loop);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
case 4: return CASLoopSerial<T>(loop);
|
||||
#endif
|
||||
}
|
||||
|
||||
Kokkos::abort("unreachable");
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class T, class DeviceType>
|
||||
bool Loop(int loop, int test) {
|
||||
void Loop(int loop, int test) {
|
||||
T res = LoopVariant<T, DeviceType>(loop, test);
|
||||
T resSerial = LoopVariantSerial<T>(loop, test);
|
||||
|
||||
bool passed = true;
|
||||
|
||||
if (resSerial != res) {
|
||||
passed = false;
|
||||
|
||||
std::cout << "Loop<" << typeid(T).name() << ">( test = " << test
|
||||
<< " FAILED : " << resSerial << " != " << res << std::endl;
|
||||
}
|
||||
|
||||
return passed;
|
||||
ASSERT_EQ(res, resSerial) << "Loop<" << Kokkos::Impl::TypeInfo<T>::name()
|
||||
<< ">(loop=" << loop << ",test=" << test << ")";
|
||||
}
|
||||
|
||||
} // namespace TestAtomic
|
||||
|
||||
namespace Test {
|
||||
|
||||
TEST(TEST_CATEGORY, atomics) {
|
||||
const int loop_count = 1e4;
|
||||
|
||||
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
Loop<int, TEST_EXECSPACE>(loop_count, 1);
|
||||
Loop<int, TEST_EXECSPACE>(loop_count, 2);
|
||||
Loop<int, TEST_EXECSPACE>(loop_count, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<int, TEST_EXECSPACE>(loop_count, 4);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
Loop<unsigned int, TEST_EXECSPACE>(loop_count, 1);
|
||||
Loop<unsigned int, TEST_EXECSPACE>(loop_count, 2);
|
||||
Loop<unsigned int, TEST_EXECSPACE>(loop_count, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<unsigned int, TEST_EXECSPACE>(loop_count, 4);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
Loop<long int, TEST_EXECSPACE>(loop_count, 1);
|
||||
Loop<long int, TEST_EXECSPACE>(loop_count, 2);
|
||||
Loop<long int, TEST_EXECSPACE>(loop_count, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<long int, TEST_EXECSPACE>(loop_count, 4);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 1);
|
||||
Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 2);
|
||||
Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 4);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
Loop<long long int, TEST_EXECSPACE>(loop_count, 1);
|
||||
Loop<long long int, TEST_EXECSPACE>(loop_count, 2);
|
||||
Loop<long long int, TEST_EXECSPACE>(loop_count, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<long long int, TEST_EXECSPACE>(loop_count, 4);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 3)));
|
||||
Loop<double, TEST_EXECSPACE>(loop_count, 1);
|
||||
Loop<double, TEST_EXECSPACE>(loop_count, 2);
|
||||
Loop<double, TEST_EXECSPACE>(loop_count, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<double, TEST_EXECSPACE>(loop_count, 4);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 3)));
|
||||
Loop<float, TEST_EXECSPACE>(100, 1);
|
||||
Loop<float, TEST_EXECSPACE>(100, 2);
|
||||
Loop<float, TEST_EXECSPACE>(100, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<float, TEST_EXECSPACE>(100, 4);
|
||||
#endif
|
||||
|
||||
// FIXME_OPENMPTARGET
|
||||
// FIXME_OPENACC: atomic operations on composite types are not supported.
|
||||
#if !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(KOKKOS_ENABLE_OPENACC)
|
||||
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 3)));
|
||||
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 1);
|
||||
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 2);
|
||||
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 4);
|
||||
#endif
|
||||
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 3)));
|
||||
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 1);
|
||||
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 2);
|
||||
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 4);
|
||||
#endif
|
||||
|
||||
// FIXME_SYCL Replace macro by SYCL_EXT_ONEAPI_DEVICE_GLOBAL or remove
|
||||
// condition alltogether when possible.
|
||||
#if defined(KOKKOS_ENABLE_SYCL) && \
|
||||
!defined(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
|
||||
if (std::is_same_v<TEST_EXECSPACE, Kokkos::Experimental::SYCL>) return;
|
||||
if (std::is_same_v<TEST_EXECSPACE, Kokkos::SYCL>) return;
|
||||
#endif
|
||||
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 1);
|
||||
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 2);
|
||||
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 4);
|
||||
#endif
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 3)));
|
||||
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 3)));
|
||||
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 1);
|
||||
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 2);
|
||||
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 4);
|
||||
#endif
|
||||
|
||||
// WORKAROUND MSVC
|
||||
#ifndef _WIN32
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 3)));
|
||||
Loop<SuperScalar<4>, TEST_EXECSPACE>(100, 1);
|
||||
Loop<SuperScalar<4>, TEST_EXECSPACE>(100, 2);
|
||||
Loop<SuperScalar<4>, TEST_EXECSPACE>(100, 3);
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
Loop<SuperScalar<4>, TEST_EXECSPACE>(100, 4);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
@ -587,4 +592,4 @@ struct TpetraUseCase {
|
||||
|
||||
TEST(TEST_CATEGORY, atomics_tpetra_max_abs) { TpetraUseCase().check(); }
|
||||
|
||||
} // namespace Test
|
||||
} // namespace
|
||||
|
||||
Reference in New Issue
Block a user