Update Kokkos library in LAMMPS to v4.5.0

This commit is contained in:
Stan Moore
2024-12-13 09:23:03 -07:00
parent a78aee5731
commit 7f68aeb6d5
617 changed files with 21499 additions and 17255 deletions

View File

@ -16,7 +16,7 @@
#include <Kokkos_Core.hpp>
namespace TestAtomic {
namespace {
// Struct for testing arbitrary size atomics.
@ -157,17 +157,6 @@ struct AddFunctor {
void operator()(int) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
};
template <class T, class DEVICE_TYPE>
struct AddFunctorReduce {
using execution_space = DEVICE_TYPE;
using type = Kokkos::View<T, execution_space>;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int, int&) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
};
template <class T, class execution_space>
T AddLoop(int loop) {
struct ZeroFunctor<T, execution_space> f_zero;
@ -188,12 +177,6 @@ T AddLoop(int loop) {
Kokkos::deep_copy(h_data, data);
T val = h_data();
struct AddFunctorReduce<T, execution_space> f_add_red;
f_add_red.data = data;
int dummy_result;
Kokkos::parallel_reduce(loop, f_add_red, dummy_result);
execution_space().fence();
return val;
}
@ -236,26 +219,6 @@ struct CASFunctor {
}
};
template <class T, class DEVICE_TYPE>
struct CASFunctorReduce {
using execution_space = DEVICE_TYPE;
using type = Kokkos::View<T, execution_space>;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int, int&) const {
T old = data();
T newval, assumed;
do {
assumed = old;
newval = assumed + (T)1;
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
} while (old != assumed);
}
};
template <class T, class execution_space>
T CASLoop(int loop) {
struct ZeroFunctor<T, execution_space> f_zero;
@ -274,12 +237,6 @@ T CASLoop(int loop) {
Kokkos::deep_copy(h_data, data);
T val = h_data();
struct CASFunctorReduce<T, execution_space> f_cas_red;
f_cas_red.data = data;
int dummy_result;
Kokkos::parallel_reduce(loop, f_cas_red, dummy_result);
execution_space().fence();
return val;
}
@ -307,6 +264,58 @@ T CASLoopSerial(int loop) {
return val;
}
//----------------------------------------------
//--------------atomic_compare_exchange_strong--
//----------------------------------------------
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH()
#endif
template <class T, class DEVICE_TYPE>
struct DeprecatedCASFunctor {
using execution_space = DEVICE_TYPE;
using type = Kokkos::View<T, execution_space>;
type data;
KOKKOS_INLINE_FUNCTION
void operator()(int) const {
T newval, assumed;
do {
assumed = Kokkos::volatile_load(&data());
newval = assumed + (T)1;
} while (!Kokkos::atomic_compare_exchange_strong(&data(), assumed, newval));
}
};
#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS
KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP()
#endif
template <class T, class execution_space>
T DeprecatedCASLoop(int loop) {
struct ZeroFunctor<T, execution_space> f_zero;
typename ZeroFunctor<T, execution_space>::type data("Data");
typename ZeroFunctor<T, execution_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for(1, f_zero);
execution_space().fence();
struct DeprecatedCASFunctor<T, execution_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for(loop, f_cas);
execution_space().fence();
Kokkos::deep_copy(h_data, data);
T val = h_data();
return val;
}
#endif
//----------------------------------------------
//--------------atomic_exchange-----------------
//----------------------------------------------
@ -325,20 +334,6 @@ struct ExchFunctor {
}
};
template <class T, class DEVICE_TYPE>
struct ExchFunctorReduce {
using execution_space = DEVICE_TYPE;
using type = Kokkos::View<T, execution_space>;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()(int i, int&) const {
T old = Kokkos::atomic_exchange(&data(), (T)i);
Kokkos::atomic_fetch_add(&data2(), old);
}
};
template <class T, class execution_space>
T ExchLoop(int loop) {
struct ZeroFunctor<T, execution_space> f_zero;
@ -366,20 +361,13 @@ T ExchLoop(int loop) {
Kokkos::deep_copy(h_data2, data2);
T val = h_data() + h_data2();
struct ExchFunctorReduce<T, execution_space> f_exch_red;
f_exch_red.data = data;
f_exch_red.data2 = data2;
int dummy_result;
Kokkos::parallel_reduce(loop, f_exch_red, dummy_result);
execution_space().fence();
return val;
}
template <class T>
T ExchLoopSerial(std::conditional_t<
!std::is_same<T, Kokkos::complex<double> >::value, int, void>
loop) {
T ExchLoopSerial(
std::conditional_t<!std::is_same_v<T, Kokkos::complex<double> >, int, void>
loop) {
T* data = new T[1];
T* data2 = new T[1];
data[0] = 0;
@ -399,9 +387,9 @@ T ExchLoopSerial(std::conditional_t<
}
template <class T>
T ExchLoopSerial(std::conditional_t<
std::is_same<T, Kokkos::complex<double> >::value, int, void>
loop) {
T ExchLoopSerial(
std::conditional_t<std::is_same_v<T, Kokkos::complex<double> >, int, void>
loop) {
T* data = new T[1];
T* data2 = new T[1];
data[0] = 0;
@ -427,8 +415,12 @@ T LoopVariant(int loop, int test) {
case 1: return AddLoop<T, DeviceType>(loop);
case 2: return CASLoop<T, DeviceType>(loop);
case 3: return ExchLoop<T, DeviceType>(loop);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
case 4: return DeprecatedCASLoop<T, DeviceType>(loop);
#endif
}
Kokkos::abort("unreachable");
return 0;
}
@ -438,108 +430,121 @@ T LoopVariantSerial(int loop, int test) {
case 1: return AddLoopSerial<T>(loop);
case 2: return CASLoopSerial<T>(loop);
case 3: return ExchLoopSerial<T>(loop);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
case 4: return CASLoopSerial<T>(loop);
#endif
}
Kokkos::abort("unreachable");
return 0;
}
template <class T, class DeviceType>
bool Loop(int loop, int test) {
void Loop(int loop, int test) {
T res = LoopVariant<T, DeviceType>(loop, test);
T resSerial = LoopVariantSerial<T>(loop, test);
bool passed = true;
if (resSerial != res) {
passed = false;
std::cout << "Loop<" << typeid(T).name() << ">( test = " << test
<< " FAILED : " << resSerial << " != " << res << std::endl;
}
return passed;
ASSERT_EQ(res, resSerial) << "Loop<" << Kokkos::Impl::TypeInfo<T>::name()
<< ">(loop=" << loop << ",test=" << test << ")";
}
} // namespace TestAtomic
namespace Test {
TEST(TEST_CATEGORY, atomics) {
const int loop_count = 1e4;
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 3)));
Loop<int, TEST_EXECSPACE>(loop_count, 1);
Loop<int, TEST_EXECSPACE>(loop_count, 2);
Loop<int, TEST_EXECSPACE>(loop_count, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<int, TEST_EXECSPACE>(loop_count, 4);
#endif
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 3)));
Loop<unsigned int, TEST_EXECSPACE>(loop_count, 1);
Loop<unsigned int, TEST_EXECSPACE>(loop_count, 2);
Loop<unsigned int, TEST_EXECSPACE>(loop_count, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<unsigned int, TEST_EXECSPACE>(loop_count, 4);
#endif
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 3)));
Loop<long int, TEST_EXECSPACE>(loop_count, 1);
Loop<long int, TEST_EXECSPACE>(loop_count, 2);
Loop<long int, TEST_EXECSPACE>(loop_count, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<long int, TEST_EXECSPACE>(loop_count, 4);
#endif
ASSERT_TRUE(
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 3)));
Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 1);
Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 2);
Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 4);
#endif
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 3)));
Loop<long long int, TEST_EXECSPACE>(loop_count, 1);
Loop<long long int, TEST_EXECSPACE>(loop_count, 2);
Loop<long long int, TEST_EXECSPACE>(loop_count, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<long long int, TEST_EXECSPACE>(loop_count, 4);
#endif
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 3)));
Loop<double, TEST_EXECSPACE>(loop_count, 1);
Loop<double, TEST_EXECSPACE>(loop_count, 2);
Loop<double, TEST_EXECSPACE>(loop_count, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<double, TEST_EXECSPACE>(loop_count, 4);
#endif
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 1)));
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 2)));
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 3)));
Loop<float, TEST_EXECSPACE>(100, 1);
Loop<float, TEST_EXECSPACE>(100, 2);
Loop<float, TEST_EXECSPACE>(100, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<float, TEST_EXECSPACE>(100, 4);
#endif
// FIXME_OPENMPTARGET
// FIXME_OPENACC: atomic operations on composite types are not supported.
#if !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(KOKKOS_ENABLE_OPENACC)
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 1)));
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 2)));
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 3)));
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 1);
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 2);
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 4);
#endif
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 3)));
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 1);
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 2);
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 4);
#endif
// FIXME_SYCL Replace macro by SYCL_EXT_ONEAPI_DEVICE_GLOBAL or remove
// condition alltogether when possible.
#if defined(KOKKOS_ENABLE_SYCL) && \
!defined(KOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED)
if (std::is_same_v<TEST_EXECSPACE, Kokkos::Experimental::SYCL>) return;
if (std::is_same_v<TEST_EXECSPACE, Kokkos::SYCL>) return;
#endif
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 1);
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 2);
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 4);
#endif
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 3)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 3)));
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 1);
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 2);
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 4);
#endif
// WORKAROUND MSVC
#ifndef _WIN32
ASSERT_TRUE(
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 3)));
Loop<SuperScalar<4>, TEST_EXECSPACE>(100, 1);
Loop<SuperScalar<4>, TEST_EXECSPACE>(100, 2);
Loop<SuperScalar<4>, TEST_EXECSPACE>(100, 3);
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
Loop<SuperScalar<4>, TEST_EXECSPACE>(100, 4);
#endif
#endif
#endif
}
@ -587,4 +592,4 @@ struct TpetraUseCase {
TEST(TEST_CATEGORY, atomics_tpetra_max_abs) { TpetraUseCase().check(); }
} // namespace Test
} // namespace