Update Kokkos library in LAMMPS to v4.1.0
This commit is contained in:
@ -28,8 +28,11 @@ using value_type = double;
|
||||
constexpr double value = 0.5;
|
||||
|
||||
struct ReduceFunctor {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, double &UpdateSum) const {
|
||||
// The functor is templated on purpose to check that the value_type deduction
|
||||
// in parallel_reduce even works in this case.
|
||||
template <typename IndexType, typename ValueType>
|
||||
KOKKOS_INLINE_FUNCTION void operator()(const IndexType i,
|
||||
ValueType &UpdateSum) const {
|
||||
UpdateSum += (i + 1) * value;
|
||||
}
|
||||
};
|
||||
@ -45,6 +48,7 @@ struct NonTrivialReduceFunctor {
|
||||
NonTrivialReduceFunctor(NonTrivialReduceFunctor &&) = default;
|
||||
NonTrivialReduceFunctor &operator=(NonTrivialReduceFunctor &&) = default;
|
||||
NonTrivialReduceFunctor &operator=(NonTrivialReduceFunctor const &) = default;
|
||||
// Also make sure that it's OK if the destructor is not device-callable.
|
||||
~NonTrivialReduceFunctor() {}
|
||||
};
|
||||
|
||||
|
||||
@ -98,6 +98,10 @@ struct ThreadScratch {
|
||||
|
||||
TEST(TEST_CATEGORY, IncrTest_12a_ThreadScratch) {
|
||||
ThreadScratch<TEST_EXECSPACE> test;
|
||||
#ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC
|
||||
GTEST_SKIP() << "skipping since scratch memory is not yet implemented in the "
|
||||
"OpenACC backend";
|
||||
#endif
|
||||
// FIXME_OPENMPTARGET - team_size has to be a multiple of 32 for the tests to
|
||||
// pass in the Release and RelWithDebInfo builds. Does not need the team_size
|
||||
// to be a multiple of 32 for the Debug builds.
|
||||
|
||||
@ -88,6 +88,10 @@ struct TeamScratch {
|
||||
|
||||
TEST(TEST_CATEGORY, IncrTest_12b_TeamScratch) {
|
||||
TeamScratch<TEST_EXECSPACE> test;
|
||||
#ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC
|
||||
GTEST_SKIP() << "skipping since scratch memory is not yet implemented in the "
|
||||
"OpenACC backend";
|
||||
#endif
|
||||
// FIXME_OPENMPTARGET - team_size has to be a multiple of 32 for the tests to
|
||||
// pass in the Release and RelWithDebInfo builds. Does not need the team_size
|
||||
// to be a multiple of 32 for the Debug builds.
|
||||
|
||||
@ -38,6 +38,13 @@ struct MyComplex {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MyComplex(const MyComplex& src) : _re(src._re), _im(src._im) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
MyComplex& operator=(const MyComplex& src) {
|
||||
_re = src._re;
|
||||
_im = src._im;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+=(const MyComplex& src) {
|
||||
_re += src._re;
|
||||
@ -93,6 +100,8 @@ struct TestMDRangeReduce {
|
||||
},
|
||||
d_result);
|
||||
|
||||
// FIXME_OPENACC: scalar reduction variable on the device is not yet supported.
|
||||
#if !defined(KOKKOS_ENABLE_OPENACC)
|
||||
// Parallel reduce on a view.
|
||||
Kokkos::parallel_reduce(
|
||||
mdPolicy_2D,
|
||||
@ -100,16 +109,23 @@ struct TestMDRangeReduce {
|
||||
update_value += d_data(i, j);
|
||||
},
|
||||
d_resultView);
|
||||
#endif
|
||||
|
||||
// Check correctness.
|
||||
ASSERT_EQ(h_result, d_result);
|
||||
|
||||
// FIXME_OPENACC: scalar reduction variable on the device is not yet supported.
|
||||
#if !defined(KOKKOS_ENABLE_OPENACC)
|
||||
// Copy view back to host.
|
||||
value_type view_result = 0.0;
|
||||
Kokkos::deep_copy(view_result, d_resultView);
|
||||
ASSERT_EQ(h_result, view_result);
|
||||
#endif
|
||||
}
|
||||
|
||||
// FIXME_OPENACC: custom reductions are not yet supported in the
|
||||
// OpenACC backend.
|
||||
#if !defined(KOKKOS_ENABLE_OPENACC)
|
||||
// Custom Reduction
|
||||
void reduce_custom() {
|
||||
Complex_View_1D d_data("complex array", N);
|
||||
@ -136,6 +152,7 @@ struct TestMDRangeReduce {
|
||||
ASSERT_EQ(result._re, sum * 0.5);
|
||||
ASSERT_EQ(result._im, -sum * 0.5);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Reductions tests for MDRange policy and customized reduction.
|
||||
@ -144,9 +161,13 @@ TEST(TEST_CATEGORY, incr_14_MDrangeReduce) {
|
||||
test.reduce_MDRange();
|
||||
// FIXME_OPENMPTARGET: custom reductions are not yet supported in the
|
||||
// OpenMPTarget backend.
|
||||
// FIXME_OPENACC: custom reductions are not yet supported in the
|
||||
// OpenACC backend.
|
||||
#if !defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
#if !defined(KOKKOS_ENABLE_OPENACC)
|
||||
test.reduce_custom();
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
|
||||
@ -26,11 +26,60 @@ namespace Test {
|
||||
using value_type = double;
|
||||
const int N = 10;
|
||||
|
||||
template <typename ExecSpace>
|
||||
struct TrivialScanFunctor {
|
||||
Kokkos::View<value_type *, ExecSpace> d_data;
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const int i, value_type &update_value,
|
||||
const bool final) const {
|
||||
const value_type val_i = d_data(i);
|
||||
if (final) d_data(i) = update_value;
|
||||
update_value += val_i;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ExecSpace>
|
||||
struct NonTrivialScanFunctor {
|
||||
Kokkos::View<value_type *, ExecSpace> d_data;
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const int i, value_type &update_value,
|
||||
const bool final) const {
|
||||
const value_type val_i = d_data(i);
|
||||
if (final) d_data(i) = update_value;
|
||||
update_value += val_i;
|
||||
}
|
||||
|
||||
NonTrivialScanFunctor(const Kokkos::View<value_type *, ExecSpace> &data)
|
||||
: d_data(data) {}
|
||||
NonTrivialScanFunctor(NonTrivialScanFunctor const &) = default;
|
||||
NonTrivialScanFunctor(NonTrivialScanFunctor &&) = default;
|
||||
NonTrivialScanFunctor &operator=(NonTrivialScanFunctor &&) = default;
|
||||
NonTrivialScanFunctor &operator=(NonTrivialScanFunctor const &) = default;
|
||||
// Also make sure that it's OK if the destructor is not device-callable.
|
||||
~NonTrivialScanFunctor() {}
|
||||
};
|
||||
|
||||
template <typename ExecSpace>
|
||||
struct GenericExclusiveScanFunctor {
|
||||
Kokkos::View<value_type *, ExecSpace> d_data;
|
||||
|
||||
template <typename IndexType, typename ValueType>
|
||||
KOKKOS_FUNCTION void operator()(const IndexType i, ValueType &update_value,
|
||||
const bool final) const {
|
||||
const ValueType val_i = d_data(i);
|
||||
if (final) d_data(i) = update_value;
|
||||
update_value += val_i;
|
||||
}
|
||||
};
|
||||
|
||||
template <class ExecSpace>
|
||||
struct TestScan {
|
||||
// 1D View of double
|
||||
using View_1D = typename Kokkos::View<value_type *, ExecSpace>;
|
||||
|
||||
template <typename FunctorType>
|
||||
void parallel_scan() {
|
||||
View_1D d_data("data", N);
|
||||
|
||||
@ -39,15 +88,9 @@ struct TestScan {
|
||||
Kokkos::RangePolicy<ExecSpace>(0, N),
|
||||
KOKKOS_LAMBDA(const int i) { d_data(i) = i * 0.5; });
|
||||
|
||||
// Exclusive parallel_scan call.
|
||||
Kokkos::parallel_scan(
|
||||
Kokkos::RangePolicy<ExecSpace>(0, N),
|
||||
KOKKOS_LAMBDA(const int i, value_type &update_value, const bool final) {
|
||||
const value_type val_i = d_data(i);
|
||||
if (final) d_data(i) = update_value;
|
||||
|
||||
update_value += val_i;
|
||||
});
|
||||
// Exclusive parallel_scan call
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecSpace>(0, N),
|
||||
FunctorType{d_data});
|
||||
|
||||
// Copy back the data.
|
||||
auto h_data =
|
||||
@ -63,9 +106,50 @@ struct TestScan {
|
||||
}
|
||||
};
|
||||
|
||||
template <class ExecSpace>
|
||||
struct TestScanWithTotal {
|
||||
// 1D View of double
|
||||
using View_1D = typename Kokkos::View<value_type *, ExecSpace>;
|
||||
View_1D d_data = View_1D("data", N);
|
||||
|
||||
template <typename IndexType>
|
||||
KOKKOS_FUNCTION void operator()(IndexType i) const {
|
||||
d_data(i) = i * 0.5;
|
||||
}
|
||||
|
||||
template <typename FunctorType>
|
||||
void parallel_scan() {
|
||||
// Initialize data.
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0, N), *this);
|
||||
|
||||
value_type total;
|
||||
// Exclusive parallel_scan call
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<ExecSpace>(0, N),
|
||||
FunctorType{d_data}, total);
|
||||
|
||||
// Copy back the data.
|
||||
auto h_data =
|
||||
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), d_data);
|
||||
|
||||
// Check Correctness
|
||||
ASSERT_EQ(h_data(0), 0.0);
|
||||
value_type upd = h_data(0);
|
||||
for (int i = 1; i < N; ++i) {
|
||||
upd += (i - 1) * 0.5;
|
||||
ASSERT_EQ(h_data(i), upd);
|
||||
}
|
||||
ASSERT_EQ(total, N * (N - 1) * 0.25);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(TEST_CATEGORY, IncrTest_16_parallelscan) {
|
||||
TestScan<TEST_EXECSPACE> test;
|
||||
test.parallel_scan();
|
||||
test.parallel_scan<TrivialScanFunctor<TEST_EXECSPACE>>();
|
||||
test.parallel_scan<NonTrivialScanFunctor<TEST_EXECSPACE>>();
|
||||
TestScanWithTotal<TEST_EXECSPACE> test_total;
|
||||
test_total.parallel_scan<TrivialScanFunctor<TEST_EXECSPACE>>();
|
||||
test_total.parallel_scan<NonTrivialScanFunctor<TEST_EXECSPACE>>();
|
||||
test_total.parallel_scan<GenericExclusiveScanFunctor<TEST_EXECSPACE>>();
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
|
||||
Reference in New Issue
Block a user