Update Kokkos library in LAMMPS to v3.3.0
This commit is contained in:
@ -126,7 +126,8 @@ struct TestParallel_For {
|
||||
init();
|
||||
|
||||
// parallel-for functor called for num_elements number of iterations.
|
||||
Kokkos::parallel_for("parallel_for", num_elements,
|
||||
Kokkos::parallel_for("parallel_for",
|
||||
Kokkos::RangePolicy<ExecSpace>(0, num_elements),
|
||||
ParallelForFunctor(deviceData, value));
|
||||
|
||||
Kokkos::fence();
|
||||
|
||||
@ -54,33 +54,38 @@ namespace Test {
|
||||
|
||||
using value_type = double;
|
||||
constexpr double value = 0.5;
|
||||
const int num_elements = 10;
|
||||
|
||||
struct ReduceFunctor {
|
||||
value_type *_data;
|
||||
|
||||
ReduceFunctor(value_type *data) : _data(data) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, double &UpdateSum) const {
|
||||
_data[i] = (i + 1) * value;
|
||||
UpdateSum += _data[i];
|
||||
UpdateSum += (i + 1) * value;
|
||||
}
|
||||
};
|
||||
|
||||
struct NonTrivialReduceFunctor {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, double &UpdateSum) const {
|
||||
UpdateSum += (i + 1) * value;
|
||||
}
|
||||
|
||||
NonTrivialReduceFunctor() = default;
|
||||
NonTrivialReduceFunctor(NonTrivialReduceFunctor const &) = default;
|
||||
NonTrivialReduceFunctor(NonTrivialReduceFunctor &&) = default;
|
||||
NonTrivialReduceFunctor &operator=(NonTrivialReduceFunctor &&) = default;
|
||||
NonTrivialReduceFunctor &operator=(NonTrivialReduceFunctor const &) = default;
|
||||
~NonTrivialReduceFunctor() {}
|
||||
};
|
||||
|
||||
template <class ExecSpace>
|
||||
struct TestReduction {
|
||||
// Memory space type for Device and Host data
|
||||
using d_memspace_type = typename ExecSpace::memory_space;
|
||||
using h_memspace_type = Kokkos::HostSpace;
|
||||
|
||||
value_type *deviceData, *hostData;
|
||||
value_type sum = 0.0;
|
||||
const int m_num_elements;
|
||||
|
||||
TestReduction(int num_elements) : m_num_elements(num_elements) {}
|
||||
|
||||
// compare and equal
|
||||
void check_correctness() {
|
||||
int sum_local = 0;
|
||||
for (int i = 0; i < num_elements; ++i) sum_local += (i + 1);
|
||||
const int sum_local = (m_num_elements * (m_num_elements + 1)) / 2;
|
||||
|
||||
ASSERT_EQ(sum, sum_local * value)
|
||||
<< "The reduced value does not match the expected answer";
|
||||
@ -99,39 +104,8 @@ struct TestReduction {
|
||||
Kokkos::kokkos_free<MemSpace>(data);
|
||||
}
|
||||
|
||||
// Free the allocated memory
|
||||
void free_mem() {
|
||||
Kokkos::kokkos_free<d_memspace_type>(deviceData);
|
||||
Kokkos::kokkos_free<h_memspace_type>(hostData);
|
||||
}
|
||||
|
||||
// Allocate Memory for both device and host memory spaces
|
||||
void init() {
|
||||
// Allocate memory on Device space.
|
||||
deviceData = allocate_mem<d_memspace_type>(num_elements);
|
||||
ASSERT_NE(deviceData, nullptr);
|
||||
|
||||
// Allocate memory on Host space.
|
||||
hostData = allocate_mem<h_memspace_type>(num_elements);
|
||||
ASSERT_NE(hostData, nullptr);
|
||||
|
||||
// Initialize the sum value to zero.
|
||||
sum = 0.0;
|
||||
}
|
||||
|
||||
void check_correctness_and_cleanup() {
|
||||
// Check if reduction has produced correct results
|
||||
check_correctness();
|
||||
|
||||
// free the allocated memory
|
||||
free_mem<d_memspace_type>(deviceData);
|
||||
free_mem<h_memspace_type>(hostData);
|
||||
}
|
||||
|
||||
void sum_reduction() {
|
||||
// Allocates memory for num_elements number of value_type elements in the
|
||||
// host and device memory spaces.
|
||||
init();
|
||||
sum = 0.0;
|
||||
|
||||
// Creates a range policy that uses dynamic schedule.
|
||||
using range_policy =
|
||||
@ -139,16 +113,34 @@ struct TestReduction {
|
||||
|
||||
// parallel_reduce call with range policy over num_elements number of
|
||||
// iterations
|
||||
Kokkos::parallel_reduce("Reduction", range_policy(0, num_elements),
|
||||
ReduceFunctor(deviceData), sum);
|
||||
Kokkos::parallel_reduce("Reduction", range_policy(0, m_num_elements),
|
||||
ReduceFunctor{}, sum);
|
||||
|
||||
check_correctness_and_cleanup();
|
||||
check_correctness();
|
||||
}
|
||||
|
||||
void non_trivial_sum_reduction() {
|
||||
sum = 0.0;
|
||||
|
||||
// Creates a range policy that uses dynamic schedule.
|
||||
using range_policy =
|
||||
Kokkos::RangePolicy<ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >;
|
||||
|
||||
// parallel_reduce call with range policy over num_elements number of
|
||||
// iterations
|
||||
Kokkos::parallel_reduce("Reduction", range_policy(0, m_num_elements),
|
||||
NonTrivialReduceFunctor{}, sum);
|
||||
|
||||
check_correctness();
|
||||
}
|
||||
};
|
||||
|
||||
TEST(TEST_CATEGORY, IncrTest_05_reduction) {
|
||||
TestReduction<TEST_EXECSPACE> test;
|
||||
test.sum_reduction();
|
||||
for (unsigned int i = 0; i < 100; ++i) {
|
||||
TestReduction<TEST_EXECSPACE> test(i);
|
||||
test.sum_reduction();
|
||||
test.non_trivial_sum_reduction();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
|
||||
@ -64,6 +64,7 @@ struct HierarchicalBasics {
|
||||
|
||||
ASSERT_EQ(pol.league_size(), nP);
|
||||
ASSERT_LE(pol.team_size(), nT);
|
||||
|
||||
nT = pol.team_size();
|
||||
|
||||
Kokkos::View<int **, ExecSpace> v("Array_A", nP, nT);
|
||||
@ -93,9 +94,17 @@ struct HierarchicalBasics {
|
||||
|
||||
TEST(TEST_CATEGORY, IncrTest_10_Hierarchical_Basics) {
|
||||
HierarchicalBasics<TEST_EXECSPACE> test;
|
||||
|
||||
// OpenMPTarget backend only accepts >= 32 threads per team
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
test.run(1, 32);
|
||||
test.run(8, 64);
|
||||
test.run(11, 128);
|
||||
#else
|
||||
test.run(1, 4);
|
||||
test.run(8, 16);
|
||||
test.run(11, 13);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
|
||||
@ -81,8 +81,8 @@ struct Hierarchical_ForLoop_A {
|
||||
Kokkos::fence();
|
||||
auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v);
|
||||
|
||||
int check = 0;
|
||||
const int s = sY * sX;
|
||||
long long int check = 0;
|
||||
const long long int s = sY * sX;
|
||||
for (int i = 0; i < sX; ++i)
|
||||
for (int j = 0; j < sY; ++j) check += v_H(i, j);
|
||||
ASSERT_EQ(check, s * (s - 1) / 2);
|
||||
|
||||
@ -81,8 +81,8 @@ struct Hierarchical_ForLoop_B {
|
||||
Kokkos::fence();
|
||||
auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v);
|
||||
|
||||
int check = 0;
|
||||
const int s = sY * sX;
|
||||
long long int check = 0;
|
||||
const long long int s = sY * sX;
|
||||
for (int i = 0; i < sX; ++i)
|
||||
for (int j = 0; j < sY; ++j) check += v_H(i, j);
|
||||
ASSERT_EQ(check, s * (s - 1) / 2);
|
||||
|
||||
@ -42,7 +42,7 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// @Kokkos_Feature_Level_Required:12
|
||||
// @Kokkos_Feature_Level_Required:13
|
||||
// Unit test for hierarchical parallelism
|
||||
// Create concurrent work hierarchically and verify if
|
||||
// contributions of paticipating processing units corresponds to expected value
|
||||
|
||||
@ -42,7 +42,7 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// @Kokkos_Feature_Level_Required:12
|
||||
// @Kokkos_Feature_Level_Required:13
|
||||
// Unit test for hierarchical parallelism
|
||||
// Create concurrent work hierarchically and verify if
|
||||
// contributions of paticipating processing units corresponds to expected value
|
||||
|
||||
@ -42,7 +42,7 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// @Kokkos_Feature_Level_Required:13
|
||||
// @Kokkos_Feature_Level_Required:12
|
||||
// Unit test for hierarchical parallelism
|
||||
// Create concurrent work hierarchically and verify if
|
||||
// sum of created processing units corresponds to expected value
|
||||
|
||||
@ -42,7 +42,7 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// @Kokkos_Feature_Level_Required:13
|
||||
// @Kokkos_Feature_Level_Required:12
|
||||
// Unit test for hierarchical parallelism
|
||||
// Create concurrent work hierarchically and verify if
|
||||
// sum of created processing units corresponds to expected value
|
||||
|
||||
@ -42,7 +42,7 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// @Kokkos_Feature_Level_Required:13
|
||||
// @Kokkos_Feature_Level_Required:12
|
||||
// Unit test for hierarchical parallelism
|
||||
// Create concurrent work hierarchically and verify if
|
||||
// sum of created processing units corresponds to expected value
|
||||
|
||||
Reference in New Issue
Block a user