Update Kokkos library in LAMMPS to v3.7.0

This commit is contained in:
Stan Gerald Moore
2022-10-04 14:04:40 -06:00
parent dd072f7e08
commit f9f9e44f2d
653 changed files with 41432 additions and 33597 deletions

View File

@ -43,7 +43,6 @@
*/
#include <cstdio>
#include <stdexcept>
#include <sstream>
#include <iostream>
@ -62,17 +61,20 @@ struct TestTeamPolicy {
view_type m_flags;
TestTeamPolicy(const size_t league_size)
: m_flags(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"),
: m_flags(Kokkos::view_alloc(Kokkos::WithoutInitializing, "flags"),
// FIXME_OPENMPTARGET temporary restriction for team size to be at least 32
#ifdef KOKKOS_ENABLE_OPENMPTARGET
Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 32).team_size_max(
*this, Kokkos::ParallelReduceTag()),
Kokkos::TeamPolicy<ScheduleType, ExecSpace>(
1, std::is_same<ExecSpace,
Kokkos::Experimental::OpenMPTarget>::value
? 32
: 1)
.team_size_max(*this, Kokkos::ParallelReduceTag()),
#else
Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 1).team_size_max(
*this, Kokkos::ParallelReduceTag()),
Kokkos::TeamPolicy<ScheduleType, ExecSpace>(1, 1).team_size_max(
*this, Kokkos::ParallelReduceTag()),
#endif
league_size) {
league_size) {
}
struct VerifyInitTag {};
@ -131,8 +133,12 @@ struct TestTeamPolicy {
constexpr const int smallest_work = 1;
// FIXME_OPENMPTARGET temporary restriction for team size to be at least 32
#ifdef KOKKOS_ENABLE_OPENMPTARGET
Kokkos::TeamPolicy<ExecSpace, NoOpTag> none_auto(smallest_work, 32,
smallest_work);
Kokkos::TeamPolicy<ExecSpace, NoOpTag> none_auto(
smallest_work,
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? 32
: smallest_work,
smallest_work);
#else
Kokkos::TeamPolicy<ExecSpace, NoOpTag> none_auto(
smallest_work, smallest_work, smallest_work);
@ -143,8 +149,12 @@ struct TestTeamPolicy {
(void)both_auto;
// FIXME_OPENMPTARGET temporary restriction for team size to be at least 32
#ifdef KOKKOS_ENABLE_OPENMPTARGET
Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_vector(smallest_work, 32,
Kokkos::AUTO());
Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_vector(
smallest_work,
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? 32
: smallest_work,
Kokkos::AUTO());
#else
Kokkos::TeamPolicy<ExecSpace, NoOpTag> auto_vector(
smallest_work, smallest_work, Kokkos::AUTO());
@ -166,10 +176,18 @@ struct TestTeamPolicy {
// 32
#ifdef KOKKOS_ENABLE_OPENMPTARGET
const int team_size =
policy_type(league_size, 32)
policy_type(
league_size,
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? 32
: 1)
.team_size_max(functor, Kokkos::ParallelForTag());
const int team_size_init =
policy_type_init(league_size, 32)
policy_type_init(
league_size,
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? 32
: 1)
.team_size_max(functor, Kokkos::ParallelForTag());
#else
const int team_size =
@ -215,7 +233,11 @@ struct TestTeamPolicy {
// FIXME_OPENMPTARGET temporary restriction for team size to be at least 32
#ifdef KOKKOS_ENABLE_OPENMPTARGET
const int team_size =
policy_type_reduce(league_size, 32)
policy_type_reduce(
league_size,
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? 32
: 1)
.team_size_max(functor, Kokkos::ParallelReduceTag());
#else
const int team_size =
@ -272,7 +294,7 @@ class ReduceTeamFunctor {
}
KOKKOS_INLINE_FUNCTION
void join(volatile value_type &dst, const volatile value_type &src) const {
void join(value_type &dst, const value_type &src) const {
dst.value[0] += src.value[0];
dst.value[1] += src.value[1];
dst.value[2] += src.value[2];
@ -371,8 +393,7 @@ class ScanTeamFunctor {
void init(value_type &error) const { error = 0; }
KOKKOS_INLINE_FUNCTION
void join(value_type volatile &error,
value_type volatile const &input) const {
void join(value_type &error, value_type const &input) const {
if (input) error = 1;
}
@ -380,8 +401,7 @@ class ScanTeamFunctor {
using value_type = int64_t;
KOKKOS_INLINE_FUNCTION
void join(value_type volatile &dst,
value_type volatile const &input) const {
void join(value_type &dst, value_type const &input) const {
if (dst < input) dst = input;
}
};
@ -571,11 +591,17 @@ struct TestSharedTeam {
#ifdef KOKKOS_ENABLE_OPENMPTARGET
const size_t team_size =
Kokkos::TeamPolicy<ScheduleType, ExecSpace>(64, 32).team_size_max(
Functor(), Kokkos::ParallelReduceTag());
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? Kokkos::TeamPolicy<ScheduleType, ExecSpace>(64, 32).team_size_max(
Functor(), Kokkos::ParallelReduceTag())
: Kokkos::TeamPolicy<ScheduleType, ExecSpace>(8192, 1)
.team_size_max(Functor(), Kokkos::ParallelReduceTag());
Kokkos::TeamPolicy<ScheduleType, ExecSpace> team_exec(32 / team_size,
team_size);
Kokkos::TeamPolicy<ScheduleType, ExecSpace> team_exec(
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? 32 / team_size
: 8192 / team_size,
team_size);
#else
const size_t team_size =
Kokkos::TeamPolicy<ScheduleType, ExecSpace>(8192, 1).team_size_max(
@ -616,7 +642,9 @@ struct TestLambdaSharedTeam {
const int SHARED_COUNT = 1000;
#ifdef KOKKOS_ENABLE_OPENMPTARGET
int team_size = 32;
int team_size =
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value ? 32
: 1;
#else
int team_size = 1;
#endif
@ -776,11 +804,18 @@ struct TestScratchTeam {
Functor::SHARED_THREAD_COUNT);
#ifdef KOKKOS_ENABLE_OPENMPTARGET
p_type team_exec = p_type(64, 32).set_scratch_size(
1,
Kokkos::PerTeam(Functor::shared_int_array_type::shmem_size(
Functor::SHARED_TEAM_COUNT)),
Kokkos::PerThread(thread_scratch_size + 3 * sizeof(int)));
p_type team_exec =
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? p_type(64, 32).set_scratch_size(
1,
Kokkos::PerTeam(Functor::shared_int_array_type::shmem_size(
Functor::SHARED_TEAM_COUNT)),
Kokkos::PerThread(thread_scratch_size + 3 * sizeof(int)))
: p_type(8192, 1).set_scratch_size(
1,
Kokkos::PerTeam(Functor::shared_int_array_type::shmem_size(
Functor::SHARED_TEAM_COUNT)),
Kokkos::PerThread(thread_scratch_size + 3 * sizeof(int)));
#else
p_type team_exec = p_type(8192, 1).set_scratch_size(
1,
@ -797,7 +832,10 @@ struct TestScratchTeam {
Functor::shared_int_array_type::shmem_size(3 * team_size);
#ifdef KOKKOS_ENABLE_OPENMPTARGET
team_exec = p_type(64 / team_size, team_size);
team_exec =
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value
? p_type(64 / team_size, team_size)
: p_type(8192 / team_size, team_size);
#else
team_exec = p_type(8192 / team_size, team_size);
#endif
@ -825,31 +863,31 @@ namespace Test {
template <class ExecSpace>
KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body(
const typename Kokkos::TeamPolicy<ExecSpace>::member_type &team) {
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
a_team1(team.team_scratch(0), 128);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
a_thread1(team.thread_scratch(0), 16);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
a_team2(team.team_scratch(0), 128);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
a_thread2(team.thread_scratch(0), 16);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
b_team1(team.team_scratch(1), 12800);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
b_thread1(team.thread_scratch(1), 1600);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
b_team2(team.team_scratch(1), 12800);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
b_thread2(team.thread_scratch(1), 1600);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
a_team3(team.team_scratch(0), 128);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
a_thread3(team.thread_scratch(0), 16);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
b_team3(team.team_scratch(1), 12800);
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
Kokkos::View<double *, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
b_thread3(team.thread_scratch(1), 1600);
// The explicit types for 0 and 128 are here to test TeamThreadRange accepting
@ -945,7 +983,7 @@ struct ClassNoShmemSizeFunction {
using member_type =
typename Kokkos::TeamPolicy<ExecSpace, ScheduleType>::member_type;
Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic>> errors;
KOKKOS_INLINE_FUNCTION
void operator()(const TagFor &, const member_type &team) const {
@ -967,20 +1005,20 @@ struct ClassNoShmemSizeFunction {
const int per_team0 =
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(128);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 =
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(16);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
const int per_team1 =
3 * Kokkos::View<
double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(12800);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(12800);
const int per_thread1 =
3 * Kokkos::View<
double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(1600);
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(1600);
#ifdef KOKKOS_ENABLE_SYCL
int team_size = 4;
@ -1031,7 +1069,7 @@ struct ClassWithShmemSizeFunction {
using member_type =
typename Kokkos::TeamPolicy<ExecSpace, ScheduleType>::member_type;
Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic>> errors;
KOKKOS_INLINE_FUNCTION
void operator()(const TagFor &, const member_type &team) const {
@ -1053,11 +1091,11 @@ struct ClassWithShmemSizeFunction {
const int per_team1 =
3 * Kokkos::View<
double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(12800);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(12800);
const int per_thread1 =
3 * Kokkos::View<
double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(1600);
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(1600);
int team_size = 8;
if (team_size > ExecSpace::concurrency())
@ -1097,11 +1135,11 @@ struct ClassWithShmemSizeFunction {
const int per_team0 =
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(128);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 =
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(16);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
return per_team0 + team_size * per_thread0;
}
};
@ -1109,27 +1147,26 @@ struct ClassWithShmemSizeFunction {
template <class ExecSpace, class ScheduleType>
void test_team_mulit_level_scratch_test_lambda() {
#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors;
Kokkos::View<int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic>> errors;
Kokkos::View<int, ExecSpace> d_errors("Errors");
errors = d_errors;
const int per_team0 =
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(128);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128);
const int per_thread0 =
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(16);
3 * Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16);
const int per_team1 =
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(12800);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(12800);
const int per_thread1 =
3 *
Kokkos::View<double *, ExecSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size(1600);
Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(1600);
#ifdef KOKKOS_ENABLE_SYCL
int team_size = 4;
@ -1244,9 +1281,8 @@ template <class ExecSpace, class ScheduleType, class T, class Enabled = void>
struct TestTeamBroadcast;
template <class ExecSpace, class ScheduleType, class T>
struct TestTeamBroadcast<
ExecSpace, ScheduleType, T,
typename std::enable_if<(sizeof(T) == sizeof(char)), void>::type> {
struct TestTeamBroadcast<ExecSpace, ScheduleType, T,
std::enable_if_t<(sizeof(T) == sizeof(char)), void>> {
using team_member =
typename Kokkos::TeamPolicy<ScheduleType, ExecSpace>::member_type;
using memory_space = typename ExecSpace::memory_space;
@ -1358,9 +1394,8 @@ struct TestTeamBroadcast<
};
template <class ExecSpace, class ScheduleType, class T>
struct TestTeamBroadcast<
ExecSpace, ScheduleType, T,
typename std::enable_if<(sizeof(T) > sizeof(char)), void>::type> {
struct TestTeamBroadcast<ExecSpace, ScheduleType, T,
std::enable_if_t<(sizeof(T) > sizeof(char)), void>> {
using team_member =
typename Kokkos::TeamPolicy<ScheduleType, ExecSpace>::member_type;
using value_type = T;
@ -1434,9 +1469,8 @@ struct TestTeamBroadcast<
}
template <class ScalarType>
static inline
typename std::enable_if<!std::is_integral<ScalarType>::value, void>::type
compare_test(ScalarType A, ScalarType B, double epsilon_factor) {
static inline std::enable_if_t<!std::is_integral<ScalarType>::value, void>
compare_test(ScalarType A, ScalarType B, double epsilon_factor) {
if (std::is_same<ScalarType, double>::value ||
std::is_same<ScalarType, float>::value) {
ASSERT_NEAR((double)A, (double)B,
@ -1448,9 +1482,8 @@ struct TestTeamBroadcast<
}
template <class ScalarType>
static inline
typename std::enable_if<std::is_integral<ScalarType>::value, void>::type
compare_test(ScalarType A, ScalarType B, double) {
static inline std::enable_if_t<std::is_integral<ScalarType>::value, void>
compare_test(ScalarType A, ScalarType B, double) {
ASSERT_EQ(A, B);
}
@ -1528,7 +1561,9 @@ struct TestScratchAlignment {
void test(bool allocate_small) {
int shmem_size = ScratchView::shmem_size(11);
#ifdef KOKKOS_ENABLE_OPENMPTARGET
int team_size = 32;
int team_size =
std::is_same<ExecSpace, Kokkos::Experimental::OpenMPTarget>::value ? 32
: 1;
#else
int team_size = 1;
#endif