Update Kokkos library in LAMMPS to v3.4.0

This commit is contained in:
Stan Gerald Moore
2021-04-26 16:28:19 -06:00
parent 39f3c1684f
commit 692da3bf88
358 changed files with 16375 additions and 10003 deletions

View File

@ -85,7 +85,14 @@ struct hello_world {
// (as well as on the host). If not building with CUDA, the macro
// is unnecessary but harmless.
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const { printf("Hello from i = %i\n", i); }
void operator()(const int i) const {
// FIXME_SYCL needs workaround for printf
#ifndef __SYCL_DEVICE_ONLY__
printf("Hello from i = %i\n", i);
#else
(void)i;
#endif
}
};
int main(int argc, char* argv[]) {

View File

@ -104,8 +104,13 @@ int main(int argc, char* argv[]) {
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
Kokkos::parallel_for(
15, KOKKOS_LAMBDA(const int i) {
// FIXME_SYCL needs workaround for printf
#ifndef __SYCL_DEVICE_ONLY__
// printf works in a CUDA parallel kernel; std::ostream does not.
printf("Hello from i = %i\n", i);
#else
(void)i;
#endif
});
#endif
// You must call finalize() after you are done using Kokkos.

View File

@ -122,7 +122,7 @@ int main() {
// Fill the 'data' array on the host with random numbers. We assume
// that they come from some process which is only implemented on the
// host, via some library. (That's true in this case.)
for (size_type i = 0; i < data.extent(0); ++i) {
for (size_type i = 0; i < static_cast<size_type>(data.extent(0)); ++i) {
h_data(i) = rand() % nnumbers;
}
Kokkos::deep_copy(data, h_data); // copy from host to device

View File

@ -7,4 +7,3 @@ KOKKOS_ADD_EXECUTABLE(
tutorial_06_simple_mdrangepolicy
SOURCES simple_mdrangepolicy.cpp
)

View File

@ -68,13 +68,15 @@ struct init_view {
ViewType a;
init_view(ViewType a_) : a(a_) {}
using size_type = typename ViewType::size_type;
KOKKOS_INLINE_FUNCTION
void operator()(const typename ViewType::size_type i) const {
// On CPUs this loop could be vectorized so j should do stride 1
// access on a for optimal performance. I.e. a should be LayoutRight.
// On GPUs threads should do coalesced loads and stores. That means
// that i should be the stride one access for optimal performance.
for (typename ViewType::size_type j = 0; j < a.extent(1); ++j) {
for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) {
a(i, j) = 1.0 * a.extent(0) * i + 1.0 * j;
}
}
@ -95,6 +97,8 @@ struct contraction {
contraction(view_type a_, ViewType1 v1_, ViewType2 v2_)
: a(a_), v1(v1_), v2(v2_) {}
using size_type = typename view_type::size_type;
// As with the initialization functor the performance of this operator
// depends on the architecture and the chosen data layouts.
// On CPUs optimal would be to vectorize the inner loop, so j should be the
@ -104,7 +108,7 @@ struct contraction {
// LayoutLeft and v2 LayoutRight.
KOKKOS_INLINE_FUNCTION
void operator()(const view_type::size_type i) const {
for (view_type::size_type j = 0; j < v1.extent(1); ++j) {
for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) {
a(i) = v1(i, j) * v2(j, i);
}
}

View File

@ -113,8 +113,9 @@ int main(int narg, char* arg[]) {
srand(134231);
using size_type = view_type::size_type;
for (int i = 0; i < size; i++) {
for (view_type::size_type j = 0; j < h_idx.extent(1); ++j) {
for (size_type j = 0; j < static_cast<size_type>(h_idx.extent(1)); ++j) {
h_idx(i, j) = (size + i + (rand() % 500 - 250)) % size;
}
}

View File

@ -78,9 +78,11 @@ struct set_boundary {
set_boundary(ViewType a_, double value_) : a(a_), value(value_) {}
using size_type = typename ViewType::size_type;
KOKKOS_INLINE_FUNCTION
void operator()(const typename ViewType::size_type i) const {
for (typename ViewType::size_type j = 0; j < a.extent(1); ++j) {
void operator()(const size_type i) const {
for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) {
a(i, j) = value;
}
}
@ -96,11 +98,12 @@ struct set_inner {
set_inner(ViewType a_, double value_) : a(a_), value(value_) {}
using size_type = typename ViewType::size_type;
KOKKOS_INLINE_FUNCTION
void operator()(const typename ViewType::size_type i) const {
using size_type = typename ViewType::size_type;
for (size_type j = 0; j < a.extent(1); ++j) {
for (size_type k = 0; k < a.extent(2); ++k) {
void operator()(const size_type i) const {
for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) {
for (size_type k = 0; k < static_cast<size_type>(a.extent(2)); ++k) {
a(i, j, k) = value;
}
}
@ -116,12 +119,13 @@ struct update {
update(ViewType a_, const double dt_) : a(a_), dt(dt_) {}
using size_type = typename ViewType::size_type;
KOKKOS_INLINE_FUNCTION
void operator()(typename ViewType::size_type i) const {
using size_type = typename ViewType::size_type;
void operator()(size_type i) const {
i++;
for (size_type j = 1; j < a.extent(1) - 1; j++) {
for (size_type k = 1; k < a.extent(2) - 1; k++) {
for (size_type j = 1; j < static_cast<size_type>(a.extent(1) - 1); j++) {
for (size_type k = 1; k < static_cast<size_type>(a.extent(2) - 1); k++) {
a(i, j, k) += dt * (a(i, j, k + 1) - a(i, j, k - 1) + a(i, j + 1, k) -
a(i, j - 1, k) + a(i + 1, j, k) - a(i - 1, j, k));
}

View File

@ -175,8 +175,9 @@ int main(int narg, char* arg[]) {
// Get a reference to the host view of idx directly (equivalent to
// idx.view<idx_type::host_mirror_space>() )
idx_type::t_host h_idx = idx.h_view;
using size_type = view_type::size_type;
for (int i = 0; i < size; ++i) {
for (view_type::size_type j = 0; j < h_idx.extent(1); ++j) {
for (size_type j = 0; j < static_cast<size_type>(h_idx.extent(1)); ++j) {
h_idx(i, j) = (size + i + (rand() % 500 - 250)) % size;
}
}

View File

@ -75,8 +75,13 @@ struct hello_world {
// The TeamPolicy<>::member_type provides functions to query the multi
// dimensional index of a thread as well as the number of thread-teams and
// the size of each team.
#ifndef __SYCL_DEVICE_ONLY__
// FIXME_SYCL needs printf workaround
printf("Hello World: %i %i // %i %i\n", thread.league_rank(),
thread.team_rank(), thread.league_size(), thread.team_size());
#else
(void)thread;
#endif
}
};

View File

@ -85,11 +85,16 @@ int main(int narg, char* args[]) {
policy,
KOKKOS_LAMBDA(const team_member& thread, int& lsum) {
lsum += 1;
// TeamPolicy<>::member_type provides functions to query the
// multidimensional index of a thread, as well as the number of
// thread teams and the size of each team.
// TeamPolicy<>::member_type provides functions to query the
// multidimensional index of a thread, as well as the number of
// thread teams and the size of each team.
#ifndef __SYCL_DEVICE_ONLY__
// FIXME_SYCL needs workaround for printf
printf("Hello World: %i %i // %i %i\n", thread.league_rank(),
thread.team_rank(), thread.league_size(), thread.team_size());
#else
(void)thread;
#endif
},
sum);
#endif

View File

@ -73,8 +73,13 @@ struct hello_world {
// also executed by all threads of the team.
Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, 31),
[&](const int& i) {
#ifndef __SYCL_DEVICE_ONLY__
// FIXME_SYCL needs printf workaround
printf("Hello World: (%i , %i) executed loop %i \n",
thread.league_rank(), thread.team_rank(), i);
#else
(void) i;
#endif
});
}
};