Update Kokkos library in LAMMPS to v3.4.0
This commit is contained in:
@ -85,7 +85,14 @@ struct hello_world {
|
||||
// (as well as on the host). If not building with CUDA, the macro
|
||||
// is unnecessary but harmless.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const { printf("Hello from i = %i\n", i); }
|
||||
void operator()(const int i) const {
|
||||
// FIXME_SYCL needs workaround for printf
|
||||
#ifndef __SYCL_DEVICE_ONLY__
|
||||
printf("Hello from i = %i\n", i);
|
||||
#else
|
||||
(void)i;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
@ -104,8 +104,13 @@ int main(int argc, char* argv[]) {
|
||||
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
|
||||
Kokkos::parallel_for(
|
||||
15, KOKKOS_LAMBDA(const int i) {
|
||||
// FIXME_SYCL needs workaround for printf
|
||||
#ifndef __SYCL_DEVICE_ONLY__
|
||||
// printf works in a CUDA parallel kernel; std::ostream does not.
|
||||
printf("Hello from i = %i\n", i);
|
||||
#else
|
||||
(void)i;
|
||||
#endif
|
||||
});
|
||||
#endif
|
||||
// You must call finalize() after you are done using Kokkos.
|
||||
|
||||
@ -122,7 +122,7 @@ int main() {
|
||||
// Fill the 'data' array on the host with random numbers. We assume
|
||||
// that they come from some process which is only implemented on the
|
||||
// host, via some library. (That's true in this case.)
|
||||
for (size_type i = 0; i < data.extent(0); ++i) {
|
||||
for (size_type i = 0; i < static_cast<size_type>(data.extent(0)); ++i) {
|
||||
h_data(i) = rand() % nnumbers;
|
||||
}
|
||||
Kokkos::deep_copy(data, h_data); // copy from host to device
|
||||
|
||||
@ -7,4 +7,3 @@ KOKKOS_ADD_EXECUTABLE(
|
||||
tutorial_06_simple_mdrangepolicy
|
||||
SOURCES simple_mdrangepolicy.cpp
|
||||
)
|
||||
|
||||
|
||||
@ -68,13 +68,15 @@ struct init_view {
|
||||
ViewType a;
|
||||
init_view(ViewType a_) : a(a_) {}
|
||||
|
||||
using size_type = typename ViewType::size_type;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename ViewType::size_type i) const {
|
||||
// On CPUs this loop could be vectorized so j should do stride 1
|
||||
// access on a for optimal performance. I.e. a should be LayoutRight.
|
||||
// On GPUs threads should do coalesced loads and stores. That means
|
||||
// that i should be the stride one access for optimal performance.
|
||||
for (typename ViewType::size_type j = 0; j < a.extent(1); ++j) {
|
||||
for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) {
|
||||
a(i, j) = 1.0 * a.extent(0) * i + 1.0 * j;
|
||||
}
|
||||
}
|
||||
@ -95,6 +97,8 @@ struct contraction {
|
||||
contraction(view_type a_, ViewType1 v1_, ViewType2 v2_)
|
||||
: a(a_), v1(v1_), v2(v2_) {}
|
||||
|
||||
using size_type = typename view_type::size_type;
|
||||
|
||||
// As with the initialization functor the performance of this operator
|
||||
// depends on the architecture and the chosen data layouts.
|
||||
// On CPUs optimal would be to vectorize the inner loop, so j should be the
|
||||
@ -104,7 +108,7 @@ struct contraction {
|
||||
// LayoutLeft and v2 LayoutRight.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const view_type::size_type i) const {
|
||||
for (view_type::size_type j = 0; j < v1.extent(1); ++j) {
|
||||
for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) {
|
||||
a(i) = v1(i, j) * v2(j, i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -113,8 +113,9 @@ int main(int narg, char* arg[]) {
|
||||
|
||||
srand(134231);
|
||||
|
||||
using size_type = view_type::size_type;
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (view_type::size_type j = 0; j < h_idx.extent(1); ++j) {
|
||||
for (size_type j = 0; j < static_cast<size_type>(h_idx.extent(1)); ++j) {
|
||||
h_idx(i, j) = (size + i + (rand() % 500 - 250)) % size;
|
||||
}
|
||||
}
|
||||
|
||||
@ -78,9 +78,11 @@ struct set_boundary {
|
||||
|
||||
set_boundary(ViewType a_, double value_) : a(a_), value(value_) {}
|
||||
|
||||
using size_type = typename ViewType::size_type;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename ViewType::size_type i) const {
|
||||
for (typename ViewType::size_type j = 0; j < a.extent(1); ++j) {
|
||||
void operator()(const size_type i) const {
|
||||
for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) {
|
||||
a(i, j) = value;
|
||||
}
|
||||
}
|
||||
@ -96,11 +98,12 @@ struct set_inner {
|
||||
|
||||
set_inner(ViewType a_, double value_) : a(a_), value(value_) {}
|
||||
|
||||
using size_type = typename ViewType::size_type;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename ViewType::size_type i) const {
|
||||
using size_type = typename ViewType::size_type;
|
||||
for (size_type j = 0; j < a.extent(1); ++j) {
|
||||
for (size_type k = 0; k < a.extent(2); ++k) {
|
||||
void operator()(const size_type i) const {
|
||||
for (size_type j = 0; j < static_cast<size_type>(a.extent(1)); ++j) {
|
||||
for (size_type k = 0; k < static_cast<size_type>(a.extent(2)); ++k) {
|
||||
a(i, j, k) = value;
|
||||
}
|
||||
}
|
||||
@ -116,12 +119,13 @@ struct update {
|
||||
|
||||
update(ViewType a_, const double dt_) : a(a_), dt(dt_) {}
|
||||
|
||||
using size_type = typename ViewType::size_type;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(typename ViewType::size_type i) const {
|
||||
using size_type = typename ViewType::size_type;
|
||||
void operator()(size_type i) const {
|
||||
i++;
|
||||
for (size_type j = 1; j < a.extent(1) - 1; j++) {
|
||||
for (size_type k = 1; k < a.extent(2) - 1; k++) {
|
||||
for (size_type j = 1; j < static_cast<size_type>(a.extent(1) - 1); j++) {
|
||||
for (size_type k = 1; k < static_cast<size_type>(a.extent(2) - 1); k++) {
|
||||
a(i, j, k) += dt * (a(i, j, k + 1) - a(i, j, k - 1) + a(i, j + 1, k) -
|
||||
a(i, j - 1, k) + a(i + 1, j, k) - a(i - 1, j, k));
|
||||
}
|
||||
|
||||
@ -175,8 +175,9 @@ int main(int narg, char* arg[]) {
|
||||
// Get a reference to the host view of idx directly (equivalent to
|
||||
// idx.view<idx_type::host_mirror_space>() )
|
||||
idx_type::t_host h_idx = idx.h_view;
|
||||
using size_type = view_type::size_type;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (view_type::size_type j = 0; j < h_idx.extent(1); ++j) {
|
||||
for (size_type j = 0; j < static_cast<size_type>(h_idx.extent(1)); ++j) {
|
||||
h_idx(i, j) = (size + i + (rand() % 500 - 250)) % size;
|
||||
}
|
||||
}
|
||||
|
||||
@ -75,8 +75,13 @@ struct hello_world {
|
||||
// The TeamPolicy<>::member_type provides functions to query the multi
|
||||
// dimensional index of a thread as well as the number of thread-teams and
|
||||
// the size of each team.
|
||||
#ifndef __SYCL_DEVICE_ONLY__
|
||||
// FIXME_SYCL needs printf workaround
|
||||
printf("Hello World: %i %i // %i %i\n", thread.league_rank(),
|
||||
thread.team_rank(), thread.league_size(), thread.team_size());
|
||||
#else
|
||||
(void)thread;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -85,11 +85,16 @@ int main(int narg, char* args[]) {
|
||||
policy,
|
||||
KOKKOS_LAMBDA(const team_member& thread, int& lsum) {
|
||||
lsum += 1;
|
||||
// TeamPolicy<>::member_type provides functions to query the
|
||||
// multidimensional index of a thread, as well as the number of
|
||||
// thread teams and the size of each team.
|
||||
// TeamPolicy<>::member_type provides functions to query the
|
||||
// multidimensional index of a thread, as well as the number of
|
||||
// thread teams and the size of each team.
|
||||
#ifndef __SYCL_DEVICE_ONLY__
|
||||
// FIXME_SYCL needs workaround for printf
|
||||
printf("Hello World: %i %i // %i %i\n", thread.league_rank(),
|
||||
thread.team_rank(), thread.league_size(), thread.team_size());
|
||||
#else
|
||||
(void)thread;
|
||||
#endif
|
||||
},
|
||||
sum);
|
||||
#endif
|
||||
|
||||
@ -73,8 +73,13 @@ struct hello_world {
|
||||
// also executed by all threads of the team.
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(thread, 31),
|
||||
[&](const int& i) {
|
||||
#ifndef __SYCL_DEVICE_ONLY__
|
||||
// FIXME_SYCL needs printf workaround
|
||||
printf("Hello World: (%i , %i) executed loop %i \n",
|
||||
thread.league_rank(), thread.team_rank(), i);
|
||||
#else
|
||||
(void) i;
|
||||
#endif
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user