Update Kokkos library in LAMMPS to v3.5.0

This commit is contained in:
Stan Gerald Moore
2021-11-04 12:45:59 -06:00
parent 515ef7bece
commit 564098e629
396 changed files with 21892 additions and 8508 deletions

View File

@ -52,35 +52,33 @@
#define HLINE "-------------------------------------------------------------\n"
#if defined(KOKKOS_ENABLE_CUDA)
using StreamHostArray = Kokkos::View<double*, Kokkos::CudaSpace>::HostMirror;
using StreamDeviceArray = Kokkos::View<double*, Kokkos::CudaSpace>;
#else
using StreamHostArray = Kokkos::View<double*, Kokkos::HostSpace>::HostMirror;
using StreamDeviceArray = Kokkos::View<double*, Kokkos::HostSpace>;
#endif
using StreamDeviceArray =
Kokkos::View<double*, Kokkos::MemoryTraits<Kokkos::Restrict>>;
using StreamHostArray = typename StreamDeviceArray::HostMirror;
using StreamIndex = int;
using Policy = Kokkos::RangePolicy<Kokkos::IndexType<StreamIndex>>;
double now() {
struct timeval now;
gettimeofday(&now, nullptr);
return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
}
void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c) {
void perform_set(StreamDeviceArray& a, const double scalar) {
Kokkos::parallel_for(
"copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i]; });
"set", Policy(0, a.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { a[i] = scalar; });
Kokkos::fence();
}
void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c, const double scalar) {
void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b) {
Kokkos::parallel_for(
"copy", a.extent(0),
"copy", Policy(0, a.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { b[i] = a[i]; });
Kokkos::fence();
}
void perform_scale(StreamDeviceArray& b, StreamDeviceArray& c,
const double scalar) {
Kokkos::parallel_for(
"scale", Policy(0, b.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { b[i] = scalar * c[i]; });
Kokkos::fence();
@ -89,7 +87,7 @@ void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b,
void perform_add(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c) {
Kokkos::parallel_for(
"add", a.extent(0),
"add", Policy(0, a.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i] + b[i]; });
Kokkos::fence();
@ -98,7 +96,7 @@ void perform_add(StreamDeviceArray& a, StreamDeviceArray& b,
void perform_triad(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c, const double scalar) {
Kokkos::parallel_for(
"triad", a.extent(0),
"triad", Policy(0, a.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { a[i] = b[i] + scalar * c[i]; });
Kokkos::fence();
@ -184,6 +182,7 @@ int run_benchmark() {
const double scalar = 3.0;
double setTime = std::numeric_limits<double>::max();
double copyTime = std::numeric_limits<double>::max();
double scaleTime = std::numeric_limits<double>::max();
double addTime = std::numeric_limits<double>::max();
@ -191,13 +190,10 @@ int run_benchmark() {
printf("Initializing Views...\n");
#if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for(
"init", Kokkos::RangePolicy<Kokkos::OpenMP>(0, STREAM_ARRAY_SIZE),
#else
Kokkos::parallel_for(
"init", Kokkos::RangePolicy<Kokkos::Serial>(0, STREAM_ARRAY_SIZE),
#endif
"init",
Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0,
STREAM_ARRAY_SIZE),
KOKKOS_LAMBDA(const int i) {
a[i] = 1.0;
b[i] = 2.0;
@ -209,26 +205,30 @@ int run_benchmark() {
Kokkos::deep_copy(dev_b, b);
Kokkos::deep_copy(dev_c, c);
double start;
printf("Starting benchmarking...\n");
Kokkos::Timer timer;
for (StreamIndex k = 0; k < STREAM_NTIMES; ++k) {
start = now();
perform_copy(dev_a, dev_b, dev_c);
copyTime = std::min(copyTime, (now() - start));
timer.reset();
perform_set(dev_c, 1.5);
setTime = std::min(setTime, timer.seconds());
start = now();
perform_scale(dev_a, dev_b, dev_c, scalar);
scaleTime = std::min(scaleTime, (now() - start));
timer.reset();
perform_copy(dev_a, dev_c);
copyTime = std::min(copyTime, timer.seconds());
start = now();
timer.reset();
perform_scale(dev_b, dev_c, scalar);
scaleTime = std::min(scaleTime, timer.seconds());
timer.reset();
perform_add(dev_a, dev_b, dev_c);
addTime = std::min(addTime, (now() - start));
addTime = std::min(addTime, timer.seconds());
start = now();
timer.reset();
perform_triad(dev_a, dev_b, dev_c, scalar);
triadTime = std::min(triadTime, (now() - start));
triadTime = std::min(triadTime, timer.seconds());
}
Kokkos::deep_copy(a, dev_a);
@ -240,6 +240,9 @@ int run_benchmark() {
printf(HLINE);
printf("Set %11.2f MB/s\n",
(1.0e-06 * 1.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
setTime);
printf("Copy %11.2f MB/s\n",
(1.0e-06 * 2.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
copyTime);