194 lines
7.6 KiB
C++
194 lines
7.6 KiB
C++
//@HEADER
|
|
// ************************************************************************
|
|
//
|
|
// Kokkos v. 4.0
|
|
// Copyright (2022) National Technology & Engineering
|
|
// Solutions of Sandia, LLC (NTESS).
|
|
//
|
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
|
// the U.S. Government retains certain rights in this software.
|
|
//
|
|
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://kokkos.org/LICENSE for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//@HEADER
|
|
|
|
#include <Kokkos_Core.hpp>
|
|
#include "policy_perf_test.hpp"
|
|
|
|
int main(int argc, char* argv[]) {
|
|
Kokkos::initialize(argc, argv);
|
|
|
|
if (argc < 10) {
|
|
printf(" Ten arguments are needed to run this program:\n");
|
|
printf(
|
|
" (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, "
|
|
"(5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, "
|
|
"(9)schedule, (10)test_type\n");
|
|
printf(" team_range: number of teams (league_size)\n");
|
|
printf(" thread_range: range for nested TeamThreadRange parallel_*\n");
|
|
printf(" vector_range: range for nested ThreadVectorRange parallel_*\n");
|
|
printf(" outer_repeat: number of repeats for outer parallel_* call\n");
|
|
printf(
|
|
" thread_repeat: number of repeats for TeamThreadRange parallel_* "
|
|
"call\n");
|
|
printf(
|
|
" vector_repeat: number of repeats for ThreadVectorRange parallel_* "
|
|
"call\n");
|
|
printf(" team_size: number of team members (team_size)\n");
|
|
printf(" vector_size: desired vectorization (if possible)\n");
|
|
printf(" schedule: 1 == Static 2 == Dynamic\n");
|
|
printf(
|
|
" test_type: 3-digit code XYZ for testing (nested) parallel_*\n");
|
|
printf(
|
|
" code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in "
|
|
"{0,1,2}\n");
|
|
printf(" TeamPolicy:\n");
|
|
printf(
|
|
" X: 0 = none (never used, makes no sense); 1 = "
|
|
"parallel_for; 2 = parallel_reduce\n");
|
|
printf(
|
|
" Y: 0 = none; 1 = parallel_for; 2 = "
|
|
"parallel_reduce\n");
|
|
printf(
|
|
" Z: 0 = none; 1 = parallel_for; 2 = "
|
|
"parallel_reduce\n");
|
|
printf(" RangePolicy:\n");
|
|
printf(
|
|
" X: 3 = parallel_for; 4 = parallel_reduce; 5 = "
|
|
"parallel_scan\n");
|
|
printf(" Y: 0 = none\n");
|
|
printf(" Z: 0 = none\n");
|
|
printf(" Example Input:\n");
|
|
printf(" 100000 32 32 100 100 100 8 1 1 100\n");
|
|
Kokkos::finalize();
|
|
return 0;
|
|
}
|
|
|
|
int team_range = std::stoi(argv[1]);
|
|
int thread_range = std::stoi(argv[2]);
|
|
int vector_range = std::stoi(argv[3]);
|
|
|
|
int outer_repeat = std::stoi(argv[4]);
|
|
int thread_repeat = std::stoi(argv[5]);
|
|
int vector_repeat = std::stoi(argv[6]);
|
|
|
|
int team_size = std::stoi(argv[7]);
|
|
int vector_size = std::stoi(argv[8]);
|
|
int schedule = std::stoi(argv[9]);
|
|
int test_type = std::stoi(argv[10]);
|
|
|
|
int disable_verbose_output = 0;
|
|
if (argc > 11) {
|
|
disable_verbose_output = std::stoi(argv[11]);
|
|
}
|
|
|
|
if (schedule != 1 && schedule != 2) {
|
|
printf("schedule: %d\n", schedule);
|
|
printf("Options for schedule are: 1 == Static 2 == Dynamic\n");
|
|
Kokkos::finalize();
|
|
return -1;
|
|
}
|
|
|
|
if (test_type != 100 && test_type != 110 && test_type != 111 &&
|
|
test_type != 112 && test_type != 120 && test_type != 121 &&
|
|
test_type != 122 && test_type != 200 && test_type != 210 &&
|
|
test_type != 211 && test_type != 212 && test_type != 220 &&
|
|
test_type != 221 && test_type != 222 && test_type != 300 &&
|
|
test_type != 400 && test_type != 500) {
|
|
printf("Incorrect test_type option\n");
|
|
Kokkos::finalize();
|
|
return -2;
|
|
}
|
|
|
|
double result = 0.0;
|
|
|
|
Kokkos::parallel_reduce(
|
|
"parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1),
|
|
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type&, double& lval) {
|
|
lval += 1;
|
|
},
|
|
result);
|
|
|
|
using view_type_1d = Kokkos::View<double*, Kokkos::LayoutRight>;
|
|
using view_type_2d = Kokkos::View<double**, Kokkos::LayoutRight>;
|
|
using view_type_3d = Kokkos::View<double***, Kokkos::LayoutRight>;
|
|
|
|
// Allocate view without initializing
|
|
// Call a 'warmup' test with 1 repeat - this will initialize the corresponding
|
|
// view appropriately for test and should obey first-touch etc Second call to
|
|
// test is the one we actually care about and time
|
|
view_type_1d v_1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_1"),
|
|
static_cast<size_t>(team_range) * team_size);
|
|
view_type_2d v_2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_2"),
|
|
static_cast<size_t>(team_range) * team_size, thread_range);
|
|
view_type_3d v_3(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_3"),
|
|
static_cast<size_t>(team_range) * team_size, thread_range,
|
|
vector_range);
|
|
|
|
double result_computed = 0.0;
|
|
double result_expect = 0.0;
|
|
double time = 0.0;
|
|
|
|
if (schedule == 1) {
|
|
if (test_type != 500) {
|
|
// warmup - no repeat of loops
|
|
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
|
team_range, thread_range, vector_range, 1, 1, 1, team_size,
|
|
vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect,
|
|
time);
|
|
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
|
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
|
|
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
|
|
result_computed, result_expect, time);
|
|
} else {
|
|
// parallel_scan: initialize 1d view for parallel_scan
|
|
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
|
team_range, thread_range, vector_range, 1, 1, 1, team_size,
|
|
vector_size, 100, v_1, v_2, v_3, result_computed, result_expect,
|
|
time);
|
|
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
|
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
|
|
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
|
|
result_computed, result_expect, time);
|
|
}
|
|
}
|
|
if (schedule == 2) {
|
|
if (test_type != 500) {
|
|
// warmup - no repeat of loops
|
|
test_policy<Kokkos::Schedule<Kokkos::Dynamic>, int>(
|
|
team_range, thread_range, vector_range, 1, 1, 1, team_size,
|
|
vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect,
|
|
time);
|
|
test_policy<Kokkos::Schedule<Kokkos::Dynamic>, int>(
|
|
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
|
|
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
|
|
result_computed, result_expect, time);
|
|
} else {
|
|
// parallel_scan: initialize 1d view for parallel_scan
|
|
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
|
team_range, thread_range, vector_range, 1, 1, 1, team_size,
|
|
vector_size, 100, v_1, v_2, v_3, result_computed, result_expect,
|
|
time);
|
|
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
|
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
|
|
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
|
|
result_computed, result_expect, time);
|
|
}
|
|
}
|
|
|
|
if (disable_verbose_output == 0) {
|
|
printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n", team_range,
|
|
thread_range, vector_range, outer_repeat, thread_repeat,
|
|
vector_repeat, team_size, vector_size, schedule, test_type,
|
|
result_computed, result_expect, time);
|
|
} else {
|
|
printf("%lf\n", time);
|
|
}
|
|
|
|
Kokkos::finalize();
|
|
|
|
return 0;
|
|
}
|