Update Kokkos library in LAMMPS to v3.0
This commit is contained in:
@ -1,124 +1,120 @@
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<impl/Kokkos_Timer.hpp>
|
||||
#include<Kokkos_Random.hpp>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <Kokkos_Random.hpp>
|
||||
|
||||
template<class Scalar>
|
||||
double test_atomic(int L, int N, int M,int K,int R,Kokkos::View<const int*> offsets) {
|
||||
Kokkos::View<Scalar*> output("Output",N);
|
||||
template <class Scalar>
|
||||
double test_atomic(int L, int N, int M, int K, int R,
|
||||
Kokkos::View<const int*> offsets) {
|
||||
Kokkos::View<Scalar*> output("Output", N);
|
||||
Kokkos::Impl::Timer timer;
|
||||
|
||||
for(int r = 0; r<R; r++)
|
||||
Kokkos::parallel_for(L, KOKKOS_LAMBDA (const int&i) {
|
||||
Scalar s = 2;
|
||||
for(int m=0;m<M;m++) {
|
||||
for(int k=0;k<K;k++)
|
||||
s=s*s+s;
|
||||
const int idx = (i+offsets(i,m))%N;
|
||||
Kokkos::atomic_add(&output(idx),s);
|
||||
}
|
||||
});
|
||||
for (int r = 0; r < R; r++)
|
||||
Kokkos::parallel_for(
|
||||
L, KOKKOS_LAMBDA(const int& i) {
|
||||
Scalar s = 2;
|
||||
for (int m = 0; m < M; m++) {
|
||||
for (int k = 0; k < K; k++) s = s * s + s;
|
||||
const int idx = (i + offsets(i, m)) % N;
|
||||
Kokkos::atomic_add(&output(idx), s);
|
||||
}
|
||||
});
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
|
||||
|
||||
return time;
|
||||
}
|
||||
|
||||
template<class Scalar>
|
||||
double test_no_atomic(int L, int N, int M,int K,int R,Kokkos::View<const int*> offsets) {
|
||||
Kokkos::View<Scalar*> output("Output",N);
|
||||
template <class Scalar>
|
||||
double test_no_atomic(int L, int N, int M, int K, int R,
|
||||
Kokkos::View<const int*> offsets) {
|
||||
Kokkos::View<Scalar*> output("Output", N);
|
||||
Kokkos::Impl::Timer timer;
|
||||
for(int r = 0; r<R; r++)
|
||||
Kokkos::parallel_for(L, KOKKOS_LAMBDA (const int&i) {
|
||||
Scalar s = 2;
|
||||
for(int m=0;m<M;m++) {
|
||||
for(int k=0;k<K;k++)
|
||||
s=s*s+s;
|
||||
const int idx = (i+offsets(i,m))%N;
|
||||
output(idx) += s;
|
||||
}
|
||||
});
|
||||
for (int r = 0; r < R; r++)
|
||||
Kokkos::parallel_for(
|
||||
L, KOKKOS_LAMBDA(const int& i) {
|
||||
Scalar s = 2;
|
||||
for (int m = 0; m < M; m++) {
|
||||
for (int k = 0; k < K; k++) s = s * s + s;
|
||||
const int idx = (i + offsets(i, m)) % N;
|
||||
output(idx) += s;
|
||||
}
|
||||
});
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
double time = timer.seconds();
|
||||
return time;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize(argc,argv);
|
||||
{
|
||||
if(argc<8) {
|
||||
printf("Arguments: L N M D K R T\n");
|
||||
printf(" L: Number of iterations to run\n");
|
||||
printf(" N: Length of array to do atomics into\n");
|
||||
printf(" M: Number of atomics per iteration to do\n");
|
||||
printf(" D: Distance from index i to do atomics into (randomly)\n");
|
||||
printf(" K: Number of FMAD per atomic\n");
|
||||
printf(" R: Number of repeats of the experiments\n");
|
||||
printf(" T: Type of atomic\n");
|
||||
printf(" 1 - int\n");
|
||||
printf(" 2 - long\n");
|
||||
printf(" 3 - float\n");
|
||||
printf(" 4 - double\n");
|
||||
printf(" 5 - complex<double>\n");
|
||||
printf("Example Input GPU:\n");
|
||||
printf(" Histogram : 1000000 1000 1 1000 1 10 1\n");
|
||||
printf(" MD Force : 100000 100000 100 1000 20 10 4\n");
|
||||
printf(" Matrix Assembly : 100000 1000000 50 1000 20 10 4\n");
|
||||
Kokkos::finalize();
|
||||
return 0;
|
||||
Kokkos::initialize(argc, argv);
|
||||
{
|
||||
if (argc < 8) {
|
||||
printf("Arguments: L N M D K R T\n");
|
||||
printf(" L: Number of iterations to run\n");
|
||||
printf(" N: Length of array to do atomics into\n");
|
||||
printf(" M: Number of atomics per iteration to do\n");
|
||||
printf(" D: Distance from index i to do atomics into (randomly)\n");
|
||||
printf(" K: Number of FMAD per atomic\n");
|
||||
printf(" R: Number of repeats of the experiments\n");
|
||||
printf(" T: Type of atomic\n");
|
||||
printf(" 1 - int\n");
|
||||
printf(" 2 - long\n");
|
||||
printf(" 3 - float\n");
|
||||
printf(" 4 - double\n");
|
||||
printf(" 5 - complex<double>\n");
|
||||
printf("Example Input GPU:\n");
|
||||
printf(" Histogram : 1000000 1000 1 1000 1 10 1\n");
|
||||
printf(" MD Force : 100000 100000 100 1000 20 10 4\n");
|
||||
printf(" Matrix Assembly : 100000 1000000 50 1000 20 10 4\n");
|
||||
Kokkos::finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int L = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int M = atoi(argv[3]);
|
||||
int D = atoi(argv[4]);
|
||||
int K = atoi(argv[5]);
|
||||
int R = atoi(argv[6]);
|
||||
int type = atoi(argv[7]);
|
||||
|
||||
Kokkos::View<int*> offsets("Offsets", L, M);
|
||||
Kokkos::Random_XorShift64_Pool<> pool(12371);
|
||||
Kokkos::fill_random(offsets, pool, D);
|
||||
double time = 0;
|
||||
if (type == 1) time = test_atomic<int>(L, N, M, K, R, offsets);
|
||||
if (type == 2) time = test_atomic<long>(L, N, M, K, R, offsets);
|
||||
if (type == 3) time = test_atomic<float>(L, N, M, K, R, offsets);
|
||||
if (type == 4) time = test_atomic<double>(L, N, M, K, R, offsets);
|
||||
if (type == 5)
|
||||
time = test_atomic<Kokkos::complex<double> >(L, N, M, K, R, offsets);
|
||||
|
||||
double time2 = 1;
|
||||
if (type == 1) time2 = test_no_atomic<int>(L, N, M, K, R, offsets);
|
||||
if (type == 2) time2 = test_no_atomic<long>(L, N, M, K, R, offsets);
|
||||
if (type == 3) time2 = test_no_atomic<float>(L, N, M, K, R, offsets);
|
||||
if (type == 4) time2 = test_no_atomic<double>(L, N, M, K, R, offsets);
|
||||
if (type == 5)
|
||||
time2 = test_no_atomic<Kokkos::complex<double> >(L, N, M, K, R, offsets);
|
||||
|
||||
int size = 0;
|
||||
if (type == 1) size = sizeof(int);
|
||||
if (type == 2) size = sizeof(long);
|
||||
if (type == 3) size = sizeof(float);
|
||||
if (type == 4) size = sizeof(double);
|
||||
if (type == 5) size = sizeof(Kokkos::complex<double>);
|
||||
|
||||
printf("%i\n", size);
|
||||
printf(
|
||||
"Time: %s %i %i %i %i %i %i (t_atomic: %e t_nonatomic: %e ratio: %lf "
|
||||
")( GUpdates/s: %lf GB/s: %lf )\n",
|
||||
(type == 1)
|
||||
? "int"
|
||||
: ((type == 2)
|
||||
? "long"
|
||||
: ((type == 3) ? "float"
|
||||
: ((type == 4) ? "double" : "complex"))),
|
||||
L, N, M, D, K, R, time, time2, time / time2, 1.e-9 * L * R * M / time,
|
||||
1.0 * L * R * M * 2 * size / time / 1024 / 1024 / 1024);
|
||||
}
|
||||
|
||||
|
||||
int L = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int M = atoi(argv[3]);
|
||||
int D = atoi(argv[4]);
|
||||
int K = atoi(argv[5]);
|
||||
int R = atoi(argv[6]);
|
||||
int type = atoi(argv[7]);
|
||||
|
||||
Kokkos::View<int*> offsets("Offsets",L,M);
|
||||
Kokkos::Random_XorShift64_Pool<> pool(12371);
|
||||
Kokkos::fill_random(offsets,pool,D);
|
||||
double time = 0;
|
||||
if(type==1)
|
||||
time = test_atomic<int>(L,N,M,K,R,offsets);
|
||||
if(type==2)
|
||||
time = test_atomic<long>(L,N,M,K,R,offsets);
|
||||
if(type==3)
|
||||
time = test_atomic<float>(L,N,M,K,R,offsets);
|
||||
if(type==4)
|
||||
time = test_atomic<double>(L,N,M,K,R,offsets);
|
||||
if(type==5)
|
||||
time = test_atomic<Kokkos::complex<double> >(L,N,M,K,R,offsets);
|
||||
|
||||
double time2 = 1;
|
||||
if(type==1)
|
||||
time2 = test_no_atomic<int>(L,N,M,K,R,offsets);
|
||||
if(type==2)
|
||||
time2 = test_no_atomic<long>(L,N,M,K,R,offsets);
|
||||
if(type==3)
|
||||
time2 = test_no_atomic<float>(L,N,M,K,R,offsets);
|
||||
if(type==4)
|
||||
time2 = test_no_atomic<double>(L,N,M,K,R,offsets);
|
||||
if(type==5)
|
||||
time2 = test_no_atomic<Kokkos::complex<double> >(L,N,M,K,R,offsets);
|
||||
|
||||
int size = 0;
|
||||
if(type==1) size = sizeof(int);
|
||||
if(type==2) size = sizeof(long);
|
||||
if(type==3) size = sizeof(float);
|
||||
if(type==4) size = sizeof(double);
|
||||
if(type==5) size = sizeof(Kokkos::complex<double>);
|
||||
|
||||
printf("%i\n",size);
|
||||
printf("Time: %s %i %i %i %i %i %i (t_atomic: %e t_nonatomic: %e ratio: %lf )( GUpdates/s: %lf GB/s: %lf )\n",
|
||||
(type==1)?"int": (
|
||||
(type==2)?"long": (
|
||||
(type==3)?"float": (
|
||||
(type==4)?"double":"complex"))),
|
||||
L,N,M,D,K,R,time,time2,time/time2,
|
||||
1.e-9*L*R*M/time, 1.0*L*R*M*2*size/time/1024/1024/1024);
|
||||
}
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -41,59 +42,52 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<impl/Kokkos_Timer.hpp>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
template<class Scalar, int Unroll,int Stride>
|
||||
template <class Scalar, int Unroll, int Stride>
|
||||
struct Run {
|
||||
static void run(int N, int K, int R, int F, int T, int S);
|
||||
static void run(int N, int K, int R, int F, int T, int S);
|
||||
};
|
||||
|
||||
template<class Scalar, int Stride>
|
||||
template <class Scalar, int Stride>
|
||||
struct RunStride {
|
||||
static void run_1(int N, int K, int R, int F, int T, int S);
|
||||
static void run_2(int N, int K, int R, int F, int T, int S);
|
||||
static void run_3(int N, int K, int R, int F, int T, int S);
|
||||
static void run_4(int N, int K, int R, int F, int T, int S);
|
||||
static void run_5(int N, int K, int R, int F, int T, int S);
|
||||
static void run_6(int N, int K, int R, int F, int T, int S);
|
||||
static void run_7(int N, int K, int R, int F, int T, int S);
|
||||
static void run_8(int N, int K, int R, int F, int T, int S);
|
||||
static void run(int N, int K, int R, int U, int F, int T, int S);
|
||||
static void run_1(int N, int K, int R, int F, int T, int S);
|
||||
static void run_2(int N, int K, int R, int F, int T, int S);
|
||||
static void run_3(int N, int K, int R, int F, int T, int S);
|
||||
static void run_4(int N, int K, int R, int F, int T, int S);
|
||||
static void run_5(int N, int K, int R, int F, int T, int S);
|
||||
static void run_6(int N, int K, int R, int F, int T, int S);
|
||||
static void run_7(int N, int K, int R, int F, int T, int S);
|
||||
static void run_8(int N, int K, int R, int F, int T, int S);
|
||||
static void run(int N, int K, int R, int U, int F, int T, int S);
|
||||
};
|
||||
|
||||
#define STRIDE 1
|
||||
#include<bench_stride.hpp>
|
||||
#include <bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 2
|
||||
#include<bench_stride.hpp>
|
||||
#include <bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 4
|
||||
#include<bench_stride.hpp>
|
||||
#include <bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 8
|
||||
#include<bench_stride.hpp>
|
||||
#include <bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 16
|
||||
#include<bench_stride.hpp>
|
||||
#include <bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 32
|
||||
#include<bench_stride.hpp>
|
||||
#include <bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
|
||||
template<class Scalar>
|
||||
template <class Scalar>
|
||||
void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) {
|
||||
if(D == 1)
|
||||
RunStride<Scalar,1>::run(N,K,R,U,F,T,S);
|
||||
if(D == 2)
|
||||
RunStride<Scalar,2>::run(N,K,R,U,F,T,S);
|
||||
if(D == 4)
|
||||
RunStride<Scalar,4>::run(N,K,R,U,F,T,S);
|
||||
if(D == 8)
|
||||
RunStride<Scalar,8>::run(N,K,R,U,F,T,S);
|
||||
if(D == 16)
|
||||
RunStride<Scalar,16>::run(N,K,R,U,F,T,S);
|
||||
if(D == 32)
|
||||
RunStride<Scalar,32>::run(N,K,R,U,F,T,S);
|
||||
if (D == 1) RunStride<Scalar, 1>::run(N, K, R, U, F, T, S);
|
||||
if (D == 2) RunStride<Scalar, 2>::run(N, K, R, U, F, T, S);
|
||||
if (D == 4) RunStride<Scalar, 4>::run(N, K, R, U, F, T, S);
|
||||
if (D == 8) RunStride<Scalar, 8>::run(N, K, R, U, F, T, S);
|
||||
if (D == 16) RunStride<Scalar, 16>::run(N, K, R, U, F, T, S);
|
||||
if (D == 32) RunStride<Scalar, 32>::run(N, K, R, U, F, T, S);
|
||||
}
|
||||
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -41,84 +42,82 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
#define UNROLL 1
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 2
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 3
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 4
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 5
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 6
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 7
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 8
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
|
||||
template<class Scalar>
|
||||
struct RunStride<Scalar,STRIDE> {
|
||||
static void run_1(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,1,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_2(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,2,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_3(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,3,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_4(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,4,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_5(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,5,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_6(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,6,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_7(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,7,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_8(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,8,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
template <class Scalar>
|
||||
struct RunStride<Scalar, STRIDE> {
|
||||
static void run_1(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar, 1, STRIDE>::run(N, K, R, F, T, S);
|
||||
}
|
||||
static void run_2(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar, 2, STRIDE>::run(N, K, R, F, T, S);
|
||||
}
|
||||
static void run_3(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar, 3, STRIDE>::run(N, K, R, F, T, S);
|
||||
}
|
||||
static void run_4(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar, 4, STRIDE>::run(N, K, R, F, T, S);
|
||||
}
|
||||
static void run_5(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar, 5, STRIDE>::run(N, K, R, F, T, S);
|
||||
}
|
||||
static void run_6(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar, 6, STRIDE>::run(N, K, R, F, T, S);
|
||||
}
|
||||
static void run_7(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar, 7, STRIDE>::run(N, K, R, F, T, S);
|
||||
}
|
||||
static void run_8(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar, 8, STRIDE>::run(N, K, R, F, T, S);
|
||||
}
|
||||
|
||||
static void run(int N, int K, int R, int U, int F, int T, int S) {
|
||||
if(U==1) {
|
||||
run_1(N,K,R,F,T,S);
|
||||
static void run(int N, int K, int R, int U, int F, int T, int S) {
|
||||
if (U == 1) {
|
||||
run_1(N, K, R, F, T, S);
|
||||
}
|
||||
if (U == 2) {
|
||||
run_2(N, K, R, F, T, S);
|
||||
}
|
||||
if (U == 3) {
|
||||
run_3(N, K, R, F, T, S);
|
||||
}
|
||||
if (U == 4) {
|
||||
run_4(N, K, R, F, T, S);
|
||||
}
|
||||
if (U == 5) {
|
||||
run_5(N, K, R, F, T, S);
|
||||
}
|
||||
if (U == 6) {
|
||||
run_6(N, K, R, F, T, S);
|
||||
}
|
||||
if (U == 7) {
|
||||
run_7(N, K, R, F, T, S);
|
||||
}
|
||||
if (U == 8) {
|
||||
run_8(N, K, R, F, T, S);
|
||||
}
|
||||
}
|
||||
if(U==2) {
|
||||
run_2(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==3) {
|
||||
run_3(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==4) {
|
||||
run_4(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==5) {
|
||||
run_5(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==6) {
|
||||
run_6(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==7) {
|
||||
run_7(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==8) {
|
||||
run_8(N,K,R,F,T,S);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -41,108 +42,110 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
template<class Scalar>
|
||||
struct Run<Scalar,UNROLL,STRIDE> {
|
||||
static void run(int N, int K, int R, int F, int T, int S) {
|
||||
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> A("A",N,K);
|
||||
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> B("B",N,K);
|
||||
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> C("C",N,K);
|
||||
template <class Scalar>
|
||||
struct Run<Scalar, UNROLL, STRIDE> {
|
||||
static void run(int N, int K, int R, int F, int T, int S) {
|
||||
Kokkos::View<Scalar* * [STRIDE], Kokkos::LayoutRight> A("A", N, K);
|
||||
Kokkos::View<Scalar* * [STRIDE], Kokkos::LayoutRight> B("B", N, K);
|
||||
Kokkos::View<Scalar* * [STRIDE], Kokkos::LayoutRight> C("C", N, K);
|
||||
|
||||
Kokkos::deep_copy(A,Scalar(1.5));
|
||||
Kokkos::deep_copy(B,Scalar(2.5));
|
||||
Kokkos::deep_copy(C,Scalar(3.5));
|
||||
Kokkos::deep_copy(A, Scalar(1.5));
|
||||
Kokkos::deep_copy(B, Scalar(2.5));
|
||||
Kokkos::deep_copy(C, Scalar(3.5));
|
||||
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for("BenchmarkKernel",Kokkos::TeamPolicy<>(N,T).set_scratch_size(0,Kokkos::PerTeam(S)),
|
||||
KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type& team) {
|
||||
const int n = team.league_rank();
|
||||
for(int r=0; r<R; r++) {
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,K), [&] (const int& i) {
|
||||
Scalar a1 = A(n,i,0);
|
||||
const Scalar b = B(n,i,0);
|
||||
#if(UNROLL>1)
|
||||
Scalar a2 = a1*1.3;
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(
|
||||
"BenchmarkKernel",
|
||||
Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)),
|
||||
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) {
|
||||
const int n = team.league_rank();
|
||||
for (int r = 0; r < R; r++) {
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::TeamThreadRange(team, 0, K), [&](const int& i) {
|
||||
Scalar a1 = A(n, i, 0);
|
||||
const Scalar b = B(n, i, 0);
|
||||
#if (UNROLL > 1)
|
||||
Scalar a2 = a1 * 1.3;
|
||||
#endif
|
||||
#if(UNROLL>2)
|
||||
Scalar a3 = a2*1.1;
|
||||
#if (UNROLL > 2)
|
||||
Scalar a3 = a2 * 1.1;
|
||||
#endif
|
||||
#if(UNROLL>3)
|
||||
Scalar a4 = a3*1.1;
|
||||
#if (UNROLL > 3)
|
||||
Scalar a4 = a3 * 1.1;
|
||||
#endif
|
||||
#if(UNROLL>4)
|
||||
Scalar a5 = a4*1.3;
|
||||
#if (UNROLL > 4)
|
||||
Scalar a5 = a4 * 1.3;
|
||||
#endif
|
||||
#if(UNROLL>5)
|
||||
Scalar a6 = a5*1.1;
|
||||
#if (UNROLL > 5)
|
||||
Scalar a6 = a5 * 1.1;
|
||||
#endif
|
||||
#if(UNROLL>6)
|
||||
Scalar a7 = a6*1.1;
|
||||
#if (UNROLL > 6)
|
||||
Scalar a7 = a6 * 1.1;
|
||||
#endif
|
||||
#if(UNROLL>7)
|
||||
Scalar a8 = a7*1.1;
|
||||
#if (UNROLL > 7)
|
||||
Scalar a8 = a7 * 1.1;
|
||||
#endif
|
||||
|
||||
for (int f = 0; f < F; f++) {
|
||||
a1 += b * a1;
|
||||
#if (UNROLL > 1)
|
||||
a2 += b * a2;
|
||||
#endif
|
||||
#if (UNROLL > 2)
|
||||
a3 += b * a3;
|
||||
#endif
|
||||
#if (UNROLL > 3)
|
||||
a4 += b * a4;
|
||||
#endif
|
||||
#if (UNROLL > 4)
|
||||
a5 += b * a5;
|
||||
#endif
|
||||
#if (UNROLL > 5)
|
||||
a6 += b * a6;
|
||||
#endif
|
||||
#if (UNROLL > 6)
|
||||
a7 += b * a7;
|
||||
#endif
|
||||
#if (UNROLL > 7)
|
||||
a8 += b * a8;
|
||||
#endif
|
||||
}
|
||||
#if (UNROLL == 1)
|
||||
C(n, i, 0) = a1;
|
||||
#endif
|
||||
#if (UNROLL == 2)
|
||||
C(n, i, 0) = a1 + a2;
|
||||
#endif
|
||||
#if (UNROLL == 3)
|
||||
C(n, i, 0) = a1 + a2 + a3;
|
||||
#endif
|
||||
#if (UNROLL == 4)
|
||||
C(n, i, 0) = a1 + a2 + a3 + a4;
|
||||
#endif
|
||||
#if (UNROLL == 5)
|
||||
C(n, i, 0) = a1 + a2 + a3 + a4 + a5;
|
||||
#endif
|
||||
#if (UNROLL == 6)
|
||||
C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6;
|
||||
#endif
|
||||
#if (UNROLL == 7)
|
||||
C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7;
|
||||
#endif
|
||||
#if (UNROLL == 8)
|
||||
C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8;
|
||||
#endif
|
||||
});
|
||||
}
|
||||
});
|
||||
Kokkos::fence();
|
||||
double seconds = timer.seconds();
|
||||
|
||||
for(int f = 0; f<F; f++) {
|
||||
a1 += b*a1;
|
||||
#if(UNROLL>1)
|
||||
a2 += b*a2;
|
||||
#endif
|
||||
#if(UNROLL>2)
|
||||
a3 += b*a3;
|
||||
#endif
|
||||
#if(UNROLL>3)
|
||||
a4 += b*a4;
|
||||
#endif
|
||||
#if(UNROLL>4)
|
||||
a5 += b*a5;
|
||||
#endif
|
||||
#if(UNROLL>5)
|
||||
a6 += b*a6;
|
||||
#endif
|
||||
#if(UNROLL>6)
|
||||
a7 += b*a7;
|
||||
#endif
|
||||
#if(UNROLL>7)
|
||||
a8 += b*a8;
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
#if(UNROLL==1)
|
||||
C(n,i,0) = a1;
|
||||
#endif
|
||||
#if(UNROLL==2)
|
||||
C(n,i,0) = a1+a2;
|
||||
#endif
|
||||
#if(UNROLL==3)
|
||||
C(n,i,0) = a1+a2+a3;
|
||||
#endif
|
||||
#if(UNROLL==4)
|
||||
C(n,i,0) = a1+a2+a3+a4;
|
||||
#endif
|
||||
#if(UNROLL==5)
|
||||
C(n,i,0) = a1+a2+a3+a4+a5;
|
||||
#endif
|
||||
#if(UNROLL==6)
|
||||
C(n,i,0) = a1+a2+a3+a4+a5+a6;
|
||||
#endif
|
||||
#if(UNROLL==7)
|
||||
C(n,i,0) = a1+a2+a3+a4+a5+a6+a7;
|
||||
#endif
|
||||
#if(UNROLL==8)
|
||||
C(n,i,0) = a1+a2+a3+a4+a5+a6+a7+a8;
|
||||
#endif
|
||||
|
||||
});
|
||||
}
|
||||
});
|
||||
Kokkos::fence();
|
||||
double seconds = timer.seconds();
|
||||
|
||||
double bytes = 1.0*N*K*R*3*sizeof(Scalar);
|
||||
double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
|
||||
printf("NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf\n",N,K,R,UNROLL,F,T,S,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds);
|
||||
}
|
||||
double bytes = 1.0 * N * K * R * 3 * sizeof(Scalar);
|
||||
double flops = 1.0 * N * K * R * (F * 2 * UNROLL + 2 * (UNROLL - 1));
|
||||
printf(
|
||||
"NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: "
|
||||
"%lf\n",
|
||||
N, K, R, UNROLL, F, T, S, seconds,
|
||||
1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -41,25 +42,27 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<impl/Kokkos_Timer.hpp>
|
||||
#include<bench.hpp>
|
||||
#include<cstdlib>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <bench.hpp>
|
||||
#include <cstdlib>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize();
|
||||
|
||||
|
||||
if(argc<10) {
|
||||
if (argc < 10) {
|
||||
printf("Arguments: N K R D U F T S\n");
|
||||
printf(" P: Precision (1==float, 2==double)\n");
|
||||
printf(" N,K: dimensions of the 2D array to allocate\n");
|
||||
printf(" R: how often to loop through the K dimension with each team\n");
|
||||
printf(" D: distance between loaded elements (stride)\n");
|
||||
printf(" U: how many independent flops to do per load\n");
|
||||
printf(" F: how many times to repeat the U unrolled operations before reading next element\n");
|
||||
printf(
|
||||
" F: how many times to repeat the U unrolled operations before "
|
||||
"reading next element\n");
|
||||
printf(" T: team size\n");
|
||||
printf(" S: shared memory per team (used to control occupancy on GPUs)\n");
|
||||
printf(
|
||||
" S: shared memory per team (used to control occupancy on GPUs)\n");
|
||||
printf("Example Input GPU:\n");
|
||||
printf(" Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n");
|
||||
printf(" Cache Bound : 2 100000 1024 64 1 1 1 512 20000\n");
|
||||
@ -70,7 +73,6 @@ int main(int argc, char* argv[]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int P = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int K = atoi(argv[3]);
|
||||
@ -81,17 +83,25 @@ int main(int argc, char* argv[]) {
|
||||
int T = atoi(argv[8]);
|
||||
int S = atoi(argv[9]);
|
||||
|
||||
if(U>8) {printf("U must be 1-8\n"); return 0;}
|
||||
if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;}
|
||||
if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;}
|
||||
|
||||
if(P==1) {
|
||||
run_stride_unroll<float>(N,K,R,D,U,F,T,S);
|
||||
if (U > 8) {
|
||||
printf("U must be 1-8\n");
|
||||
return 0;
|
||||
}
|
||||
if(P==2) {
|
||||
run_stride_unroll<double>(N,K,R,D,U,F,T,S);
|
||||
if ((D != 1) && (D != 2) && (D != 4) && (D != 8) && (D != 16) && (D != 32)) {
|
||||
printf("D must be one of 1,2,4,8,16,32\n");
|
||||
return 0;
|
||||
}
|
||||
if ((P != 1) && (P != 2)) {
|
||||
printf("P must be one of 1,2\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (P == 1) {
|
||||
run_stride_unroll<float>(N, K, R, D, U, F, T, S);
|
||||
}
|
||||
if (P == 2) {
|
||||
run_stride_unroll<double>(N, K, R, D, U, F, T, S);
|
||||
}
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -41,52 +42,44 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
template<class Scalar, int UNROLL>
|
||||
template <class Scalar, int UNROLL>
|
||||
struct RunGather {
|
||||
static void run(int N, int K, int D, int R, int F);
|
||||
};
|
||||
|
||||
#define UNROLL 1
|
||||
#include<gather_unroll.hpp>
|
||||
#include <gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 2
|
||||
#include<gather_unroll.hpp>
|
||||
#include <gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 3
|
||||
#include<gather_unroll.hpp>
|
||||
#include <gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 4
|
||||
#include<gather_unroll.hpp>
|
||||
#include <gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 5
|
||||
#include<gather_unroll.hpp>
|
||||
#include <gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 6
|
||||
#include<gather_unroll.hpp>
|
||||
#include <gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 7
|
||||
#include<gather_unroll.hpp>
|
||||
#include <gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 8
|
||||
#include<gather_unroll.hpp>
|
||||
#include <gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
|
||||
template<class Scalar>
|
||||
template <class Scalar>
|
||||
void run_gather_test(int N, int K, int D, int R, int U, int F) {
|
||||
if(U == 1)
|
||||
RunGather<Scalar,1>::run(N,K,D,R,F);
|
||||
if(U == 2)
|
||||
RunGather<Scalar,2>::run(N,K,D,R,F);
|
||||
if(U == 3)
|
||||
RunGather<Scalar,3>::run(N,K,D,R,F);
|
||||
if(U == 4)
|
||||
RunGather<Scalar,4>::run(N,K,D,R,F);
|
||||
if(U == 5)
|
||||
RunGather<Scalar,5>::run(N,K,D,R,F);
|
||||
if(U == 6)
|
||||
RunGather<Scalar,6>::run(N,K,D,R,F);
|
||||
if(U == 7)
|
||||
RunGather<Scalar,7>::run(N,K,D,R,F);
|
||||
if(U == 8)
|
||||
RunGather<Scalar,8>::run(N,K,D,R,F);
|
||||
if (U == 1) RunGather<Scalar, 1>::run(N, K, D, R, F);
|
||||
if (U == 2) RunGather<Scalar, 2>::run(N, K, D, R, F);
|
||||
if (U == 3) RunGather<Scalar, 3>::run(N, K, D, R, F);
|
||||
if (U == 4) RunGather<Scalar, 4>::run(N, K, D, R, F);
|
||||
if (U == 5) RunGather<Scalar, 5>::run(N, K, D, R, F);
|
||||
if (U == 6) RunGather<Scalar, 6>::run(N, K, D, R, F);
|
||||
if (U == 7) RunGather<Scalar, 7>::run(N, K, D, R, F);
|
||||
if (U == 8) RunGather<Scalar, 8>::run(N, K, D, R, F);
|
||||
}
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -41,129 +42,132 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<Kokkos_Random.hpp>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Random.hpp>
|
||||
|
||||
template<class Scalar>
|
||||
struct RunGather<Scalar,UNROLL> {
|
||||
static void run(int N, int K, int D, int R, int F) {
|
||||
Kokkos::View<int**> connectivity("Connectivity",N,K);
|
||||
Kokkos::View<Scalar*> A_in("Input",N);
|
||||
Kokkos::View<Scalar*> B_in("Input",N);
|
||||
Kokkos::View<Scalar*> C("Output",N);
|
||||
template <class Scalar>
|
||||
struct RunGather<Scalar, UNROLL> {
|
||||
static void run(int N, int K, int D, int R, int F) {
|
||||
Kokkos::View<int**> connectivity("Connectivity", N, K);
|
||||
Kokkos::View<Scalar*> A_in("Input", N);
|
||||
Kokkos::View<Scalar*> B_in("Input", N);
|
||||
Kokkos::View<Scalar*> C("Output", N);
|
||||
|
||||
Kokkos::Random_XorShift64_Pool<> rand_pool(12313);
|
||||
Kokkos::Random_XorShift64_Pool<> rand_pool(12313);
|
||||
|
||||
Kokkos::deep_copy(A_in,1.5);
|
||||
Kokkos::deep_copy(B_in,2.0);
|
||||
Kokkos::deep_copy(A_in, 1.5);
|
||||
Kokkos::deep_copy(B_in, 2.0);
|
||||
|
||||
Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > A(A_in);
|
||||
Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > B(B_in);
|
||||
Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > A(
|
||||
A_in);
|
||||
Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > B(
|
||||
B_in);
|
||||
|
||||
Kokkos::parallel_for("InitKernel",N,
|
||||
KOKKOS_LAMBDA (const int& i) {
|
||||
auto rand_gen = rand_pool.get_state();
|
||||
for( int jj=0; jj<K; jj++) {
|
||||
connectivity(i,jj) = (rand_gen.rand(D) + i - D/2 + N)%N;
|
||||
Kokkos::parallel_for(
|
||||
"InitKernel", N, KOKKOS_LAMBDA(const int& i) {
|
||||
auto rand_gen = rand_pool.get_state();
|
||||
for (int jj = 0; jj < K; jj++) {
|
||||
connectivity(i, jj) = (rand_gen.rand(D) + i - D / 2 + N) % N;
|
||||
}
|
||||
rand_pool.free_state(rand_gen);
|
||||
});
|
||||
Kokkos::fence();
|
||||
|
||||
Kokkos::Timer timer;
|
||||
for (int r = 0; r < R; r++) {
|
||||
Kokkos::parallel_for(
|
||||
"BenchmarkKernel", N, KOKKOS_LAMBDA(const int& i) {
|
||||
Scalar c = Scalar(0.0);
|
||||
for (int jj = 0; jj < K; jj++) {
|
||||
const int j = connectivity(i, jj);
|
||||
Scalar a1 = A(j);
|
||||
const Scalar b = B(j);
|
||||
#if (UNROLL > 1)
|
||||
Scalar a2 = a1 * Scalar(1.3);
|
||||
#endif
|
||||
#if (UNROLL > 2)
|
||||
Scalar a3 = a2 * Scalar(1.1);
|
||||
#endif
|
||||
#if (UNROLL > 3)
|
||||
Scalar a4 = a3 * Scalar(1.1);
|
||||
#endif
|
||||
#if (UNROLL > 4)
|
||||
Scalar a5 = a4 * Scalar(1.3);
|
||||
#endif
|
||||
#if (UNROLL > 5)
|
||||
Scalar a6 = a5 * Scalar(1.1);
|
||||
#endif
|
||||
#if (UNROLL > 6)
|
||||
Scalar a7 = a6 * Scalar(1.1);
|
||||
#endif
|
||||
#if (UNROLL > 7)
|
||||
Scalar a8 = a7 * Scalar(1.1);
|
||||
#endif
|
||||
|
||||
for (int f = 0; f < F; f++) {
|
||||
a1 += b * a1;
|
||||
#if (UNROLL > 1)
|
||||
a2 += b * a2;
|
||||
#endif
|
||||
#if (UNROLL > 2)
|
||||
a3 += b * a3;
|
||||
#endif
|
||||
#if (UNROLL > 3)
|
||||
a4 += b * a4;
|
||||
#endif
|
||||
#if (UNROLL > 4)
|
||||
a5 += b * a5;
|
||||
#endif
|
||||
#if (UNROLL > 5)
|
||||
a6 += b * a6;
|
||||
#endif
|
||||
#if (UNROLL > 6)
|
||||
a7 += b * a7;
|
||||
#endif
|
||||
#if (UNROLL > 7)
|
||||
a8 += b * a8;
|
||||
#endif
|
||||
}
|
||||
#if (UNROLL == 1)
|
||||
c += a1;
|
||||
#endif
|
||||
#if (UNROLL == 2)
|
||||
c += a1 + a2;
|
||||
#endif
|
||||
#if (UNROLL == 3)
|
||||
c += a1 + a2 + a3;
|
||||
#endif
|
||||
#if (UNROLL == 4)
|
||||
c += a1 + a2 + a3 + a4;
|
||||
#endif
|
||||
#if (UNROLL == 5)
|
||||
c += a1 + a2 + a3 + a4 + a5;
|
||||
#endif
|
||||
#if (UNROLL == 6)
|
||||
c += a1 + a2 + a3 + a4 + a5 + a6;
|
||||
#endif
|
||||
#if (UNROLL == 7)
|
||||
c += a1 + a2 + a3 + a4 + a5 + a6 + a7;
|
||||
#endif
|
||||
#if (UNROLL == 8)
|
||||
c += a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8;
|
||||
#endif
|
||||
}
|
||||
C(i) = c;
|
||||
});
|
||||
Kokkos::fence();
|
||||
}
|
||||
rand_pool.free_state(rand_gen);
|
||||
});
|
||||
Kokkos::fence();
|
||||
double seconds = timer.seconds();
|
||||
|
||||
|
||||
Kokkos::Timer timer;
|
||||
for(int r = 0; r<R; r++) {
|
||||
Kokkos::parallel_for("BenchmarkKernel",N,
|
||||
KOKKOS_LAMBDA (const int& i) {
|
||||
Scalar c = Scalar(0.0);
|
||||
for( int jj=0; jj<K; jj++) {
|
||||
const int j = connectivity(i,jj);
|
||||
Scalar a1 = A(j);
|
||||
const Scalar b = B(j);
|
||||
#if(UNROLL>1)
|
||||
Scalar a2 = a1*Scalar(1.3);
|
||||
#endif
|
||||
#if(UNROLL>2)
|
||||
Scalar a3 = a2*Scalar(1.1);
|
||||
#endif
|
||||
#if(UNROLL>3)
|
||||
Scalar a4 = a3*Scalar(1.1);
|
||||
#endif
|
||||
#if(UNROLL>4)
|
||||
Scalar a5 = a4*Scalar(1.3);
|
||||
#endif
|
||||
#if(UNROLL>5)
|
||||
Scalar a6 = a5*Scalar(1.1);
|
||||
#endif
|
||||
#if(UNROLL>6)
|
||||
Scalar a7 = a6*Scalar(1.1);
|
||||
#endif
|
||||
#if(UNROLL>7)
|
||||
Scalar a8 = a7*Scalar(1.1);
|
||||
#endif
|
||||
|
||||
|
||||
for(int f = 0; f<F; f++) {
|
||||
a1 += b*a1;
|
||||
#if(UNROLL>1)
|
||||
a2 += b*a2;
|
||||
#endif
|
||||
#if(UNROLL>2)
|
||||
a3 += b*a3;
|
||||
#endif
|
||||
#if(UNROLL>3)
|
||||
a4 += b*a4;
|
||||
#endif
|
||||
#if(UNROLL>4)
|
||||
a5 += b*a5;
|
||||
#endif
|
||||
#if(UNROLL>5)
|
||||
a6 += b*a6;
|
||||
#endif
|
||||
#if(UNROLL>6)
|
||||
a7 += b*a7;
|
||||
#endif
|
||||
#if(UNROLL>7)
|
||||
a8 += b*a8;
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
#if(UNROLL==1)
|
||||
c += a1;
|
||||
#endif
|
||||
#if(UNROLL==2)
|
||||
c += a1+a2;
|
||||
#endif
|
||||
#if(UNROLL==3)
|
||||
c += a1+a2+a3;
|
||||
#endif
|
||||
#if(UNROLL==4)
|
||||
c += a1+a2+a3+a4;
|
||||
#endif
|
||||
#if(UNROLL==5)
|
||||
c += a1+a2+a3+a4+a5;
|
||||
#endif
|
||||
#if(UNROLL==6)
|
||||
c += a1+a2+a3+a4+a5+a6;
|
||||
#endif
|
||||
#if(UNROLL==7)
|
||||
c += a1+a2+a3+a4+a5+a6+a7;
|
||||
#endif
|
||||
#if(UNROLL==8)
|
||||
c += a1+a2+a3+a4+a5+a6+a7+a8;
|
||||
#endif
|
||||
|
||||
}
|
||||
C(i) = c ;
|
||||
});
|
||||
Kokkos::fence();
|
||||
double bytes = 1.0 * N * K * R * (2 * sizeof(Scalar) + sizeof(int)) +
|
||||
1.0 * N * R * sizeof(Scalar);
|
||||
double flops = 1.0 * N * K * R * (F * 2 * UNROLL + 2 * (UNROLL - 1));
|
||||
double gather_ops = 1.0 * N * K * R * 2;
|
||||
printf(
|
||||
"SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: "
|
||||
"%lf GGather/s: %lf\n",
|
||||
sizeof(Scalar) / 4, N, K, D, R, UNROLL, F, seconds,
|
||||
1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds,
|
||||
1.e-9 * gather_ops / seconds);
|
||||
}
|
||||
double seconds = timer.seconds();
|
||||
|
||||
double bytes = 1.0*N*K*R*(2*sizeof(Scalar)+sizeof(int)) + 1.0*N*R*sizeof(Scalar);
|
||||
double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
|
||||
double gather_ops = 1.0*N*K*R*2;
|
||||
printf("SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf GGather/s: %lf\n",sizeof(Scalar)/4,N,K,D,R,UNROLL,F,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds,1.e-9*gather_ops/seconds);
|
||||
}
|
||||
};
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -41,23 +42,26 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<impl/Kokkos_Timer.hpp>
|
||||
#include<gather.hpp>
|
||||
#include<cstdlib>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <gather.hpp>
|
||||
#include <cstdlib>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize(argc,argv);
|
||||
Kokkos::initialize(argc, argv);
|
||||
|
||||
if(argc<8) {
|
||||
if (argc < 8) {
|
||||
printf("Arguments: S N K D\n");
|
||||
printf(" S: Scalar Type Size (1==float, 2==double, 4=complex<double>)\n");
|
||||
printf(
|
||||
" S: Scalar Type Size (1==float, 2==double, 4=complex<double>)\n");
|
||||
printf(" N: Number of entities\n");
|
||||
printf(" K: Number of things to gather per entity\n");
|
||||
printf(" D: Max distance of gathered things of an entity\n");
|
||||
printf(" R: how often to loop through the K dimension with each team\n");
|
||||
printf(" U: how many independent flops to do per load\n");
|
||||
printf(" F: how many times to repeat the U unrolled operations before reading next element\n");
|
||||
printf(
|
||||
" F: how many times to repeat the U unrolled operations before "
|
||||
"reading next element\n");
|
||||
printf("Example Input GPU:\n");
|
||||
printf(" Bandwidth Bound : 2 10000000 1 1 10 1 1\n");
|
||||
printf(" Cache Bound : 2 10000000 64 1 10 1 1\n");
|
||||
@ -68,7 +72,6 @@ int main(int argc, char* argv[]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int S = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int K = atoi(argv[3]);
|
||||
@ -77,17 +80,22 @@ int main(int argc, char* argv[]) {
|
||||
int U = atoi(argv[6]);
|
||||
int F = atoi(argv[7]);
|
||||
|
||||
if( (S!=1) && (S!=2) && (S!=4)) {printf("S must be one of 1,2,4\n"); return 0;}
|
||||
if( N<D ) {printf("N must be larger or equal to D\n"); return 0; }
|
||||
if(S==1) {
|
||||
run_gather_test<float>(N,K,D,R,U,F);
|
||||
if ((S != 1) && (S != 2) && (S != 4)) {
|
||||
printf("S must be one of 1,2,4\n");
|
||||
return 0;
|
||||
}
|
||||
if(S==2) {
|
||||
run_gather_test<double>(N,K,D,R,U,F);
|
||||
if (N < D) {
|
||||
printf("N must be larger or equal to D\n");
|
||||
return 0;
|
||||
}
|
||||
if(S==4) {
|
||||
run_gather_test<Kokkos::complex<double> >(N,K,D,R,U,F);
|
||||
if (S == 1) {
|
||||
run_gather_test<float>(N, K, D, R, U, F);
|
||||
}
|
||||
if (S == 2) {
|
||||
run_gather_test<double>(N, K, D, R, U, F);
|
||||
}
|
||||
if (S == 4) {
|
||||
run_gather_test<Kokkos::complex<double> >(N, K, D, R, U, F);
|
||||
}
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -44,67 +45,86 @@
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include "policy_perf_test.hpp"
|
||||
|
||||
int main(int argc, char* argv[] ) {
|
||||
Kokkos::initialize(argc,argv);
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize(argc, argv);
|
||||
|
||||
if(argc<10) {
|
||||
if (argc < 10) {
|
||||
printf(" Ten arguments are needed to run this program:\n");
|
||||
printf(" (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, (5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, (9)schedule, (10)test_type\n");
|
||||
printf(
|
||||
" (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, "
|
||||
"(5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, "
|
||||
"(9)schedule, (10)test_type\n");
|
||||
printf(" team_range: number of teams (league_size)\n");
|
||||
printf(" thread_range: range for nested TeamThreadRange parallel_*\n");
|
||||
printf(" vector_range: range for nested ThreadVectorRange parallel_*\n");
|
||||
printf(" outer_repeat: number of repeats for outer parallel_* call\n");
|
||||
printf(" thread_repeat: number of repeats for TeamThreadRange parallel_* call\n");
|
||||
printf(" vector_repeat: number of repeats for ThreadVectorRange parallel_* call\n");
|
||||
printf(
|
||||
" thread_repeat: number of repeats for TeamThreadRange parallel_* "
|
||||
"call\n");
|
||||
printf(
|
||||
" vector_repeat: number of repeats for ThreadVectorRange parallel_* "
|
||||
"call\n");
|
||||
printf(" team_size: number of team members (team_size)\n");
|
||||
printf(" vector_size: desired vectorization (if possible)\n");
|
||||
printf(" schedule: 1 == Static 2 == Dynamic\n");
|
||||
printf(" test_type: 3-digit code XYZ for testing (nested) parallel_*\n");
|
||||
printf(" code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in {0,1,2}\n");
|
||||
printf(
|
||||
" test_type: 3-digit code XYZ for testing (nested) parallel_*\n");
|
||||
printf(
|
||||
" code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in "
|
||||
"{0,1,2}\n");
|
||||
printf(" TeamPolicy:\n");
|
||||
printf(" X: 0 = none (never used, makes no sense); 1 = parallel_for; 2 = parallel_reduce\n");
|
||||
printf(" Y: 0 = none; 1 = parallel_for; 2 = parallel_reduce\n");
|
||||
printf(" Z: 0 = none; 1 = parallel_for; 2 = parallel_reduce\n");
|
||||
printf(
|
||||
" X: 0 = none (never used, makes no sense); 1 = "
|
||||
"parallel_for; 2 = parallel_reduce\n");
|
||||
printf(
|
||||
" Y: 0 = none; 1 = parallel_for; 2 = "
|
||||
"parallel_reduce\n");
|
||||
printf(
|
||||
" Z: 0 = none; 1 = parallel_for; 2 = "
|
||||
"parallel_reduce\n");
|
||||
printf(" RangePolicy:\n");
|
||||
printf(" X: 3 = parallel_for; 4 = parallel_reduce; 5 = parallel_scan\n");
|
||||
printf(
|
||||
" X: 3 = parallel_for; 4 = parallel_reduce; 5 = "
|
||||
"parallel_scan\n");
|
||||
printf(" Y: 0 = none\n");
|
||||
printf(" Z: 0 = none\n");
|
||||
printf(" Example Input:\n");
|
||||
printf(" 100000 32 32 100 100 100 8 1 1 100\n");
|
||||
printf(" 100000 32 32 100 100 100 8 1 1 100\n");
|
||||
Kokkos::finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int team_range = atoi(argv[1]);
|
||||
int team_range = atoi(argv[1]);
|
||||
int thread_range = atoi(argv[2]);
|
||||
int vector_range = atoi(argv[3]);
|
||||
|
||||
int outer_repeat = atoi(argv[4]);
|
||||
int outer_repeat = atoi(argv[4]);
|
||||
int thread_repeat = atoi(argv[5]);
|
||||
int vector_repeat = atoi(argv[6]);
|
||||
|
||||
int team_size = atoi(argv[7]);
|
||||
int team_size = atoi(argv[7]);
|
||||
int vector_size = atoi(argv[8]);
|
||||
int schedule = atoi(argv[9]);
|
||||
int test_type = atoi(argv[10]);
|
||||
int schedule = atoi(argv[9]);
|
||||
int test_type = atoi(argv[10]);
|
||||
|
||||
int disable_verbose_output = 0;
|
||||
if ( argc > 11 ) {
|
||||
int disable_verbose_output = 0;
|
||||
if (argc > 11) {
|
||||
disable_verbose_output = atoi(argv[11]);
|
||||
}
|
||||
|
||||
if ( schedule != 1 && schedule != 2 ) {
|
||||
if (schedule != 1 && schedule != 2) {
|
||||
printf("schedule: %d\n", schedule);
|
||||
printf("Options for schedule are: 1 == Static 2 == Dynamic\n");
|
||||
Kokkos::finalize();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( test_type != 100 && test_type != 110 && test_type != 111 && test_type != 112 && test_type != 120 && test_type != 121 && test_type != 122
|
||||
&& test_type != 200 && test_type != 210 && test_type != 211 && test_type != 212 && test_type != 220 && test_type != 221 && test_type != 222
|
||||
&& test_type != 300 && test_type != 400 && test_type != 500
|
||||
)
|
||||
{
|
||||
if (test_type != 100 && test_type != 110 && test_type != 111 &&
|
||||
test_type != 112 && test_type != 120 && test_type != 121 &&
|
||||
test_type != 122 && test_type != 200 && test_type != 210 &&
|
||||
test_type != 211 && test_type != 212 && test_type != 220 &&
|
||||
test_type != 221 && test_type != 222 && test_type != 300 &&
|
||||
test_type != 400 && test_type != 500) {
|
||||
printf("Incorrect test_type option\n");
|
||||
Kokkos::finalize();
|
||||
return -2;
|
||||
@ -112,56 +132,85 @@ int main(int argc, char* argv[] ) {
|
||||
|
||||
double result = 0.0;
|
||||
|
||||
Kokkos::parallel_reduce( "parallel_reduce warmup", Kokkos::TeamPolicy<>(10,1),
|
||||
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team, double& lval) {
|
||||
lval += 1;
|
||||
}, result);
|
||||
Kokkos::parallel_reduce(
|
||||
"parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1),
|
||||
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team,
|
||||
double& lval) { lval += 1; },
|
||||
result);
|
||||
|
||||
typedef Kokkos::View<double*, Kokkos::LayoutRight> view_type_1d;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutRight> view_type_2d;
|
||||
typedef Kokkos::View<double*, Kokkos::LayoutRight> view_type_1d;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutRight> view_type_2d;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutRight> view_type_3d;
|
||||
|
||||
// Allocate view without initializing
|
||||
// Call a 'warmup' test with 1 repeat - this will initialize the corresponding view appropriately for test and should obey first-touch etc
|
||||
// Second call to test is the one we actually care about and time
|
||||
view_type_1d v_1( Kokkos::ViewAllocateWithoutInitializing("v_1"), team_range*team_size);
|
||||
view_type_2d v_2( Kokkos::ViewAllocateWithoutInitializing("v_2"), team_range*team_size, thread_range);
|
||||
view_type_3d v_3( Kokkos::ViewAllocateWithoutInitializing("v_3"), team_range*team_size, thread_range, vector_range);
|
||||
// Call a 'warmup' test with 1 repeat - this will initialize the corresponding
|
||||
// view appropriately for test and should obey first-touch etc Second call to
|
||||
// test is the one we actually care about and time
|
||||
view_type_1d v_1(Kokkos::ViewAllocateWithoutInitializing("v_1"),
|
||||
team_range * team_size);
|
||||
view_type_2d v_2(Kokkos::ViewAllocateWithoutInitializing("v_2"),
|
||||
team_range * team_size, thread_range);
|
||||
view_type_3d v_3(Kokkos::ViewAllocateWithoutInitializing("v_3"),
|
||||
team_range * team_size, thread_range, vector_range);
|
||||
|
||||
double result_computed = 0.0;
|
||||
double result_expect = 0.0;
|
||||
double time = 0.0;
|
||||
double result_expect = 0.0;
|
||||
double time = 0.0;
|
||||
|
||||
if(schedule==1) {
|
||||
if ( test_type != 500 ) {
|
||||
if (schedule == 1) {
|
||||
if (test_type != 500) {
|
||||
// warmup - no repeat of loops
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time);
|
||||
}
|
||||
else {
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
||||
team_range, thread_range, vector_range, 1, 1, 1, team_size,
|
||||
vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect,
|
||||
time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
||||
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
|
||||
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
|
||||
result_computed, result_expect, time);
|
||||
} else {
|
||||
// parallel_scan: initialize 1d view for parallel_scan
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,100,v_1,v_2,v_3,result_computed,result_expect,time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
||||
team_range, thread_range, vector_range, 1, 1, 1, team_size,
|
||||
vector_size, 100, v_1, v_2, v_3, result_computed, result_expect,
|
||||
time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
||||
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
|
||||
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
|
||||
result_computed, result_expect, time);
|
||||
}
|
||||
}
|
||||
if(schedule==2) {
|
||||
if ( test_type != 500 ) {
|
||||
if (schedule == 2) {
|
||||
if (test_type != 500) {
|
||||
// warmup - no repeat of loops
|
||||
test_policy<Kokkos::Schedule<Kokkos::Dynamic>,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Dynamic>,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time);
|
||||
}
|
||||
else {
|
||||
test_policy<Kokkos::Schedule<Kokkos::Dynamic>, int>(
|
||||
team_range, thread_range, vector_range, 1, 1, 1, team_size,
|
||||
vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect,
|
||||
time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Dynamic>, int>(
|
||||
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
|
||||
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
|
||||
result_computed, result_expect, time);
|
||||
} else {
|
||||
// parallel_scan: initialize 1d view for parallel_scan
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,100,v_1,v_2,v_3,result_computed,result_expect,time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
||||
team_range, thread_range, vector_range, 1, 1, 1, team_size,
|
||||
vector_size, 100, v_1, v_2, v_3, result_computed, result_expect,
|
||||
time);
|
||||
test_policy<Kokkos::Schedule<Kokkos::Static>, int>(
|
||||
team_range, thread_range, vector_range, outer_repeat, thread_repeat,
|
||||
vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3,
|
||||
result_computed, result_expect, time);
|
||||
}
|
||||
}
|
||||
|
||||
if ( disable_verbose_output == 0 ) {
|
||||
printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n",team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,schedule,test_type,result_computed,result_expect,time);
|
||||
}
|
||||
else {
|
||||
printf("%lf\n",time);
|
||||
if (disable_verbose_output == 0) {
|
||||
printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n", team_range,
|
||||
thread_range, vector_range, outer_repeat, thread_repeat,
|
||||
vector_repeat, team_size, vector_size, schedule, test_type,
|
||||
result_computed, result_expect, time);
|
||||
} else {
|
||||
printf("%lf\n", time);
|
||||
}
|
||||
|
||||
Kokkos::finalize();
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -43,297 +44,375 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
template < class ViewType >
|
||||
template <class ViewType>
|
||||
struct ParallelScanFunctor {
|
||||
using value_type = double;
|
||||
ViewType v;
|
||||
|
||||
ParallelScanFunctor( const ViewType & v_ )
|
||||
: v(v_)
|
||||
{}
|
||||
ParallelScanFunctor(const ViewType& v_) : v(v_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int idx, value_type& val, const bool& final ) const
|
||||
{
|
||||
// inclusive scan
|
||||
val += v(idx);
|
||||
if ( final ) {
|
||||
v(idx) = val;
|
||||
}
|
||||
void operator()(const int idx, value_type& val, const bool& final) const {
|
||||
// inclusive scan
|
||||
val += v(idx);
|
||||
if (final) {
|
||||
v(idx) = val;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ScheduleType,class IndexType,class ViewType1, class ViewType2, class ViewType3>
|
||||
template <class ScheduleType, class IndexType, class ViewType1, class ViewType2,
|
||||
class ViewType3>
|
||||
void test_policy(int team_range, int thread_range, int vector_range,
|
||||
int outer_repeat, int thread_repeat, int inner_repeat,
|
||||
int team_size, int vector_size, int test_type,
|
||||
ViewType1 &v1, ViewType2 &v2, ViewType3 &v3,
|
||||
double &result, double &result_expect, double &time) {
|
||||
|
||||
typedef Kokkos::TeamPolicy<ScheduleType,IndexType> t_policy;
|
||||
int outer_repeat, int thread_repeat, int inner_repeat,
|
||||
int team_size, int vector_size, int test_type, ViewType1& v1,
|
||||
ViewType2& v2, ViewType3& v3, double& result,
|
||||
double& result_expect, double& time) {
|
||||
typedef Kokkos::TeamPolicy<ScheduleType, IndexType> t_policy;
|
||||
typedef typename t_policy::member_type t_team;
|
||||
Kokkos::Timer timer;
|
||||
|
||||
for(int orep = 0; orep<outer_repeat; orep++) {
|
||||
|
||||
for (int orep = 0; orep < outer_repeat; orep++) {
|
||||
if (test_type == 100) {
|
||||
Kokkos::parallel_for("100 outer for", t_policy(team_range,team_size),
|
||||
KOKKOS_LAMBDA (const t_team& team) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
v1(idx) = idx;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
Kokkos::parallel_for(
|
||||
"100 outer for", t_policy(team_range, team_size),
|
||||
KOKKOS_LAMBDA(const t_team& team) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
v1(idx) = idx;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
|
||||
if (test_type == 110) {
|
||||
Kokkos::parallel_for("110 outer for", t_policy(team_range,team_size),
|
||||
KOKKOS_LAMBDA (const t_team& team) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
for (int tr = 0; tr<thread_repeat; ++tr) {
|
||||
// Each team launches a parallel_for; thread_range is partitioned among team members
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t) {
|
||||
v2( idx, t ) = t;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
});
|
||||
Kokkos::parallel_for(
|
||||
"110 outer for", t_policy(team_range, team_size),
|
||||
KOKKOS_LAMBDA(const t_team& team) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
for (int tr = 0; tr < thread_repeat; ++tr) {
|
||||
// Each team launches a parallel_for; thread_range is partitioned
|
||||
// among team members
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t) {
|
||||
v2(idx, t) = t;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
if (test_type == 111) {
|
||||
Kokkos::parallel_for("111 outer for", t_policy(team_range,team_size,vector_size),
|
||||
KOKKOS_LAMBDA (const t_team& team) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
for (int tr = 0; tr<thread_repeat; ++tr) {
|
||||
// Each team launches a parallel_for; thread_range is partitioned among team members
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t) {
|
||||
for (int vr = 0; vr<inner_repeat; ++vr)
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,vector_range), [&] (const int vi) {
|
||||
v3( idx, t, vi ) = vi;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
Kokkos::parallel_for(
|
||||
"111 outer for", t_policy(team_range, team_size, vector_size),
|
||||
KOKKOS_LAMBDA(const t_team& team) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
for (int tr = 0; tr < thread_repeat; ++tr) {
|
||||
// Each team launches a parallel_for; thread_range is partitioned
|
||||
// among team members
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t) {
|
||||
for (int vr = 0; vr < inner_repeat; ++vr)
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi) {
|
||||
v3(idx, t, vi) = vi;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
if (test_type == 112) {
|
||||
Kokkos::parallel_for("112 outer for", t_policy(team_range,team_size,vector_size),
|
||||
KOKKOS_LAMBDA (const t_team& team) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
for (int tr = 0; tr<thread_repeat; ++tr) {
|
||||
// Each team launches a parallel_for; thread_range is partitioned among team members
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr<inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,vector_range), [&] (const int vi, double &vval) {
|
||||
vval += 1;
|
||||
}, vector_result);
|
||||
}
|
||||
v2( idx, t ) = vector_result;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
});
|
||||
Kokkos::parallel_for(
|
||||
"112 outer for", t_policy(team_range, team_size, vector_size),
|
||||
KOKKOS_LAMBDA(const t_team& team) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
for (int tr = 0; tr < thread_repeat; ++tr) {
|
||||
// Each team launches a parallel_for; thread_range is partitioned
|
||||
// among team members
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr < inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi, double& vval) { vval += 1; },
|
||||
vector_result);
|
||||
}
|
||||
v2(idx, t) = vector_result;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
if (test_type == 120) {
|
||||
Kokkos::parallel_for("120 outer for", t_policy(team_range,team_size),
|
||||
KOKKOS_LAMBDA (const t_team& team) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
double team_result = 0.0;
|
||||
for (int tr = 0; tr<thread_repeat; ++tr) {
|
||||
team_result = 0.0;
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t, double &lval) {
|
||||
lval += 1;
|
||||
}, team_result);
|
||||
}
|
||||
v1(idx) = team_result;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
Kokkos::parallel_for(
|
||||
"120 outer for", t_policy(team_range, team_size),
|
||||
KOKKOS_LAMBDA(const t_team& team) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
double team_result = 0.0;
|
||||
for (int tr = 0; tr < thread_repeat; ++tr) {
|
||||
team_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& lval) { lval += 1; }, team_result);
|
||||
}
|
||||
v1(idx) = team_result;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
if (test_type == 121) {
|
||||
Kokkos::parallel_for("121 outer for", t_policy(team_range,team_size,vector_size),
|
||||
KOKKOS_LAMBDA (const t_team& team) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
double team_result = 0.0;
|
||||
for (int tr = 0; tr<thread_repeat; ++tr) {
|
||||
team_result = 0.0;
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t, double &lval) {
|
||||
lval += 1;
|
||||
for (int vr = 0; vr<inner_repeat; ++vr) {
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,vector_range), [&] (const int vi) {
|
||||
v3( idx, t, vi ) = vi;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
}, team_result);
|
||||
}
|
||||
v3( idx, 0, 0 ) = team_result;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
Kokkos::parallel_for(
|
||||
"121 outer for", t_policy(team_range, team_size, vector_size),
|
||||
KOKKOS_LAMBDA(const t_team& team) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
double team_result = 0.0;
|
||||
for (int tr = 0; tr < thread_repeat; ++tr) {
|
||||
team_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& lval) {
|
||||
lval += 1;
|
||||
for (int vr = 0; vr < inner_repeat; ++vr) {
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi) {
|
||||
v3(idx, t, vi) = vi;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
},
|
||||
team_result);
|
||||
}
|
||||
v3(idx, 0, 0) = team_result;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
if (test_type == 122) {
|
||||
Kokkos::parallel_for("122 outer for", t_policy(team_range,team_size,vector_size),
|
||||
KOKKOS_LAMBDA (const t_team& team) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
double team_result = 0.0;
|
||||
for (int tr = 0; tr<thread_repeat; ++tr) {
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t, double &lval) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr<inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,vector_range), [&] (const int vi, double &vval) {
|
||||
vval += 1;
|
||||
}, vector_result);
|
||||
lval += vector_result;
|
||||
}
|
||||
}, team_result);
|
||||
}
|
||||
v1(idx) = team_result;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
Kokkos::parallel_for(
|
||||
"122 outer for", t_policy(team_range, team_size, vector_size),
|
||||
KOKKOS_LAMBDA(const t_team& team) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
double team_result = 0.0;
|
||||
for (int tr = 0; tr < thread_repeat; ++tr) {
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& lval) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr < inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi, double& vval) { vval += 1; },
|
||||
vector_result);
|
||||
lval += vector_result;
|
||||
}
|
||||
},
|
||||
team_result);
|
||||
}
|
||||
v1(idx) = team_result;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
if (test_type == 200) {
|
||||
Kokkos::parallel_reduce("200 outer reduce", t_policy(team_range,team_size),
|
||||
KOKKOS_LAMBDA (const t_team& team, double& lval) {
|
||||
lval+=team.team_size()*team.league_rank() + team.team_rank();
|
||||
},result);
|
||||
result_expect = 0.5* (team_range*team_size)*(team_range*team_size-1);
|
||||
Kokkos::parallel_reduce(
|
||||
"200 outer reduce", t_policy(team_range, team_size),
|
||||
KOKKOS_LAMBDA(const t_team& team, double& lval) {
|
||||
lval += team.team_size() * team.league_rank() + team.team_rank();
|
||||
},
|
||||
result);
|
||||
result_expect =
|
||||
0.5 * (team_range * team_size) * (team_range * team_size - 1);
|
||||
// sum ( seq( [0, team_range*team_size) )
|
||||
}
|
||||
if (test_type == 210) {
|
||||
Kokkos::parallel_reduce("210 outer reduce", t_policy(team_range,team_size),
|
||||
KOKKOS_LAMBDA (const t_team& team, double& lval) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
double thread_for = 1.0;
|
||||
for(int tr = 0; tr<thread_repeat; tr++) {
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t) {
|
||||
v2(idx,t) = t;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
lval+=(team.team_size()*team.league_rank() + team.team_rank() + thread_for);
|
||||
},result);
|
||||
result_expect = 0.5* (team_range*team_size)*(team_range*team_size-1) + (team_range*team_size);
|
||||
// sum ( seq( [0, team_range*team_size) + 1 per team_member (total of team_range*team_size) )
|
||||
Kokkos::parallel_reduce(
|
||||
"210 outer reduce", t_policy(team_range, team_size),
|
||||
KOKKOS_LAMBDA(const t_team& team, double& lval) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
double thread_for = 1.0;
|
||||
for (int tr = 0; tr < thread_repeat; tr++) {
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t) {
|
||||
v2(idx, t) = t;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
lval += (team.team_size() * team.league_rank() + team.team_rank() +
|
||||
thread_for);
|
||||
},
|
||||
result);
|
||||
result_expect =
|
||||
0.5 * (team_range * team_size) * (team_range * team_size - 1) +
|
||||
(team_range * team_size);
|
||||
// sum ( seq( [0, team_range*team_size) + 1 per team_member (total of
|
||||
// team_range*team_size) )
|
||||
}
|
||||
if (test_type == 211) {
|
||||
Kokkos::parallel_reduce("211 outer reduce", t_policy(team_range,team_size,vector_size),
|
||||
KOKKOS_LAMBDA (const t_team& team, double& lval) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
double thread_for = 1.0;
|
||||
for(int tr = 0; tr<thread_repeat; tr++) {
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t) {
|
||||
for (int vr = 0; vr<inner_repeat; ++vr)
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, vector_range), [&] (const int vi) {
|
||||
v3(idx, t, vi) = vi;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
});
|
||||
}
|
||||
lval+=idx+thread_for;
|
||||
},result);
|
||||
result_expect = 0.5*(team_range*team_size)*(team_range*team_size-1) + (team_range*team_size);
|
||||
// sum ( seq( [0, team_range*team_size) + 1 per team_member (total of team_range*team_size) )
|
||||
Kokkos::parallel_reduce(
|
||||
"211 outer reduce", t_policy(team_range, team_size, vector_size),
|
||||
KOKKOS_LAMBDA(const t_team& team, double& lval) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
double thread_for = 1.0;
|
||||
for (int tr = 0; tr < thread_repeat; tr++) {
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t) {
|
||||
for (int vr = 0; vr < inner_repeat; ++vr)
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi) {
|
||||
v3(idx, t, vi) = vi;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
});
|
||||
}
|
||||
lval += idx + thread_for;
|
||||
},
|
||||
result);
|
||||
result_expect =
|
||||
0.5 * (team_range * team_size) * (team_range * team_size - 1) +
|
||||
(team_range * team_size);
|
||||
// sum ( seq( [0, team_range*team_size) + 1 per team_member (total of
|
||||
// team_range*team_size) )
|
||||
}
|
||||
if (test_type == 212) {
|
||||
Kokkos::parallel_reduce("212 outer reduce", t_policy(team_range,team_size,vector_size),
|
||||
KOKKOS_LAMBDA (const t_team& team, double& lval) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
double vector_result = 0.0;
|
||||
for(int tr = 0; tr<thread_repeat; tr++) {
|
||||
// This parallel_for is executed by each team; the thread_range is partitioned among the team members
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t) {
|
||||
v2(idx,t) = t;
|
||||
// prevent compiler optimizing loop away
|
||||
for (int vr = 0; vr<inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, vector_range), [&] (const int vi, double &vval) {
|
||||
vval += vi;
|
||||
}, vector_result );
|
||||
Kokkos::parallel_reduce(
|
||||
"212 outer reduce", t_policy(team_range, team_size, vector_size),
|
||||
KOKKOS_LAMBDA(const t_team& team, double& lval) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
double vector_result = 0.0;
|
||||
for (int tr = 0; tr < thread_repeat; tr++) {
|
||||
// This parallel_for is executed by each team; the thread_range is
|
||||
// partitioned among the team members
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t) {
|
||||
v2(idx, t) = t;
|
||||
// prevent compiler optimizing loop away
|
||||
for (int vr = 0; vr < inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi, double& vval) { vval += vi; },
|
||||
vector_result);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
lval+= idx + vector_result;
|
||||
},result);
|
||||
result_expect = 0.5*(team_range*team_size)*(team_range*team_size-1) + (0.5*vector_range*(vector_range-1)*team_range*team_size);
|
||||
// sum ( seq( [0, team_range*team_size) + sum( seq( [0, vector_range) ) per team_member (total of team_range*team_size) )
|
||||
lval += idx + vector_result;
|
||||
},
|
||||
result);
|
||||
result_expect =
|
||||
0.5 * (team_range * team_size) * (team_range * team_size - 1) +
|
||||
(0.5 * vector_range * (vector_range - 1) * team_range * team_size);
|
||||
// sum ( seq( [0, team_range*team_size) + sum( seq( [0, vector_range) )
|
||||
// per team_member (total of team_range*team_size) )
|
||||
}
|
||||
if (test_type == 220) {
|
||||
Kokkos::parallel_reduce("220 outer reduce", t_policy(team_range,team_size),
|
||||
KOKKOS_LAMBDA (const t_team& team, double& lval) {
|
||||
double team_result = 0.0;
|
||||
for(int tr = 0; tr<thread_repeat; tr++) {
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t, double& tval) {
|
||||
tval += t;
|
||||
},team_result);
|
||||
}
|
||||
lval+=team_result*team.league_rank(); // constant * league_rank
|
||||
},result);
|
||||
result_expect = 0.5*(team_range)*(team_range-1) * team_size * 0.5*(thread_range)*(thread_range-1);
|
||||
// sum ( seq( [0, team_range) * constant ); constant = sum( seq( [0, thread_range) )*team_size (1 per member, result for each team)
|
||||
Kokkos::parallel_reduce(
|
||||
"220 outer reduce", t_policy(team_range, team_size),
|
||||
KOKKOS_LAMBDA(const t_team& team, double& lval) {
|
||||
double team_result = 0.0;
|
||||
for (int tr = 0; tr < thread_repeat; tr++) {
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& tval) { tval += t; }, team_result);
|
||||
}
|
||||
lval += team_result * team.league_rank(); // constant * league_rank
|
||||
},
|
||||
result);
|
||||
result_expect = 0.5 * (team_range) * (team_range - 1) * team_size * 0.5 *
|
||||
(thread_range) * (thread_range - 1);
|
||||
// sum ( seq( [0, team_range) * constant ); constant = sum( seq( [0,
|
||||
// thread_range) )*team_size (1 per member, result for each team)
|
||||
}
|
||||
if (test_type == 221) {
|
||||
Kokkos::parallel_reduce("221 outer reduce", t_policy(team_range,team_size,vector_size),
|
||||
KOKKOS_LAMBDA (const t_team& team, double& lval) {
|
||||
long idx = team.league_rank()*team.team_size() + team.team_rank();
|
||||
double team_result = 0;
|
||||
for(int tr = 0; tr<thread_repeat; tr++) {
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t, double& tval) {
|
||||
double vector_for = 1.0;
|
||||
for (int vr = 0; vr<inner_repeat; ++vr) {
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, vector_range), [&] (const int vi) {
|
||||
v3(idx, t, vi) = vi;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
Kokkos::parallel_reduce(
|
||||
"221 outer reduce", t_policy(team_range, team_size, vector_size),
|
||||
KOKKOS_LAMBDA(const t_team& team, double& lval) {
|
||||
long idx = team.league_rank() * team.team_size() + team.team_rank();
|
||||
double team_result = 0;
|
||||
for (int tr = 0; tr < thread_repeat; tr++) {
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& tval) {
|
||||
double vector_for = 1.0;
|
||||
for (int vr = 0; vr < inner_repeat; ++vr) {
|
||||
Kokkos::parallel_for(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi) {
|
||||
v3(idx, t, vi) = vi;
|
||||
// prevent compiler optimizing loop away
|
||||
});
|
||||
}
|
||||
tval += t + vector_for;
|
||||
},
|
||||
team_result);
|
||||
}
|
||||
tval += t + vector_for;
|
||||
},team_result);
|
||||
}
|
||||
lval+=team_result*team.league_rank();
|
||||
},result);
|
||||
result_expect = 0.5* (team_range)*(team_range-1) * team_size * (0.5*(thread_range) * (thread_range-1) + thread_range);
|
||||
// sum ( seq( [0, team_range) * constant ) + 1 per member per team; constant = sum( seq( [0, thread_range) )*team_size (1 per member, result for each team)
|
||||
lval += team_result * team.league_rank();
|
||||
},
|
||||
result);
|
||||
result_expect =
|
||||
0.5 * (team_range) * (team_range - 1) * team_size *
|
||||
(0.5 * (thread_range) * (thread_range - 1) + thread_range);
|
||||
// sum ( seq( [0, team_range) * constant ) + 1 per member per team;
|
||||
// constant = sum( seq( [0, thread_range) )*team_size (1 per member,
|
||||
// result for each team)
|
||||
}
|
||||
if (test_type == 222) {
|
||||
Kokkos::parallel_reduce("222 outer reduce", t_policy(team_range,team_size,vector_size),
|
||||
KOKKOS_LAMBDA (const t_team& team, double& lval) {
|
||||
double team_result = 0.0;
|
||||
for(int tr = 0; tr<thread_repeat; tr++) {
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,thread_range), [&] (const int t, double& tval) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr<inner_repeat; ++vr) {
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, vector_range), [&] (const int vi, double& vval) {
|
||||
vval += vi;
|
||||
}, vector_result);
|
||||
Kokkos::parallel_reduce(
|
||||
"222 outer reduce", t_policy(team_range, team_size, vector_size),
|
||||
KOKKOS_LAMBDA(const t_team& team, double& lval) {
|
||||
double team_result = 0.0;
|
||||
for (int tr = 0; tr < thread_repeat; tr++) {
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& tval) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr < inner_repeat; ++vr) {
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi, double& vval) { vval += vi; },
|
||||
vector_result);
|
||||
}
|
||||
tval += t + vector_result;
|
||||
},
|
||||
team_result);
|
||||
}
|
||||
tval += t + vector_result;
|
||||
},team_result);
|
||||
}
|
||||
lval+=team_result*team.league_rank();
|
||||
},result);
|
||||
result_expect = 0.5* (team_range)*(team_range-1) * team_size * (0.5*(thread_range) * (thread_range-1) + thread_range*0.5*(vector_range)*(vector_range-1));
|
||||
// sum ( seq( [0, team_range) * constant ) + 1 + sum( seq([0,vector_range) ) per member per team; constant = sum( seq( [0, thread_range) )*team_size (1 per member, result for each team)
|
||||
lval += team_result * team.league_rank();
|
||||
},
|
||||
result);
|
||||
result_expect =
|
||||
0.5 * (team_range) * (team_range - 1) * team_size *
|
||||
(0.5 * (thread_range) * (thread_range - 1) +
|
||||
thread_range * 0.5 * (vector_range) * (vector_range - 1));
|
||||
// sum ( seq( [0, team_range) * constant ) + 1 + sum( seq([0,vector_range)
|
||||
// ) per member per team; constant = sum( seq( [0, thread_range)
|
||||
// )*team_size (1 per member, result for each team)
|
||||
}
|
||||
|
||||
// parallel_for RangePolicy: range = team_size*team_range
|
||||
if (test_type == 300) {
|
||||
Kokkos::parallel_for("300 outer for", team_size*team_range,
|
||||
KOKKOS_LAMBDA (const int idx) {
|
||||
v1(idx) = idx;
|
||||
// prevent compiler from optimizing away the loop
|
||||
});
|
||||
Kokkos::parallel_for(
|
||||
"300 outer for", team_size * team_range,
|
||||
KOKKOS_LAMBDA(const int idx) {
|
||||
v1(idx) = idx;
|
||||
// prevent compiler from optimizing away the loop
|
||||
});
|
||||
}
|
||||
// parallel_reduce RangePolicy: range = team_size*team_range
|
||||
if (test_type == 400) {
|
||||
Kokkos::parallel_reduce("400 outer reduce", team_size*team_range,
|
||||
KOKKOS_LAMBDA (const int idx, double& val) {
|
||||
val += idx;
|
||||
}, result);
|
||||
result_expect = 0.5*(team_size*team_range)*(team_size*team_range-1);
|
||||
Kokkos::parallel_reduce(
|
||||
"400 outer reduce", team_size * team_range,
|
||||
KOKKOS_LAMBDA(const int idx, double& val) { val += idx; }, result);
|
||||
result_expect =
|
||||
0.5 * (team_size * team_range) * (team_size * team_range - 1);
|
||||
}
|
||||
// parallel_scan RangePolicy: range = team_size*team_range
|
||||
if (test_type == 500) {
|
||||
Kokkos::parallel_scan("500 outer scan", team_size*team_range,
|
||||
ParallelScanFunctor<ViewType1>(v1)
|
||||
Kokkos::parallel_scan("500 outer scan", team_size * team_range,
|
||||
ParallelScanFunctor<ViewType1>(v1)
|
||||
#if 0
|
||||
// This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation
|
||||
KOKKOS_LAMBDA (const int idx, double& val, const bool& final) {
|
||||
@ -345,11 +424,12 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
}
|
||||
#endif
|
||||
);
|
||||
// result = v1( team_size*team_range - 1 ); // won't work with Cuda - need to copy result back to host to print
|
||||
// result_expect = 0.5*(team_size*team_range)*(team_size*team_range-1);
|
||||
// result = v1( team_size*team_range - 1 ); // won't work with Cuda - need
|
||||
// to copy result back to host to print result_expect =
|
||||
// 0.5*(team_size*team_range)*(team_size*team_range-1);
|
||||
}
|
||||
|
||||
} // end outer for loop
|
||||
} // end outer for loop
|
||||
|
||||
time = timer.seconds();
|
||||
} //end test_policy
|
||||
} // end test_policy
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
# Sample script for benchmarking policy performance
|
||||
|
||||
# Suggested environment variables to export prior to executing script:
|
||||
# Suggested enviroment variables to export prior to executing script:
|
||||
# KNL:
|
||||
# OMP_NUM_THREADS=256 KMP_AFFINITY=compact
|
||||
# Power:
|
||||
|
||||
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
|
||||
Reference in New Issue
Block a user