/* //@HEADER // ************************************************************************ // // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // ************************************************************************ //@HEADER */ #include "Kokkos_Core.hpp" #include #include #include #include #define HLINE "-------------------------------------------------------------\n" #if defined(KOKKOS_ENABLE_CUDA) using GUPSHostArray = Kokkos::View::HostMirror; using GUPSDeviceArray = Kokkos::View; #else using GUPSHostArray = Kokkos::View::HostMirror; using GUPSDeviceArray = Kokkos::View; #endif using GUPSIndex = int; double now() { struct timeval now; gettimeofday(&now, nullptr); return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6); } void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices, const int64_t dataCount) { for (GUPSIndex i = 0; i < indices.extent(0); ++i) { indices[i] = lrand48() % dataCount; } Kokkos::deep_copy(dev_indices, indices); } void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data, const int64_t datum, const bool performAtomics) { if (performAtomics) { Kokkos::parallel_for( "bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) { Kokkos::atomic_fetch_xor(&data[indices[i]], datum); }); } else { Kokkos::parallel_for( "bench-gups-non-atomic", indices.extent(0), KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; }); } Kokkos::fence(); } int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount, const int repeats, const bool useAtomics) { printf("Reports fastest timing per kernel\n"); printf("Creating Views...\n"); printf("Memory Sizes:\n"); printf("- Elements: %15" PRIu64 " (%12.4f MB)\n", static_cast(dataCount), 1.0e-6 * ((double)dataCount * (double)sizeof(int64_t))); printf("- Indices: %15" PRIu64 " (%12.4f MB)\n", static_cast(indicesCount), 1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t))); printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No")); printf("Benchmark kernels will be performed for %d iterations.\n", repeats); printf(HLINE); GUPSDeviceArray dev_indices("indices", indicesCount); GUPSDeviceArray dev_data("data", dataCount); int64_t datum = -1; GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices); GUPSHostArray data = Kokkos::create_mirror_view(dev_data); double gupsTime = 0.0; printf("Initializing Views...\n"); #if defined(KOKKOS_HAVE_OPENMP) Kokkos::parallel_for( "init-data", Kokkos::RangePolicy(0, dataCount), #else Kokkos::parallel_for( "init-data", Kokkos::RangePolicy(0, dataCount), #endif KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; }); #if defined(KOKKOS_HAVE_OPENMP) Kokkos::parallel_for( "init-indices", Kokkos::RangePolicy(0, indicesCount), #else Kokkos::parallel_for( "init-indices", Kokkos::RangePolicy(0, indicesCount), #endif KOKKOS_LAMBDA(const int i) { indices[i] = 0; }); Kokkos::deep_copy(dev_data, data); Kokkos::deep_copy(dev_indices, indices); double start; printf("Starting benchmarking...\n"); for (GUPSIndex k = 0; k < repeats; ++k) { randomize_indices(indices, dev_indices, data.extent(0)); start = now(); run_gups(dev_indices, dev_data, datum, useAtomics); gupsTime += now() - start; } Kokkos::deep_copy(indices, dev_indices); Kokkos::deep_copy(data, dev_data); printf(HLINE); printf( "GUP/s Random: %18.6f\n", (1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime); printf(HLINE); return 0; } int main(int argc, char* argv[]) { printf(HLINE); printf("Kokkos GUPS Benchmark\n"); printf(HLINE); srand48(1010101); Kokkos::initialize(argc, argv); int64_t indices = 8192; int64_t data = 33554432; int64_t repeats = 10; bool useAtomics = false; for (int i = 1; i < argc; ++i) { if (strcmp(argv[i], "--indices") == 0) { indices = std::atoll(argv[i + 1]); ++i; } else if (strcmp(argv[i], "--data") == 0) { data = std::atoll(argv[i + 1]); ++i; } else if (strcmp(argv[i], "--repeats") == 0) { repeats = std::atoll(argv[i + 1]); ++i; } else if (strcmp(argv[i], "--atomics") == 0) { useAtomics = true; } } const int rc = run_benchmark(indices, data, repeats, useAtomics); Kokkos::finalize(); return rc; }