Update Kokkos library in LAMMPS to v3.3.0
This commit is contained in:
@ -48,17 +48,10 @@ SET(SOURCES
|
||||
PerfTest_ViewResize_8.cpp
|
||||
)
|
||||
|
||||
IF(Kokkos_ENABLE_HIP)
|
||||
# FIXME HIP requires TeamPolicy
|
||||
LIST(REMOVE_ITEM SOURCES
|
||||
PerfTest_CustomReduction.cpp
|
||||
PerfTest_ExecSpacePartitioning.cpp
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF(Kokkos_ENABLE_OPENMPTARGET)
|
||||
# FIXME OPENMPTARGET requires TeamPolicy Reductions and Custom Reduction
|
||||
LIST(REMOVE_ITEM SOURCES
|
||||
PerfTestGramSchmidt.cpp
|
||||
PerfTest_CustomReduction.cpp
|
||||
PerfTest_ExecSpacePartitioning.cpp
|
||||
)
|
||||
@ -75,7 +68,8 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
# This test currently times out for MSVC
|
||||
IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# FIXME_SYCL these tests don't compile yet (require parallel_for).
|
||||
IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC" AND NOT Kokkos_ENABLE_SYCL)
|
||||
KOKKOS_ADD_EXECUTABLE_AND_TEST(
|
||||
PerfTestExec
|
||||
SOURCES ${SOURCES}
|
||||
@ -83,17 +77,28 @@ IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
KOKKOS_ADD_EXECUTABLE_AND_TEST(
|
||||
PerformanceTest_Atomic
|
||||
SOURCES test_atomic.cpp
|
||||
CATEGORIES PERFORMANCE
|
||||
)
|
||||
# FIXME_SYCL
|
||||
IF(NOT Kokkos_ENABLE_SYCL)
|
||||
KOKKOS_ADD_EXECUTABLE_AND_TEST(
|
||||
PerformanceTest_Atomic
|
||||
SOURCES test_atomic.cpp
|
||||
CATEGORIES PERFORMANCE
|
||||
)
|
||||
|
||||
IF(NOT KOKKOS_ENABLE_CUDA OR KOKKOS_ENABLE_CUDA_LAMBDA)
|
||||
KOKKOS_ADD_EXECUTABLE_AND_TEST(
|
||||
PerformanceTest_Atomic_MinMax
|
||||
SOURCES test_atomic_minmax_simple.cpp
|
||||
CATEGORIES PERFORMANCE
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
KOKKOS_ADD_EXECUTABLE_AND_TEST(
|
||||
PerformanceTest_Mempool
|
||||
SOURCES test_mempool.cpp
|
||||
CATEGORIES PERFORMANCE
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT Kokkos_ENABLE_OPENMPTARGET)
|
||||
# FIXME OPENMPTARGET needs tasking
|
||||
|
||||
@ -65,6 +65,12 @@ TEST_TARGETS += test-taskdag
|
||||
|
||||
#
|
||||
|
||||
OBJ_ATOMICS_MINMAX = test_atomic_minmax_simple.o
|
||||
TARGETS += KokkosCore_PerformanceTest_Atomics_MinMax
|
||||
TEST_TARGETS += test-atomic-minmax
|
||||
|
||||
#
|
||||
|
||||
KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_PerformanceTest
|
||||
|
||||
@ -77,6 +83,9 @@ KokkosCore_PerformanceTest_Mempool: $(OBJ_MEMPOOL) $(KOKKOS_LINK_DEPENDS)
|
||||
KokkosCore_PerformanceTest_TaskDAG: $(OBJ_TASKDAG) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_TASKDAG) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_TaskDAG
|
||||
|
||||
KokkosCore_PerformanceTest_Atomics_MinMax: $(OBJ_ATOMICS_MINMAX) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_ATOMICS_MINMAX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_PerformanceTest_Atomics_MinMax
|
||||
|
||||
test-performance: KokkosCore_PerformanceTest
|
||||
./KokkosCore_PerformanceTest
|
||||
|
||||
@ -89,6 +98,9 @@ test-mempool: KokkosCore_PerformanceTest_Mempool
|
||||
test-taskdag: KokkosCore_PerformanceTest_TaskDAG
|
||||
./KokkosCore_PerformanceTest_TaskDAG
|
||||
|
||||
test-atomic-minmax: KokkosCore_PerformanceTest_Atomics_MinMax
|
||||
./KokkosCore_PerformanceTest_Atomics_MinMax
|
||||
|
||||
build_all: $(TARGETS)
|
||||
|
||||
test: $(TEST_TARGETS)
|
||||
|
||||
@ -120,7 +120,7 @@ void run_resizeview_tests123(int N, int R) {
|
||||
Kokkos::Timer timer;
|
||||
for (int r = 0; r < R; r++) {
|
||||
Kokkos::View<double*, Layout> a1(
|
||||
Kokkos::ViewAllocateWithoutInitializing("A1"), int(N8 * 1.1));
|
||||
Kokkos::view_alloc(Kokkos::WithoutInitializing, "A1"), int(N8 * 1.1));
|
||||
double* a1_ptr = a1.data();
|
||||
Kokkos::parallel_for(
|
||||
N8, KOKKOS_LAMBDA(const int& i) { a1_ptr[i] = a_ptr[i]; });
|
||||
@ -201,7 +201,7 @@ void run_resizeview_tests45(int N, int R) {
|
||||
Kokkos::Timer timer;
|
||||
for (int r = 0; r < R; r++) {
|
||||
Kokkos::View<double*, Layout> a1(
|
||||
Kokkos::ViewAllocateWithoutInitializing("A1"), int(N8 * 1.1));
|
||||
Kokkos::view_alloc(Kokkos::WithoutInitializing, "A1"), int(N8 * 1.1));
|
||||
double* a1_ptr = a1.data();
|
||||
Kokkos::parallel_for(
|
||||
N8, KOKKOS_LAMBDA(const int& i) { a1_ptr[i] = a_ptr[i]; });
|
||||
@ -258,7 +258,7 @@ void run_resizeview_tests6(int N, int R) {
|
||||
Kokkos::Timer timer;
|
||||
for (int r = 0; r < R; r++) {
|
||||
Kokkos::View<double*, Layout> a1(
|
||||
Kokkos::ViewAllocateWithoutInitializing("A1"), int(N8 * 1.1));
|
||||
Kokkos::view_alloc(Kokkos::WithoutInitializing, "A1"), int(N8 * 1.1));
|
||||
double* a1_ptr = a1.data();
|
||||
Kokkos::parallel_for(
|
||||
N8, KOKKOS_LAMBDA(const int& i) { a1_ptr[i] = a_ptr[i]; });
|
||||
@ -311,7 +311,7 @@ void run_resizeview_tests7(int N, int R) {
|
||||
Kokkos::Timer timer;
|
||||
for (int r = 0; r < R; r++) {
|
||||
Kokkos::View<double*, Layout> a1(
|
||||
Kokkos::ViewAllocateWithoutInitializing("A1"), int(N8 * 1.1));
|
||||
Kokkos::view_alloc(Kokkos::WithoutInitializing, "A1"), int(N8 * 1.1));
|
||||
double* a1_ptr = a1.data();
|
||||
Kokkos::parallel_for(
|
||||
N8, KOKKOS_LAMBDA(const int& i) { a1_ptr[i] = a_ptr[i]; });
|
||||
@ -366,7 +366,7 @@ void run_resizeview_tests8(int N, int R) {
|
||||
Kokkos::Timer timer;
|
||||
for (int r = 0; r < R; r++) {
|
||||
Kokkos::View<double*, Layout> a1(
|
||||
Kokkos::ViewAllocateWithoutInitializing("A1"), int(N8 * 1.1));
|
||||
Kokkos::view_alloc(Kokkos::WithoutInitializing, "A1"), int(N8 * 1.1));
|
||||
double* a1_ptr = a1.data();
|
||||
Kokkos::parallel_for(
|
||||
N8, KOKKOS_LAMBDA(const int& i) { a1_ptr[i] = a_ptr[i]; });
|
||||
|
||||
244
lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp
Normal file
244
lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp
Normal file
@ -0,0 +1,244 @@
|
||||
// export OMP_PROC_BIND=spread ; export OMP_PLACES=threads
|
||||
// c++ -O2 -g -DNDEBUG -fopenmp
|
||||
// ../core/perf_test/test_atomic_minmax_simple.cpp -I../core/src/ -I. -o
|
||||
// test_atomic_minmax_simple.x containers/src/libkokkoscontainers.a
|
||||
// core/src/libkokkoscore.a -ldl && OMP_NUM_THREADS=1
|
||||
// ./test_atomic_minmax_simple.x 10000000
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <iostream>
|
||||
#include <typeinfo>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
using exec_space = Kokkos::DefaultExecutionSpace;
|
||||
|
||||
template <typename T>
|
||||
void test(const int length) {
|
||||
Kokkos::Impl::Timer timer;
|
||||
|
||||
using vector = Kokkos::View<T*, exec_space>;
|
||||
|
||||
vector inp("input", length);
|
||||
T max = std::numeric_limits<T>::max();
|
||||
T min = std::numeric_limits<T>::lowest();
|
||||
|
||||
// input is max values - all min atomics will replace
|
||||
{
|
||||
Kokkos::parallel_for(
|
||||
length, KOKKOS_LAMBDA(const int i) { inp(i) = max; });
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(
|
||||
length, KOKKOS_LAMBDA(const int i) {
|
||||
(void)Kokkos::atomic_fetch_min(&(inp(i)), (T)i);
|
||||
});
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
|
||||
int errors(0);
|
||||
Kokkos::parallel_reduce(
|
||||
length,
|
||||
KOKKOS_LAMBDA(const int i, int& inner) { inner += (inp(i) != (T)i); },
|
||||
errors);
|
||||
Kokkos::fence();
|
||||
|
||||
if (errors) {
|
||||
std::cerr << "Error in 100% min replacements: " << errors << std::endl;
|
||||
std::cerr << "inp(0)=" << inp(0) << std::endl;
|
||||
}
|
||||
std::cout << "Time for 100% min replacements: " << time << std::endl;
|
||||
}
|
||||
|
||||
// input is min values - all max atomics will replace
|
||||
{
|
||||
Kokkos::parallel_for(
|
||||
length, KOKKOS_LAMBDA(const int i) { inp(i) = min; });
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(
|
||||
length, KOKKOS_LAMBDA(const int i) {
|
||||
(void)Kokkos::atomic_max_fetch(&(inp(i)), (T)i);
|
||||
});
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
|
||||
int errors(0);
|
||||
Kokkos::parallel_reduce(
|
||||
length,
|
||||
KOKKOS_LAMBDA(const int i, int& inner) { inner += (inp(i) != (T)i); },
|
||||
errors);
|
||||
Kokkos::fence();
|
||||
|
||||
if (errors) {
|
||||
std::cerr << "Error in 100% max replacements: " << errors << std::endl;
|
||||
std::cerr << "inp(0)=" << inp(0) << std::endl;
|
||||
}
|
||||
std::cout << "Time for 100% max replacements: " << time << std::endl;
|
||||
}
|
||||
|
||||
// input is max values - all max atomics will early exit
|
||||
{
|
||||
Kokkos::parallel_for(
|
||||
length, KOKKOS_LAMBDA(const int i) { inp(i) = max; });
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(
|
||||
length, KOKKOS_LAMBDA(const int i) {
|
||||
(void)Kokkos::atomic_max_fetch(&(inp(i)), (T)i);
|
||||
});
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
|
||||
int errors(0);
|
||||
Kokkos::parallel_reduce(
|
||||
length,
|
||||
KOKKOS_LAMBDA(const int i, int& inner) {
|
||||
T ref = max;
|
||||
inner += (inp(i) != ref);
|
||||
},
|
||||
errors);
|
||||
Kokkos::fence();
|
||||
|
||||
if (errors) {
|
||||
std::cerr << "Error in 100% max early exits: " << errors << std::endl;
|
||||
std::cerr << "inp(0)=" << inp(0) << std::endl;
|
||||
}
|
||||
std::cout << "Time for 100% max early exits: " << time << std::endl;
|
||||
}
|
||||
|
||||
// input is min values - all min atomics will early exit
|
||||
{
|
||||
Kokkos::parallel_for(
|
||||
length, KOKKOS_LAMBDA(const int i) { inp(i) = min; });
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(
|
||||
length, KOKKOS_LAMBDA(const int i) {
|
||||
(void)Kokkos::atomic_min_fetch(&(inp(i)), (T)i);
|
||||
});
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
|
||||
int errors(0);
|
||||
Kokkos::parallel_reduce(
|
||||
length,
|
||||
KOKKOS_LAMBDA(const int i, int& inner) {
|
||||
T ref = min;
|
||||
inner += (inp(i) != ref);
|
||||
},
|
||||
errors);
|
||||
Kokkos::fence();
|
||||
|
||||
if (errors) {
|
||||
std::cerr << "Error in 100% min early exits: " << errors << std::endl;
|
||||
std::cerr << "inp(0)=" << inp(0) << std::endl;
|
||||
if (length > 9) std::cout << "inp(9)=" << inp(9) << std::endl;
|
||||
}
|
||||
std::cout << "Time for 100% min early exits: " << time << std::endl;
|
||||
}
|
||||
|
||||
// limit iterations for contentious test, takes ~50x longer for same length
|
||||
auto con_length = length / 5;
|
||||
// input is min values - some max atomics will replace
|
||||
{
|
||||
Kokkos::parallel_for(
|
||||
1, KOKKOS_LAMBDA(const int i) { inp(i) = min; });
|
||||
Kokkos::fence();
|
||||
|
||||
T current(0);
|
||||
timer.reset();
|
||||
Kokkos::parallel_reduce(
|
||||
con_length,
|
||||
KOKKOS_LAMBDA(const int i, T& inner) {
|
||||
inner = Kokkos::atomic_max_fetch(&(inp(0)), inner + 1);
|
||||
if (i == con_length - 1) {
|
||||
Kokkos::atomic_max_fetch(&(inp(0)), max);
|
||||
inner = max;
|
||||
}
|
||||
},
|
||||
Kokkos::Max<T>(current));
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
|
||||
if (current < max) {
|
||||
std::cerr << "Error in contentious max replacements: " << std::endl;
|
||||
std::cerr << "final=" << current << " inp(0)=" << inp(0) << " max=" << max
|
||||
<< std::endl;
|
||||
}
|
||||
std::cout << "Time for contentious max " << con_length
|
||||
<< " replacements: " << time << std::endl;
|
||||
}
|
||||
|
||||
// input is max values - some min atomics will replace
|
||||
{
|
||||
Kokkos::parallel_for(
|
||||
1, KOKKOS_LAMBDA(const int i) { inp(i) = max; });
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
T current(100000000);
|
||||
Kokkos::parallel_reduce(
|
||||
con_length,
|
||||
KOKKOS_LAMBDA(const int i, T& inner) {
|
||||
inner = Kokkos::atomic_min_fetch(&(inp(0)), inner - 1);
|
||||
if (i == con_length - 1) {
|
||||
Kokkos::atomic_min_fetch(&(inp(0)), min);
|
||||
inner = min;
|
||||
}
|
||||
},
|
||||
Kokkos::Min<T>(current));
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
|
||||
if (current > min) {
|
||||
std::cerr << "Error in contentious min replacements: " << std::endl;
|
||||
std::cerr << "final=" << current << " inp(0)=" << inp(0) << " min=" << min
|
||||
<< std::endl;
|
||||
}
|
||||
std::cout << "Time for contentious min " << con_length
|
||||
<< " replacements: " << time << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize(argc, argv);
|
||||
{
|
||||
int length = 1000000;
|
||||
if (argc == 2) {
|
||||
length = std::stoi(argv[1]);
|
||||
}
|
||||
|
||||
if (length < 1) {
|
||||
throw std::invalid_argument("");
|
||||
}
|
||||
|
||||
std::cout << "================ int" << std::endl;
|
||||
test<int>(length);
|
||||
std::cout << "================ long" << std::endl;
|
||||
test<long>(length);
|
||||
std::cout << "================ long long" << std::endl;
|
||||
test<long long>(length);
|
||||
|
||||
std::cout << "================ unsigned int" << std::endl;
|
||||
test<unsigned int>(length);
|
||||
std::cout << "================ unsigned long" << std::endl;
|
||||
test<unsigned long>(length);
|
||||
std::cout << "================ unsigned long long" << std::endl;
|
||||
test<unsigned long long>(length);
|
||||
|
||||
std::cout << "================ float" << std::endl;
|
||||
test<float>(length);
|
||||
std::cout << "================ double" << std::endl;
|
||||
test<double>(length);
|
||||
}
|
||||
Kokkos::finalize();
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user