Update Kokkos library in LAMMPS to v4.5.0

This commit is contained in:
Stan Moore
2024-12-13 09:23:03 -07:00
parent a78aee5731
commit 7f68aeb6d5
617 changed files with 21499 additions and 17255 deletions

View File

@ -1,10 +1,5 @@
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_01_hello_world
SOURCES hello_world.cpp
)
kokkos_add_executable(tutorial_01_hello_world SOURCES hello_world.cpp)

View File

@ -1,10 +1,5 @@
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_01_hello_world_lambda
SOURCES hello_world_lambda.cpp
)
kokkos_add_executable(tutorial_01_hello_world_lambda SOURCES hello_world_lambda.cpp)

View File

@ -67,16 +67,13 @@ int main(int argc, char* argv[]) {
//
// You may notice that the printed numbers do not print out in
// order. Parallel for loops may execute in any order.
// We also need to protect the usage of a lambda against compiling
// with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
Kokkos::parallel_for(
15, KOKKOS_LAMBDA(const int i) {
// Kokko::printf works for all backends in a parallel kernel;
// std::ostream does not.
Kokkos::printf("Hello from i = %i\n", i);
});
#endif
// You must call finalize() after you are done using Kokkos.
Kokkos::finalize();
}

View File

@ -1,9 +1,5 @@
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_02_simple_reduce
SOURCES simple_reduce.cpp
)
kokkos_add_executable(tutorial_02_simple_reduce SOURCES simple_reduce.cpp)

View File

@ -1,9 +1,4 @@
kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
KOKKOS_ADD_EXECUTABLE(
tutorial_02_simple_reduce_lambda
SOURCES simple_reduce_lambda.cpp
)
kokkos_add_executable(tutorial_02_simple_reduce_lambda SOURCES simple_reduce_lambda.cpp)

View File

@ -37,14 +37,11 @@ int main(int argc, char* argv[]) {
// functor. The lambda takes the same arguments as the functor's
// operator().
int sum = 0;
// The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=].
// It also handles any other syntax needed for CUDA.
// We also need to protect the usage of a lambda against compiling
// with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
// The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=].
// It also handles any other syntax needed for CUDA.
Kokkos::parallel_reduce(
n, KOKKOS_LAMBDA(const int i, int& lsum) { lsum += i * i; }, sum);
#endif
printf(
"Sum of squares of integers from 0 to %i, "
"computed in parallel, is %i\n",
@ -60,9 +57,6 @@ int main(int argc, char* argv[]) {
"computed sequentially, is %i\n",
n - 1, seqSum);
Kokkos::finalize();
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
return (sum == seqSum) ? 0 : -1;
#else
return 0;
#endif
}

View File

@ -1,9 +1,5 @@
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_03_simple_view
SOURCES simple_view.cpp
)
kokkos_add_executable(tutorial_03_simple_view SOURCES simple_view.cpp)

View File

@ -39,7 +39,7 @@
//
// The first dimension of the View is the dimension over which it is
// efficient for Kokkos to parallelize.
using view_type = Kokkos::View<double * [3]>;
using view_type = Kokkos::View<double* [3]>;
// parallel_for functor that fills the View given to its constructor.
// The View must already have been allocated.

View File

@ -1,9 +1,5 @@
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_03_simple_view_lambda
SOURCES simple_view_lambda.cpp
)
kokkos_add_executable(tutorial_03_simple_view_lambda SOURCES simple_view_lambda.cpp)

View File

@ -38,7 +38,7 @@
//
// The first dimension of the View is the dimension over which it is
// efficient for Kokkos to parallelize.
using view_type = Kokkos::View<double * [3]>;
using view_type = Kokkos::View<double* [3]>;
int main(int argc, char* argv[]) {
Kokkos::initialize(argc, argv);
@ -61,19 +61,16 @@ int main(int argc, char* argv[]) {
// Different Views may have the same label.
view_type a("A", 10);
// Fill the View with some data. The parallel_for loop will iterate
// over the View's first dimension N.
//
// Note that the View is passed by value into the lambda. The macro
// KOKKOS_LAMBDA includes the "capture by value" clause [=]. This
// tells the lambda to "capture all variables in the enclosing scope
// by value." Views have "view semantics"; they behave like
// pointers, not like std::vector. Passing them by value does a
// shallow copy. A deep copy never happens unless you explicitly
// ask for one.
// We also need to protect the usage of a lambda against compiling
// with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
// Fill the View with some data. The parallel_for loop will iterate
// over the View's first dimension N.
//
// Note that the View is passed by value into the lambda. The macro
// KOKKOS_LAMBDA includes the "capture by value" clause [=]. This
// tells the lambda to "capture all variables in the enclosing scope
// by value." Views have "view semantics"; they behave like
// pointers, not like std::vector. Passing them by value does a
// shallow copy. A deep copy never happens unless you explicitly
// ask for one.
Kokkos::parallel_for(
10, KOKKOS_LAMBDA(const int i) {
// Acesss the View just like a Fortran array. The layout depends
@ -92,7 +89,6 @@ int main(int argc, char* argv[]) {
},
sum);
printf("Result: %f\n", sum);
#endif
}
Kokkos::finalize();
}

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_04_simple_memoryspaces
SOURCES simple_memoryspaces.cpp
)
kokkos_add_executable(tutorial_04_simple_memoryspaces SOURCES simple_memoryspaces.cpp)

View File

@ -19,7 +19,7 @@
// The type of a two-dimensional N x 3 array of double.
// It lives in Kokkos' default memory space.
using view_type = Kokkos::View<double * [3]>;
using view_type = Kokkos::View<double *[3]>;
// The "HostMirror" type corresponding to view_type above is also a
// two-dimensional N x 3 array of double. However, it lives in the

View File

@ -1,10 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_05_simple_atomics
SOURCES simple_atomics.cpp
)
kokkos_add_executable(tutorial_05_simple_atomics SOURCES simple_atomics.cpp)

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_06_simple_mdrangepolicy
SOURCES simple_mdrangepolicy.cpp
)
kokkos_add_executable(tutorial_06_simple_mdrangepolicy SOURCES simple_mdrangepolicy.cpp)

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_advancedviews_01_data_layouts
SOURCES data_layouts.cpp
)
kokkos_add_executable(tutorial_advancedviews_01_data_layouts SOURCES data_layouts.cpp)

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_advancedviews_02_memory_traits
SOURCES memory_traits.cpp
)
kokkos_add_executable(tutorial_advancedviews_02_memory_traits SOURCES memory_traits.cpp)

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_advancedviews_03_subviews
SOURCES subviews.cpp
)
kokkos_add_executable(tutorial_advancedviews_03_subviews SOURCES subviews.cpp)

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_advancedviews_04_dualviews
SOURCES dual_view.cpp
)
kokkos_add_executable(tutorial_advancedviews_04_dualviews SOURCES dual_view.cpp)

View File

@ -48,9 +48,9 @@ struct localsum {
// overrides Kokkos' default execution space.
using execution_space = ExecutionSpace;
using memory_space = typename Kokkos::Impl::if_c<
std::is_same<ExecutionSpace, Kokkos::DefaultExecutionSpace>::value,
idx_type::memory_space, idx_type::host_mirror_space>::type;
using memory_space = std::conditional_t<
std::is_same_v<ExecutionSpace, Kokkos::DefaultExecutionSpace>,
idx_type::memory_space, idx_type::host_mirror_space>;
// Get the view types on the particular device for which the functor
// is instantiated.

View File

@ -1,11 +1,7 @@
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
IF (Kokkos_ENABLE_CUDA_UVM)
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_advancedviews_05_nvidia_uvm
SOURCES uvm_example.cpp
)
ENDIF ()
if(Kokkos_ENABLE_CUDA_UVM)
# This is a tutorial, not a test, so we don't ask CTest to run it.
kokkos_add_executable(tutorial_advancedviews_05_nvidia_uvm SOURCES uvm_example.cpp)
endif()

View File

@ -1,9 +1,8 @@
kokkos_add_example_directories(01_data_layouts)
kokkos_add_example_directories(02_memory_traits)
kokkos_add_example_directories(03_subviews)
kokkos_add_example_directories(04_dualviews)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_data_layouts)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_memory_traits)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_subviews)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(04_dualviews)
IF (Kokkos_ENABLE_CUDA_UVM)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(05_NVIDIA_UVM)
ENDIF ()
if(Kokkos_ENABLE_CUDA_UVM)
kokkos_add_example_directories(05_NVIDIA_UVM)
endif()

View File

@ -0,0 +1,5 @@
kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
kokkos_add_executable(tutorial_algorithms_01_random_numbers SOURCES random_numbers.cpp)

View File

@ -0,0 +1 @@
kokkos_add_example_directories(01_random_numbers)

View File

@ -1,14 +1,14 @@
kokkos_add_example_directories(01_hello_world)
kokkos_add_example_directories(02_simple_reduce)
kokkos_add_example_directories(03_simple_view)
kokkos_add_example_directories(04_simple_memoryspaces)
kokkos_add_example_directories(05_simple_atomics)
kokkos_add_example_directories(06_simple_mdrangepolicy)
kokkos_add_example_directories(Advanced_Views)
kokkos_add_example_directories(Algorithms)
kokkos_add_example_directories(Hierarchical_Parallelism)
kokkos_add_example_directories(launch_bounds)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_hello_world)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_simple_view)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(04_simple_memoryspaces)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(05_simple_atomics)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(06_simple_mdrangepolicy)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(Advanced_Views)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(Hierarchical_Parallelism)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_hello_world_lambda)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce_lambda)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_simple_view_lambda)
kokkos_add_example_directories(01_hello_world_lambda)
kokkos_add_example_directories(02_simple_reduce_lambda)
kokkos_add_example_directories(03_simple_view_lambda)

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_hierarchicalparallelism_01_thread_teams
SOURCES thread_teams.cpp
)
kokkos_add_executable(tutorial_hierarchicalparallelism_01_thread_teams SOURCES thread_teams.cpp)

View File

@ -1,10 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_hierarchical_01_thread_teams_lambda
SOURCES thread_teams_lambda.cpp
)
kokkos_add_executable(tutorial_hierarchical_01_thread_teams_lambda SOURCES thread_teams_lambda.cpp)

View File

@ -50,9 +50,6 @@ int main(int narg, char* args[]) {
// region." That is, every team member is active and will execute
// the body of the lambda.
int sum = 0;
// We also need to protect the usage of a lambda against compiling
// with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
parallel_reduce(
policy,
KOKKOS_LAMBDA(const team_member& thread, int& lsum) {
@ -65,7 +62,7 @@ int main(int narg, char* args[]) {
thread.team_size());
},
sum);
#endif
// The result will be 12*team_policy::team_size_max([=]{})
printf("Result %i\n", sum);

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_hierarchicalparallelism_02_nested_parallel_for
SOURCES nested_parallel_for.cpp
)
kokkos_add_executable(tutorial_hierarchicalparallelism_02_nested_parallel_for SOURCES nested_parallel_for.cpp)

View File

@ -1,10 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_hierarchicalparallelism_03_vectorization
SOURCES vectorization.cpp
)
kokkos_add_executable(tutorial_hierarchicalparallelism_03_vectorization SOURCES vectorization.cpp)

View File

@ -1,10 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_hierarchicalparallelism_04_team_scan
SOURCES team_scan.cpp
)
kokkos_add_executable(tutorial_hierarchicalparallelism_04_team_scan SOURCES team_scan.cpp)

View File

@ -1,6 +1,4 @@
KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams_lambda)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_nested_parallel_for)
KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_vectorization)
kokkos_add_example_directories(01_thread_teams)
kokkos_add_example_directories(01_thread_teams_lambda)
kokkos_add_example_directories(02_nested_parallel_for)
kokkos_add_example_directories(03_vectorization)

View File

@ -1,9 +1,5 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
KOKKOS_ADD_EXECUTABLE(
tutorial_02_simple_reduce
SOURCES simple_reduce.cpp
)
kokkos_add_executable(launch_bounds_reduce SOURCES launch_bounds_reduce.cpp)

View File

@ -38,8 +38,9 @@ struct collision {
int hash(int q) const {
// A simple hash by Justin Sobel
// Thanks to Arash Partow (partow.net)
char* fourchars = (char*)&q;
int hash = 1315423911;
char* fourchars =
(char*)&q; // NOLINT(cppcoreguidelines-pro-type-cstyle-cast)
int hash = 1315423911;
for (int i = 0; i < 4; fourchars++, i++) {
hash ^= ((hash << 5) + *fourchars + (hash >> 2));
}