Merge branch 'develop' into deprecate_pylammps

This commit is contained in:
Richard Berger
2025-01-10 16:01:06 -07:00
committed by GitHub
917 changed files with 28628 additions and 26724 deletions

View File

@ -1078,12 +1078,15 @@ if(BUILD_TOOLS)
message(STATUS "<<< Building Tools >>>") message(STATUS "<<< Building Tools >>>")
endif() endif()
if(BUILD_LAMMPS_GUI) if(BUILD_LAMMPS_GUI)
message(STATUS "<<< Building LAMMPS GUI >>>") message(STATUS "<<< Building LAMMPS-GUI >>>")
if(LAMMPS_GUI_USE_PLUGIN) if(LAMMPS_GUI_USE_PLUGIN)
message(STATUS "Loading LAMMPS library as plugin at run time") message(STATUS "Loading LAMMPS library as plugin at run time")
else() else()
message(STATUS "Linking LAMMPS library at compile time") message(STATUS "Linking LAMMPS library at compile time")
endif() endif()
if(BUILD_WHAM)
message(STATUS "<<< Building WHAM >>>")
endif()
endif() endif()
if(ENABLE_TESTING) if(ENABLE_TESTING)
message(STATUS "<<< Building Unit Tests >>>") message(STATUS "<<< Building Unit Tests >>>")

View File

@ -7,26 +7,13 @@ endif()
######################################################################## ########################################################################
# consistency checks and Kokkos options/settings required by LAMMPS # consistency checks and Kokkos options/settings required by LAMMPS
if(Kokkos_ENABLE_CUDA)
option(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC "CUDA asynchronous malloc support" OFF)
mark_as_advanced(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
if(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
message(STATUS "KOKKOS: CUDA malloc async support enabled")
else()
message(STATUS "KOKKOS: CUDA malloc async support disabled")
endif()
endif()
if(Kokkos_ENABLE_HIP) if(Kokkos_ENABLE_HIP)
option(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS "Enable multiple kernel instantiations with HIP" ON) option(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS "Enable multiple kernel instantiations with HIP" ON)
mark_as_advanced(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS) mark_as_advanced(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS)
option(Kokkos_ENABLE_ROCTHRUST "Use RoCThrust library" ON) option(Kokkos_ENABLE_ROCTHRUST "Use RoCThrust library" ON)
mark_as_advanced(Kokkos_ENABLE_ROCTHRUST) mark_as_advanced(Kokkos_ENABLE_ROCTHRUST)
if(Kokkos_ARCH_AMD_GFX942 OR Kokkos_ARCH_AMD_GFX940)
option(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY "Enable unified memory with HIP" ON)
mark_as_advanced(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
endif()
endif() endif()
# Adding OpenMP compiler flags without the checks done for # Adding OpenMP compiler flags without the checks done for
# BUILD_OMP can result in compile failures. Enforce consistency. # BUILD_OMP can result in compile failures. Enforce consistency.
if(Kokkos_ENABLE_OPENMP) if(Kokkos_ENABLE_OPENMP)
@ -70,8 +57,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject) include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.5.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "de6ee80d00b6212b02bfb7f1e71a8392" CACHE STRING "MD5 checksum of KOKKOS tarball") set(KOKKOS_MD5 "4d832aa0284169d9e3fbae3165286bc6" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5) mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK) GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -96,7 +83,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS) elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 4.4.01 REQUIRED CONFIG) find_package(Kokkos 4.5.01 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos) target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else() else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

View File

@ -1,50 +1,62 @@
# PACE library support for ML-PACE package # PACE library support for ML-PACE package
find_package(pace QUIET)
# set policy to silence warnings about timestamps of downloaded files. review occasionally if it may be set to NEW if(pace_FOUND)
if(POLICY CMP0135) find_package(pace)
cmake_policy(SET CMP0135 OLD) target_link_libraries(lammps PRIVATE pace::pace)
endif()
set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.11.25.fix.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
set(PACELIB_MD5 "b45de9a633f42ed65422567e3ce56f9f" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
mark_as_advanced(PACELIB_URL)
mark_as_advanced(PACELIB_MD5)
GetFallbackURL(PACELIB_URL PACELIB_FALLBACK)
# LOCAL_ML-PACE points to top-level dir with local lammps-user-pace repo,
# to make it easier to check local build without going through the public github releases
if(LOCAL_ML-PACE)
set(lib-pace "${LOCAL_ML-PACE}")
else() else()
# download library sources to build folder # set policy to silence warnings about timestamps of downloaded files. review occasionally if it may be set to NEW
if(EXISTS ${CMAKE_BINARY_DIR}/libpace.tar.gz) if(POLICY CMP0135)
file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5) cmake_policy(SET CMP0135 OLD)
endif()
if(NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}")
message(STATUS "Downloading ${PACELIB_URL}")
file(DOWNLOAD ${PACELIB_URL} ${CMAKE_BINARY_DIR}/libpace.tar.gz STATUS DL_STATUS SHOW_PROGRESS)
file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5)
if((NOT DL_STATUS EQUAL 0) OR (NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}"))
message(WARNING "Download from primary URL ${PACELIB_URL} failed\nTrying fallback URL ${PACELIB_FALLBACK}")
file(DOWNLOAD ${PACELIB_FALLBACK} ${CMAKE_BINARY_DIR}/libpace.tar.gz EXPECTED_HASH MD5=${PACELIB_MD5} SHOW_PROGRESS)
endif() endif()
else()
message(STATUS "Using already downloaded archive ${CMAKE_BINARY_DIR}/libpace.tar.gz") set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.11.25.fix2.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
endif() set(PACELIB_MD5 "a53bd87cfee8b07d9f44bc17aad69c3f" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
mark_as_advanced(PACELIB_URL)
mark_as_advanced(PACELIB_MD5)
GetFallbackURL(PACELIB_URL PACELIB_FALLBACK)
# LOCAL_ML-PACE points to top-level dir with local lammps-user-pace repo,
# to make it easier to check local build without going through the public github releases
if(LOCAL_ML-PACE)
set(lib-pace "${LOCAL_ML-PACE}")
else()
# download library sources to build folder
if(EXISTS ${CMAKE_BINARY_DIR}/libpace.tar.gz)
file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5)
endif()
if(NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}")
message(STATUS "Downloading ${PACELIB_URL}")
file(DOWNLOAD ${PACELIB_URL} ${CMAKE_BINARY_DIR}/libpace.tar.gz STATUS DL_STATUS SHOW_PROGRESS)
file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5)
if((NOT DL_STATUS EQUAL 0) OR (NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}"))
message(WARNING "Download from primary URL ${PACELIB_URL} failed\nTrying fallback URL ${PACELIB_FALLBACK}")
file(DOWNLOAD ${PACELIB_FALLBACK} ${CMAKE_BINARY_DIR}/libpace.tar.gz EXPECTED_HASH MD5=${PACELIB_MD5} SHOW_PROGRESS)
endif()
else()
message(STATUS "Using already downloaded archive ${CMAKE_BINARY_DIR}/libpace.tar.gz")
endif()
# uncompress downloaded sources # uncompress downloaded sources
execute_process( execute_process(
COMMAND ${CMAKE_COMMAND} -E remove_directory lammps-user-pace* COMMAND ${CMAKE_COMMAND} -E remove_directory lammps-user-pace*
COMMAND ${CMAKE_COMMAND} -E tar xzf libpace.tar.gz COMMAND ${CMAKE_COMMAND} -E tar xzf libpace.tar.gz
WORKING_DIRECTORY ${CMAKE_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
) )
get_newest_file(${CMAKE_BINARY_DIR}/lammps-user-pace-* lib-pace) get_newest_file(${CMAKE_BINARY_DIR}/lammps-user-pace-* lib-pace)
endif() endif()
add_subdirectory(${lib-pace} build-pace) # some preinstalled yaml-cpp versions don't provide a namespaced target
set_target_properties(pace PROPERTIES CXX_EXTENSIONS ON OUTPUT_NAME lammps_pace${LAMMPS_MACHINE}) find_package(yaml-cpp QUIET)
if(TARGET yaml-cpp AND NOT TARGET yaml-cpp::yaml-cpp)
if(CMAKE_PROJECT_NAME STREQUAL "lammps") add_library(yaml-cpp::yaml-cpp ALIAS yaml-cpp)
target_link_libraries(lammps PRIVATE pace) endif()
add_subdirectory(${lib-pace} build-pace)
set_target_properties(pace PROPERTIES CXX_EXTENSIONS ON OUTPUT_NAME lammps_pace${LAMMPS_MACHINE})
if(CMAKE_PROJECT_NAME STREQUAL "lammps")
target_link_libraries(lammps PRIVATE pace)
endif()
endif() endif()

View File

@ -1,3 +1,5 @@
# FindVTK requires that C support is enabled when looking for MPI support
enable_language(C)
find_package(VTK REQUIRED NO_MODULE) find_package(VTK REQUIRED NO_MODULE)
target_compile_definitions(lammps PRIVATE -DLAMMPS_VTK) target_compile_definitions(lammps PRIVATE -DLAMMPS_VTK)
if (VTK_MAJOR_VERSION VERSION_LESS 9.0) if (VTK_MAJOR_VERSION VERSION_LESS 9.0)

View File

@ -2,7 +2,7 @@
DOXYFILE_ENCODING = UTF-8 DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = "LAMMPS Programmer's Guide" PROJECT_NAME = "LAMMPS Programmer's Guide"
PROJECT_NUMBER = "4 May 2022" PROJECT_NUMBER = "19 November 2024"
PROJECT_BRIEF = "Documentation of the LAMMPS library interface and Python wrapper" PROJECT_BRIEF = "Documentation of the LAMMPS library interface and Python wrapper"
PROJECT_LOGO = lammps-logo.png PROJECT_LOGO = lammps-logo.png
CREATE_SUBDIRS = NO CREATE_SUBDIRS = NO

View File

@ -160,7 +160,7 @@ with the OpenMP 3.1 semantics used in LAMMPS for maximal compatibility
with compiler versions in use. If compilation with OpenMP enabled fails with compiler versions in use. If compilation with OpenMP enabled fails
because of your compiler requiring strict OpenMP 4.0 semantics, you can because of your compiler requiring strict OpenMP 4.0 semantics, you can
change the behavior by adding ``-D LAMMPS_OMP_COMPAT=4`` to the change the behavior by adding ``-D LAMMPS_OMP_COMPAT=4`` to the
``LMP_INC`` variable in your makefile, or add it to the command line ``LMP_INC`` variable in your makefile, or add it to the command-line flags
while configuring with CMake. LAMMPS will auto-detect a suitable setting while configuring with CMake. LAMMPS will auto-detect a suitable setting
for most GNU, Clang, and Intel compilers. for most GNU, Clang, and Intel compilers.
@ -502,6 +502,8 @@ using CMake or Make.
# chain.x, micelle2d.x, msi2lmp, phana, # chain.x, micelle2d.x, msi2lmp, phana,
# stl_bin2txt # stl_bin2txt
-D BUILD_LAMMPS_GUI=value # yes or no (default). Build LAMMPS-GUI -D BUILD_LAMMPS_GUI=value # yes or no (default). Build LAMMPS-GUI
-D BUILD_WHAM=value # yes (default). Download and build WHAM;
# only available for BUILD_LAMMPS_GUI=yes
The generated binaries will also become part of the LAMMPS installation The generated binaries will also become part of the LAMMPS installation
(see below). (see below).

View File

@ -8,7 +8,7 @@ packages. Links to those pages on the :doc:`Build overview <Build>`
page. page.
The following text assumes some familiarity with CMake and focuses on The following text assumes some familiarity with CMake and focuses on
using the command line tool ``cmake`` and what settings are supported using the command-line tool ``cmake`` and what settings are supported
for building LAMMPS. A more detailed tutorial on how to use CMake for building LAMMPS. A more detailed tutorial on how to use CMake
itself, the text mode or graphical user interface, to change the itself, the text mode or graphical user interface, to change the
generated output files for different build tools and development generated output files for different build tools and development
@ -42,9 +42,9 @@ that want to modify or extend LAMMPS.
and adapt the LAMMPS default build configuration accordingly. and adapt the LAMMPS default build configuration accordingly.
- CMake can generate files for different build tools and integrated - CMake can generate files for different build tools and integrated
development environments (IDE). development environments (IDE).
- CMake supports customization of settings with a command line, text - CMake supports customization of settings with a command-line, text
mode, or graphical user interface. No manual editing of files, mode, or graphical user interface. No manual editing of files,
knowledge of file formats or complex command line syntax is required. knowledge of file formats or complex command-line syntax is required.
- All enabled components are compiled in a single build operation. - All enabled components are compiled in a single build operation.
- Automated dependency tracking for all files and configuration options. - Automated dependency tracking for all files and configuration options.
- Support for true out-of-source compilation. Multiple configurations - Support for true out-of-source compilation. Multiple configurations
@ -68,7 +68,7 @@ that purpose you can use either the command-line utility ``cmake`` (or
graphical utility ``cmake-gui``, or use them interchangeably. The graphical utility ``cmake-gui``, or use them interchangeably. The
second step is then the compilation and linking of all objects, second step is then the compilation and linking of all objects,
libraries, and executables using the selected build tool. Here is a libraries, and executables using the selected build tool. Here is a
minimal example using the command line version of CMake to build LAMMPS minimal example using the command-line version of CMake to build LAMMPS
with no add-on packages enabled and no customization: with no add-on packages enabled and no customization:
.. code-block:: bash .. code-block:: bash
@ -131,7 +131,7 @@ file called ``CMakeLists.txt`` (for LAMMPS it is located in the
configuration step. The cache file contains all current CMake settings. configuration step. The cache file contains all current CMake settings.
To modify settings, enable or disable features, you need to set To modify settings, enable or disable features, you need to set
*variables* with either the ``-D`` command line flag (``-D *variables* with either the ``-D`` command-line flag (``-D
VARIABLE1_NAME=value``) or change them in the text mode of the graphical VARIABLE1_NAME=value``) or change them in the text mode of the graphical
user interface. The ``-D`` flag can be used several times in one command. user interface. The ``-D`` flag can be used several times in one command.
@ -141,11 +141,11 @@ a different compiler tool chain. Those are loaded with the ``-C`` flag
(``-C ../cmake/presets/basic.cmake``). This step would only be needed (``-C ../cmake/presets/basic.cmake``). This step would only be needed
once, as the settings from the preset files are stored in the once, as the settings from the preset files are stored in the
``CMakeCache.txt`` file. It is also possible to customize the build ``CMakeCache.txt`` file. It is also possible to customize the build
by adding one or more ``-D`` flags to the CMake command line. by adding one or more ``-D`` flags to the CMake command.
Generating files for alternate build tools (e.g. Ninja) and project files Generating files for alternate build tools (e.g. Ninja) and project files
for IDEs like Eclipse, CodeBlocks, or Kate can be selected using the ``-G`` for IDEs like Eclipse, CodeBlocks, or Kate can be selected using the ``-G``
command line flag. A list of available generator settings for your command-line flag. A list of available generator settings for your
specific CMake version is given when running ``cmake --help``. specific CMake version is given when running ``cmake --help``.
.. _cmake_multiconfig: .. _cmake_multiconfig:

View File

@ -263,9 +263,9 @@ will be skipped if prerequisite features are not available in LAMMPS.
time. Preference is given to parts of the code base that are easy to time. Preference is given to parts of the code base that are easy to
test or commonly used. test or commonly used.
Tests as shown by the ``ctest`` program are command lines defined in the Tests as shown by the ``ctest`` program are commands defined in the
``CMakeLists.txt`` files in the ``unittest`` directory tree. A few ``CMakeLists.txt`` files in the ``unittest`` directory tree. A few
tests simply execute LAMMPS with specific command line flags and check tests simply execute LAMMPS with specific command-line flags and check
the output to the screen for expected content. A large number of unit the output to the screen for expected content. A large number of unit
tests are special tests programs using the `GoogleTest framework tests are special tests programs using the `GoogleTest framework
<https://github.com/google/googletest/>`_ and linked to the LAMMPS <https://github.com/google/googletest/>`_ and linked to the LAMMPS
@ -420,7 +420,7 @@ during MD timestepping and manipulate per-atom properties like
positions, velocities, and forces. For those fix styles, testing can be positions, velocities, and forces. For those fix styles, testing can be
done in a very similar fashion as for force fields and thus there is a done in a very similar fashion as for force fields and thus there is a
test program `test_fix_timestep` that shares a lot of code, properties, test program `test_fix_timestep` that shares a lot of code, properties,
and command line flags with the force field style testers described in and command-line flags with the force field style testers described in
the previous section. the previous section.
This tester will set up a small molecular system run with verlet run This tester will set up a small molecular system run with verlet run
@ -642,10 +642,10 @@ The following target are available for both, GNU make and CMake:
.. _gh-cli: .. _gh-cli:
GitHub command line interface GitHub command-line interface
----------------------------- -----------------------------
GitHub has developed a `command line tool <https://cli.github.com>`_ GitHub has developed a `command-line tool <https://cli.github.com>`_
to interact with the GitHub website via a command called ``gh``. to interact with the GitHub website via a command called ``gh``.
This is extremely convenient when working with a Git repository hosted This is extremely convenient when working with a Git repository hosted
on GitHub (like LAMMPS). It is thus highly recommended to install it on GitHub (like LAMMPS). It is thus highly recommended to install it

View File

@ -209,7 +209,7 @@ necessary for ``hipcc`` and the linker to work correctly.
Using the CHIP-SPV implementation of HIP is supported. It allows one to Using the CHIP-SPV implementation of HIP is supported. It allows one to
run HIP code on Intel GPUs via the OpenCL or Level Zero back ends. To use run HIP code on Intel GPUs via the OpenCL or Level Zero back ends. To use
CHIP-SPV, you must set ``-DHIP_USE_DEVICE_SORT=OFF`` in your CMake CHIP-SPV, you must set ``-DHIP_USE_DEVICE_SORT=OFF`` in your CMake
command line as CHIP-SPV does not yet support hipCUB. As of Summer 2022, command-line as CHIP-SPV does not yet support hipCUB. As of Summer 2022,
the use of HIP for Intel GPUs is experimental. You should only use this the use of HIP for Intel GPUs is experimental. You should only use this
option in preparations to run on Aurora system at Argonne. option in preparations to run on Aurora system at Argonne.
@ -232,7 +232,7 @@ option in preparations to run on Aurora system at Argonne.
.. code:: bash .. code:: bash
# CUDA target (not recommended, use GPU_ARCH=cuda) # CUDA target (not recommended, use GPU_API=cuda)
# !!! DO NOT set CMAKE_CXX_COMPILER !!! # !!! DO NOT set CMAKE_CXX_COMPILER !!!
export HIP_PLATFORM=nvcc export HIP_PLATFORM=nvcc
export HIP_PATH=/path/to/HIP/install export HIP_PATH=/path/to/HIP/install
@ -421,9 +421,10 @@ minutes to hours) to build. Of course you only need to do that once.)
cmake build system. The ``lib/kim/Install.py`` script supports a cmake build system. The ``lib/kim/Install.py`` script supports a
``CMAKE`` environment variable if the cmake executable is named other ``CMAKE`` environment variable if the cmake executable is named other
than ``cmake`` on your system. Additional environment variables may be than ``cmake`` on your system. Additional environment variables may be
provided on the command line for use by cmake. For example, to use the set with the ``make`` command for use by cmake. For example, to use the
``cmake3`` executable and tell it to use the gnu version 11 compilers ``cmake3`` executable and tell it to use the GNU version 11 compilers
to build KIM, one could use the following command line. called ``g++-11``, ``gcc-11`` and ``gfortran-11`` to build KIM, one
could use the following command.
.. code-block:: bash .. code-block:: bash
@ -546,16 +547,7 @@ They must be specified in uppercase.
- Local machine - Local machine
* - AMDAVX * - AMDAVX
- HOST - HOST
- AMD 64-bit x86 CPU (AVX 1) - AMD chip
* - ZEN
- HOST
- AMD Zen class CPU (AVX 2)
* - ZEN2
- HOST
- AMD Zen2 class CPU (AVX 2)
* - ZEN3
- HOST
- AMD Zen3 class CPU (AVX 2)
* - ARMV80 * - ARMV80
- HOST - HOST
- ARMv8.0 Compatible CPU - ARMv8.0 Compatible CPU
@ -571,105 +563,126 @@ They must be specified in uppercase.
* - A64FX * - A64FX
- HOST - HOST
- ARMv8.2 with SVE Support - ARMv8.2 with SVE Support
* - ARMV9_GRACE
- HOST
- ARMv9 NVIDIA Grace CPU
* - SNB * - SNB
- HOST - HOST
- Intel Sandy/Ivy Bridge CPU (AVX 1) - Intel Sandy/Ivy Bridge CPUs
* - HSW * - HSW
- HOST - HOST
- Intel Haswell CPU (AVX 2) - Intel Haswell CPUs
* - BDW * - BDW
- HOST - HOST
- Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem) - Intel Broadwell Xeon E-class CPUs
* - SKL
- HOST
- Intel Skylake Client CPU
* - SKX
- HOST
- Intel Skylake Xeon Server CPU (AVX512)
* - ICL * - ICL
- HOST - HOST
- Intel Ice Lake Client CPU (AVX512) - Intel Ice Lake Client CPUs (AVX512)
* - ICX * - ICX
- HOST - HOST
- Intel Ice Lake Xeon Server CPU (AVX512) - Intel Ice Lake Xeon Server CPUs (AVX512)
* - SPR * - SKL
- HOST - HOST
- Intel Sapphire Rapids Xeon Server CPU (AVX512) - Intel Skylake Client CPUs
* - SKX
- HOST
- Intel Skylake Xeon Server CPUs (AVX512)
* - KNC * - KNC
- HOST - HOST
- Intel Knights Corner Xeon Phi - Intel Knights Corner Xeon Phi
* - KNL * - KNL
- HOST - HOST
- Intel Knights Landing Xeon Phi - Intel Knights Landing Xeon Phi
* - SPR
- HOST
- Intel Sapphire Rapids Xeon Server CPUs (AVX512)
* - POWER8 * - POWER8
- HOST - HOST
- IBM POWER8 CPU - IBM POWER8 CPUs
* - POWER9 * - POWER9
- HOST - HOST
- IBM POWER9 CPU - IBM POWER9 CPUs
* - ZEN
- HOST
- AMD Zen architecture
* - ZEN2
- HOST
- AMD Zen2 architecture
* - ZEN3
- HOST
- AMD Zen3 architecture
* - RISCV_SG2042 * - RISCV_SG2042
- HOST - HOST
- SG2042 (RISC-V) CPU - SG2042 (RISC-V) CPUs
* - RISCV_RVA22V
- HOST
- RVA22V (RISC-V) CPUs
* - KEPLER30 * - KEPLER30
- GPU - GPU
- NVIDIA Kepler generation CC 3.0 GPU - NVIDIA Kepler generation CC 3.0
* - KEPLER32 * - KEPLER32
- GPU - GPU
- NVIDIA Kepler generation CC 3.2 GPU - NVIDIA Kepler generation CC 3.2
* - KEPLER35 * - KEPLER35
- GPU - GPU
- NVIDIA Kepler generation CC 3.5 GPU - NVIDIA Kepler generation CC 3.5
* - KEPLER37 * - KEPLER37
- GPU - GPU
- NVIDIA Kepler generation CC 3.7 GPU - NVIDIA Kepler generation CC 3.7
* - MAXWELL50 * - MAXWELL50
- GPU - GPU
- NVIDIA Maxwell generation CC 5.0 GPU - NVIDIA Maxwell generation CC 5.0
* - MAXWELL52 * - MAXWELL52
- GPU - GPU
- NVIDIA Maxwell generation CC 5.2 GPU - NVIDIA Maxwell generation CC 5.2
* - MAXWELL53 * - MAXWELL53
- GPU - GPU
- NVIDIA Maxwell generation CC 5.3 GPU - NVIDIA Maxwell generation CC 5.3
* - PASCAL60 * - PASCAL60
- GPU - GPU
- NVIDIA Pascal generation CC 6.0 GPU - NVIDIA Pascal generation CC 6.0
* - PASCAL61 * - PASCAL61
- GPU - GPU
- NVIDIA Pascal generation CC 6.1 GPU - NVIDIA Pascal generation CC 6.1
* - VOLTA70 * - VOLTA70
- GPU - GPU
- NVIDIA Volta generation CC 7.0 GPU - NVIDIA Volta generation CC 7.0
* - VOLTA72 * - VOLTA72
- GPU - GPU
- NVIDIA Volta generation CC 7.2 GPU - NVIDIA Volta generation CC 7.2
* - TURING75 * - TURING75
- GPU - GPU
- NVIDIA Turing generation CC 7.5 GPU - NVIDIA Turing generation CC 7.5
* - AMPERE80 * - AMPERE80
- GPU - GPU
- NVIDIA Ampere generation CC 8.0 GPU - NVIDIA Ampere generation CC 8.0
* - AMPERE86 * - AMPERE86
- GPU - GPU
- NVIDIA Ampere generation CC 8.6 GPU - NVIDIA Ampere generation CC 8.6
* - ADA89 * - ADA89
- GPU - GPU
- NVIDIA Ada Lovelace generation CC 8.9 GPU - NVIDIA Ada generation CC 8.9
* - HOPPER90 * - HOPPER90
- GPU - GPU
- NVIDIA Hopper generation CC 9.0 GPU - NVIDIA Hopper generation CC 9.0
* - AMD_GFX906 * - AMD_GFX906
- GPU - GPU
- AMD GPU MI50/MI60 - AMD GPU MI50/60
* - AMD_GFX908 * - AMD_GFX908
- GPU - GPU
- AMD GPU MI100 - AMD GPU MI100
* - AMD_GFX90A * - AMD_GFX90A
- GPU - GPU
- AMD GPU MI200 - AMD GPU MI200
* - AMD_GFX940
- GPU
- AMD GPU MI300
* - AMD_GFX942 * - AMD_GFX942
- GPU - GPU
- AMD GPU MI300 - AMD GPU MI300
* - AMD_GFX942_APU
- GPU
- AMD APU MI300A
* - AMD_GFX1030 * - AMD_GFX1030
- GPU - GPU
- AMD GPU V620/W6800 - AMD GPU V620/W6800
@ -678,7 +691,7 @@ They must be specified in uppercase.
- AMD GPU RX7900XTX - AMD GPU RX7900XTX
* - AMD_GFX1103 * - AMD_GFX1103
- GPU - GPU
- AMD Phoenix APU with Radeon 740M/760M/780M/880M/890M - AMD APU Phoenix
* - INTEL_GEN * - INTEL_GEN
- GPU - GPU
- SPIR64-based devices, e.g. Intel GPUs, using JIT - SPIR64-based devices, e.g. Intel GPUs, using JIT
@ -701,7 +714,7 @@ They must be specified in uppercase.
- GPU - GPU
- Intel GPU Ponte Vecchio - Intel GPU Ponte Vecchio
This list was last updated for version 4.3.0 of the Kokkos library. This list was last updated for version 4.5.1 of the Kokkos library.
.. tabs:: .. tabs::
@ -2191,7 +2204,7 @@ verified to work in February 2020 with Quantum Espresso versions 6.3 to
from the sources in the *lib* folder (including the essential from the sources in the *lib* folder (including the essential
libqmmm.a) are not included in the static LAMMPS library and libqmmm.a) are not included in the static LAMMPS library and
(currently) not installed, while their code is included in the (currently) not installed, while their code is included in the
shared LAMMPS library. Thus a typical command line to configure shared LAMMPS library. Thus a typical command to configure
building LAMMPS for QMMM would be: building LAMMPS for QMMM would be:
.. code-block:: bash .. code-block:: bash

View File

@ -100,9 +100,9 @@ procedure.
It is possible to use both the integrated CMake support of the Visual It is possible to use both the integrated CMake support of the Visual
Studio IDE or use an external CMake installation (e.g. downloaded from Studio IDE or use an external CMake installation (e.g. downloaded from
cmake.org) to create build files and compile LAMMPS from the command line. cmake.org) to create build files and compile LAMMPS from the command-line.
Compilation via command line and unit tests are checked automatically Compilation via command-line and unit tests are checked automatically
for the LAMMPS development branch through for the LAMMPS development branch through
`GitHub Actions <https://github.com/lammps/lammps/actions/workflows/compile-msvc.yml>`_. `GitHub Actions <https://github.com/lammps/lammps/actions/workflows/compile-msvc.yml>`_.
@ -115,7 +115,7 @@ for the LAMMPS development branch through
Please note, that for either approach CMake will create a so-called Please note, that for either approach CMake will create a so-called
:ref:`"multi-configuration" build environment <cmake_multiconfig>`, and :ref:`"multi-configuration" build environment <cmake_multiconfig>`, and
the command lines for building and testing LAMMPS must be adjusted the commands for building and testing LAMMPS must be adjusted
accordingly. accordingly.
The LAMMPS cmake folder contains a ``CMakeSettings.json`` file with The LAMMPS cmake folder contains a ``CMakeSettings.json`` file with

View File

@ -4,7 +4,7 @@ LAMMPS Class
The LAMMPS class is encapsulating an MD simulation state and thus it is The LAMMPS class is encapsulating an MD simulation state and thus it is
the class that needs to be created when starting a new simulation system the class that needs to be created when starting a new simulation system
state. The LAMMPS executable essentially creates one instance of this state. The LAMMPS executable essentially creates one instance of this
class and passes the command line flags and tells it to process the class and passes the command-line flags and tells it to process the
provided input (a file or ``stdin``). It shuts the class down when provided input (a file or ``stdin``). It shuts the class down when
control is returned to it and then exits. When using LAMMPS as a control is returned to it and then exits. When using LAMMPS as a
library from another code it is required to create an instance of this library from another code it is required to create an instance of this

View File

@ -69,7 +69,7 @@ WARNING message is printed. The :doc:`Errors <Errors>` page gives
more information on what errors mean. The documentation for each more information on what errors mean. The documentation for each
command lists restrictions on how the command can be used. command lists restrictions on how the command can be used.
You can use the :ref:`-skiprun <skiprun>` command line flag You can use the :ref:`-skiprun <skiprun>` command-line flag
to have LAMMPS skip the execution of any ``run``, ``minimize``, or similar to have LAMMPS skip the execution of any ``run``, ``minimize``, or similar
commands to check the entire input for correct syntax to avoid crashes commands to check the entire input for correct syntax to avoid crashes
on typos or syntax errors in long runs. on typos or syntax errors in long runs.

View File

@ -18,7 +18,7 @@ LAMMPS executable directly instead of having a separate tool. A
combination of the commands :doc:`read_restart <read_restart>` and combination of the commands :doc:`read_restart <read_restart>` and
:doc:`write_data <write_data>` can be used to the same effect. For :doc:`write_data <write_data>` can be used to the same effect. For
added convenience this conversion can also be triggered by added convenience this conversion can also be triggered by
:doc:`command line flags <Run_options>` :doc:`command-line flags <Run_options>`
Fix ave/spatial and fix ave/spatial/sphere Fix ave/spatial and fix ave/spatial/sphere
------------------------------------------ ------------------------------------------

View File

@ -94,12 +94,12 @@ represents what is generally referred to as an "instance of LAMMPS". It
is a composite holding pointers to instances of other core classes is a composite holding pointers to instances of other core classes
providing the core functionality of the MD engine in LAMMPS and through providing the core functionality of the MD engine in LAMMPS and through
them abstractions of the required operations. The constructor of the them abstractions of the required operations. The constructor of the
LAMMPS class will instantiate those instances, process the command line LAMMPS class will instantiate those instances, process the command-line
flags, initialize MPI (if not already done) and set up file pointers for flags, initialize MPI (if not already done) and set up file pointers for
input and output. The destructor will shut everything down and free all input and output. The destructor will shut everything down and free all
associated memory. Thus code for the standalone LAMMPS executable in associated memory. Thus code for the standalone LAMMPS executable in
``main.cpp`` simply initializes MPI, instantiates a single instance of ``main.cpp`` simply initializes MPI, instantiates a single instance of
LAMMPS while passing it the command line flags and input script. It LAMMPS while passing it the command-line flags and input script. It
deletes the LAMMPS instance after the method reading the input returns deletes the LAMMPS instance after the method reading the input returns
and shuts down the MPI environment before it exits the executable. and shuts down the MPI environment before it exits the executable.

View File

@ -227,12 +227,12 @@ Tests for the C-style library interface
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Tests for validating the LAMMPS C-style library interface are in the Tests for validating the LAMMPS C-style library interface are in the
``unittest/c-library`` folder. They are implemented either to be used ``unittest/c-library`` folder. They text either utility functions or
for utility functions or for LAMMPS commands, but use the functions LAMMPS commands, but use the functions implemented in
implemented in the ``src/library.cpp`` file as much as possible. There ``src/library.cpp`` as much as possible. There may be some overlap with
may be some overlap with other tests, but only in as much as is required other tests as far as the LAMMPS functionality is concerned, but the
to test the C-style library API. The tests are distributed over focus is on testing the C-style library API. The tests are distributed
multiple test programs which try to match the grouping of the over multiple test programs which try to match the grouping of the
functions in the source code and :ref:`in the manual <lammps_c_api>`. functions in the source code and :ref:`in the manual <lammps_c_api>`.
This group of tests also includes tests invoking LAMMPS in parallel This group of tests also includes tests invoking LAMMPS in parallel
@ -258,7 +258,7 @@ Tests for the Python module and package
The ``unittest/python`` folder contains primarily tests for classes and The ``unittest/python`` folder contains primarily tests for classes and
functions in the LAMMPS python module but also for commands in the functions in the LAMMPS python module but also for commands in the
PYTHON package. These tests are only enabled if the necessary PYTHON package. These tests are only enabled, if the necessary
prerequisites are detected or enabled during configuration and prerequisites are detected or enabled during configuration and
compilation of LAMMPS (shared library build enabled, Python interpreter compilation of LAMMPS (shared library build enabled, Python interpreter
found, Python development files found). found, Python development files found).
@ -272,29 +272,30 @@ Tests for the Fortran interface
Tests for using the Fortran module are in the ``unittest/fortran`` Tests for using the Fortran module are in the ``unittest/fortran``
folder. Since they are also using the GoogleTest library, they require folder. Since they are also using the GoogleTest library, they require
implementing test wrappers in C++ that will call fortran functions test wrappers written in C++ that will call fortran functions with a C
which provide a C function interface through ISO_C_BINDINGS that will in function interface through ISO_C_BINDINGS which will in turn call the
turn call the functions in the LAMMPS Fortran module. functions in the LAMMPS Fortran module.
Tests for the C++-style library interface Tests for the C++-style library interface
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The tests in the ``unittest/cplusplus`` folder are somewhat similar to The tests in the ``unittest/cplusplus`` folder are somewhat similar to
the tests for the C-style library interface, but do not need to test the the tests for the C-style library interface, but do not need to test the
several convenience and utility functions that are only available through convenience and utility functions that are only available through the
the C-style interface. Instead it can focus on the more generic features C-style library interface. Instead they focus on the more generic
that are used internally. This part of the unit tests is currently still features that are used in LAMMPS internally. This part of the unit
mostly in the planning stage. tests is currently still mostly in the planning stage.
Tests for reading and writing file formats Tests for reading and writing file formats
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The ``unittest/formats`` folder contains test programs for reading and The ``unittest/formats`` folder contains test programs for reading and
writing files like data files, restart files, potential files or dump files. writing files like data files, restart files, potential files or dump
This covers simple things like the file i/o convenience functions in the files. This covers simple things like the file i/o convenience
``utils::`` namespace to complex tests of atom styles where creating and functions in the ``utils::`` namespace to complex tests of atom styles
deleting atoms with different properties is tested in different ways where creating and deleting of atoms with different properties is tested
and through script commands or reading and writing of data or restart files. in different ways and through script commands or reading and writing of
data or restart files.
Tests for styles computing or modifying forces Tests for styles computing or modifying forces
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -443,7 +444,7 @@ file for a style that is similar to one to be tested. The file name should
follow the naming conventions described above and after copying the file, follow the naming conventions described above and after copying the file,
the first step is to replace the style names where needed. The coefficient the first step is to replace the style names where needed. The coefficient
values do not have to be meaningful, just in a reasonable range for the values do not have to be meaningful, just in a reasonable range for the
given system. It does not matter if some forces are large, as long as given system. It does not matter if some forces are large, for as long as
they do not diverge. they do not diverge.
The template input files define a large number of index variables at the top The template input files define a large number of index variables at the top
@ -476,7 +477,7 @@ the tabulated coulomb, to test both code paths. The reference results in the YA
files then should be compared manually, if they agree well enough within the limits files then should be compared manually, if they agree well enough within the limits
of those two approximations. of those two approximations.
The ``test_pair_style`` and equivalent programs have special command line options The ``test_pair_style`` and equivalent programs have special command-line options
to update the YAML files. Running a command like to update the YAML files. Running a command like
.. code-block:: bash .. code-block:: bash
@ -531,19 +532,20 @@ Python module.
Troubleshooting failed unit tests Troubleshooting failed unit tests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The are by default no unit tests for newly added features (e.g. pair, fix, There are by default no unit tests for newly added features (e.g. pair,
or compute styles) unless your pull request also includes tests for the fix, or compute styles) unless your pull request also includes tests for
added features. If you are modifying some features, you may see failures these added features. If you are modifying some existing LAMMPS
for existing tests, if your modifications have some unexpected side effects features, you may see failures for existing tests, if your modifications
or your changes render the existing test invalid. If you are adding an have some unexpected side effects or your changes render the existing
accelerated version of an existing style, then only tests for INTEL, test invalid. If you are adding an accelerated version of an existing
KOKKOS (with OpenMP only), OPENMP, and OPT will be run automatically. style, then only tests for INTEL, KOKKOS (with OpenMP only), OPENMP, and
Tests for the GPU package are time consuming and thus are only run OPT will be run automatically. Tests for the GPU package are time
*after* a merge, or when a special label, ``gpu_unit_tests`` is added consuming and thus are only run *after* a merge, or when a special
to the pull request. After the test has started, it is often best to label, ``gpu_unit_tests`` is added to the pull request. After the test
remove the label since every PR activity will re-trigger the test (that has started, it is often best to remove the label since every PR
is a limitation of triggering a test with a label). Support for unit activity will re-trigger the test (that is a limitation of triggering a
tests when using KOKKOS with GPU acceleration is currently not supported. test with a label). Support for unit tests using KOKKOS with GPU
acceleration is currently not supported.
When you see a failed build on GitHub, click on ``Details`` to be taken When you see a failed build on GitHub, click on ``Details`` to be taken
to the corresponding LAMMPS Jenkins CI web page. Click on the "Exit" to the corresponding LAMMPS Jenkins CI web page. Click on the "Exit"
@ -588,7 +590,7 @@ While the epsilon (relative precision) for a single, `IEEE 754 compliant
<https://en.wikipedia.org/wiki/IEEE_754>`_, double precision floating <https://en.wikipedia.org/wiki/IEEE_754>`_, double precision floating
point operation is at about 2.2e-16, the achievable precision for the point operation is at about 2.2e-16, the achievable precision for the
tests is lower due to most numbers being sums over intermediate results tests is lower due to most numbers being sums over intermediate results
and the non-associativity of floating point math leading to larger for which the non-associativity of floating point math leads to larger
errors. As a rule of thumb, the test epsilon can often be in the range errors. As a rule of thumb, the test epsilon can often be in the range
5.0e-14 to 1.0e-13. But for "noisy" force kernels, e.g. those a larger 5.0e-14 to 1.0e-13. But for "noisy" force kernels, e.g. those a larger
amount of arithmetic operations involving `exp()`, `log()` or `sin()` amount of arithmetic operations involving `exp()`, `log()` or `sin()`
@ -602,14 +604,14 @@ of floating point operations or that some or most intermediate operations
may be done using approximations or with single precision floating point may be done using approximations or with single precision floating point
math. math.
To rerun the failed unit test individually, change to the ``build`` directory To rerun a failed unit test individually, change to the ``build`` directory
and run the test with verbose output. For example, and run the test with verbose output. For example,
.. code-block:: bash .. code-block:: bash
env TEST_ARGS=-v ctest -R ^MolPairStyle:lj_cut_coul_long -V env TEST_ARGS=-v ctest -R ^MolPairStyle:lj_cut_coul_long -V
``ctest`` with the ``-V`` flag also shows the exact command line ``ctest`` with the ``-V`` flag also shows the exact command
of the test. One can then use ``gdb --args`` to further debug and of the test. One can then use ``gdb --args`` to further debug and
catch exceptions with the test command, for example, catch exceptions with the test command, for example,

View File

@ -310,7 +310,7 @@ the constructor and the destructor.
Pair styles are different from most classes in LAMMPS that define a Pair styles are different from most classes in LAMMPS that define a
"style", as their constructor only uses the LAMMPS class instance "style", as their constructor only uses the LAMMPS class instance
pointer as an argument, but **not** the command line arguments of the pointer as an argument, but **not** the arguments of the
:doc:`pair_style command <pair_style>`. Instead, those arguments are :doc:`pair_style command <pair_style>`. Instead, those arguments are
processed in the ``Pair::settings()`` function (or rather the version in processed in the ``Pair::settings()`` function (or rather the version in
the derived class). The constructor is the place where global defaults the derived class). The constructor is the place where global defaults
@ -891,7 +891,7 @@ originally created from mixing or not).
These data file output functions are only useful for true pair-wise These data file output functions are only useful for true pair-wise
additive potentials, where the potential parameters can be entered additive potentials, where the potential parameters can be entered
through *multiple* :doc:`pair_coeff commands <pair_coeff>`. Pair styles through *multiple* :doc:`pair_coeff commands <pair_coeff>`. Pair styles
that require a single "pair_coeff \* \*" command line are not compatible that require a single "pair_coeff \* \*" command are not compatible
with reading their parameters from data files. For pair styles like with reading their parameters from data files. For pair styles like
*born/gauss* that do support writing to data files, the potential *born/gauss* that do support writing to data files, the potential
parameters will be read from the data file, if present, and parameters will be read from the data file, if present, and
@ -1122,7 +1122,7 @@ once. Thus, the ``coeff()`` function has to do three tasks, each of
which is delegated to a function in the ``PairTersoff`` class: which is delegated to a function in the ``PairTersoff`` class:
#. map elements to atom types. Those follow the potential file name in the #. map elements to atom types. Those follow the potential file name in the
command line arguments and are processed by the ``map_element2type()`` function. command arguments and are processed by the ``map_element2type()`` function.
#. read and parse the potential parameter file in the ``read_file()`` function. #. read and parse the potential parameter file in the ``read_file()`` function.
#. Build data structures where the original and derived parameters are #. Build data structures where the original and derived parameters are
indexed by all possible triples of atom types and thus can be looked indexed by all possible triples of atom types and thus can be looked
@ -1356,8 +1356,8 @@ either 0 or 1.
The ``morseflag`` variable defaults to 0 and is set to 1 in the The ``morseflag`` variable defaults to 0 and is set to 1 in the
``PairAIREBOMorse::settings()`` function which is called by the ``PairAIREBOMorse::settings()`` function which is called by the
:doc:`pair_style <pair_style>` command. This function delegates :doc:`pair_style <pair_style>` command. This function delegates all
all command line processing and setting of other parameters to the command argument processing and setting of other parameters to the
``PairAIREBO::settings()`` function of the base class. ``PairAIREBO::settings()`` function of the base class.
.. code-block:: c++ .. code-block:: c++

View File

@ -83,7 +83,7 @@ Run LAMMPS from within the debugger
Running LAMMPS under the control of the debugger as shown below only Running LAMMPS under the control of the debugger as shown below only
works for a single MPI rank (for debugging a program running in parallel works for a single MPI rank (for debugging a program running in parallel
you usually need a parallel debugger program). A simple way to launch you usually need a parallel debugger program). A simple way to launch
GDB is to prefix the LAMMPS command line with ``gdb --args`` and then GDB is to prefix the LAMMPS command-line with ``gdb --args`` and then
type the command "run" at the GDB prompt. This will launch the type the command "run" at the GDB prompt. This will launch the
debugger, load the LAMMPS executable and its debug info, and then run debugger, load the LAMMPS executable and its debug info, and then run
it. When it reaches the code causing the segmentation fault, it will it. When it reaches the code causing the segmentation fault, it will
@ -180,7 +180,7 @@ inspect the behavior of a compiled program by essentially emulating a
CPU and instrumenting the program while running. This slows down CPU and instrumenting the program while running. This slows down
execution quite significantly, but can also report issues that are not execution quite significantly, but can also report issues that are not
resulting in a crash. The default valgrind tool is a memory checker and resulting in a crash. The default valgrind tool is a memory checker and
you can use it by prefixing the normal command line with ``valgrind``. you can use it by prefixing the normal command-line with ``valgrind``.
Unlike GDB, this will also work for parallel execution, but it is Unlike GDB, this will also work for parallel execution, but it is
recommended to redirect the valgrind output to a file (e.g. with recommended to redirect the valgrind output to a file (e.g. with
``--log-file=crash-%p.txt``, the %p will be substituted with the ``--log-file=crash-%p.txt``, the %p will be substituted with the
@ -235,3 +235,53 @@ from GDB. In addition you get a more specific hint about what cause the
segmentation fault, i.e. that it is a NULL pointer dereference. To find segmentation fault, i.e. that it is a NULL pointer dereference. To find
out which pointer exactly was NULL, you need to use the debugger, though. out which pointer exactly was NULL, you need to use the debugger, though.
Debugging when LAMMPS appears to be stuck
=========================================
Sometimes the LAMMPS calculation appears to be stuck, that is the LAMMPS
process or processes are active, but there is no visible progress. This
can have multiple reasons:
- The selected styles are slow and require a lot of CPU time and the
system is large. When extrapolating the expected speed from smaller
systems, one has to factor in that not all models scale linearly with
system size, e.g. :doc:`kspace styles like ewald or pppm
<kspace_style>`. There is very little that can be done in this case.
- The output interval is not set or set to a large value with the
:doc:`thermo <thermo>` command. I the first case, there will be output
only at the first and last step.
- The output is block-buffered and instead of line-buffered. The output
will only be written to the screen after 4096 or 8192 characters of
output have accumulated. This most often happens for files but also
with MPI parallel executables for output to the screen, since the
output to the screen is handled by the MPI library so that output from
all processes can be shown. This can be suppressed by using the
``-nonblock`` or ``-nb`` command-line flag, which turns off buffering
for screen and logfile output.
- An MPI parallel calculation has a bug where a collective MPI function
is called (e.g. ``MPI_Barrier()``, ``MPI_Bcast()``,
``MPI_Allreduce()`` and so on) before pending point-to-point
communications are completed or when the collective function is only
called from a subset of the MPI processes. This also applies to some
internal LAMMPS functions like ``Error::all()`` which uses
``MPI_Barrier()`` and thus ``Error::one()`` must be called, if the
error condition does not happen on all MPI processes simultaneously.
- Some function in LAMMPS has a bug where a ``for`` or ``while`` loop
does not trigger the exit condition and thus will loop forever. This
can happen when the wrong variable is incremented or when one value in
a comparison becomes ``NaN`` due to an overflow.
In the latter two cases, further information and stack traces (see above)
can be obtain by attaching a debugger to a running process. For that the
process ID (PID) is needed; this can be found on Linux machines with the
``top``, ``htop``, ``ps``, or ``pstree`` commands.
Then running the (GNU) debugger ``gdb`` with the ``-p`` flag followed by
the process id will attach the process to the debugger and stop
execution of that specific process. From there on it is possible to
issue all debugger commands in the same way as when LAMMPS was started
from the debugger (see above). Most importantly it is possible to
obtain a stack trace with the ``where`` command and thus determine where
in the execution of a timestep this process is. Also internal data can
be printed and execution single stepped or continued. When the debugger
is exited, the calculation will resume normally.

View File

@ -7774,7 +7774,7 @@ Doc page with :doc:`WARNING messages <Errors_warnings>`
*Too few values in body section of molecule file* *Too few values in body section of molecule file*
Self-explanatory. Self-explanatory.
*Too many -pk arguments in command line* *Too many -pk arguments in command-line*
The string formed by concatenating the arguments is too long. Use a The string formed by concatenating the arguments is too long. Use a
package command in the input script instead. package command in the input script instead.

View File

@ -146,6 +146,8 @@ Lowercase directories
+-------------+------------------------------------------------------------------+ +-------------+------------------------------------------------------------------+
| streitz | use of Streitz/Mintmire potential with charge equilibration | | streitz | use of Streitz/Mintmire potential with charge equilibration |
+-------------+------------------------------------------------------------------+ +-------------+------------------------------------------------------------------+
| stress_vcm | removing binned rigid body motion from binned stress profile |
+-------------+------------------------------------------------------------------+
| tad | temperature-accelerated dynamics of vacancy diffusion in bulk Si | | tad | temperature-accelerated dynamics of vacancy diffusion in bulk Si |
+-------------+------------------------------------------------------------------+ +-------------+------------------------------------------------------------------+
| threebody | regression test input for a variety of manybody potentials | | threebody | regression test input for a variety of manybody potentials |

View File

@ -16,7 +16,7 @@ compiled alongside the code using it from the source code in
``fortran/lammps.f90`` *and* with the same compiler used to build the ``fortran/lammps.f90`` *and* with the same compiler used to build the
rest of the Fortran code that interfaces to LAMMPS. When linking, you rest of the Fortran code that interfaces to LAMMPS. When linking, you
also need to :doc:`link to the LAMMPS library <Build_link>`. A typical also need to :doc:`link to the LAMMPS library <Build_link>`. A typical
command line for a simple program using the Fortran interface would be: command for a simple program using the Fortran interface would be:
.. code-block:: bash .. code-block:: bash
@ -91,12 +91,12 @@ function and triggered with the optional logical argument set to
CALL lmp%close(.TRUE.) CALL lmp%close(.TRUE.)
END PROGRAM testlib END PROGRAM testlib
It is also possible to pass command line flags from Fortran to C/C++ and It is also possible to pass command-line flags from Fortran to C/C++ and
thus make the resulting executable behave similarly to the standalone thus make the resulting executable behave similarly to the standalone
executable (it will ignore the `-in/-i` flag, though). This allows executable (it will ignore the `-in/-i` flag, though). This allows
using the command line to configure accelerator and suffix settings, using the command-line to configure accelerator and suffix settings,
configure screen and logfile output, or to set index style variables configure screen and logfile output, or to set index style variables
from the command line and more. Here is a correspondingly adapted from the command-line and more. Here is a correspondingly adapted
version of the previous example: version of the previous example:
.. code-block:: fortran .. code-block:: fortran
@ -108,7 +108,7 @@ version of the previous example:
CHARACTER(LEN=128), ALLOCATABLE :: command_args(:) CHARACTER(LEN=128), ALLOCATABLE :: command_args(:)
INTEGER :: i, argc INTEGER :: i, argc
! copy command line flags to `command_args()` ! copy command-line flags to `command_args()`
argc = COMMAND_ARGUMENT_COUNT() argc = COMMAND_ARGUMENT_COUNT()
ALLOCATE(command_args(0:argc)) ALLOCATE(command_args(0:argc))
DO i=0, argc DO i=0, argc
@ -448,7 +448,7 @@ of the contents of the :f:mod:`LIBLAMMPS` Fortran interface to LAMMPS.
compiled with MPI support, it will also initialize MPI, if it has compiled with MPI support, it will also initialize MPI, if it has
not already been initialized before. not already been initialized before.
The *args* argument with the list of command line parameters is The *args* argument with the list of command-line parameters is
optional and so it the *comm* argument with the MPI communicator. optional and so it the *comm* argument with the MPI communicator.
If *comm* is not provided, ``MPI_COMM_WORLD`` is assumed. For If *comm* is not provided, ``MPI_COMM_WORLD`` is assumed. For
more details please see the documentation of :cpp:func:`lammps_open`. more details please see the documentation of :cpp:func:`lammps_open`.

View File

@ -56,7 +56,7 @@ using a shell like Bash or Zsh.
Visual Studio IDE with the bundled CMake or from the Windows command prompt using Visual Studio IDE with the bundled CMake or from the Windows command prompt using
a separately installed CMake package, both using the native Microsoft Visual C++ a separately installed CMake package, both using the native Microsoft Visual C++
compilers and (optionally) the Microsoft MPI SDK. This tutorial, however, only compilers and (optionally) the Microsoft MPI SDK. This tutorial, however, only
covers unix-like command line interfaces. covers unix-like command-line interfaces.
We also assume that you have downloaded and unpacked a recent LAMMPS source code package We also assume that you have downloaded and unpacked a recent LAMMPS source code package
or used Git to create a clone of the LAMMPS sources on your compilation machine. or used Git to create a clone of the LAMMPS sources on your compilation machine.
@ -277,7 +277,7 @@ Setting options
--------------- ---------------
Options that enable, disable or modify settings are modified by setting Options that enable, disable or modify settings are modified by setting
the value of CMake variables. This is done on the command line with the the value of CMake variables. This is done on the command-line with the
*-D* flag in the format ``-D VARIABLE=value``, e.g. ``-D *-D* flag in the format ``-D VARIABLE=value``, e.g. ``-D
CMAKE_BUILD_TYPE=Release`` or ``-D BUILD_MPI=on``. There is one quirk: CMAKE_BUILD_TYPE=Release`` or ``-D BUILD_MPI=on``. There is one quirk:
when used before the CMake directory, there may be a space between the when used before the CMake directory, there may be a space between the
@ -376,7 +376,7 @@ Using presets
------------- -------------
Since LAMMPS has a lot of optional features and packages, specifying Since LAMMPS has a lot of optional features and packages, specifying
them all on the command line can be tedious. Or when selecting a them all on the command-line can be tedious. Or when selecting a
different compiler toolchain, multiple options have to be changed different compiler toolchain, multiple options have to be changed
consistently and that is rather error prone. Or when enabling certain consistently and that is rather error prone. Or when enabling certain
packages, they require consistent settings to be operated in a packages, they require consistent settings to be operated in a
@ -384,7 +384,7 @@ particular mode. For this purpose, we are providing a selection of
"preset files" for CMake in the folder ``cmake/presets``. They "preset files" for CMake in the folder ``cmake/presets``. They
represent a way to pre-load or override the CMake configuration cache by represent a way to pre-load or override the CMake configuration cache by
setting or changing CMake variables. Preset files are loaded using the setting or changing CMake variables. Preset files are loaded using the
*-C* command line flag. You can combine loading multiple preset files or *-C* command-line flag. You can combine loading multiple preset files or
change some variables later with additional *-D* flags. A few examples: change some variables later with additional *-D* flags. A few examples:
.. code-block:: bash .. code-block:: bash

View File

@ -163,7 +163,7 @@ After everything is done, add the files to the branch and commit them:
*git rm*, *git mv* for adding, removing, renaming individual files, *git rm*, *git mv* for adding, removing, renaming individual files,
respectively, and then *git commit* to finalize the commit. respectively, and then *git commit* to finalize the commit.
Carefully check all pending changes with *git status* before Carefully check all pending changes with *git status* before
committing them. If you find doing this on the command line too committing them. If you find doing this on the command-line too
tedious, consider using a GUI, for example the one included in git tedious, consider using a GUI, for example the one included in git
distributions written in Tk, i.e. use *git gui* (on some Linux distributions written in Tk, i.e. use *git gui* (on some Linux
distributions it may be required to install an additional package to distributions it may be required to install an additional package to

View File

@ -20,8 +20,11 @@ to the online LAMMPS documentation for known LAMMPS commands and styles.
(Ubuntu 20.04LTS or later and compatible), macOS (version 11 aka Big (Ubuntu 20.04LTS or later and compatible), macOS (version 11 aka Big
Sur or later), and Windows (version 10 or later) :ref:`are available Sur or later), and Windows (version 10 or later) :ref:`are available
<lammps_gui_install>` for download. Non-MPI LAMMPS executables (as <lammps_gui_install>` for download. Non-MPI LAMMPS executables (as
``lmp``) for running LAMMPS from the command line and :doc:`some ``lmp``) for running LAMMPS from the command-line and :doc:`some
LAMMPS tools <Tools>` compiled executables are also included. LAMMPS tools <Tools>` compiled executables are also included.
Also, the pre-compiled LAMMPS-GUI packages include the WHAM executables
from http://membrane.urmc.rochester.edu/content/wham/ for use with
LAMMPS tutorials.
The source code for LAMMPS-GUI is included in the LAMMPS source code The source code for LAMMPS-GUI is included in the LAMMPS source code
distribution and can be found in the ``tools/lammps-gui`` folder. It distribution and can be found in the ``tools/lammps-gui`` folder. It
@ -29,16 +32,16 @@ to the online LAMMPS documentation for known LAMMPS commands and styles.
<Build_cmake>`. <Build_cmake>`.
LAMMPS-GUI tries to provide an experience similar to what people LAMMPS-GUI tries to provide an experience similar to what people
traditionally would have running LAMMPS using a command line window and traditionally would have running LAMMPS using a command-line window and
the console LAMMPS executable but just rolled into a single executable: the console LAMMPS executable but just rolled into a single executable:
- writing & editing LAMMPS input files with a text editor - writing & editing LAMMPS input files with a text editor
- run LAMMPS on those input file with selected command line flags - run LAMMPS on those input file with selected command-line flags
- extract data from the created files and visualize it with and - extract data from the created files and visualize it with and
external software external software
That procedure is quite effective for people proficient in using the That procedure is quite effective for people proficient in using the
command line, as that allows them to use tools for the individual steps command-line, as that allows them to use tools for the individual steps
that they are most comfortable with. In fact, it is often *required* to that they are most comfortable with. In fact, it is often *required* to
adopt this workflow when running LAMMPS simulations on high-performance adopt this workflow when running LAMMPS simulations on high-performance
computing facilities. computing facilities.
@ -100,10 +103,11 @@ MacOS 11 and later
^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^
After downloading the ``LAMMPS-macOS-multiarch-GUI-<version>.dmg`` After downloading the ``LAMMPS-macOS-multiarch-GUI-<version>.dmg``
installer package, you need to double-click it and then, in the window application bundle disk image, you need to double-click it and then, in
that opens, drag the app bundle as indicated into the "Applications" the window that opens, drag the app bundle as indicated into the
folder. The follow the instructions in the "README.txt" file to "Applications" folder. Afterwards, the disk image can be unmounted.
get access to the other included executables. Then follow the instructions in the "README.txt" file to get access to
the other included command-line executables.
Linux on x86\_64 Linux on x86\_64
^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^
@ -117,15 +121,25 @@ into the "LAMMPS_GUI" folder and execute "./lammps-gui" directly.
The second variant uses `flatpak <https://www.flatpak.org>`_ and The second variant uses `flatpak <https://www.flatpak.org>`_ and
requires the flatpak management and runtime software to be installed. requires the flatpak management and runtime software to be installed.
After downloading the ``LAMMPS-GUI-Linux-x86_64-GUI-<version>.tar.gz`` After downloading the ``LAMMPS-GUI-Linux-x86_64-GUI-<version>.flatpak``
flatpak bundle, you can install it with ``flatpak install --user flatpak bundle, you can install it with ``flatpak install --user
LAMMPS-GUI-Linux-x86_64-GUI-<version>.tar.gz``. After installation, LAMMPS-GUI-Linux-x86_64-GUI-<version>.flatpak``. After installation,
LAMMPS-GUI should be integrated into your desktop environment under LAMMPS-GUI should be integrated into your desktop environment under
"Applications > Science" but also can be launched from the console with "Applications > Science" but also can be launched from the console with
``flatpak run org.lammps.lammps-gui``. The flatpak bundle also includes ``flatpak run org.lammps.lammps-gui``. The flatpak bundle also includes
the console LAMMPS executable ``lmp`` which can be launched to run the console LAMMPS executable ``lmp`` which can be launched to run
simulations with, for example: ``flatpak run --command=lmp simulations with, for example with:
org.lammps.lammps-gui -in in.melt``.
.. code-block:: sh
flatpak run --command=lmp org.lammps.lammps-gui -in in.melt
Other bundled command-line executables are run the same way and can be
listed with:
.. code-block:: sh
ls $(flatpak info --show-location org.lammps.lammps-gui )/files/bin
Compiling from Source Compiling from Source
@ -165,9 +179,9 @@ window is stored when exiting and restored when starting again.
Opening Files Opening Files
^^^^^^^^^^^^^ ^^^^^^^^^^^^^
The LAMMPS-GUI application can be launched without command line arguments The LAMMPS-GUI application can be launched without command-line arguments
and then starts with an empty buffer in the *Editor* window. If arguments and then starts with an empty buffer in the *Editor* window. If arguments
are given LAMMPS will use first command line argument as the file name for are given LAMMPS will use first command-line argument as the file name for
the *Editor* buffer and reads its contents into the buffer, if the file the *Editor* buffer and reads its contents into the buffer, if the file
exists. All further arguments are ignored. Files can also be opened via exists. All further arguments are ignored. Files can also be opened via
the *File* menu, the `Ctrl-O` (`Command-O` on macOS) keyboard shortcut the *File* menu, the `Ctrl-O` (`Command-O` on macOS) keyboard shortcut
@ -261,14 +275,21 @@ Output Window
By default, when starting a run, an *Output* window opens that displays By default, when starting a run, an *Output* window opens that displays
the screen output of the running LAMMPS calculation, as shown below. the screen output of the running LAMMPS calculation, as shown below.
This text would normally be seen in the command line window. This text would normally be seen in the command-line window.
.. image:: JPG/lammps-gui-log.png .. image:: JPG/lammps-gui-log.png
:align: center :align: center
:scale: 50% :scale: 50%
LAMMPS-GUI captures the screen output from LAMMPS as it is generated and LAMMPS-GUI captures the screen output from LAMMPS as it is generated and
updates the *Output* window regularly during a run. updates the *Output* window regularly during a run. If there are any
warnings or errors in the LAMMPS output, they are highlighted by using
bold text colored in red. There is a small panel at the bottom center
of the *Output* window showing how many warnings and errors were
detected and how many lines the entire output has. By clicking on the
button on the right with the warning symbol or by using the keyboard
shortcut `Ctrl-N` (`Command-N` on macOS), you can jump to the next
line with a warning or error.
By default, the *Output* window is replaced each time a run is started. By default, the *Output* window is replaced each time a run is started.
The runs are counted and the run number for the current run is displayed The runs are counted and the run number for the current run is displayed
@ -398,7 +419,7 @@ below.
Like for the *Output* and *Charts* windows, its content is continuously Like for the *Output* and *Charts* windows, its content is continuously
updated during a run. It will show "(none)" if there are no variables updated during a run. It will show "(none)" if there are no variables
defined. Note that it is also possible to *set* :doc:`index style defined. Note that it is also possible to *set* :doc:`index style
variables <variable>`, that would normally be set via command line variables <variable>`, that would normally be set via command-line
flags, via the "Set Variables..." dialog from the *Run* menu. flags, via the "Set Variables..." dialog from the *Run* menu.
LAMMPS-GUI automatically defines the variable "gui_run" to the current LAMMPS-GUI automatically defines the variable "gui_run" to the current
value of the run counter. That way it is possible to automatically value of the run counter. That way it is possible to automatically
@ -775,11 +796,11 @@ General Settings:
- *Echo input to log:* when checked, all input commands, including - *Echo input to log:* when checked, all input commands, including
variable expansions, are echoed to the *Output* window. This is variable expansions, are echoed to the *Output* window. This is
equivalent to using `-echo screen` at the command line. There is no equivalent to using `-echo screen` at the command-line. There is no
log *file* produced by default, since LAMMPS-GUI uses `-log none`. log *file* produced by default, since LAMMPS-GUI uses `-log none`.
- *Include citation details:* when checked full citation info will be - *Include citation details:* when checked full citation info will be
included to the log window. This is equivalent to using `-cite included to the log window. This is equivalent to using `-cite
screen` on the command line. screen` on the command-line.
- *Show log window by default:* when checked, the screen output of a - *Show log window by default:* when checked, the screen output of a
LAMMPS run will be collected in a log window during the run LAMMPS run will be collected in a log window during the run
- *Show chart window by default:* when checked, the thermodynamic - *Show chart window by default:* when checked, the thermodynamic
@ -828,7 +849,7 @@ Accelerators:
This tab enables selection of an accelerator package for LAMMPS to use This tab enables selection of an accelerator package for LAMMPS to use
and is equivalent to using the `-suffix` and `-package` flags on the and is equivalent to using the `-suffix` and `-package` flags on the
command line. Only settings supported by the LAMMPS library and local command-line. Only settings supported by the LAMMPS library and local
hardware are available. The `Number of threads` field allows setting hardware are available. The `Number of threads` field allows setting
the maximum number of threads for the accelerator packages that use the maximum number of threads for the accelerator packages that use
threads. threads.

View File

@ -738,8 +738,8 @@ command.
This can be done, for example, by using the built-in visualizer of the This can be done, for example, by using the built-in visualizer of the
:doc:`dump image or dump movie <dump_image>` command to create snapshot :doc:`dump image or dump movie <dump_image>` command to create snapshot
images or a movie. Below are example command lines for using dump image images or a movie. Below are example command for using dump image with
with the :ref:`example listed below <periexample>` and a set of images the :ref:`example listed below <periexample>` and a set of images
created for steps 300, 600, and 2000 this way. created for steps 300, 600, and 2000 this way.
.. code-block:: LAMMPS .. code-block:: LAMMPS

View File

@ -260,7 +260,7 @@ Switch into the :code:`examples/melt` folder:
cd ../examples/melt cd ../examples/melt
To run this example in serial, use the following command line: To run this example in serial, use the following command:
.. code-block:: .. code-block::

View File

@ -60,7 +60,7 @@ between them at any time using "git checkout <branch name>".)
files (mostly by accident). If you do not need access to the entire files (mostly by accident). If you do not need access to the entire
commit history (most people don't), you can speed up the "cloning" commit history (most people don't), you can speed up the "cloning"
process and reduce local disk space requirements by using the process and reduce local disk space requirements by using the
``--depth`` git command line flag. That will create a "shallow clone" ``--depth`` git command-line flag. That will create a "shallow clone"
of the repository, which contains only a subset of the git history. of the repository, which contains only a subset of the git history.
Using a depth of 1000 is usually sufficient to include the head Using a depth of 1000 is usually sufficient to include the head
commits of the *develop*, the *release*, and the *maintenance* commits of the *develop*, the *release*, and the *maintenance*

View File

@ -8,6 +8,8 @@ send an email to all of them at this address: "developers at
lammps.org". General questions about LAMMPS should be posted in the lammps.org". General questions about LAMMPS should be posted in the
`LAMMPS forum on MatSci <https://matsci.org/lammps/>`_. `LAMMPS forum on MatSci <https://matsci.org/lammps/>`_.
.. We need to keep this file in sync with https://www.lammps.org/authors.html
.. raw:: latex .. raw:: latex
\small \small
@ -27,7 +29,7 @@ lammps.org". General questions about LAMMPS should be posted in the
* - `Steve Plimpton <sjp_>`_ * - `Steve Plimpton <sjp_>`_
- SNL (retired) - SNL (retired)
- sjplimp at gmail.com - sjplimp at gmail.com
- MD kernels, parallel algorithms & scalability, code structure and design - original author, MD kernels, parallel algorithms & scalability, code structure and design
* - `Aidan Thompson <at_>`_ * - `Aidan Thompson <at_>`_
- SNL - SNL
- athomps at sandia.gov - athomps at sandia.gov

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

After

Width:  |  Height:  |  Size: 78 KiB

View File

@ -46,7 +46,7 @@ Include files (varied)
but instead should be initialized either in the initializer list of but instead should be initialized either in the initializer list of
the constructor or explicitly assigned in the body of the constructor. the constructor or explicitly assigned in the body of the constructor.
If the member variable is relevant to the functionality of a class If the member variable is relevant to the functionality of a class
(for example when it stores a value from a command line argument), the (for example when it stores a value from a command-line argument), the
member variable declaration is followed by a brief comment explaining member variable declaration is followed by a brief comment explaining
its purpose and what its values can be. Class members that are its purpose and what its values can be. Class members that are
pointers should always be initialized to ``nullptr`` in the pointers should always be initialized to ``nullptr`` in the

View File

@ -2171,8 +2171,8 @@ the :doc:`Build extras <Build_extras>` page.
* ``src/OPENMP/README`` * ``src/OPENMP/README``
* :doc:`Accelerator packages <Speed_packages>` * :doc:`Accelerator packages <Speed_packages>`
* :doc:`OPENMP package <Speed_omp>` * :doc:`OPENMP package <Speed_omp>`
* :doc:`Command line option -suffix/-sf omp <Run_options>` * :doc:`Command-line option -suffix/-sf omp <Run_options>`
* :doc:`Command line option -package/-pk omp <Run_options>` * :doc:`Command-line option -package/-pk omp <Run_options>`
* :doc:`package omp <package>` * :doc:`package omp <package>`
* Search the :doc:`commands <Commands_all>` pages (:doc:`fix <Commands_fix>`, :doc:`compute <Commands_compute>`, * Search the :doc:`commands <Commands_all>` pages (:doc:`fix <Commands_fix>`, :doc:`compute <Commands_compute>`,
:doc:`pair <Commands_pair>`, :doc:`bond, angle, dihedral, improper <Commands_bond>`, :doc:`pair <Commands_pair>`, :doc:`bond, angle, dihedral, improper <Commands_bond>`,
@ -2789,14 +2789,15 @@ implements smoothed particle hydrodynamics (SPH) for liquids. See the
related :ref:`MACHDYN package <PKG-MACHDYN>` package for smooth Mach dynamics related :ref:`MACHDYN package <PKG-MACHDYN>` package for smooth Mach dynamics
(SMD) for solids. (SMD) for solids.
This package contains ideal gas, Lennard-Jones equation of states, This package contains ideal gas, Lennard-Jones equation of states, Tait,
Tait, and full support for complete (i.e. internal-energy dependent) and full support for complete (i.e. internal-energy dependent) equations
equations of state. It allows for plain or Monaghans XSPH integration of state. It allows for plain or Monaghans XSPH integration of the
of the equations of motion. It has options for density continuity or equations of motion. It has options for density continuity or density
density summation to propagate the density field. It has summation to propagate the density field. It has :doc:`set <set>`
:doc:`set <set>` command options to set the internal energy and density command options to set the internal energy and density of particles from
of particles from the input script and allows the same quantities to the input script and allows the same quantities to be output with
be output with thermodynamic output or to dump files via the :doc:`compute property/atom <compute_property_atom>` command. thermodynamic output or to dump files via the :doc:`compute
property/atom <compute_property_atom>` command.
**Author:** Georg Ganzenmuller (Fraunhofer-Institute for High-Speed **Author:** Georg Ganzenmuller (Fraunhofer-Institute for High-Speed
Dynamics, Ernst Mach Institute, Germany). Dynamics, Ernst Mach Institute, Germany).
@ -2809,6 +2810,17 @@ Dynamics, Ernst Mach Institute, Germany).
* ``examples/PACKAGES/sph`` * ``examples/PACKAGES/sph``
* https://www.lammps.org/movies.html#sph * https://www.lammps.org/movies.html#sph
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
.. note::
Please also note, that the :ref:`RHEO package <PKG-RHEO>` offers
similar functionality in a more modern and flexible implementation.
---------- ----------
.. _PKG-SPIN: .. _PKG-SPIN:

View File

@ -1,8 +1,8 @@
Basics of running LAMMPS Basics of running LAMMPS
======================== ========================
LAMMPS is run from the command line, reading commands from a file via LAMMPS is run from the command-line, reading commands from a file via
the ``-in`` command line flag, or from standard input. Using the ``-in the ``-in`` command-line flag, or from standard input. Using the ``-in
in.file`` variant is recommended (see note below). The name of the in.file`` variant is recommended (see note below). The name of the
LAMMPS executable is either ``lmp`` or ``lmp_<machine>`` with LAMMPS executable is either ``lmp`` or ``lmp_<machine>`` with
`<machine>` being the machine string used when compiling LAMMPS. This `<machine>` being the machine string used when compiling LAMMPS. This
@ -25,7 +25,7 @@ build LAMMPS:
You normally run the LAMMPS command in the directory where your input You normally run the LAMMPS command in the directory where your input
script is located. That is also where output files are produced by script is located. That is also where output files are produced by
default, unless you provide specific other paths in your input script or default, unless you provide specific other paths in your input script or
on the command line. As in some of the examples above, the LAMMPS on the command-line. As in some of the examples above, the LAMMPS
executable itself can be placed elsewhere. executable itself can be placed elsewhere.
.. note:: .. note::

View File

@ -632,7 +632,7 @@ the ``-package omp`` command-line switch or the :doc:`package omp <package>` com
The :doc:`suffix <suffix>` command can also be used within an input The :doc:`suffix <suffix>` command can also be used within an input
script to set a suffix, or to turn off or back on any suffix setting script to set a suffix, or to turn off or back on any suffix setting
made via the command line. made via the command-line.
---------- ----------

View File

@ -20,7 +20,7 @@ To run with 4 threads, you can type this:
lmp -in in.lj.lmp -k on t 4 -sf kk lmp -in in.lj.lmp -k on t 4 -sf kk
Alternately, you can also install a package with LAMMPS-GUI included and Alternately, you can also install a package with LAMMPS-GUI included and
open the LAMMPS-GUI app (the package includes the command line version open the LAMMPS-GUI app (the package includes the command-line version
of LAMMPS as well) and open the input file in the GUI and run it from of LAMMPS as well) and open the input file in the GUI and run it from
there. For details on LAMMPS-GUI, see :doc:`Howto_lammps_gui`. there. For details on LAMMPS-GUI, see :doc:`Howto_lammps_gui`.

View File

@ -31,7 +31,8 @@ Coulombics. It has the following general features:
(for Nvidia GPUs, AMD GPUs, Intel GPUs, and multicore CPUs). (for Nvidia GPUs, AMD GPUs, Intel GPUs, and multicore CPUs).
so that the same functionality is supported on a variety of hardware. so that the same functionality is supported on a variety of hardware.
**Required hardware/software:** Required hardware/software
""""""""""""""""""""""""""
To compile and use this package in CUDA mode, you currently need To compile and use this package in CUDA mode, you currently need
to have an NVIDIA GPU and install the corresponding NVIDIA CUDA to have an NVIDIA GPU and install the corresponding NVIDIA CUDA
@ -69,12 +70,14 @@ To compile and use this package in HIP mode, you have to have the AMD ROCm
software installed. Versions of ROCm older than 3.5 are currently deprecated software installed. Versions of ROCm older than 3.5 are currently deprecated
by AMD. by AMD.
**Building LAMMPS with the GPU package:** Building LAMMPS with the GPU package
""""""""""""""""""""""""""""""""""""
See the :ref:`Build extras <gpu>` page for See the :ref:`Build extras <gpu>` page for
instructions. instructions.
**Run with the GPU package from the command line:** Run with the GPU package from the command-line
""""""""""""""""""""""""""""""""""""""""""""""
The ``mpirun`` or ``mpiexec`` command sets the total number of MPI tasks The ``mpirun`` or ``mpiexec`` command sets the total number of MPI tasks
used by LAMMPS (one or multiple per compute node) and the number of MPI used by LAMMPS (one or multiple per compute node) and the number of MPI
@ -133,7 +136,8 @@ affect the setting for bonded interactions (LAMMPS default is "on").
The "off" setting for pairwise interaction is currently required for The "off" setting for pairwise interaction is currently required for
GPU package pair styles. GPU package pair styles.
**Or run with the GPU package by editing an input script:** Run with the GPU package by editing an input script
"""""""""""""""""""""""""""""""""""""""""""""""""""
The discussion above for the ``mpirun`` or ``mpiexec`` command, MPI The discussion above for the ``mpirun`` or ``mpiexec`` command, MPI
tasks/node, and use of multiple MPI tasks/GPU is the same. tasks/node, and use of multiple MPI tasks/GPU is the same.
@ -149,7 +153,8 @@ You must also use the :doc:`package gpu <package>` command to enable the
GPU package, unless the ``-sf gpu`` or ``-pk gpu`` :doc:`command-line switches <Run_options>` were used. It specifies the number of GPU package, unless the ``-sf gpu`` or ``-pk gpu`` :doc:`command-line switches <Run_options>` were used. It specifies the number of
GPUs/node to use, as well as other options. GPUs/node to use, as well as other options.
**Speed-ups to expect:** Speed-up to expect
""""""""""""""""""
The performance of a GPU versus a multicore CPU is a function of your The performance of a GPU versus a multicore CPU is a function of your
hardware, which pair style is used, the number of atoms/GPU, and the hardware, which pair style is used, the number of atoms/GPU, and the
@ -176,10 +181,13 @@ better with multiple OMP threads because the inter-process communication
is higher for these styles with the GPU package in order to allow is higher for these styles with the GPU package in order to allow
deterministic results. deterministic results.
**Guidelines for best performance:** Guidelines for best performance
"""""""""""""""""""""""""""""""
* Using multiple MPI tasks per GPU will often give the best performance, * Using multiple MPI tasks (2-10) per GPU will often give the best
as allowed my most multicore CPU/GPU configurations. performance, as allowed my most multicore CPU/GPU configurations.
Using too many MPI tasks will result in worse performance due to
growing overhead with the growing number of MPI tasks.
* If the number of particles per MPI task is small (e.g. 100s of * If the number of particles per MPI task is small (e.g. 100s of
particles), it can be more efficient to run with fewer MPI tasks per particles), it can be more efficient to run with fewer MPI tasks per
GPU, even if you do not use all the cores on the compute node. GPU, even if you do not use all the cores on the compute node.
@ -199,12 +207,13 @@ deterministic results.
:doc:`angle <angle_style>`, :doc:`dihedral <dihedral_style>`, :doc:`angle <angle_style>`, :doc:`dihedral <dihedral_style>`,
:doc:`improper <improper_style>`, and :doc:`long-range <kspace_style>` :doc:`improper <improper_style>`, and :doc:`long-range <kspace_style>`
calculations will not be included in the "Pair" time. calculations will not be included in the "Pair" time.
* Since only part of the pppm kspace style is GPU accelerated, it * Since only part of the pppm kspace style is GPU accelerated, it may be
may be faster to only use GPU acceleration for Pair styles with faster to only use GPU acceleration for Pair styles with long-range
long-range electrostatics. See the "pair/only" keyword of the electrostatics. See the "pair/only" keyword of the :doc:`package
package command for a shortcut to do that. The work between kspace command <package>` for a shortcut to do that. The distribution of
on the CPU and non-bonded interactions on the GPU can be balanced work between kspace on the CPU and non-bonded interactions on the GPU
through adjusting the coulomb cutoff without loss of accuracy. can be balanced through adjusting the coulomb cutoff without loss of
accuracy.
* When the *mode* setting for the package gpu command is force/neigh, * When the *mode* setting for the package gpu command is force/neigh,
the time for neighbor list calculations on the GPU will be added into the time for neighbor list calculations on the GPU will be added into
the "Pair" time, not the "Neigh" time. An additional breakdown of the the "Pair" time, not the "Neigh" time. An additional breakdown of the
@ -220,4 +229,6 @@ deterministic results.
Restrictions Restrictions
"""""""""""" """"""""""""
None. When using :doc:`hybrid pair styles <pair_hybrid>`, the neighbor list
must be generated on the host instead of the GPU and thus the potential
GPU acceleration is reduced.

View File

@ -1,5 +1,5 @@
INTEL package INTEL package
================== =============
The INTEL package is maintained by Mike Brown at Intel The INTEL package is maintained by Mike Brown at Intel
Corporation. It provides two methods for accelerating simulations, Corporation. It provides two methods for accelerating simulations,
@ -13,18 +13,18 @@ twice, once on the CPU and once with an offload flag. This allows
LAMMPS to run on the CPU cores and co-processor cores simultaneously. LAMMPS to run on the CPU cores and co-processor cores simultaneously.
Currently Available INTEL Styles Currently Available INTEL Styles
""""""""""""""""""""""""""""""""""""" """"""""""""""""""""""""""""""""
* Angle Styles: charmm, harmonic * Angle Styles: charmm, harmonic
* Bond Styles: fene, fourier, harmonic * Bond Styles: fene, harmonic
* Dihedral Styles: charmm, fourier, harmonic, opls * Dihedral Styles: charmm, fourier, harmonic, opls
* Fixes: nve, npt, nvt, nvt/sllod, nve/asphere * Fixes: nve, npt, nvt, nvt/sllod, nve/asphere, electrode/conp, electrode/conq, electrode/thermo
* Improper Styles: cvff, harmonic * Improper Styles: cvff, harmonic
* Pair Styles: airebo, airebo/morse, buck/coul/cut, buck/coul/long, * Pair Styles: airebo, airebo/morse, buck/coul/cut, buck/coul/long,
buck, dpd, eam, eam/alloy, eam/fs, gayberne, lj/charmm/coul/charmm, buck, dpd, eam, eam/alloy, eam/fs, gayberne, lj/charmm/coul/charmm,
lj/charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, lj/charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long,
rebo, sw, tersoff rebo, snap, sw, tersoff
* K-Space Styles: pppm, pppm/disp * K-Space Styles: pppm, pppm/disp, pppm/electrode
.. warning:: .. warning::
@ -33,7 +33,7 @@ Currently Available INTEL Styles
input requires it, LAMMPS will abort with an error message. input requires it, LAMMPS will abort with an error message.
Speed-up to expect Speed-up to expect
""""""""""""""""""" """"""""""""""""""
The speedup will depend on your simulation, the hardware, which The speedup will depend on your simulation, the hardware, which
styles are used, the number of atoms, and the floating-point styles are used, the number of atoms, and the floating-point
@ -312,21 +312,21 @@ almost all cases.
recommended, especially when running on a machine with Intel recommended, especially when running on a machine with Intel
Hyper-Threading technology disabled. Hyper-Threading technology disabled.
Run with the INTEL package from the command line Run with the INTEL package from the command-line
""""""""""""""""""""""""""""""""""""""""""""""""""""" """"""""""""""""""""""""""""""""""""""""""""""""
To enable INTEL optimizations for all available styles used in To enable INTEL optimizations for all available styles used in the input
the input script, the ``-sf intel`` :doc:`command-line switch <Run_options>` can be used without any requirement for script, the ``-sf intel`` :doc:`command-line switch <Run_options>` can
editing the input script. This switch will automatically append be used without any requirement for editing the input script. This
"intel" to styles that support it. It also invokes a default command: switch will automatically append "intel" to styles that support it. It
:doc:`package intel 1 <package>`. This package command is used to set also invokes a default command: :doc:`package intel 1 <package>`. This
options for the INTEL package. The default package command will package command is used to set options for the INTEL package. The
specify that INTEL calculations are performed in mixed precision, default package command will specify that INTEL calculations are
that the number of OpenMP threads is specified by the OMP_NUM_THREADS performed in mixed precision, that the number of OpenMP threads is
environment variable, and that if co-processors are present and the specified by the OMP_NUM_THREADS environment variable, and that if
binary was built with offload support, that 1 co-processor per node co-processors are present and the binary was built with offload support,
will be used with automatic balancing of work between the CPU and the that 1 co-processor per node will be used with automatic balancing of
co-processor. work between the CPU and the co-processor.
You can specify different options for the INTEL package by using You can specify different options for the INTEL package by using
the ``-pk intel Nphi`` :doc:`command-line switch <Run_options>` with the ``-pk intel Nphi`` :doc:`command-line switch <Run_options>` with

View File

@ -77,7 +77,7 @@ version 23 November 2023 and Kokkos version 4.2.
rank. When running with multiple MPI ranks, you may see segmentation rank. When running with multiple MPI ranks, you may see segmentation
faults without GPU-aware MPI support. These can be avoided by adding faults without GPU-aware MPI support. These can be avoided by adding
the flags :doc:`-pk kokkos gpu/aware off <Run_options>` to the the flags :doc:`-pk kokkos gpu/aware off <Run_options>` to the
LAMMPS command line or by using the command :doc:`package kokkos LAMMPS command-line or by using the command :doc:`package kokkos
gpu/aware off <package>` in the input file. gpu/aware off <package>` in the input file.
.. admonition:: Intel Data Center GPU support .. admonition:: Intel Data Center GPU support
@ -423,7 +423,7 @@ in the ``kokkos-cuda.cmake`` CMake preset file.
cmake -DKokkos_ENABLE_CUDA=yes -DKokkos_ENABLE_OPENMP=yes ../cmake cmake -DKokkos_ENABLE_CUDA=yes -DKokkos_ENABLE_OPENMP=yes ../cmake
The suffix "/kk" is equivalent to "/kk/device", and for Kokkos CUDA, The suffix "/kk" is equivalent to "/kk/device", and for Kokkos CUDA,
using the ``-sf kk`` in the command line gives the default CUDA version using the ``-sf kk`` in the command-line gives the default CUDA version
everywhere. However, if the "/kk/host" suffix is added to a specific everywhere. However, if the "/kk/host" suffix is added to a specific
style in the input script, the Kokkos OpenMP (CPU) version of that style in the input script, the Kokkos OpenMP (CPU) version of that
specific style will be used instead. Set the number of OpenMP threads specific style will be used instead. Set the number of OpenMP threads
@ -439,7 +439,7 @@ For example, the command to run with 1 GPU and 8 OpenMP threads is then:
mpiexec -np 1 lmp_kokkos_cuda_openmpi -in in.lj -k on g 1 t 8 -sf kk mpiexec -np 1 lmp_kokkos_cuda_openmpi -in in.lj -k on g 1 t 8 -sf kk
Conversely, if the ``-sf kk/host`` is used in the command line and then Conversely, if the ``-sf kk/host`` is used in the command-line and then
the "/kk" or "/kk/device" suffix is added to a specific style in your the "/kk" or "/kk/device" suffix is added to a specific style in your
input script, then only that specific style will run on the GPU while input script, then only that specific style will run on the GPU while
everything else will run on the CPU in OpenMP mode. Note that the everything else will run on the CPU in OpenMP mode. Note that the
@ -451,7 +451,7 @@ on the host CPU can overlap with a pair style running on the
GPU. First compile with ``--default-stream per-thread`` added to ``CCFLAGS`` GPU. First compile with ``--default-stream per-thread`` added to ``CCFLAGS``
in the Kokkos CUDA Makefile. Then explicitly use the "/kk/host" in the Kokkos CUDA Makefile. Then explicitly use the "/kk/host"
suffix for kspace and bonds, angles, etc. in the input file and the suffix for kspace and bonds, angles, etc. in the input file and the
"kk" suffix (equal to "kk/device") on the command line. Also make "kk" suffix (equal to "kk/device") on the command-line. Also make
sure the environment variable ``CUDA_LAUNCH_BLOCKING`` is not set to "1" sure the environment variable ``CUDA_LAUNCH_BLOCKING`` is not set to "1"
so CPU/GPU overlap can occur. so CPU/GPU overlap can occur.

View File

@ -21,7 +21,7 @@ Building LAMMPS with the OPENMP package
See the :ref:`Build extras <openmp>` page for See the :ref:`Build extras <openmp>` page for
instructions. instructions.
Run with the OPENMP package from the command line Run with the OPENMP package from the command-line
""""""""""""""""""""""""""""""""""""""""""""""""""" """""""""""""""""""""""""""""""""""""""""""""""""""
These examples assume one or more 16-core nodes. These examples assume one or more 16-core nodes.

View File

@ -17,7 +17,7 @@ Building LAMMPS with the OPT package
See the :ref:`Build extras <opt>` page for instructions. See the :ref:`Build extras <opt>` page for instructions.
Run with the OPT package from the command line Run with the OPT package from the command-line
"""""""""""""""""""""""""""""""""""""""""""""" """"""""""""""""""""""""""""""""""""""""""""""
.. code-block:: bash .. code-block:: bash

View File

@ -501,7 +501,7 @@ Here are a few highlights of LAMMPS-GUI
- Indicator for line that caused an error - Indicator for line that caused an error
- Visualization of current state in Image Viewer (via calling :doc:`write_dump image <dump_image>`) - Visualization of current state in Image Viewer (via calling :doc:`write_dump image <dump_image>`)
- Capture of images created via :doc:`dump image <dump_image>` in Slide show window - Capture of images created via :doc:`dump image <dump_image>` in Slide show window
- Dialog to set variables, similar to the LAMMPS command line flag '-v' / '-var' - Dialog to set variables, similar to the LAMMPS command-line flag '-v' / '-var'
- Support for GPU, INTEL, KOKKOS/OpenMP, OPENMAP, and OPT and accelerator packages - Support for GPU, INTEL, KOKKOS/OpenMP, OPENMAP, and OPT and accelerator packages
Parallelization Parallelization
@ -550,7 +550,7 @@ will be found automatically. 2) you can download the `Flatpak file
*flatpak* command: ``flatpak install --user *flatpak* command: ``flatpak install --user
LAMMPS-Linux-x86_64-GUI-<version>.flatpak`` and run it with ``flatpak LAMMPS-Linux-x86_64-GUI-<version>.flatpak`` and run it with ``flatpak
run org.lammps.lammps-gui``. The flatpak bundle also includes the run org.lammps.lammps-gui``. The flatpak bundle also includes the
command line version of LAMMPS and some LAMMPS tools like msi2lmp. The command-line version of LAMMPS and some LAMMPS tools like msi2lmp. The
can be launched by using the ``--command`` flag. For example to run can be launched by using the ``--command`` flag. For example to run
LAMMPS directly on the ``in.lj`` benchmark input you would type in the LAMMPS directly on the ``in.lj`` benchmark input you would type in the
``bench`` folder: ``flatpak run --command=lmp -in in.lj`` The flatpak ``bench`` folder: ``flatpak run --command=lmp -in in.lj`` The flatpak
@ -608,10 +608,10 @@ would be the ``examples/COUPLE/plugin`` folder of the LAMMPS
distribution. distribution.
When compiling LAMMPS-GUI with plugin support, there is an additional When compiling LAMMPS-GUI with plugin support, there is an additional
command line flag (``-p <path>`` or ``--pluginpath <path>``) which command-line flag (``-p <path>`` or ``--pluginpath <path>``) which
allows to override the path to LAMMPS shared library used by the GUI. allows to override the path to LAMMPS shared library used by the GUI.
This is usually auto-detected on the first run and can be changed in the This is usually auto-detected on the first run and can be changed in the
LAMMPS-GUI *Preferences* dialog. The command line flag allows to reset LAMMPS-GUI *Preferences* dialog. The command-line flag allows to reset
this path to a valid value in case the original setting has become this path to a valid value in case the original setting has become
invalid. An empty path ("") as argument restores the default setting. invalid. An empty path ("") as argument restores the default setting.
@ -656,7 +656,7 @@ it will create a compressed ``LAMMPS-Win10-amd64.zip`` zip file with the
executables and required dependent .dll files. This zip file can be executables and required dependent .dll files. This zip file can be
uncompressed and ``lammps-gui.exe`` run directly from there. The uncompressed and ``lammps-gui.exe`` run directly from there. The
uncompressed folder can be added to the ``PATH`` environment and LAMMPS uncompressed folder can be added to the ``PATH`` environment and LAMMPS
and LAMMPS-GUI can be launched from anywhere from the command line. and LAMMPS-GUI can be launched from anywhere from the command-line.
**MinGW64 Cross-compiler** **MinGW64 Cross-compiler**
@ -876,7 +876,7 @@ the same ``LAMMPS_CACHING_DIR``. This script does the following:
#. Start a simple local HTTP server using Python to host files for CMake #. Start a simple local HTTP server using Python to host files for CMake
Afterwards, it will print out instruction on how to modify the CMake Afterwards, it will print out instruction on how to modify the CMake
command line to make sure it uses the local HTTP server. commands to make sure it uses the local HTTP server.
To undo the environment changes and shutdown the local HTTP server, To undo the environment changes and shutdown the local HTTP server,
run the ``deactivate_caches`` command. run the ``deactivate_caches`` command.
@ -1025,7 +1025,7 @@ with those in the provided log file with the same number of processors
in the same subdirectory. If the differences between the actual and in the same subdirectory. If the differences between the actual and
reference values are within specified tolerances, the test is considered reference values are within specified tolerances, the test is considered
passed. For each test batch, that is, a set of example input scripts, passed. For each test batch, that is, a set of example input scripts,
the mpirun command, the LAMMPS command line arguments, and the the mpirun command, the LAMMPS command-line arguments, and the
tolerances for individual thermo quantities can be specified in a tolerances for individual thermo quantities can be specified in a
configuration file in YAML format. configuration file in YAML format.

View File

@ -33,6 +33,12 @@ particle.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The value of the internal energy will be 0.0 for atoms not in the The value of the internal energy will be 0.0 for atoms not in the
specified compute group. specified compute group.

View File

@ -32,6 +32,12 @@ kernel function interpolation using "pair style sph/rhosum".
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The value of the SPH density will be 0.0 for atoms not in the The value of the SPH density will be 0.0 for atoms not in the
specified compute group. specified compute group.

View File

@ -37,6 +37,12 @@ particles, i.e. a Smooth-Particle Hydrodynamics particle.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The value of the internal energy will be 0.0 for atoms not in the The value of the internal energy will be 0.0 for atoms not in the
specified compute group. specified compute group.

View File

@ -184,11 +184,24 @@ temp/chunk calculation to a file is to use the
The keyword/value option pairs are used in the following ways. The keyword/value option pairs are used in the following ways.
The *com* keyword can be used with a value of *yes* to subtract the The *com* keyword can be used with a value of *yes* to subtract the
velocity of the center-of-mass for each chunk from the velocity of the velocity of the center-of-mass (VCM) for each chunk from the velocity of
atoms in that chunk, before calculating either the global or per-chunk the atoms in that chunk, before calculating either the global or per-chunk
temperature. This can be useful if the atoms are streaming or temperature. This can be useful if the atoms are streaming or
otherwise moving collectively, and you wish to calculate only the otherwise moving collectively, and you wish to calculate only the
thermal temperature. thermal temperature. This per-chunk VCM bias can be used in other fixes and
computes that can incorporate a temperature bias. If this compute is used
as a temperature bias in other commands then this bias is subtracted from
each atom, the command runs with the remaining thermal velocities, and
then the bias is added back in. This includes thermostatting
fixes like :doc:`fix nvt <fix_nh>`,
:doc:`fix temp/rescale <fix_temp_rescale>`,
:doc:`fix temp/berendsen <fix_temp_berendsen>`, and
:doc:`fix langevin <fix_langevin>`, and computes like
:doc:`compute stress/atom <compute_stress_atom>` and
:doc:`compute pressure <compute_pressure>`. See the input script in
examples/stress_vcm for an example of how to use the *com* keyword in
conjunction with compute stress/atom to create a stress profile of a rigid
body while removing the overall motion of the rigid body.
For the *bias* keyword, *bias-ID* refers to the ID of a temperature For the *bias* keyword, *bias-ID* refers to the ID of a temperature
compute that removes a "bias" velocity from each atom. This also compute that removes a "bias" velocity from each atom. This also

View File

@ -681,7 +681,7 @@ MPEG or other movie file you can use:
* c) Use FFmpeg * c) Use FFmpeg
FFmpeg is a command line tool that is available on many platforms and FFmpeg is a command-line tool that is available on many platforms and
allows extremely flexible encoding and decoding of movies. allows extremely flexible encoding and decoding of movies.
.. code-block:: bash .. code-block:: bash

View File

@ -97,7 +97,7 @@ adjustments.
To connect VMD to a listening LAMMPS simulation on the same machine To connect VMD to a listening LAMMPS simulation on the same machine
with fix imd enabled, one needs to start VMD and load a coordinate or with fix imd enabled, one needs to start VMD and load a coordinate or
topology file that matches the fix group. When the VMD command topology file that matches the fix group. When the VMD command
prompts appears, one types the command line: prompts appears, one types the command:
.. parsed-literal:: .. parsed-literal::

View File

@ -135,7 +135,7 @@ directions for the forces. Only the direction of the vector is
important; its length is ignored (the entered vectors are important; its length is ignored (the entered vectors are
normalized). normalized).
Those styles can be combined within one single command line. Those styles can be combined within one single command.
.. note:: .. note::

View File

@ -32,6 +32,12 @@ Hydrodynamics.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
Restart, fix_modify, output, run start/stop, minimize info Restart, fix_modify, output, run start/stop, minimize info
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" """""""""""""""""""""""""""""""""""""""""""""""""""""""""""

View File

@ -32,6 +32,12 @@ space. SPH stands for Smoothed Particle Hydrodynamics.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
Restart, fix_modify, output, run start/stop, minimize info Restart, fix_modify, output, run start/stop, minimize info
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" """""""""""""""""""""""""""""""""""""""""""""""""""""""""""

View File

@ -1084,10 +1084,11 @@ the form of *key_name_key*-*key_name_value* pairs). For example,
kim property modify 1 key mass source-value 26.98154 kim property modify 1 key mass source-value 26.98154
kim property modify 1 key mass source-unit amu kim property modify 1 key mass source-unit amu
where the special keyword "key" is followed by a *key_name* ("species" or where the special keyword "key" is followed by a *key_name* ("species"
"mass" in the above) and one or more key-value pairs. These key-value pairs or "mass" in the above) and one or more key-value pairs. These
may continue until either another "key" keyword is given or the end of the key-value pairs may continue until either another "key" keyword is given
command line is reached. Thus, the above could equivalently be written as or the end of the line is reached. Thus, the above could equivalently
be written as
.. code-block:: LAMMPS .. code-block:: LAMMPS

View File

@ -24,12 +24,12 @@ Description
""""""""""" """""""""""
Label this line of the input script with the chosen ID. Unless a jump Label this line of the input script with the chosen ID. Unless a jump
command was used previously, this does nothing. But if a command was used previously, this does nothing. But if a :doc:`jump
:doc:`jump <jump>` command was used with a label argument to begin <jump>` command was used with a label argument to begin invoking this
invoking this script file, then all command lines in the script prior script file, then all commands in the script prior to this line will be
to this line will be ignored. I.e. execution of the script will begin ignored. I.e. execution of the script will begin at this line. This is
at this line. This is useful for looping over a section of the input useful for looping over a section of the input script as discussed in
script as discussed in the :doc:`jump <jump>` command. the :doc:`jump <jump>` command.
Restrictions Restrictions
"""""""""""" """"""""""""

View File

@ -504,7 +504,7 @@ as it is for non-accelerated pair styles
The *binsize* keyword sets the size of bins used to bin atoms during The *binsize* keyword sets the size of bins used to bin atoms during
neighbor list builds. The same value can be set by the neighbor list builds. The same value can be set by the
:doc:`neigh_modify binsize <neigh_modify>` command. Making it an option :doc:`neigh_modify binsize <neigh_modify>` command. Making it an option
in the package kokkos command allows it to be set from the command line. in the package kokkos command allows it to be set from the command-line.
The default value for CPUs is 0.0, which means the LAMMPS default will be The default value for CPUs is 0.0, which means the LAMMPS default will be
used, which is bins = 1/2 the size of the pairwise cutoff + neighbor skin used, which is bins = 1/2 the size of the pairwise cutoff + neighbor skin
distance. This is fine when neighbor lists are built on the CPU. For GPU distance. This is fine when neighbor lists are built on the CPU. For GPU
@ -664,7 +664,7 @@ too.
Also note that if the :doc:`-sf hybrid intel omp command-line switch <Run_options>` is used, it invokes a "package intel" command, followed by a Also note that if the :doc:`-sf hybrid intel omp command-line switch <Run_options>` is used, it invokes a "package intel" command, followed by a
"package omp" command, both with a setting of *Nthreads* = 0. Likewise "package omp" command, both with a setting of *Nthreads* = 0. Likewise
for a hybrid suffix for gpu and omp. Note that KOKKOS also supports for a hybrid suffix for gpu and omp. Note that KOKKOS also supports
setting the number of OpenMP threads from the command line using the setting the number of OpenMP threads from the command-line using the
"-k on" :doc:`command-line switch <Run_options>`. The default for "-k on" :doc:`command-line switch <Run_options>`. The default for
KOKKOS is 1 thread per MPI task, so any other number of threads should KOKKOS is 1 thread per MPI task, so any other number of threads should
be explicitly set using the "-k on" command-line switch (and this be explicitly set using the "-k on" command-line switch (and this

View File

@ -111,8 +111,8 @@ routines. For x-86 machines, there is a provided Makefile.mgptfast
which enables the fast algebra routines, i.e. build LAMMPS with "make which enables the fast algebra routines, i.e. build LAMMPS with "make
mgptfast". The user will be informed in the output files of the mgptfast". The user will be informed in the output files of the
matrix kernels in use. To further improve speed, on x86 the option matrix kernels in use. To further improve speed, on x86 the option
precision single can be added to the :doc:`pair_coeff <pair_coeff>` *precision single* can be added to the :doc:`pair_coeff <pair_coeff>`
command line, which improves speed (up to a factor of two) at the cost command, which improves speed (up to a factor of two) at the cost
of doing matrix calculations with 7 digit precision instead of the of doing matrix calculations with 7 digit precision instead of the
default 16. For consistency the default option can be specified default 16. For consistency the default option can be specified
explicitly by the option precision double. explicitly by the option precision double.

View File

@ -30,6 +30,12 @@ The transport model is the diffusion equation for the internal energy.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The following coefficients must be defined for each pair of atoms The following coefficients must be defined for each pair of atoms
types via the :doc:`pair_coeff <pair_coeff>` command as in the examples types via the :doc:`pair_coeff <pair_coeff>` command as in the examples
above. above.

View File

@ -36,6 +36,12 @@ particles from interpenetrating :ref:`(Monaghan) <ideal-Monoghan>`.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The following coefficients must be defined for each pair of atoms The following coefficients must be defined for each pair of atoms
types via the :doc:`pair_coeff <pair_coeff>` command as in the examples types via the :doc:`pair_coeff <pair_coeff>` command as in the examples
above. above.

View File

@ -34,6 +34,12 @@ interpenetrating :ref:`(Monaghan) <Monoghan>`.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The following coefficients must be defined for each pair of atoms The following coefficients must be defined for each pair of atoms
types via the :doc:`pair_coeff <pair_coeff>` command as in the examples types via the :doc:`pair_coeff <pair_coeff>` command as in the examples
above. above.

View File

@ -29,6 +29,12 @@ SPH particles by kernel function interpolation, every Nstep timesteps.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The following coefficients must be defined for each pair of atoms The following coefficients must be defined for each pair of atoms
types via the :doc:`pair_coeff <pair_coeff>` command as in the examples types via the :doc:`pair_coeff <pair_coeff>` command as in the examples
above. above.

View File

@ -41,6 +41,12 @@ prevent particles from interpenetrating :ref:`(Monaghan) <Monaghan>`.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The following coefficients must be defined for each pair of atoms The following coefficients must be defined for each pair of atoms
types via the :doc:`pair_coeff <pair_coeff>` command as in the examples types via the :doc:`pair_coeff <pair_coeff>` command as in the examples
above. above.

View File

@ -37,6 +37,12 @@ This pair style also computes laminar viscosity :ref:`(Morris) <Morris>`.
See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in See `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in
LAMMPS. LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
The following coefficients must be defined for each pair of atoms The following coefficients must be defined for each pair of atoms
types via the :doc:`pair_coeff <pair_coeff>` command as in the examples types via the :doc:`pair_coeff <pair_coeff>` command as in the examples
above. above.

View File

@ -131,7 +131,7 @@ command.
* LJ cutoff (distance units) * LJ cutoff (distance units)
The last two coefficients are optional and default to the global values from The last two coefficients are optional and default to the global values from
the *pair_style* command line. the *pair_style* command.
---------- ----------

View File

@ -48,9 +48,9 @@ meaning that the trajectories of a restarted run will precisely match
those produced by the original run had it continued on. those produced by the original run had it continued on.
Some information about a restart file can be gathered directly from the Some information about a restart file can be gathered directly from the
command line when using LAMMPS with the :ref:`-restart2info command-line when using LAMMPS with the :ref:`-restart2info
<restart2info>` command line flag. On Unix-like operating systems (like <restart2info>` command-line flag. On Unix-like operating systems (like
Linux or macOS), one can also :ref:`configure the "file" command line Linux or macOS), one can also :ref:`configure the "file" command-line
program <magic>` to display basic information about a restart file program <magic>` to display basic information about a restart file
The binary restart file format was not designed with backward, forward, The binary restart file format was not designed with backward, forward,
@ -60,9 +60,9 @@ Changes to the architecture, compilation settings, or LAMMPS version can
render a restart file unreadable or it may read the data incorrectly. render a restart file unreadable or it may read the data incorrectly.
If you want a more portable format, you can use the data file format as If you want a more portable format, you can use the data file format as
created by the :doc:`write_data <write_data>` command. Binary restart created by the :doc:`write_data <write_data>` command. Binary restart
files can also be converted into a data file from the command line by files can also be converted into a data file from the command-line by
the LAMMPS executable that wrote them using the :ref:`-restart2data the LAMMPS executable that wrote them using the :ref:`-restart2data
<restart2data>` command line flag. <restart2data>` command-line flag.
Several things can prevent exact restarts due to round-off effects, in Several things can prevent exact restarts due to round-off effects, in
which case the trajectories in the 2 runs will slowly diverge. These which case the trajectories in the 2 runs will slowly diverge. These

View File

@ -516,6 +516,12 @@ Keywords *sph/e*, *sph/cv*, and *sph/rho* set the energy, heat capacity,
and density of smoothed particle hydrodynamics (SPH) particles. See and density of smoothed particle hydrodynamics (SPH) particles. See
`this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in LAMMPS. `this PDF guide <PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in LAMMPS.
.. note::
Please note that the SPH PDF guide file has not been updated for
many years and thus does not reflect the current *syntax* of the
SPH package commands. For that please refer to the LAMMPS manual.
Keyword *smd/mass/density* sets the mass of all selected particles, but Keyword *smd/mass/density* sets the mass of all selected particles, but
it is only applicable to the Smooth Mach Dynamics package MACHDYN. It it is only applicable to the Smooth Mach Dynamics package MACHDYN. It
assumes that the particle volume has already been correctly set and assumes that the particle volume has already been correctly set and

View File

@ -30,7 +30,7 @@ Description
This command allows you to use variants of various styles if they This command allows you to use variants of various styles if they
exist. In that respect it operates the same as the :doc:`-suffix command-line switch <Run_options>`. It also has options to turn exist. In that respect it operates the same as the :doc:`-suffix command-line switch <Run_options>`. It also has options to turn
off or back on any suffix setting made via the command line. off or back on any suffix setting made via the command-line.
The specified style can be *gpu*, *intel*, *kk*, *omp*, *opt* or The specified style can be *gpu*, *intel*, *kk*, *omp*, *opt* or
*hybrid*\ . These refer to optional packages that LAMMPS can be built *hybrid*\ . These refer to optional packages that LAMMPS can be built

View File

@ -71,9 +71,9 @@ Syntax
feature functions = is_available(category,feature), is_active(category,feature), is_defined(category,id) feature functions = is_available(category,feature), is_active(category,feature), is_defined(category,id)
atom value = id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i] atom value = id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i]
atom vector = id, mass, type, mol, radius, q, x, y, z, vx, vy, vz, fx, fy, fz atom vector = id, mass, type, mol, radius, q, x, y, z, vx, vy, vz, fx, fy, fz
custom atom property = i_name, d_name, i_name[i], d_name[i], i2_name[i], d2_name[i], i2_name[i][j], d_name[i][j] custom atom property = i_name, d_name, i_name[i], d_name[i], i2_name[i], d2_name[i], i2_name[i][j], d2_name[i][j]
compute references = c_ID, c_ID[i], c_ID[i][j], C_ID, C_ID[i] compute references = c_ID, c_ID[i], c_ID[i][j], C_ID, C_ID[i], C_ID[i][j]
fix references = f_ID, f_ID[i], f_ID[i][j], F_ID, F_ID[i] fix references = f_ID, f_ID[i], f_ID[i][j], F_ID, F_ID[i], F_ID[i][j]
variable references = v_name, v_name[i] variable references = v_name, v_name[i]
vector initialization = [1,3,7,10] (for *vector* variables only) vector initialization = [1,3,7,10] (for *vector* variables only)

View File

@ -112,6 +112,7 @@ snap: examples for using several bundled SNAP potentials
srd: stochastic rotation dynamics (SRD) particles as solvent srd: stochastic rotation dynamics (SRD) particles as solvent
steinhardt: Steinhardt-Nelson Q_l and W_l parameters usng orientorder/atom steinhardt: Steinhardt-Nelson Q_l and W_l parameters usng orientorder/atom
streitz: Streitz-Mintmire potential for Al2O3 streitz: Streitz-Mintmire potential for Al2O3
stress_vcm: removing binned rigid body motion from binned stress profile
tad: temperature-accelerated dynamics of vacancy diffusion in bulk Si tad: temperature-accelerated dynamics of vacancy diffusion in bulk Si
template: examples for using atom_style template and comparing to atom style molecular template: examples for using atom_style template and comparing to atom style molecular
tersoff: regression test input for Tersoff variants tersoff: regression test input for Tersoff variants

View File

@ -20,5 +20,3 @@ Examples:
4. in.comb.Cu2O.elastic: Cu2O crystal, qeq on, minimizes, then calculates 4. in.comb.Cu2O.elastic: Cu2O crystal, qeq on, minimizes, then calculates
elastic constants elastic constants
5. in.comb.HfO2: HfO2 polymorphs: Monoclinic HfO2 NVT @ 300K 5. in.comb.HfO2: HfO2 polymorphs: Monoclinic HfO2 NVT @ 300K
6. in.comb.CuaS: Metallic Cu and amorphous silica interface, qeq on,
five step NVE run

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,32 @@
README stress_vcm
=================
Contents:
- in.stress_vcm: Example script showing how to remove binned
velocities of center of mass (VCM) from stress calculations.
- stress_comparison.19Nov24.png: Plot shows the stress
calculated in bars on the y axis for each positional bin on
the x axis. Plotted are three different time steps from
stress profiles with and without the VCM removed. Plot
generated using Python.
- stress_xx.19Nov24.out: Output file generated by fix ave/time.
- log.19Nov24.stress_vcm.g++.1: LAMMPS log file with 1 proc.
- log.19Nov24.stress_vcm.g++.4: LAMMPS log file with 4 procs.
Notes:
- Running this script as-is will generate two files. A log
file with thermodynamic data and a stress_xx.out file
containing the binned stress profile with the VCM removed.
- To generate the binned stress profile without removing the
VCM then the compute stress/atom command at step three
needs the last keyword "ch_temp_vcm" to be replaced with
"NULL".
- Uncommenting the line under "Atom dump" will generate an
all atom dump file every 50 time steps containing atom ID,
type, and xyz coordinates.
- Uncommenting the lines under "Image dumps" will generate
.jpg image files every 250 timesteps.
- Uncommenting lines under "Movie dump" will generate a .avi
movie file showing timesteps every 125 timesteps.

View File

@ -0,0 +1,113 @@
# Removing Binned Velocities of Center of Mass (VCM) from Stress
# This example shows how to remove rigid body motion from
# binned stress calculations. This uses a combination of commands
# from compute chunk/atom, compute temp/chunk, compute
# stress/atom and fix ave/time. We'll show how these commands
# work in the context of a shockwave experiment on a cube of
# atoms. To shock the cube, a rectangular region of atoms is
# frozen, moved into the cube with a constant velocity along the
# x direction, and then unfrozen. As the shockwave begins
# propagating, the body of the cube also moves along the x
# direction. To better understand the stress dynamics of the
# cube we remove the velocity component belonging to the overall
# motion of each bin.
units metal
boundary p p p
atom_style atomic
lattice fcc 5.3589
processors 1 * *
# Defining regions for box and atoms.
# In this experiment an elongated simulation cell is
# defined in the x direction to allow for non-periodic
# motion of the atoms.
region box1 block -3 24 0 12 0 12 units lattice
region box2 block 0 12 0 12 0 12 units lattice
# Creating box and atoms
create_box 1 box1
create_atoms 1 region box2
mass 1 40.00
# Adding energy to the system
velocity all create 600.0 9999
pair_style lj/cut 10
pair_coeff 1 1 0.04 3.405
# Begin time integration
timestep 2e-3
fix fix_nve all nve
thermo 100
run 500
#--------------------------------------#
# Chunk, Stress, and VCM removal steps #
#--------------------------------------#
# 1. Create 20 equispaced bins sliced along the x direction.
# -"units reduced" normalizes the distance from 0.0 to 1.0
variable nbins index 20
variable fraction equal 1.0/v_nbins
variable volfrac equal 1/(vol*${fraction})
compute ch_id all chunk/atom bin/1d x lower ${fraction} units reduced
# 2. Calculate temperature bins with VCM aka COM velocities removed.
compute ch_temp_vcm all temp/chunk ch_id com yes
# 3. Compute per atom stress with VCM removed via temp-ID.
# -The velocities from specified temp-ID are used to compute stress.
# -Stress/atom units are pressure*volume! Optionally handled next step.
compute atom_stress_vcm all stress/atom ch_temp_vcm
# 4. Divide out bin volume from xx stress component.
variable stress atom -(c_atom_stress_vcm[1])/(vol*${fraction})
# 5. Sum the per atom stresses in each bin.
compute ch_stress_vcm all reduce/chunk ch_id sum v_stress
# 6. Average and output to file.
# -The average output is every 100 steps with samples collected 20 times with 5 step intervals.
fix ave_stress_vcm all ave/time 5 20 100 c_ch_stress_vcm mode vector file stress_xx.out
#--------------------------------------#
# Piston compressing along x direction
region piston block -1 1 INF INF INF INF units lattice
group piston region piston
fix fix_piston piston move linear 5 0 0 units box # strain rate ~ 8e10 1/s
thermo_style custom step temp ke pe lx ly lz pxx pyy pzz econserve
# Atom dump
# dump atom_dump all atom 50 dump.vcm
# # Image dumps
# dump 2 all image 250 image.*.jpg type type &
# axes yes 0.8 0.02 view 60 -30
# dump_modify 2 pad 1
# # Movie dump
# dump 3 all movie 125 movie.avi type type &
# axes yes 0.8 0.02 view 60 -30
# dump_modify 3 pad 1
run 500
unfix fix_piston
run 1500

View File

@ -0,0 +1,253 @@
LAMMPS (19 Nov 2024)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:99)
using 1 OpenMP thread(s) per MPI task
# Removing Binned Velocities of Center of Mass (VCM) from Stress
# This example shows how to remove rigid body motion from
# binned stress calculations. This uses a combination of commands
# from compute chunk/atom, compute temp/chunk, compute
# stress/atom and fix ave/time. We'll show how these commands
# work in the context of a shockwave experiment on a cube of
# atoms. To shock the cube, a rectangular region of atoms is
# frozen, moved into the cube with a constant velocity along the
# x direction, and then unfrozen. As the shockwave begins
# propagating, the body of the cube also moves along the x
# direction. To better understand the stress dynamics of the
# cube we remove the velocity component belonging to the overall
# motion of each bin.
units metal
boundary p p p
atom_style atomic
lattice fcc 5.3589
Lattice spacing in x,y,z = 5.3589 5.3589 5.3589
processors 1 * *
# Defining regions for box and atoms.
# In this experiment an elongated simulation cell is
# defined in the x direction to allow for non-periodic
# motion of the atoms.
region box1 block -3 24 0 12 0 12 units lattice
region box2 block 0 12 0 12 0 12 units lattice
# Creating box and atoms
create_box 1 box1
Created orthogonal box = (-16.0767 0 0) to (128.6136 64.3068 64.3068)
1 by 1 by 1 MPI processor grid
create_atoms 1 region box2
Created 7200 atoms
using lattice units in orthogonal box = (-16.0767 0 0) to (128.6136 64.3068 64.3068)
create_atoms CPU = 0.002 seconds
mass 1 40.00
# Adding energy to the system
velocity all create 600.0 9999
pair_style lj/cut 10
pair_coeff 1 1 0.04 3.405
# Begin time integration
timestep 2e-3
fix fix_nve all nve
thermo 100
run 500
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6, bins = 25 11 11
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.721 | 5.721 | 5.721 Mbytes
Step Temp E_pair E_mol TotEng Press
0 600 -2252.7567 0 -1694.4304 -974.62456
100 284.72172 -1977.4291 0 -1712.483 2453.7429
200 304.44519 -1994.7937 0 -1711.4941 1822.2699
300 304.28012 -1993.2958 0 -1710.1498 1498.3794
400 296.76492 -1985.1364 0 -1708.9836 1259.9474
500 295.00895 -1982.4224 0 -1707.9036 964.9526
Loop time of 3.01696 on 1 procs for 500 steps with 7200 atoms
Performance: 28.638 ns/day, 0.838 hours/ns, 165.730 timesteps/s, 1.193 Matom-step/s
99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 2.8439 | 2.8439 | 2.8439 | 0.0 | 94.26
Neigh | 0.11212 | 0.11212 | 0.11212 | 0.0 | 3.72
Comm | 0.015585 | 0.015585 | 0.015585 | 0.0 | 0.52
Output | 0.003747 | 0.003747 | 0.003747 | 0.0 | 0.12
Modify | 0.026097 | 0.026097 | 0.026097 | 0.0 | 0.87
Other | | 0.01551 | | | 0.51
Nlocal: 7200 ave 7200 max 7200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 6410 ave 6410 max 6410 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 615095 ave 615095 max 615095 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 615095
Ave neighs/atom = 85.429861
Neighbor list builds = 9
Dangerous builds = 0
#------------------------------------#
# Chunk, Stress, and VCM removal steps
#------------------------------------#
# 1. Create 20 equispaced bins sliced along the x direction.
# "units reduced" normalizes the distance from 0 to 1
variable nbins index 20
variable fraction equal 1.0/v_nbins
variable volfrac equal 1/(vol*${fraction})
variable volfrac equal 1/(vol*0.05)
compute ch_id all chunk/atom bin/1d x lower ${fraction} units reduced
compute ch_id all chunk/atom bin/1d x lower 0.05 units reduced
# 2. Calculate temperature bins with VCM aka COM velocities removed.
compute ch_temp_vcm all temp/chunk ch_id com yes
# 3. Compute per atom stress with VCM removed via temp-ID.
# The velocities from specified temp-ID are used to compute stress
# Stress/atom units are pressure*volume! Optionally handled next step.
compute atom_stress_vcm all stress/atom ch_temp_vcm
# 4. Divide out bin volume from xx stress component.
variable stress atom -(c_atom_stress_vcm[1])/(vol*${fraction})
variable stress atom -(c_atom_stress_vcm[1])/(vol*0.05)
# 5. Sum the per atom stresses in each bin.
compute ch_stress_vcm all reduce/chunk ch_id sum v_stress
# 6. Average and output to file.
# The average output is every 100 steps with samples collected 20 times with 5 step intervals
# fix ave_stress_vcm all ave/time 5 20 100 c_ch_stress_vcm mode vector file stress_xx.out
#------------------------------------#
# Piston compressing along x direction
region piston block -1 1 INF INF INF INF units lattice
group piston region piston
863 atoms in group piston
fix fix_piston piston move linear 5 0 0 units box # strain rate ~ 8e10 1/s
thermo_style custom step temp ke pe lx ly lz pxx pyy pzz econserve
# Atom dump
# dump atom_dump all atom 50 dump.vcm
# # Image dumps
# dump 2 all image 250 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
# dump_modify 2 pad 1
# # Movie dump
# dump 3 all movie 125 movie.avi type type # axes yes 0.8 0.02 view 60 -30
# dump_modify 3 pad 1
run 500
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
WARNING: One or more atoms are time integrated more than once (src/modify.cpp:296)
Per MPI rank memory allocation (min/avg/max) = 6.975 | 6.975 | 6.975 Mbytes
Step Temp KinEng PotEng Lx Ly Lz Pxx Pyy Pzz Econserve
500 295.00895 274.51875 -1982.4224 144.6903 64.3068 64.3068 631.89976 1127.2965 1135.6616 -1707.9036
600 357.38902 332.56613 -1951.3422 144.6903 64.3068 64.3068 2236.6706 2003.2726 1943.6815 -1618.7761
700 420.30268 391.11005 -1911.8178 144.6903 64.3068 64.3068 3761.5011 3065.4699 3140.3169 -1520.7077
800 484.96279 451.27911 -1875.379 144.6903 64.3068 64.3068 5362.254 4174.4201 4166.0818 -1424.0999
900 587.78954 546.96391 -1871.217 144.6903 64.3068 64.3068 6481.4714 4875.705 4676.6083 -1324.2531
1000 684.07997 636.56636 -1868.1639 144.6903 64.3068 64.3068 7734.6158 5271.3524 5272.1276 -1231.5975
Loop time of 3.09383 on 1 procs for 500 steps with 7200 atoms
Performance: 27.927 ns/day, 0.859 hours/ns, 161.612 timesteps/s, 1.164 Matom-step/s
100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 2.8485 | 2.8485 | 2.8485 | 0.0 | 92.07
Neigh | 0.18767 | 0.18767 | 0.18767 | 0.0 | 6.07
Comm | 0.011533 | 0.011533 | 0.011533 | 0.0 | 0.37
Output | 0.003323 | 0.003323 | 0.003323 | 0.0 | 0.11
Modify | 0.031777 | 0.031777 | 0.031777 | 0.0 | 1.03
Other | | 0.01107 | | | 0.36
Nlocal: 7200 ave 7200 max 7200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 6409 ave 6409 max 6409 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 646408 ave 646408 max 646408 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 646408
Ave neighs/atom = 89.778889
Neighbor list builds = 15
Dangerous builds = 0
unfix fix_piston
run 1500
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Per MPI rank memory allocation (min/avg/max) = 6.6 | 6.6 | 6.6 Mbytes
Step Temp KinEng PotEng Lx Ly Lz Pxx Pyy Pzz Econserve
1000 684.07997 636.56636 -1868.1639 144.6903 64.3068 64.3068 7734.6158 5271.3524 5272.1276 -1231.5975
1100 710.19886 660.87113 -1894.0485 144.6903 64.3068 64.3068 8048.3485 5396.6668 5376.5956 -1233.1774
1200 717.16487 667.35331 -1901.3849 144.6903 64.3068 64.3068 8009.7984 5634.5121 5349.4113 -1234.0316
1300 710.26037 660.92837 -1894.9802 144.6903 64.3068 64.3068 8063.4125 5572.1245 5530.174 -1234.0519
1400 715.93921 666.21278 -1898.8885 144.6903 64.3068 64.3068 7752.0927 5293.5463 5322.2312 -1232.6757
1500 748.85411 696.84154 -1926.4891 144.6903 64.3068 64.3068 6030.5428 4076.8886 4012.7653 -1229.6475
1600 767.98982 714.64815 -1939.8556 144.6903 64.3068 64.3068 4200.3475 2532.5711 2530.5518 -1225.2075
1700 757.22042 704.62675 -1925.553 144.6903 64.3068 64.3068 2686.7843 1482.2796 1505.8073 -1220.9262
1800 727.30327 676.78754 -1894.6635 144.6903 64.3068 64.3068 1764.2793 781.37451 801.18668 -1217.8759
1900 688.82146 640.97853 -1856.5007 144.6903 64.3068 64.3068 1022.805 417.32394 359.74951 -1215.5221
2000 655.91228 610.35509 -1823.954 144.6903 64.3068 64.3068 551.98825 -20.148643 -56.976652 -1213.5989
2100 620.22468 577.14622 -1789.1761 144.6903 64.3068 64.3068 264.05975 -266.8323 -314.45533 -1212.0299
2200 589.13325 548.21428 -1758.9252 144.6903 64.3068 64.3068 41.369707 -533.503 -525.69401 -1210.7109
2300 563.20394 524.08593 -1733.6036 144.6903 64.3068 64.3068 -220.99189 -810.90513 -774.65084 -1209.5176
2400 540.44236 502.90528 -1711.3384 144.6903 64.3068 64.3068 -358.01508 -962.31635 -977.3253 -1208.4332
2500 523.5718 487.20648 -1694.7088 144.6903 64.3068 64.3068 -521.87444 -1152.8386 -1231.7615 -1207.5023
Loop time of 9.34327 on 1 procs for 1500 steps with 7200 atoms
Performance: 27.742 ns/day, 0.865 hours/ns, 160.543 timesteps/s, 1.156 Matom-step/s
98.5% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 8.4692 | 8.4692 | 8.4692 | 0.0 | 90.65
Neigh | 0.7512 | 0.7512 | 0.7512 | 0.0 | 8.04
Comm | 0.031189 | 0.031189 | 0.031189 | 0.0 | 0.33
Output | 0.010584 | 0.010584 | 0.010584 | 0.0 | 0.11
Modify | 0.053052 | 0.053052 | 0.053052 | 0.0 | 0.57
Other | | 0.02803 | | | 0.30
Nlocal: 7200 ave 7200 max 7200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 6380 ave 6380 max 6380 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 515773 ave 515773 max 515773 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 515773
Ave neighs/atom = 71.635139
Neighbor list builds = 57
Dangerous builds = 0
Total wall time: 0:00:15

View File

@ -0,0 +1,253 @@
LAMMPS (19 Nov 2024)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:99)
using 1 OpenMP thread(s) per MPI task
# Removing Binned Velocities of Center of Mass (VCM) from Stress
# This example shows how to remove rigid body motion from
# binned stress calculations. This uses a combination of commands
# from compute chunk/atom, compute temp/chunk, compute
# stress/atom and fix ave/time. We'll show how these commands
# work in the context of a shockwave experiment on a cube of
# atoms. To shock the cube, a rectangular region of atoms is
# frozen, moved into the cube with a constant velocity along the
# x direction, and then unfrozen. As the shockwave begins
# propagating, the body of the cube also moves along the x
# direction. To better understand the stress dynamics of the
# cube we remove the velocity component belonging to the overall
# motion of each bin.
units metal
boundary p p p
atom_style atomic
lattice fcc 5.3589
Lattice spacing in x,y,z = 5.3589 5.3589 5.3589
processors 1 * *
# Defining regions for box and atoms.
# In this experiment an elongated simulation cell is
# defined in the x direction to allow for non-periodic
# motion of the atoms.
region box1 block -3 24 0 12 0 12 units lattice
region box2 block 0 12 0 12 0 12 units lattice
# Creating box and atoms
create_box 1 box1
Created orthogonal box = (-16.0767 0 0) to (128.6136 64.3068 64.3068)
1 by 2 by 2 MPI processor grid
create_atoms 1 region box2
Created 7200 atoms
using lattice units in orthogonal box = (-16.0767 0 0) to (128.6136 64.3068 64.3068)
create_atoms CPU = 0.001 seconds
mass 1 40.00
# Adding energy to the system
velocity all create 600.0 9999
pair_style lj/cut 10
pair_coeff 1 1 0.04 3.405
# Begin time integration
timestep 2e-3
fix fix_nve all nve
thermo 100
run 500
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6, bins = 25 11 11
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut/opt, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.662 | 3.662 | 3.662 Mbytes
Step Temp E_pair E_mol TotEng Press
0 600 -2252.7567 0 -1694.4304 -974.62456
100 284.1896 -1976.961 0 -1712.5101 2462.6396
200 308.58965 -1998.6349 0 -1711.4787 1789.0033
300 300.55093 -1989.9838 0 -1710.308 1545.8576
400 297.91491 -1986.2519 0 -1709.029 1247.7121
500 294.66041 -1982.1097 0 -1707.9153 961.03073
Loop time of 0.942408 on 4 procs for 500 steps with 7200 atoms
Performance: 91.680 ns/day, 0.262 hours/ns, 530.556 timesteps/s, 3.820 Matom-step/s
82.1% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.61287 | 0.63781 | 0.65858 | 2.1 | 67.68
Neigh | 0.030246 | 0.031529 | 0.034546 | 1.0 | 3.35
Comm | 0.23074 | 0.25145 | 0.27819 | 3.7 | 26.68
Output | 0.000282 | 0.0003735 | 0.000463 | 0.0 | 0.04
Modify | 0.005566 | 0.0057635 | 0.005989 | 0.2 | 0.61
Other | | 0.01548 | | | 1.64
Nlocal: 1800 ave 1814 max 1787 min
Histogram: 1 0 1 0 0 0 0 1 0 1
Nghost: 3713.5 ave 3727 max 3699 min
Histogram: 1 0 1 0 0 0 0 1 0 1
Neighs: 153532 ave 154995 max 152312 min
Histogram: 1 0 1 0 0 1 0 0 0 1
Total # of neighbors = 614128
Ave neighs/atom = 85.295556
Neighbor list builds = 9
Dangerous builds = 0
#------------------------------------#
# Chunk, Stress, and VCM removal steps
#------------------------------------#
# 1. Create 20 equispaced bins sliced along the x direction.
# "units reduced" normalizes the distance from 0 to 1
variable nbins index 20
variable fraction equal 1.0/v_nbins
variable volfrac equal 1/(vol*${fraction})
variable volfrac equal 1/(vol*0.05)
compute ch_id all chunk/atom bin/1d x lower ${fraction} units reduced
compute ch_id all chunk/atom bin/1d x lower 0.05 units reduced
# 2. Calculate temperature bins with VCM aka COM velocities removed.
compute ch_temp_vcm all temp/chunk ch_id com yes
# 3. Compute per atom stress with VCM removed via temp-ID.
# The velocities from specified temp-ID are used to compute stress
# Stress/atom units are pressure*volume! Optionally handled next step.
compute atom_stress_vcm all stress/atom ch_temp_vcm
# 4. Divide out bin volume from xx stress component.
variable stress atom -(c_atom_stress_vcm[1])/(vol*${fraction})
variable stress atom -(c_atom_stress_vcm[1])/(vol*0.05)
# 5. Sum the per atom stresses in each bin.
compute ch_stress_vcm all reduce/chunk ch_id sum v_stress
# 6. Average and output to file.
# The average output is every 100 steps with samples collected 20 times with 5 step intervals
# fix ave_stress_vcm all ave/time 5 20 100 c_ch_stress_vcm mode vector file stress_xx.out
#------------------------------------#
# Piston compressing along x direction
region piston block -1 1 INF INF INF INF units lattice
group piston region piston
864 atoms in group piston
fix fix_piston piston move linear 5 0 0 units box # strain rate ~ 8e10 1/s
thermo_style custom step temp ke pe lx ly lz pxx pyy pzz econserve
# Atom dump
# dump atom_dump all atom 50 dump.vcm
# # Image dumps
# dump 2 all image 250 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
# dump_modify 2 pad 1
# # Movie dump
# dump 3 all movie 125 movie.avi type type # axes yes 0.8 0.02 view 60 -30
# dump_modify 3 pad 1
run 500
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
WARNING: One or more atoms are time integrated more than once (src/modify.cpp:296)
Per MPI rank memory allocation (min/avg/max) = 4.916 | 4.916 | 4.916 Mbytes
Step Temp KinEng PotEng Lx Ly Lz Pxx Pyy Pzz Econserve
500 294.66041 274.19441 -1982.1097 144.6903 64.3068 64.3068 645.25795 1119.5337 1118.3006 -1707.9153
600 357.88641 333.02897 -1951.8158 144.6903 64.3068 64.3068 2176.0343 1929.2787 1981.8479 -1618.7869
700 418.41159 389.3503 -1912.8337 144.6903 64.3068 64.3068 3702.2875 3043.7607 3081.1607 -1523.4834
800 483.71102 450.11428 -1875.7955 144.6903 64.3068 64.3068 5254.3875 4190.9789 4158.3561 -1425.6813
900 586.0893 545.38176 -1870.9313 144.6903 64.3068 64.3068 6509.1439 4756.2216 4724.7086 -1325.5495
1000 686.32946 638.65962 -1874.811 144.6903 64.3068 64.3068 7515.1606 5193.049 5261.8688 -1236.1514
Loop time of 0.656417 on 4 procs for 500 steps with 7200 atoms
Performance: 131.624 ns/day, 0.182 hours/ns, 761.711 timesteps/s, 5.484 Matom-step/s
92.8% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.51672 | 0.52334 | 0.53259 | 0.8 | 79.73
Neigh | 0.045091 | 0.045915 | 0.047402 | 0.4 | 6.99
Comm | 0.060735 | 0.071794 | 0.079302 | 2.6 | 10.94
Output | 0.000208 | 0.000389 | 0.000926 | 0.0 | 0.06
Modify | 0.006007 | 0.0061595 | 0.00626 | 0.1 | 0.94
Other | | 0.008815 | | | 1.34
Nlocal: 1800 ave 1811 max 1785 min
Histogram: 1 0 0 1 0 0 0 0 0 2
Nghost: 3713.25 ave 3727 max 3702 min
Histogram: 2 0 0 0 0 0 0 1 0 1
Neighs: 161477 ave 162958 max 159732 min
Histogram: 1 0 0 0 1 0 0 1 0 1
Total # of neighbors = 645909
Ave neighs/atom = 89.709583
Neighbor list builds = 15
Dangerous builds = 0
unfix fix_piston
run 1500
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Per MPI rank memory allocation (min/avg/max) = 4.541 | 4.541 | 4.541 Mbytes
Step Temp KinEng PotEng Lx Ly Lz Pxx Pyy Pzz Econserve
1000 686.32946 638.65962 -1874.811 144.6903 64.3068 64.3068 7515.1606 5193.049 5261.8688 -1236.1514
1100 709.7333 660.43791 -1898.2844 144.6903 64.3068 64.3068 7932.8638 5334.6171 5364.5335 -1237.8465
1200 713.27253 663.73132 -1902.4588 144.6903 64.3068 64.3068 7957.2574 5500.6231 5538.0516 -1238.7275
1300 705.44796 656.45022 -1895.1575 144.6903 64.3068 64.3068 7996.7265 5584.6233 5538.2494 -1238.7072
1400 711.86463 662.42121 -1899.8416 144.6903 64.3068 64.3068 7674.2462 5292.4915 5294.5366 -1237.4204
1500 742.18946 690.63979 -1924.9562 144.6903 64.3068 64.3068 6047.915 4056.6156 4014.4446 -1234.3164
1600 762.81764 709.83522 -1939.8563 144.6903 64.3068 64.3068 4185.5873 2530.0572 2576.1943 -1230.0211
1700 754.40428 702.00621 -1927.7337 144.6903 64.3068 64.3068 2662.7604 1509.1985 1484.7252 -1225.7275
1800 721.03504 670.95468 -1893.5556 144.6903 64.3068 64.3068 1765.8783 835.89765 861.9432 -1222.6009
1900 689.64162 641.74172 -1861.8886 144.6903 64.3068 64.3068 941.58148 312.93205 409.79901 -1220.1469
2000 650.79664 605.59477 -1823.9889 144.6903 64.3068 64.3068 543.39234 28.48735 80.396505 -1218.3941
2100 616.04072 573.25286 -1790.1764 144.6903 64.3068 64.3068 308.16444 -235.20997 -248.22531 -1216.9235
2200 587.18712 546.40333 -1761.8878 144.6903 64.3068 64.3068 37.044801 -476.50396 -470.83059 -1215.4845
2300 562.84178 523.74892 -1738.2239 144.6903 64.3068 64.3068 -139.28348 -711.17273 -730.80877 -1214.475
2400 540.48362 502.94367 -1716.3529 144.6903 64.3068 64.3068 -320.98222 -951.2066 -943.93966 -1213.4093
2500 519.80431 483.70067 -1696.1896 144.6903 64.3068 64.3068 -471.61317 -1088.8457 -1131.5396 -1212.4889
Loop time of 1.97213 on 4 procs for 1500 steps with 7200 atoms
Performance: 131.431 ns/day, 0.183 hours/ns, 760.598 timesteps/s, 5.476 Matom-step/s
95.3% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 1.5455 | 1.5599 | 1.5723 | 0.8 | 79.10
Neigh | 0.16844 | 0.1704 | 0.17237 | 0.4 | 8.64
Comm | 0.19002 | 0.2047 | 0.22068 | 2.4 | 10.38
Output | 0.000525 | 0.0006785 | 0.001077 | 0.0 | 0.03
Modify | 0.012434 | 0.012601 | 0.012777 | 0.1 | 0.64
Other | | 0.02388 | | | 1.21
Nlocal: 1800 ave 1833 max 1776 min
Histogram: 1 0 1 0 1 0 0 0 0 1
Nghost: 3702 ave 3732 max 3674 min
Histogram: 1 0 0 1 0 0 1 0 0 1
Neighs: 129380 ave 132578 max 127003 min
Histogram: 1 0 0 2 0 0 0 0 0 1
Total # of neighbors = 517520
Ave neighs/atom = 71.877778
Neighbor list builds = 54
Dangerous builds = 0
Total wall time: 0:00:03

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

View File

@ -0,0 +1,423 @@
# Time-averaged data for fix ave_stress_vcm
# TimeStep Number-of-rows
# Row c_ch_stress_vcm
600 20
1 0
2 -142.965
3 2142.79
4 12968.3
5 -336.7
6 2638.09
7 4214.83
8 3187.61
9 -488.891
10 -49.3553
11 151.373
12 -317.663
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
700 20
1 0
2 -14.3195
3 -1238.9
4 30664.3
5 18805.2
6 498.562
7 930.874
8 660.655
9 -266.903
10 -317.877
11 -386.989
12 -304.697
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
800 20
1 0
2 0
3 -1656.7
4 30424.3
5 37003.5
6 15562.5
7 -2441.9
8 -1766.09
9 272.718
10 -664.774
11 -72.6933
12 -469.765
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
900 20
1 0
2 0
3 -1567.21
4 24987.6
5 38068.9
6 31595
7 8864.94
8 -3423.99
9 -753.063
10 125.21
11 -50.4895
12 -172.14
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
1000 20
1 0
2 0
3 -893.168
4 15591.6
5 32690.6
6 30183
7 27172
8 9459.75
9 -1416.35
10 -432.731
11 444.323
12 -424.357
13 0
14 0
15 0
16 0
17 0
18 0
19 0
20 0
1100 20
1 0
2 0
3 -601.805
4 8890.79
5 23345.1
6 28529.2
7 29111.9
8 25846.2
9 7451.83
10 -1624.2
11 320.704
12 -50.9865
13 -5.50481
14 0
15 0
16 0
17 0
18 0
19 0
20 0
1200 20
1 0
2 0
3 1435.39
4 8818.29
5 7129.61
6 20281.7
7 28026.1
8 28327.7
9 26918.6
10 8277.12
11 -249.644
12 -171.806
13 -7.19065
14 0
15 0
16 0
17 0
18 0
19 0
20 0
1300 20
1 0
2 0
3 -718.118
4 3021.9
5 9010.51
6 9500.87
7 19432.8
8 27254.3
9 28638.5
10 25568.5
11 8094.66
12 -368.293
13 -2.20997
14 0
15 0
16 0
17 0
18 0
19 0
20 0
1400 20
1 0
2 0
3 -650.581
4 190.19
5 5465.38
6 7489.23
7 7575.16
8 18433.5
9 26975.3
10 28981.5
11 26987.9
12 7502.07
13 0.117312
14 0
15 0
16 0
17 0
18 0
19 0
20 0
1500 20
1 0
2 0
3 -619.311
4 561.257
5 461.5
6 4105.68
7 9272.68
8 10445.6
9 18826.1
10 25434.8
11 25653.8
12 10981.2
13 33.682
14 0
15 0
16 0
17 0
18 0
19 0
20 0
1600 20
1 0
2 0
3 -349.345
4 513.579
5 -471.384
6 1257.81
7 7122.9
8 8659.35
9 8452.08
10 16013.5
11 17091
12 5476.24
13 -136.183
14 0
15 0
16 0
17 0
18 0
19 0
20 0
1700 20
1 0
2 0
3 -273.839
4 -907.407
5 -272.136
6 594.363
7 3302.77
8 5564.07
9 8689.92
10 6446.06
11 1779.37
12 338.998
13 -171.408
14 -1.21548
15 0
16 0
17 0
18 0
19 0
20 0
1800 20
1 0
2 0
3 -164.819
4 383.877
5 -140.681
6 -10.0153
7 907.937
8 3269.05
9 5325.22
10 395.73
11 -4103.73
12 -2787.16
13 -1357.04
14 -35.2044
15 0
16 0
17 0
18 0
19 0
20 0
1900 20
1 0
2 0
3 -80.813
4 334.225
5 248.55
6 82.0566
7 207.763
8 185.714
9 -55.8635
10 -2758.51
11 -4619.33
12 -5521.92
13 -2346.36
14 -415.324
15 0
16 0
17 0
18 0
19 0
20 0
2000 20
1 0
2 0
3 -83.1832
4 264.023
5 596.087
6 40.8157
7 -267.093
8 -2288.15
9 -3387.64
10 -5566.79
11 -5640.76
12 -4925.74
13 -3096.01
14 -757.817
15 -1.13042
16 0
17 0
18 0
19 0
20 0
2100 20
1 0
2 0
3 -17.4378
4 62.1251
5 740.988
6 357.467
7 -1137.61
8 -4266.83
9 -4962.9
10 -5322.45
11 -5437.58
12 -4846.56
13 -3651.28
14 -1151.01
15 -28.3074
16 0
17 0
18 0
19 0
20 0
2200 20
1 0
2 0
3 -10.8779
4 -56.7926
5 400.261
6 -568.63
7 -2193.36
8 -3856.71
9 -6603
10 -5717.11
11 -4868.64
12 -4173.5
13 -3402.64
14 -1712.44
15 -80.6771
16 -0.123189
17 0
18 0
19 0
20 0
2300 20
1 0
2 0
3 -22.8402
4 -44.5496
5 -365.476
6 -1285.6
7 -2887.76
8 -4022.77
9 -6280.86
10 -6055.26
11 -4921.51
12 -4445.37
13 -3531.69
14 -1360.49
15 -258.99
16 0.196931
17 0
18 0
19 0
20 0
2400 20
1 0
2 0
3 -0.594396
4 -148.921
5 -1118.18
6 -2071.85
7 -3989.41
8 -4567.01
9 -4939.36
10 -5170.94
11 -4922.25
12 -4587.5
13 -3748.19
14 -1785.46
15 -460.491
16 2.54038
17 0
18 0
19 0
20 0
2500 20
1 0
2 0
3 5.64755
4 -485.854
5 -2525.68
6 -2642.35
7 -5066.15
8 -4546.03
9 -4429.45
10 -4579.15
11 -4829.56
12 -4384.77
13 -3525.99
14 -1708.9
15 -627.176
16 -23.5581
17 0
18 0
19 0
20 0

View File

@ -1,7 +1,112 @@
# CHANGELOG # CHANGELOG
## 4.5.01
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01)
### Bug Fixes
* Fix re-builds after cleaning the binary tree when doing `add_subdirectory` on the Kokkos source [\#7557](https://github.com/kokkos/kokkos/pull/7557)
* Update mdspan to include fix for submdspan and bracket operator with clang 15&16 [\#7559](https://github.com/kokkos/kokkos/pull/7559)
* Fix DynRankView performance regression by re-introducing shortcut operator() impls [\#7606](https://github.com/kokkos/kokkos/pull/7606)
* Add missing MI300A (`GFX942_APU`) option to Makefile build-system
## 4.5.00
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.4.01...4.5.00)
### Features
* SYCL backend graduated to production ready
* Introduce new `SequentialHostInit` view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229) (backported in 4.4.01)
* Support building with Run-Time Type Information (RTTI) disabled
* Add new `KOKKOS_RELOCATABLE_FUNCTION` function annotation macro [\#5993](https://github.com/kokkos/kokkos/pull/5993)
### Backend and Architecture Enhancements
#### CUDA
* Adding occupancy tuning for CUDA architectures [\#6788](https://github.com/kokkos/kokkos/pull/6788)
* By default disable `cudaMallocAsync` (i.e., revert the change made in version 4.2) [\#7353](https://github.com/kokkos/kokkos/pull/7353)
#### HIP
* Add support for AMD Phoenix APUs with Radeon 740M/760M/780M/880M/890M [\#7162](https://github.com/kokkos/kokkos/pull/7162)
* Update maximum waves per CU values for consumer card [\#7347](https://github.com/kokkos/kokkos/pull/7347)
* Check that Kokkos is running on the architecture it was compiled for [\#7379](https://github.com/kokkos/kokkos/pull/7379)
* Add opt-in option to use `hipMallocAsync` instead of `hipMalloc` [\#7324](https://github.com/kokkos/kokkos/pull/7324)
* Introduce new architecture option `AMD_GFX942_APU` for MI300A [\#7462](https://github.com/kokkos/kokkos/pull/7462)
#### SYCL
* Move the `SYCL` backend out of the `Experimental` namespace [\#7171](https://github.com/kokkos/kokkos/pull/7171)
* Introduce `KOKKOS_ENABLE_SYCL_RELOCATABLE_DEVICE_CODE` as CMake option [\#5993](https://github.com/kokkos/kokkos/pull/5993)
#### OpenACC
* Add support for building with the Clacc compiler [\#7198](https://github.com/kokkos/kokkos/pull/7198)
* Workaround NVHPC collapse clause bug for `MDRangePolicy` [\#7425](https://github.com/kokkos/kokkos/pull/7425)
#### HPX
* Implement `Experimental::partition_space` to produce truly independent execution spaces [\#7287](https://github.com/kokkos/kokkos/pull/7287)
#### Threads
* Fix compilation for `parallel_reduce` `MDRange` with `Dynamic` scheduling [\#7478](https://github.com/kokkos/kokkos/pull/7478)
* Fix race conditions on ARM architectures [\#7498](https://github.com/kokkos/kokkos/pull/7498)
#### OpenMP
* Fix run time behavior when compiling with `-fvisibility-hidden` [\#7284](https://github.com/kokkos/kokkos/pull/7284) (backported in 4.4.01)
* Fix linking with Cray Clang compiler [\#7341](https://github.com/kokkos/kokkos/pull/7341)
#### Serial
* Allow `Kokkos_ENABLE_ATOMICS_BYPASS` to skip mutexes to remediate performance regression in 4.4 [\#7369](https://github.com/kokkos/kokkos/pull/7369)
### General Enhancements
* Improve `View` initialization/destruction for non-scalar trivial and trivially-destructible types [\#7219](https://github.com/kokkos/kokkos/pull/7219) [\#7225](https://github.com/kokkos/kokkos/pull/7225)
* Add getters for default tile sizes used in `MDRangePolicy` [\#6839](https://github.com/kokkos/kokkos/pull/6839)
* Improve performance of `Kokkos::sort` when `std::sort` is used [\#7264](https://github.com/kokkos/kokkos/pull/7264)
* Add range-based for loop support for `Array<T, N>` [\#7293](https://github.com/kokkos/kokkos/pull/7293)
* Allow functors as reducers for nested team parallel reduce [\#6921](https://github.com/kokkos/kokkos/pull/6921)
* Avoid making copies of string rvalue reference arguments to `view_alloc()` [\#7364](https://github.com/kokkos/kokkos/pull/7364)
* Add `atomic_{mod,xor,nand,lshift,rshift}` [\#7458](https://github.com/kokkos/kokkos/pull/7458)
* Allow using `SequentialHostInit` with `Kokkos::DualView` [\#7456](https://github.com/kokkos/kokkos/pull/7456)
* Add `Graph::instantiate()` [\#7240](https://github.com/kokkos/kokkos/pull/7240)
* Allow an arbitrary execution space instance to be used in `Kokkos::Graph::submit()` [\#7249](https://github.com/kokkos/kokkos/pull/7249)
* Enable compile-time diagnostic of illegal reduction target for graphs [\#7460](https://github.com/kokkos/kokkos/pull/7460)
### Build System Changes
* Make sure backend-specific options such as `IMPL_CUDA_MALLOC_ASYNC` only show when that backend is actually enabled [\#7228](https://github.com/kokkos/kokkos/pull/7228)
* Major refactoring removing `TriBITS` paths [\#6164](https://github.com/kokkos/kokkos/pull/6164)
* Add support for SpacemiT K60 (RISC-V) [\#7160](https://github.com/kokkos/kokkos/pull/7160)
### Deprecations
* Deprecate Tasking interface [\#7393](https://github.com/kokkos/kokkos/pull/7393)
* Deprecate `atomic_query_version`, `atomic_assign`, `atomic_compare_exchange_strong`, `atomic_{inc, dec}rement` [\#7458](https://github.com/kokkos/kokkos/pull/7458)
* Deprecate `{OpenMP,HPX}::is_asynchronous()` [\#7322](https://github.com/kokkos/kokkos/pull/7322)
### Bug Fixes
* Fix undefined behavior in `BinSort` when sorting within bins on host [\#7223](https://github.com/kokkos/kokkos/pull/7223)
* Using CUDA limits to set extents for blocks, grids [\#7235](https://github.com/kokkos/kokkos/pull/7235)
* Fix `deep_copy (serial_exec, dst, src)` with multiple host backends [\#7245](https://github.com/kokkos/kokkos/pull/7245)
* Skip `RangePolicy` bounds conversion checks if roundtrip convertibility is not provided [\#7172](https://github.com/kokkos/kokkos/pull/7172)
* Allow extracting host and device views from `DualView` with `const` value type [\#7242](https://github.com/kokkos/kokkos/pull/7242)
* Fix `TeamPolicy` array reduction for CUDA and HIP [\#6296](https://github.com/kokkos/kokkos/pull/6296)
* Fix implicit copy assignment operators in few AVX2 masks being deleted [\#7296](https://github.com/kokkos/kokkos/pull/7296)
* Fix configuring without architecture flags for SYCL [\#7303](https://github.com/kokkos/kokkos/pull/7303)
* Set an initial value index during join of `MinLoc`, `MaxLoc` or `MinMaxLoc` [\#7330](https://github.com/kokkos/kokkos/pull/7330)
* Fix storage lifetime of driver for global launch of graph nodes for CUDA and HIP [\#7365](https://github.com/kokkos/kokkos/pull/7365)
* Make `value_type` for `RandomAccessIterator` non-`const` [\#7485](https://github.com/kokkos/kokkos/pull/7485)
## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01) ## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.4.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.4.00...4.4.01)
### Features: ### Features:
* Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229) * Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229)
@ -13,7 +118,7 @@
### Bug Fixes ### Bug Fixes
* OpenMP: Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284) * OpenMP: Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284)
* Fix implicit copy assignment operators in few AVX2 masks being deleted [#7296](https://github.com/kokkos/kokkos/pull/7296) * Fix implicit copy assignment operators in few AVX2 masks being deleted [\#7296](https://github.com/kokkos/kokkos/pull/7296)
## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00) ## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00)
@ -57,6 +162,7 @@
* SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802) * SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802)
* Updates for `Kokkos::Array`: add `kokkos_swap(Array<T, N>)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148) * Updates for `Kokkos::Array`: add `kokkos_swap(Array<T, N>)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148)
* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040) * Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040)
* Introduce `KOKKOS_DEDUCTION_GUIDE` macro to allow for portable user-defined deduction guides [\#6954](https://github.com/kokkos/kokkos/pull/6954)
### Build System Changes ### Build System Changes
* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965) * Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965)
@ -1388,7 +1494,7 @@
**Closed issues:** **Closed issues:**
- Silent error (Validate storage level arg to set_scratch_size) [\#3097](https://github.com/kokkos/kokkos/issues/3097) - Silent error (Validate storage level arg to set_scratch_size) [\#3097](https://github.com/kokkos/kokkos/issues/3097)
- Remove KOKKKOS\_ENABLE\_PROFILING Option [\#3095](https://github.com/kokkos/kokkos/issues/3095) - Remove KOKKOS\_ENABLE\_PROFILING Option [\#3095](https://github.com/kokkos/kokkos/issues/3095)
- Cuda 11 -\> allow C++17 [\#3083](https://github.com/kokkos/kokkos/issues/3083) - Cuda 11 -\> allow C++17 [\#3083](https://github.com/kokkos/kokkos/issues/3083)
- In source build failure not explained [\#3081](https://github.com/kokkos/kokkos/issues/3081) - In source build failure not explained [\#3081](https://github.com/kokkos/kokkos/issues/3081)
- Allow naming of Views for initialization kernel [\#3070](https://github.com/kokkos/kokkos/issues/3070) - Allow naming of Views for initialization kernel [\#3070](https://github.com/kokkos/kokkos/issues/3070)

View File

@ -1,12 +1,11 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR) cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
# Disable in-source builds to prevent source tree corruption. # Disable in-source builds to prevent source tree corruption.
if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt." ) message(
endif() FATAL_ERROR
"FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt."
if (COMMAND TRIBITS_PACKAGE) )
TRIBITS_PACKAGE(Kokkos)
endif() endif()
# We want to determine if options are given with the wrong case # We want to determine if options are given with the wrong case
@ -15,142 +14,141 @@ endif()
# form a list of all the given variables. If it begins with any # form a list of all the given variables. If it begins with any
# case of KoKkOS, we add it to the list. # case of KoKkOS, we add it to the list.
GET_CMAKE_PROPERTY(_variableNames VARIABLES) get_cmake_property(_variableNames VARIABLES)
SET(KOKKOS_GIVEN_VARIABLES) set(KOKKOS_GIVEN_VARIABLES)
FOREACH (var ${_variableNames}) foreach(var ${_variableNames})
STRING(TOUPPER ${var} UC_VAR) string(TOUPPER ${var} UC_VAR)
STRING(FIND ${UC_VAR} KOKKOS IDX) string(FIND ${UC_VAR} KOKKOS IDX)
IF (${IDX} EQUAL 0) if(${IDX} EQUAL 0)
LIST(APPEND KOKKOS_GIVEN_VARIABLES ${var}) list(APPEND KOKKOS_GIVEN_VARIABLES ${var})
ENDIF() endif()
ENDFOREACH() endforeach()
# Basic initialization (Used in KOKKOS_SETTINGS) # Basic initialization (Used in KOKKOS_SETTINGS)
SET(Kokkos_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(Kokkos_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
SET(KOKKOS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(KOKKOS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
SET(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR})
SET(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_PATH ${Kokkos_SOURCE_DIR})
SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(PACKAGE_NAME Kokkos)
set(PACKAGE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
# Is this a build as part of Trilinos?
IF(COMMAND TRIBITS_PACKAGE_DECL)
SET(KOKKOS_HAS_TRILINOS ON)
ELSE()
SET(KOKKOS_HAS_TRILINOS OFF)
SET(PACKAGE_NAME Kokkos)
SET(PACKAGE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
ENDIF()
# Is this build a subdirectory of another project # Is this build a subdirectory of another project
GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY) get_directory_property(HAS_PARENT PARENT_DIRECTORY)
include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake)
include(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) set(KOKKOS_ENABLED_OPTIONS) #exported in config file
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_pick_cxx_std.cmake) set(KOKKOS_ENABLED_DEVICES) #exported in config file
set(KOKKOS_ENABLED_TPLS) #exported in config file
SET(KOKKOS_ENABLED_OPTIONS) #exported in config file set(KOKKOS_ENABLED_ARCH_LIST) #exported in config file
SET(KOKKOS_ENABLED_DEVICES) #exported in config file
SET(KOKKOS_ENABLED_TPLS) #exported in config file
SET(KOKKOS_ENABLED_ARCH_LIST) #exported in config file
#These are helper flags used for sanity checks during config #These are helper flags used for sanity checks during config
#Certain features should depend on other features being configured first #Certain features should depend on other features being configured first
SET(KOKKOS_CFG_DAG_NONE On) #sentinel to indicate no dependencies set(KOKKOS_CFG_DAG_NONE On) #sentinel to indicate no dependencies
SET(KOKKOS_CFG_DAG_DEVICES_DONE Off) set(KOKKOS_CFG_DAG_DEVICES_DONE Off)
SET(KOKKOS_CFG_DAG_OPTIONS_DONE Off) set(KOKKOS_CFG_DAG_OPTIONS_DONE Off)
SET(KOKKOS_CFG_DAG_ARCH_DONE Off) set(KOKKOS_CFG_DAG_ARCH_DONE Off)
SET(KOKKOS_CFG_DAG_CXX_STD_DONE Off) set(KOKKOS_CFG_DAG_CXX_STD_DONE Off)
SET(KOKKOS_CFG_DAG_COMPILER_ID_DONE Off) set(KOKKOS_CFG_DAG_COMPILER_ID_DONE Off)
FUNCTION(KOKKOS_CFG_DEPENDS SUCCESSOR PRECURSOR) function(KOKKOS_CFG_DEPENDS SUCCESSOR PRECURSOR)
SET(PRE_FLAG KOKKOS_CFG_DAG_${PRECURSOR}) set(PRE_FLAG KOKKOS_CFG_DAG_${PRECURSOR})
SET(POST_FLAG KOKKOS_CFG_DAG_${SUCCESSOR}) set(POST_FLAG KOKKOS_CFG_DAG_${SUCCESSOR})
IF (NOT ${PRE_FLAG}) if(NOT ${PRE_FLAG})
MESSAGE(FATAL_ERROR "Bad CMake refactor: feature ${SUCCESSOR} cannot be configured until ${PRECURSOR} is configured") message(
ENDIF() FATAL_ERROR "Bad CMake refactor: feature ${SUCCESSOR} cannot be configured until ${PRECURSOR} is configured"
GLOBAL_SET(${POST_FLAG} On) )
ENDFUNCTION() endif()
global_set(${POST_FLAG} On)
endfunction()
list(APPEND CMAKE_MODULE_PATH cmake/Modules)
LIST(APPEND CMAKE_MODULE_PATH cmake/Modules) set(CMAKE_DISABLE_SOURCE_CHANGES ON)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
IF(NOT KOKKOS_HAS_TRILINOS) # What language are we compiling Kokkos as
set(CMAKE_DISABLE_SOURCE_CHANGES ON) # downstream dependencies need to match this!
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) set(KOKKOS_COMPILE_LANGUAGE CXX)
# use lower case here since we didn't parse options yet
if(Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_CUDA)
# What language are we compiling Kokkos as # Without this as a language for the package we would get a C++ compiler enabled.
# downstream dependencies need to match this! # but we still need a C++ compiler even if we build all our cpp files as CUDA only
SET(KOKKOS_COMPILE_LANGUAGE CXX) # because otherwise the C++ features don't work etc.
# use lower case here since we didn't parse options yet # This is just the rather odd way CMake does this, since CUDA doesn't imply C++ even
IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_CUDA) # though it is a C++ extension ... (but I guess it didn't use to be back in CUDA 4 or 5
# days.
set(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX)
# Without this as a language for the package we would get a C++ compiler enabled. set(KOKKOS_COMPILE_LANGUAGE CUDA)
# but we still need a C++ compiler even if we build all our cpp files as CUDA only endif()
# because otherwise the C++ features don't work etc. # use lower case here since we haven't parsed options yet
# This is just the rather odd way CMake does this, since CUDA doesn't imply C++ even if(Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_HIP)
# though it is a C++ extension ... (but I guess it didn't use to be back in CUDA 4 or 5
# days.
SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX)
SET(KOKKOS_COMPILE_LANGUAGE CUDA) # Without this as a language for the package we would get a C++ compiler enabled.
ENDIF() # but we still need a C++ compiler even if we build all our cpp files as HIP only
# use lower case here since we haven't parsed options yet # because otherwise the C++ features don't work etc.
IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_HIP) set(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX)
# Without this as a language for the package we would get a C++ compiler enabled. set(KOKKOS_COMPILE_LANGUAGE HIP)
# but we still need a C++ compiler even if we build all our cpp files as HIP only endif()
# because otherwise the C++ features don't work etc.
SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX)
SET(KOKKOS_COMPILE_LANGUAGE HIP) if(Spack_WORKAROUND)
ENDIF() if(Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
message(FATAL_ERROR "Can't currently use Kokkos_ENABLE_COMPILER_AS_CMAKE_LANGUAGE in a spack installation!")
endif()
IF (Spack_WORKAROUND) #if we are explicitly using Spack for development,
IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) #nuke the Spack compiler
MESSAGE(FATAL_ERROR "Can't currently use Kokkos_ENABLE_COMPILER_AS_CMAKE_LANGUAGE in a spack installation!") set(SPACK_CXX $ENV{SPACK_CXX})
ENDIF() if(SPACK_CXX)
set(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE)
#if we are explicitly using Spack for development, set(ENV{CXX} ${SPACK_CXX})
#nuke the Spack compiler endif()
SET(SPACK_CXX $ENV{SPACK_CXX}) endif()
IF(SPACK_CXX) # Always call the project command to define Kokkos_ variables
SET(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE) # and to make sure that C++ is an enabled language
SET(ENV{CXX} ${SPACK_CXX}) project(Kokkos ${KOKKOS_COMPILE_LANGUAGE} ${KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE})
ENDIF() if(NOT HAS_PARENT)
ENDIF() if(NOT CMAKE_BUILD_TYPE)
# Always call the project command to define Kokkos_ variables set(DEFAULT_BUILD_TYPE "RelWithDebInfo")
# and to make sure that C++ is an enabled language message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.")
PROJECT(Kokkos ${KOKKOS_COMPILE_LANGUAGE} ${KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE}) set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}"
IF(NOT HAS_PARENT) CACHE STRING "Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel." FORCE
IF (NOT CMAKE_BUILD_TYPE) )
SET(DEFAULT_BUILD_TYPE "RelWithDebInfo") endif()
MESSAGE(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.") endif()
SET(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING
"Choose the type of build, options are: Debug, Release, RelWithDebInfo and MinSizeRel."
FORCE)
ENDIF()
ENDIF()
ELSE()
SET(KOKKOS_COMPILE_LANGUAGE CXX)
ENDIF()
IF (NOT CMAKE_SIZEOF_VOID_P)
STRING(FIND ${CMAKE_CXX_COMPILER} nvcc_wrapper FIND_IDX)
IF (NOT FIND_IDX STREQUAL -1)
MESSAGE(FATAL_ERROR "Kokkos did not configure correctly and failed to validate compiler. The most likely cause is CUDA linkage using nvcc_wrapper. Please ensure your CUDA environment is correctly configured.")
ELSE()
MESSAGE(FATAL_ERROR "Kokkos did not configure correctly and failed to validate compiler. The most likely cause is linkage errors during CMake compiler validation. Please consult the CMake error log shown below for the exact error during compiler validation")
ENDIF()
ELSEIF (NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
IF(CMAKE_SIZEOF_VOID_P EQUAL 4)
MESSAGE(WARNING "32-bit builds are experimental and not officially supported.")
SET(KOKKOS_IMPL_32BIT ON)
ELSE()
MESSAGE(FATAL_ERROR "Kokkos assumes a 64-bit build, i.e., 8-byte pointers, but found ${CMAKE_SIZEOF_VOID_P}-byte pointers instead;")
ENDIF()
ENDIF()
if(NOT CMAKE_SIZEOF_VOID_P)
string(FIND ${CMAKE_CXX_COMPILER} nvcc_wrapper FIND_IDX)
if(NOT FIND_IDX STREQUAL -1)
message(
FATAL_ERROR
"Kokkos did not configure correctly and failed to validate compiler. The most likely cause is CUDA linkage using nvcc_wrapper. Please ensure your CUDA environment is correctly configured."
)
else()
message(
FATAL_ERROR
"Kokkos did not configure correctly and failed to validate compiler. The most likely cause is linkage errors during CMake compiler validation. Please consult the CMake error log shown below for the exact error during compiler validation"
)
endif()
elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
if(CMAKE_SIZEOF_VOID_P EQUAL 4)
message(WARNING "32-bit builds are experimental and not officially supported.")
set(KOKKOS_IMPL_32BIT ON)
else()
message(
FATAL_ERROR
"Kokkos assumes a 64-bit build, i.e., 8-byte pointers, but found ${CMAKE_SIZEOF_VOID_P}-byte pointers instead;"
)
endif()
endif()
set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 4) set(Kokkos_VERSION_MINOR 5)
set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}") message(STATUS "Kokkos version: ${Kokkos_VERSION}")
@ -164,58 +162,54 @@ math(EXPR KOKKOS_VERSION_PATCH "${KOKKOS_VERSION} % 100")
# Load either the real TriBITS or a TriBITS wrapper # Load either the real TriBITS or a TriBITS wrapper
# for certain utility functions that are universal (like GLOBAL_SET) # for certain utility functions that are universal (like GLOBAL_SET)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) include(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake)
IF (Kokkos_ENABLE_CUDA) if(Kokkos_ENABLE_CUDA)
# If we are building CUDA, we have tricked CMake because we declare a CXX project # If we are building CUDA, we have tricked CMake because we declare a CXX project
# If the default C++ standard for a given compiler matches the requested # If the default C++ standard for a given compiler matches the requested
# standard, then CMake just omits the -std flag in later versions of CMake # standard, then CMake just omits the -std flag in later versions of CMake
# This breaks CUDA compilation (CUDA compiler can have a different default # This breaks CUDA compilation (CUDA compiler can have a different default
# -std then the underlying host compiler by itself). Setting this variable # -std then the underlying host compiler by itself). Setting this variable
# forces CMake to always add the -std flag even if it thinks it doesn't need it # forces CMake to always add the -std flag even if it thinks it doesn't need it
GLOBAL_SET(CMAKE_CXX_STANDARD_DEFAULT 98) global_set(CMAKE_CXX_STANDARD_DEFAULT 98)
ENDIF() endif()
# These are the variables we will append to as we go # These are the variables we will append to as we go
# I really wish these were regular variables # I really wish these were regular variables
# but scoping issues can make it difficult # but scoping issues can make it difficult
GLOBAL_SET(KOKKOS_COMPILE_OPTIONS) global_set(KOKKOS_COMPILE_OPTIONS)
GLOBAL_SET(KOKKOS_LINK_OPTIONS) global_set(KOKKOS_LINK_OPTIONS)
GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS) global_set(KOKKOS_AMDGPU_OPTIONS)
GLOBAL_SET(KOKKOS_CUDA_OPTIONS) global_set(KOKKOS_CUDA_OPTIONS)
GLOBAL_SET(KOKKOS_CUDAFE_OPTIONS) global_set(KOKKOS_CUDAFE_OPTIONS)
GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS) global_set(KOKKOS_XCOMPILER_OPTIONS)
# We need to append text here for making sure TPLs # We need to append text here for making sure TPLs
# we import are available for an installed Kokkos # we import are available for an installed Kokkos
GLOBAL_SET(KOKKOS_TPL_EXPORTS) global_set(KOKKOS_TPL_EXPORTS)
# KOKKOS_DEPENDENCE is used by kokkos_launch_compiler # KOKKOS_DEPENDENCE is used by kokkos_launch_compiler
GLOBAL_SET(KOKKOS_COMPILE_DEFINITIONS KOKKOS_DEPENDENCE) global_set(KOKKOS_COMPILE_DEFINITIONS KOKKOS_DEPENDENCE)
# MSVC never goes through kokkos_launch_compiler # MSVC never goes through kokkos_launch_compiler
IF(NOT MSVC) if(NOT MSVC)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -DKOKKOS_DEPENDENCE) global_append(KOKKOS_LINK_OPTIONS -DKOKKOS_DEPENDENCE)
ENDIF() endif()
IF(Kokkos_ENABLE_TESTS AND NOT KOKKOS_HAS_TRILINOS) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/kokkos_configure_trilinos.cmake)
if(Kokkos_ENABLE_TESTS)
find_package(GTest QUIET) find_package(GTest QUIET)
ENDIF() endif()
# Include a set of Kokkos-specific wrapper functions that # Include a set of Kokkos-specific wrapper functions that
# will either call raw CMake or TriBITS # will either call raw CMake or TriBITS
# These are functions like KOKKOS_INCLUDE_DIRECTORIES # These are functions like KOKKOS_INCLUDE_DIRECTORIES
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) include(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake)
# Check the environment and set certain variables # Check the environment and set certain variables
# to allow platform-specific checks # to allow platform-specific checks
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake) include(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake)
IF(NOT KOKKOS_HAS_TRILINOS) include(${KOKKOS_SRC_PATH}/cmake/build_env_info.cmake)
# This does not work in Trilinos and we simply don't care check_git_setup()
# to fix it for Trilinos
# Gather information about the runtime environment
INCLUDE(${KOKKOS_SRC_PATH}/cmake/build_env_info.cmake)
check_git_setup()
ENDIF()
# The build environment setup goes in the following steps # The build environment setup goes in the following steps
# 1) Check all the enable options. This includes checking Kokkos_DEVICES # 1) Check all the enable options. This includes checking Kokkos_DEVICES
@ -223,102 +217,54 @@ ENDIF()
# 3) Check the CXX standard and select important CXX flags # 3) Check the CXX standard and select important CXX flags
# 4) Check for any third-party libraries (TPLs) like hwloc # 4) Check for any third-party libraries (TPLs) like hwloc
# 5) Check if optimizing for a particular architecture and add arch-specific flags # 5) Check if optimizing for a particular architecture and add arch-specific flags
KOKKOS_SETUP_BUILD_ENVIRONMENT() kokkos_setup_build_environment()
# Finish off the build # Finish off the build
# 6) Recurse into subdirectories and configure individual libraries # 6) Recurse into subdirectories and configure individual libraries
# 7) Export and install targets # 7) Export and install targets
OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF) option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
SET(KOKKOS_COMPONENT_LIBRARIES kokkoscore kokkoscontainers kokkosalgorithms kokkossimd) set(KOKKOS_COMPONENT_LIBRARIES kokkoscore kokkoscontainers kokkosalgorithms kokkossimd)
SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES kokkos ${KOKKOS_COMPONENT_LIBRARIES}) set_property(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES kokkos ${KOKKOS_COMPONENT_LIBRARIES})
IF (KOKKOS_HAS_TRILINOS) if(HAS_PARENT)
SET(TRILINOS_INCDIR ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) set(KOKKOS_HEADER_DIR "include/kokkos")
SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) set(KOKKOS_IS_SUBDIRECTORY TRUE)
SET(KOKKOS_IS_SUBDIRECTORY TRUE) else()
ELSEIF(HAS_PARENT) set(KOKKOS_HEADER_DIR "${CMAKE_INSTALL_INCLUDEDIR}")
SET(KOKKOS_HEADER_DIR "include/kokkos") set(KOKKOS_IS_SUBDIRECTORY FALSE)
SET(KOKKOS_IS_SUBDIRECTORY TRUE) endif()
ELSE()
SET(KOKKOS_HEADER_DIR "${CMAKE_INSTALL_INCLUDEDIR}")
SET(KOKKOS_IS_SUBDIRECTORY FALSE)
ENDIF()
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# #
# A) Forward declare the package so that certain options are also defined for # A) Forward declare the package so that certain options are also defined for
# subpackages # subpackages
## This restores the old behavior of ProjectCompilerPostConfig.cmake
# We must do this before KOKKOS_PACKAGE_DECL
IF (KOKKOS_HAS_TRILINOS)
# Overwrite the old flags at the top-level
# Because Tribits doesn't use lists, it uses spaces for the list of CXX flags
# we have to match the annoying behavior, also we have to preserve quotes
# which needs another workaround.
SET(KOKKOS_COMPILE_OPTIONS_TMP)
IF (KOKKOS_ENABLE_HIP)
LIST(APPEND KOKKOS_COMPILE_OPTIONS ${KOKKOS_AMDGPU_OPTIONS})
ENDIF()
FOREACH(OPTION ${KOKKOS_COMPILE_OPTIONS})
STRING(FIND "${OPTION}" " " OPTION_HAS_WHITESPACE)
IF(OPTION_HAS_WHITESPACE EQUAL -1)
LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP "${OPTION}")
ELSE()
LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP "\"${OPTION}\"")
ENDIF()
ENDFOREACH()
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS_TMP}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})
IF (KOKKOS_ENABLE_CUDA)
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS})
ENDIF()
FOREACH(XCOMP_FLAG ${KOKKOS_XCOMPILER_OPTIONS})
SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
ENDFOREACH()
IF (KOKKOS_ENABLE_CUDA)
STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}")
FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG})
ENDFOREACH()
ENDIF()
#These flags get set up in KOKKOS_PACKAGE_DECL, which means they
#must be configured before KOKKOS_PACKAGE_DECL
SET(KOKKOS_ALL_COMPILE_OPTIONS
$<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_ALL_COMPILE_OPTIONS}>)
ENDIF()
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# #
# D) Process the subpackages (subdirectories) for Kokkos # D) Process the subpackages (subdirectories) for Kokkos
# #
KOKKOS_PROCESS_SUBPACKAGES() kokkos_process_subpackages()
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# #
# E) If Kokkos itself is enabled, process the Kokkos package # E) If Kokkos itself is enabled, process the Kokkos package
# #
KOKKOS_PACKAGE_POSTPROCESS() kokkos_configure_core()
KOKKOS_CONFIGURE_CORE()
IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING) if(NOT Kokkos_INSTALL_TESTING)
ADD_LIBRARY(kokkos INTERFACE) add_library(kokkos INTERFACE)
#Make sure in-tree projects can reference this as Kokkos:: #Make sure in-tree projects can reference this as Kokkos::
#to match the installed target names #to match the installed target names
ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos) add_library(Kokkos::kokkos ALIAS kokkos)
# all_libs target is required for TriBITS-compliance # all_libs target is required for TriBITS-compliance
ADD_LIBRARY(Kokkos::all_libs ALIAS kokkos) add_library(Kokkos::all_libs ALIAS kokkos)
TARGET_LINK_LIBRARIES(kokkos INTERFACE ${KOKKOS_COMPONENT_LIBRARIES}) target_link_libraries(kokkos INTERFACE ${KOKKOS_COMPONENT_LIBRARIES})
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) kokkos_internal_add_library_install(kokkos)
ENDIF() endif()
INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) include(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)
# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. # nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler.
# Kokkos needs nvcc_wrapper in order to build. Other libraries and # Kokkos needs nvcc_wrapper in order to build. Other libraries and
@ -327,16 +273,15 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake)
# as relative to ${CMAKE_INSTALL_PATH}. # as relative to ${CMAKE_INSTALL_PATH}.
# KOKKOS_INSTALL_ADDITIONAL_FILES will install nvcc wrapper and other generated # KOKKOS_INSTALL_ADDITIONAL_FILES will install nvcc wrapper and other generated
# files # files
KOKKOS_INSTALL_ADDITIONAL_FILES() kokkos_install_additional_files()
# Finally - if we are a subproject - make sure the enabled devices are visible # Finally - if we are a subproject - make sure the enabled devices are visible
IF (HAS_PARENT) if(HAS_PARENT)
FOREACH(DEV Kokkos_ENABLED_DEVICES) foreach(DEV Kokkos_ENABLED_DEVICES)
#I would much rather not make these cache variables or global properties, but I can't #I would much rather not make these cache variables or global properties, but I can't
#make any guarantees on whether PARENT_SCOPE is good enough to make #make any guarantees on whether PARENT_SCOPE is good enough to make
#these variables visible where I need them #these variables visible where I need them
SET(Kokkos_ENABLE_${DEV} ON PARENT_SCOPE) set(Kokkos_ENABLE_${DEV} ON PARENT_SCOPE)
SET_PROPERTY(GLOBAL PROPERTY Kokkos_ENABLE_${DEV} ON) set_property(GLOBAL PROPERTY Kokkos_ENABLE_${DEV} ON)
ENDFOREACH() endforeach()
ENDIF() endif()

View File

@ -7,6 +7,8 @@ We actively welcome pull requests.
3. If you've changed APIs, update the documentation. 3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes. 4. Ensure the test suite passes.
Before sending your patch for review, please try to ensure that it is formatted properly. We use clang-format version 16 for this.
## Issues ## Issues
We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue. We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue.

View File

@ -1,73 +0,0 @@
Developers of Kokkos (those who commit modifications to Kokkos)
must maintain the snapshot of Kokkos in the Trilinos repository.
This file contains instructions for how to
snapshot Kokkos from github.com/kokkos to Trilinos.
------------------------------------------------------------------------
*** EVERYTHING GOES RIGHT WORKFLOW ***
1) Given a 'git clone' of Kokkos and of Trilinos repositories.
1.1) Let ${KOKKOS} be the absolute path to the Kokkos clone.
This path *must* terminate with the directory name 'kokkos';
e.g., ${HOME}/kokkos .
1.2) Let ${TRILINOS} be the absolute path to the Trilinos directory.
2) Given that the Kokkos build & test is clean and
changes are committed to the Kokkos clone.
3) Snapshot the current commit in the Kokkos clone into the Trilinos clone.
This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}:
${KOKKOS}/scripts/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages
4) Verify the snapshot commit happened as expected
cd ${TRILINOS}/packages/kokkos
git log -1 --name-only
5) Modify, build, and test Trilinos with the Kokkos snapshot.
6) Given that that the Trilinos build & test is clean and
changes are committed to the Trilinos clone.
7) Attempt push to the Kokkos repository.
If push fails then you must 'remove the Kokkos snapshot'
from your Trilinos clone.
See below.
8) Attempt to push to the Trilinos repository.
If updating for a failed push requires you to change Kokkos you must
'remove the Kokkos snapshot' from your Trilinos clone.
See below.
------------------------------------------------------------------------
*** WHEN SOMETHING GOES WRONG AND YOU MUST ***
*** REMOVE THE KOKKOS SNAPSHOT FROM YOUR TRILINOS CLONE ***
1) Query the Trilinos clone commit log.
git log --oneline
2) Note the <SHA1> of the commit to the Trillinos clone
immediately BEFORE the Kokkos snapshot commit.
Copy this <SHA1> for use in the next command.
3) IF more than one outstanding commit then you can remove just the
Kokkos snapshot commit with 'git rebase -i'. Edit the rebase file.
Remove or comment out the Kokkos snapshot commit entry.
git rebase -i <SHA1>
4) IF the Kokkos snapshot commit is the one and only
outstanding commit then remove just than commit.
git reset --hard HEAD~1
------------------------------------------------------------------------
*** REGARDING 'snapshot.py' TOOL ***
The 'snapshot.py' tool is developed and maintained by the
Center for Computing Research (CCR)
Software Engineering, Maintenance, and Support (SEMS) team.
Contact Brent Perschbacher <bmpersc@sandia.gov> for questions>
------------------------------------------------------------------------

View File

@ -1,6 +1,6 @@
# Default settings common options. # Default settings common options.
#LAMMPS specific settings: #SPARTA specific settings:
ifndef KOKKOS_PATH ifndef KOKKOS_PATH
KOKKOS_PATH=../../lib/kokkos KOKKOS_PATH=../../lib/kokkos
endif endif
@ -11,7 +11,7 @@ CXXFLAGS += $(SHFLAGS)
endif endif
KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 4 KOKKOS_VERSION_MINOR = 5
KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9 # IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 # AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= "" KOKKOS_ARCH ?= ""
@ -40,16 +40,19 @@ KOKKOS_TRIBITS ?= "no"
KOKKOS_STANDALONE_CMAKE ?= "no" KOKKOS_STANDALONE_CMAKE ?= "no"
# Default settings specific options. # Default settings specific options.
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async # Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,enable_malloc_async
KOKKOS_CUDA_OPTIONS ?= "disable_malloc_async" KOKKOS_CUDA_OPTIONS ?= ""
# Options: rdc # Options: rdc,enable_malloc_async
KOKKOS_HIP_OPTIONS ?= "" KOKKOS_HIP_OPTIONS ?= ""
# Default settings specific options. # Default settings specific options.
# Options: enable_async_dispatch # Options: enable_async_dispatch
KOKKOS_HPX_OPTIONS ?= "" KOKKOS_HPX_OPTIONS ?= ""
#Options : force_host_as_device
KOKKOS_OPENACC_OPTIONS ?= ""
# Helper functions for conversion to upper case # Helper functions for conversion to upper case
uppercase_TABLE:=a,A b,B c,C d,D e,E f,F g,G h,H i,I j,J k,K l,L m,M n,N o,O p,P q,Q r,R s,S t,T u,U v,V w,W x,X y,Y z,Z uppercase_TABLE:=a,A b,B c,C d,D e,E f,F g,G h,H i,I j,J k,K l,L m,M n,N o,O p,P q,Q r,R s,S t,T u,U v,V w,W x,X y,Y z,Z
uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$(wordlist 2,$(words $1),$1),$2)),$2) uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$(wordlist 2,$(words $1),$1),$2)),$2)
@ -92,7 +95,7 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc)
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr)
KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),disable_malloc_async) KOKKOS_INTERNAL_CUDA_ENABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_malloc_async)
KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
# deprecated # deprecated
KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics)
@ -103,6 +106,8 @@ KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPT
KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecation_warnings) KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecation_warnings)
KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc) KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
KOKKOS_INTERNAL_HIP_ENABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),enable_malloc_async)
KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE := $(call kokkos_has_string,$(KOKKOS_OPENACC_OPTIONS),force_host_as_device)
# Check for Kokkos Host Execution Spaces one of which must be on. # Check for Kokkos Host Execution Spaces one of which must be on.
KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP)
@ -178,7 +183,7 @@ KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep -c nvcc)>0" | bc)) KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep -c nvcc)>0" | bc))
KOKKOS_INTERNAL_COMPILER_NVHPC := $(strip $(shell $(CXX) --version 2>&1 | grep -c "nvc++")) KOKKOS_INTERNAL_COMPILER_NVHPC := $(strip $(shell $(CXX) --version 2>&1 | grep -c "nvc++"))
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
KOKKOS_INTERNAL_COMPILER_CRAY_CLANG := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -c "clang++")) KOKKOS_INTERNAL_COMPILER_CRAY_CLANG := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -v "error:" | grep -c "clang++"))
KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),oneAPI) KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),oneAPI)
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang) KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
@ -292,6 +297,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
# Set OpenACC flags. # Set OpenACC flags.
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1)
KOKKOS_INTERNAL_OPENACC_FLAG := -acc KOKKOS_INTERNAL_OPENACC_FLAG := -acc
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_OPENACC_FLAG := -fopenacc -fopenacc-fake-async-wait -fopenacc-implicit-worker=vector -Wno-openacc-and-cxx -Wno-openmp-mapping -Wno-unknown-cuda-version -Wno-pass-failed
else else
$(error Makefile.kokkos: OpenACC is enabled but the compiler must be NVHPC (got version string $(KOKKOS_CXX_VERSION))) $(error Makefile.kokkos: OpenACC is enabled but the compiler must be NVHPC (got version string $(KOKKOS_CXX_VERSION)))
endif endif
@ -411,8 +418,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH) KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH)
@ -457,6 +464,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
endif endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
@ -466,6 +474,15 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
endif endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103)
KOKKOS_INTERNAL_USE_ARCH_AMD := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100) \
+ $(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103))
# Any AVX? # Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
@ -561,6 +578,9 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_OPENACC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_OPENACC")
ifeq ($(KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_OPENACC_FORCE_HOST_AS_DEVICE")
endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -733,7 +753,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0) ifeq ($(KOKKOS_INTERNAL_CUDA_ENABLE_MALLOC_ASYNC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC")
else else
tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC */") tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC */")
@ -1024,86 +1044,122 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--offload-arch
endif
endif
# Do not add this flag if its the cray compiler or the nvhpc compiler. # Do not add this flag if its the cray compiler or the nvhpc compiler.
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 0) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 0)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) # Lets start with adding architecture defines
# Lets start with adding architecture defines ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30") ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1) endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER") ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90")
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90 KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90
endif endif
endif endif
@ -1119,6 +1175,9 @@ ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
endif
endif endif
endif endif
@ -1126,43 +1185,48 @@ endif
# Figure out the architecture flag for ROCm. # Figure out the architecture flag for ROCm.
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx906\"")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx906
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx908\"")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx908
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx90A\"")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx90a
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx940\"")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx940 KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx940
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx942 KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942_APU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx942\"")
KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx942
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1030\"")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx1030
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1100\"")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx1100
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU \"gfx1103\"")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103 KOKKOS_INTERNAL_AMD_ARCH_FLAG := --offload-arch=gfx1103
endif endif
@ -1171,8 +1235,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp)
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_AMD_ARCH_FLAG)
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_AMD_ARCH_FLAG)
ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE")
@ -1182,6 +1246,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_CXXFLAGS+=-fno-gpu-rdc KOKKOS_CXXFLAGS+=-fno-gpu-rdc
KOKKOS_LDFLAGS+=-fno-gpu-rdc KOKKOS_LDFLAGS+=-fno-gpu-rdc
endif endif
ifeq ($(KOKKOS_INTERNAL_HIP_ENABLE_MALLOC_ASYNC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_HIP_MALLOC_ASYNC")
else
tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_HIP_MALLOC_ASYNC */")
endif
endif
ifneq ($(KOKKOS_INTERNAL_USE_ARCH_AMD), 0)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_AMD_ARCH_FLAG)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_AMD_ARCH_FLAG)
endif
endif
endif endif
# Figure out Intel architecture flags. # Figure out Intel architecture flags.
@ -1235,6 +1314,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG)
KOKKOS_LDFLAGS+=-fsycl KOKKOS_LDFLAGS+=-fsycl
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SYCL_RELOCATABLE_DEVICE_CODE")
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
@ -1322,6 +1403,8 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
endif endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/View/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/View/MDSpan/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp)
@ -1374,6 +1457,48 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENACC_FLAG) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENACC_FLAG)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENACC_FLAG) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENACC_FLAG)
KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENACC_LIB) KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENACC_LIB)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
ifneq ($(CUDA_PATH),)
ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib), 1)
CUDA_PATH := $(CUDA_PATH:/compilers=/cuda)
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
ifneq ($(CUDA_PATH),)
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
endif
KOKKOS_LIBS += -lcudart
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1)
KOKKOS_LIBS += -cuda
endif
ifeq ($(KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE), 1)
$(error If a GPU architecture is specified, KOKKOS_OPENACC_OPTIONS = force_host_as_device cannot be used. Disable the force_host_as_device option)
endif
else ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
ifneq ($(ROCM_PATH),)
KOKKOS_CPPFLAGS += -I$(ROCM_PATH)/include
KOKKOS_LDFLAGS += -L$(ROCM_PATH)/lib
endif
KOKKOS_LIBS += -lamdhip64
endif
ifeq ($(KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE), 1)
$(error If a GPU architecture is specified, KOKKOS_OPENACC_OPTIONS = force_host_as_device cannot be used. Disable the force_host_as_device option)
endif
else ifeq ($(KOKKOS_INTERNAL_OPENACC_FORCE_HOST_AS_DEVICE), 1)
# Compile for kernel execution on the host. In that case,
# memory is shared between the OpenACC space and the host space.
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1)
KOKKOS_CXXFLAGS += -acc=multicore
endif
else
# Automatic fallback mode; try to offload any available GPU, and fall back
# to the host CPU if no available GPU is found.
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1)
KOKKOS_CXXFLAGS += -acc=gpu,multicore
endif
endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -1484,7 +1609,11 @@ else
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC") ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1)
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC")
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */")
endif
else else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */") tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */")
endif endif
@ -1512,6 +1641,12 @@ $(DESUL_CONFIG_HEADER):
KOKKOS_CPP_DEPENDS := $(DESUL_CONFIG_HEADER) KokkosCore_config.h $(KOKKOS_HEADERS) KOKKOS_CPP_DEPENDS := $(DESUL_CONFIG_HEADER) KokkosCore_config.h $(KOKKOS_HEADERS)
# Tasking is deprecated
ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1)
TMP_KOKKOS_SRC := $(KOKKOS_SRC)
KOKKOS_SRC = $(patsubst %Task.cpp,, $(TMP_KOKKOS_SRC))
endif
KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o) KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o)
KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ)) KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))

View File

@ -16,8 +16,6 @@ Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ho
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp
Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
@ -38,17 +36,21 @@ Kokkos_Abort.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp
ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1)
Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp
endif endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp
Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1)
Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
endif
Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp
endif endif
@ -73,6 +75,8 @@ Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp
Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp
Kokkos_HIP_ZeroMemset.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_ZeroMemset.cpp
Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp
endif endif
@ -89,26 +93,26 @@ Kokkos_OpenMP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_Ope
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp
Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1)
Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
endif endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
Kokkos_HPX.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp Kokkos_HPX.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp
ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1)
Kokkos_HPX_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp Kokkos_HPX_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp
endif endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)

View File

@ -30,12 +30,12 @@ To start learning about Kokkos:
The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).
The current release is [4.3.01](https://github.com/kokkos/kokkos/releases/tag/4.3.01). The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01).
```bash ```bash
curl -OJ -L https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz
# Or with wget # Or with wget
wget https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz
``` ```
To clone the latest development version of Kokkos from GitHub: To clone the latest development version of Kokkos from GitHub:

View File

@ -1,7 +1,7 @@
IF (NOT Kokkos_INSTALL_TESTING) if(NOT Kokkos_INSTALL_TESTING)
ADD_SUBDIRECTORY(src) add_subdirectory(src)
ENDIF() endif()
# FIXME_OPENACC: temporarily disabled due to unimplemented features # FIXME_OPENACC: temporarily disabled due to unimplemented features
IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) if(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC))
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) kokkos_add_test_directories(unit_tests)
ENDIF() endif()

View File

@ -1,34 +1,29 @@
#I have to leave these here for tribits #I have to leave these here for tribits
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR})
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
FILE(GLOB ALGO_HEADERS *.hpp) file(GLOB ALGO_HEADERS *.hpp)
FILE(GLOB ALGO_SOURCES *.cpp) file(GLOB ALGO_SOURCES *.cpp)
APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp) append_glob(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp)
APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp) append_glob(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp)
INSTALL ( install(
DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/"
DESTINATION ${KOKKOS_HEADER_DIR} DESTINATION ${KOKKOS_HEADER_DIR}
FILES_MATCHING PATTERN "*.hpp" FILES_MATCHING
PATTERN "*.hpp"
) )
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# We have to pass the sources in here for Tribits # We have to pass the sources in here for Tribits
# These will get ignored for standalone CMake and a true interface library made # These will get ignored for standalone CMake and a true interface library made
KOKKOS_ADD_INTERFACE_LIBRARY( kokkos_add_interface_library(kokkosalgorithms NOINSTALLHEADERS ${ALGO_HEADERS} SOURCES ${ALGO_SOURCES})
kokkosalgorithms kokkos_lib_include_directories(
NOINSTALLHEADERS ${ALGO_HEADERS} kokkosalgorithms ${KOKKOS_TOP_BUILD_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
SOURCES ${ALGO_SOURCES}
)
KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
${KOKKOS_TOP_BUILD_DIR}
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
) )
KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST) kokkos_link_tpl(kokkoscontainers PUBLIC ROCTHRUST)
KOKKOS_LINK_TPL(kokkoscore PUBLIC ONEDPL) kokkos_link_tpl(kokkoscore PUBLIC ONEDPL)

View File

@ -615,7 +615,7 @@ template <class DeviceType>
struct Random_UniqueIndex { struct Random_UniqueIndex {
using locks_view_type = View<int**, DeviceType>; using locks_view_type = View<int**, DeviceType>;
KOKKOS_FUNCTION KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type) { static int get_state_idx(const locks_view_type&) {
KOKKOS_IF_ON_HOST( KOKKOS_IF_ON_HOST(
(return DeviceType::execution_space::impl_hardware_thread_id();)) (return DeviceType::execution_space::impl_hardware_thread_id();))
@ -665,17 +665,16 @@ struct Random_UniqueIndex<
#ifdef KOKKOS_ENABLE_SYCL #ifdef KOKKOS_ENABLE_SYCL
template <class MemorySpace> template <class MemorySpace>
struct Random_UniqueIndex< struct Random_UniqueIndex<Kokkos::Device<Kokkos::SYCL, MemorySpace>> {
Kokkos::Device<Kokkos::Experimental::SYCL, MemorySpace>> {
using locks_view_type = using locks_view_type =
View<int**, Kokkos::Device<Kokkos::Experimental::SYCL, MemorySpace>>; View<int**, Kokkos::Device<Kokkos::SYCL, MemorySpace>>;
KOKKOS_FUNCTION KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) { static int get_state_idx(const locks_view_type& locks_) {
auto item = sycl::ext::oneapi::experimental::this_nd_item<3>(); auto item = sycl::ext::oneapi::experimental::this_nd_item<3>();
std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1), std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1),
item.get_local_id(0)}; item.get_local_id(0)};
std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1), std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1),
item.get_group(0)}; item.get_group(0)};
std::size_t blockDim[3] = {item.get_local_range(2), item.get_local_range(1), std::size_t blockDim[3] = {item.get_local_range(2), item.get_local_range(1),
item.get_local_range(0)}; item.get_local_range(0)};
std::size_t gridDim[3] = { std::size_t gridDim[3] = {
@ -1121,7 +1120,7 @@ class Random_XorShift1024_Pool {
using execution_space = typename device_type::execution_space; using execution_space = typename device_type::execution_space;
using locks_type = View<int**, device_type>; using locks_type = View<int**, device_type>;
using int_view_type = View<int**, device_type>; using int_view_type = View<int**, device_type>;
using state_data_type = View<uint64_t * [16], device_type>; using state_data_type = View<uint64_t* [16], device_type>;
locks_type locks_ = {}; locks_type locks_ = {};
state_data_type state_ = {}; state_data_type state_ = {};

View File

@ -35,11 +35,11 @@ struct BinOp1D {
#endif #endif
// Construct BinOp with number of bins, minimum value and maximum value // Construct BinOp with number of bins, minimum value and maximum value
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, BinOp1D(int max_bins, typename KeyViewType::const_value_type min,
typename KeyViewType::const_value_type max) typename KeyViewType::const_value_type max)
: max_bins_(max_bins__ + 1), : max_bins_(max_bins + 1),
// Cast to double to avoid possible overflow when using integer // Cast to double to avoid possible overflow when using integer
mul_(static_cast<double>(max_bins__) / mul_(static_cast<double>(max_bins) /
(static_cast<double>(max) - static_cast<double>(min))), (static_cast<double>(max) - static_cast<double>(min))),
min_(static_cast<double>(min)) { min_(static_cast<double>(min)) {
// For integral types the number of bins may be larger than the range // For integral types the number of bins may be larger than the range
@ -47,7 +47,7 @@ struct BinOp1D {
// and then don't need to sort bins. // and then don't need to sort bins.
if (std::is_integral<typename KeyViewType::const_value_type>::value && if (std::is_integral<typename KeyViewType::const_value_type>::value &&
(static_cast<double>(max) - static_cast<double>(min)) <= (static_cast<double>(max) - static_cast<double>(min)) <=
static_cast<double>(max_bins__)) { static_cast<double>(max_bins)) {
mul_ = 1.; mul_ = 1.;
} }
} }
@ -82,16 +82,16 @@ struct BinOp3D {
BinOp3D() = delete; BinOp3D() = delete;
#endif #endif
BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], BinOp3D(int max_bins[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[]) { typename KeyViewType::const_value_type max[]) {
max_bins_[0] = max_bins__[0]; max_bins_[0] = max_bins[0];
max_bins_[1] = max_bins__[1]; max_bins_[1] = max_bins[1];
max_bins_[2] = max_bins__[2]; max_bins_[2] = max_bins[2];
mul_[0] = static_cast<double>(max_bins__[0]) / mul_[0] = static_cast<double>(max_bins[0]) /
(static_cast<double>(max[0]) - static_cast<double>(min[0])); (static_cast<double>(max[0]) - static_cast<double>(min[0]));
mul_[1] = static_cast<double>(max_bins__[1]) / mul_[1] = static_cast<double>(max_bins[1]) /
(static_cast<double>(max[1]) - static_cast<double>(min[1])); (static_cast<double>(max[1]) - static_cast<double>(min[1]));
mul_[2] = static_cast<double>(max_bins__[2]) / mul_[2] = static_cast<double>(max_bins[2]) /
(static_cast<double>(max[2]) - static_cast<double>(min[2])); (static_cast<double>(max[2]) - static_cast<double>(min[2]));
min_[0] = static_cast<double>(min[0]); min_[0] = static_cast<double>(min[0]);
min_[1] = static_cast<double>(min[1]); min_[1] = static_cast<double>(min[1]);

View File

@ -388,7 +388,8 @@ class BinSort {
// reasonable experimentally. // reasonable experimentally.
if (use_std_sort && bin_size > 10) { if (use_std_sort && bin_size > 10) {
KOKKOS_IF_ON_HOST( KOKKOS_IF_ON_HOST(
(std::sort(&sort_order(lower_bound), &sort_order(upper_bound), (std::sort(sort_order.data() + lower_bound,
sort_order.data() + upper_bound,
[this](int p, int q) { return bin_op(keys_rnd, p, q); });)) [this](int p, int q) { return bin_op(keys_rnd, p, q); });))
} else { } else {
for (int k = lower_bound + 1; k < upper_bound; ++k) { for (int k = lower_bound + 1; k < upper_bound; ++k) {

View File

@ -53,9 +53,13 @@ void sort(const ExecutionSpace& exec,
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) { if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort without comparator use std::sort"); exec.fence("Kokkos::sort without comparator use std::sort");
auto first = ::Kokkos::Experimental::begin(view); if (view.span_is_contiguous()) {
auto last = ::Kokkos::Experimental::end(view); std::sort(view.data(), view.data() + view.size());
std::sort(first, last); } else {
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last);
}
} else { } else {
Impl::sort_device_view_without_comparator(exec, view); Impl::sort_device_view_without_comparator(exec, view);
} }
@ -107,9 +111,13 @@ void sort(const ExecutionSpace& exec,
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) { if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort with comparator use std::sort"); exec.fence("Kokkos::sort with comparator use std::sort");
auto first = ::Kokkos::Experimental::begin(view); if (view.span_is_contiguous()) {
auto last = ::Kokkos::Experimental::end(view); std::sort(view.data(), view.data() + view.size(), comparator);
std::sort(first, last, comparator); } else {
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last, comparator);
}
} else { } else {
Impl::sort_device_view_with_comparator(exec, view, comparator); Impl::sort_device_view_with_comparator(exec, view, comparator);
} }

View File

@ -30,6 +30,7 @@
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow" #pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wsuggest-override"
#if defined(KOKKOS_COMPILER_CLANG) #if defined(KOKKOS_COMPILER_CLANG)
// Some versions of Clang fail to compile Thrust, failing with errors like // Some versions of Clang fail to compile Thrust, failing with errors like
@ -76,13 +77,10 @@ namespace Kokkos::Impl {
template <typename T> template <typename T>
constexpr inline bool is_admissible_to_kokkos_sort_by_key = constexpr inline bool is_admissible_to_kokkos_sort_by_key =
::Kokkos::is_view<T>::value&& T::rank() == 1 && ::Kokkos::is_view<T>::value && T::rank() == 1 &&
(std::is_same<typename T::traits::array_layout, (std::is_same_v<typename T::traits::array_layout, Kokkos::LayoutLeft> ||
Kokkos::LayoutLeft>::value || std::is_same_v<typename T::traits::array_layout, Kokkos::LayoutRight> ||
std::is_same<typename T::traits::array_layout, std::is_same_v<typename T::traits::array_layout, Kokkos::LayoutStride>);
Kokkos::LayoutRight>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutStride>::value);
template <class ViewType> template <class ViewType>
KOKKOS_INLINE_FUNCTION constexpr void KOKKOS_INLINE_FUNCTION constexpr void
@ -144,7 +142,7 @@ void sort_by_key_rocthrust(
#if defined(KOKKOS_ENABLE_ONEDPL) #if defined(KOKKOS_ENABLE_ONEDPL)
template <class Layout> template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::Experimental::SYCL, Layout> = inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> =
std::is_same_v<Layout, Kokkos::LayoutLeft> || std::is_same_v<Layout, Kokkos::LayoutLeft> ||
std::is_same_v<Layout, Kokkos::LayoutRight>; std::is_same_v<Layout, Kokkos::LayoutRight>;
@ -152,7 +150,7 @@ inline constexpr bool sort_on_device_v<Kokkos::Experimental::SYCL, Layout> =
template <class KeysDataType, class... KeysProperties, class ValuesDataType, template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator> class... ValuesProperties, class... MaybeComparator>
void sort_by_key_onedpl( void sort_by_key_onedpl(
const Kokkos::Experimental::SYCL& exec, const Kokkos::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys, const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values, const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) { MaybeComparator&&... maybeComparator) {
@ -176,7 +174,7 @@ template <typename ExecutionSpace, typename PermutationView, typename ViewType>
void applyPermutation(const ExecutionSpace& space, void applyPermutation(const ExecutionSpace& space,
const PermutationView& permutation, const PermutationView& permutation,
const ViewType& view) { const ViewType& view) {
static_assert(std::is_integral<typename PermutationView::value_type>::value); static_assert(std::is_integral_v<typename PermutationView::value_type>);
auto view_copy = Kokkos::create_mirror( auto view_copy = Kokkos::create_mirror(
Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{}, Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{},
@ -335,7 +333,7 @@ void sort_by_key_device_view_without_comparator(
template <class KeysDataType, class... KeysProperties, class ValuesDataType, template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties> class... ValuesProperties>
void sort_by_key_device_view_without_comparator( void sort_by_key_device_view_without_comparator(
const Kokkos::Experimental::SYCL& exec, const Kokkos::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys, const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) { const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY #ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
@ -392,7 +390,7 @@ void sort_by_key_device_view_with_comparator(
template <class ComparatorType, class KeysDataType, class... KeysProperties, template <class ComparatorType, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties> class ValuesDataType, class... ValuesProperties>
void sort_by_key_device_view_with_comparator( void sort_by_key_device_view_with_comparator(
const Kokkos::Experimental::SYCL& exec, const Kokkos::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys, const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values, const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) { const ComparatorType& comparator) {

View File

@ -34,6 +34,7 @@
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow" #pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wsuggest-override"
#if defined(KOKKOS_COMPILER_CLANG) #if defined(KOKKOS_COMPILER_CLANG)
// Some versions of Clang fail to compile Thrust, failing with errors like // Some versions of Clang fail to compile Thrust, failing with errors like
@ -146,7 +147,7 @@ void sort_via_binsort(const ExecutionSpace& exec,
bool sort_in_bins = true; bool sort_in_bins = true;
// TODO: figure out better max_bins then this ... // TODO: figure out better max_bins then this ...
int64_t max_bins = view.extent(0) / 2; int64_t max_bins = view.extent(0) / 2;
if (std::is_integral<typename ViewType::non_const_value_type>::value) { if (std::is_integral_v<typename ViewType::non_const_value_type>) {
// Cast to double to avoid possible overflow when using integer // Cast to double to avoid possible overflow when using integer
auto const max_val = static_cast<double>(result.max_val); auto const max_val = static_cast<double>(result.max_val);
auto const min_val = static_cast<double>(result.min_val); auto const min_val = static_cast<double>(result.min_val);
@ -157,7 +158,7 @@ void sort_via_binsort(const ExecutionSpace& exec,
sort_in_bins = false; sort_in_bins = false;
} }
} }
if (std::is_floating_point<typename ViewType::non_const_value_type>::value) { if (std::is_floating_point_v<typename ViewType::non_const_value_type>) {
KOKKOS_ASSERT(std::isfinite(static_cast<double>(result.max_val) - KOKKOS_ASSERT(std::isfinite(static_cast<double>(result.max_val) -
static_cast<double>(result.min_val))); static_cast<double>(result.min_val)));
} }
@ -211,11 +212,11 @@ void sort_rocthrust(const HIP& space,
#if defined(KOKKOS_ENABLE_ONEDPL) #if defined(KOKKOS_ENABLE_ONEDPL)
template <class DataType, class... Properties, class... MaybeComparator> template <class DataType, class... Properties, class... MaybeComparator>
void sort_onedpl(const Kokkos::Experimental::SYCL& space, void sort_onedpl(const Kokkos::SYCL& space,
const Kokkos::View<DataType, Properties...>& view, const Kokkos::View<DataType, Properties...>& view,
MaybeComparator&&... maybeComparator) { MaybeComparator&&... maybeComparator) {
using ViewType = Kokkos::View<DataType, Properties...>; using ViewType = Kokkos::View<DataType, Properties...>;
static_assert(SpaceAccessibility<Kokkos::Experimental::SYCL, static_assert(SpaceAccessibility<Kokkos::SYCL,
typename ViewType::memory_space>::accessible, typename ViewType::memory_space>::accessible,
"SYCL execution space is not able to access the memory space " "SYCL execution space is not able to access the memory space "
"of the View argument!"); "of the View argument!");
@ -268,19 +269,29 @@ void copy_to_host_run_stdsort_copy_back(
KE::copy(exec, view, view_dc); KE::copy(exec, view, view_dc);
// run sort on the mirror of view_dc // run sort on the mirror of view_dc
auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
auto first = KE::begin(mv_h); if (view.span_is_contiguous()) {
auto last = KE::end(mv_h); std::sort(mv_h.data(), mv_h.data() + mv_h.size(),
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...); std::forward<MaybeComparator>(maybeComparator)...);
} else {
auto first = KE::begin(mv_h);
auto last = KE::end(mv_h);
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
}
Kokkos::deep_copy(exec, view_dc, mv_h); Kokkos::deep_copy(exec, view_dc, mv_h);
// copy back to argument view // copy back to argument view
KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view));
} else { } else {
auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view);
auto first = KE::begin(view_h); if (view.span_is_contiguous()) {
auto last = KE::end(view_h); std::sort(view_h.data(), view_h.data() + view_h.size(),
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...); std::forward<MaybeComparator>(maybeComparator)...);
} else {
auto first = KE::begin(view_h);
auto last = KE::end(view_h);
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
}
Kokkos::deep_copy(exec, view, view_h); Kokkos::deep_copy(exec, view, view_h);
} }
} }
@ -310,7 +321,7 @@ void sort_device_view_without_comparator(
#if defined(KOKKOS_ENABLE_ONEDPL) #if defined(KOKKOS_ENABLE_ONEDPL)
template <class DataType, class... Properties> template <class DataType, class... Properties>
void sort_device_view_without_comparator( void sort_device_view_without_comparator(
const Kokkos::Experimental::SYCL& exec, const Kokkos::SYCL& exec,
const Kokkos::View<DataType, Properties...>& view) { const Kokkos::View<DataType, Properties...>& view) {
using ViewType = Kokkos::View<DataType, Properties...>; using ViewType = Kokkos::View<DataType, Properties...>;
static_assert( static_assert(
@ -365,8 +376,7 @@ void sort_device_view_with_comparator(
#if defined(KOKKOS_ENABLE_ONEDPL) #if defined(KOKKOS_ENABLE_ONEDPL)
template <class ComparatorType, class DataType, class... Properties> template <class ComparatorType, class DataType, class... Properties>
void sort_device_view_with_comparator( void sort_device_view_with_comparator(
const Kokkos::Experimental::SYCL& exec, const Kokkos::SYCL& exec, const Kokkos::View<DataType, Properties...>& view,
const Kokkos::View<DataType, Properties...>& view,
const ComparatorType& comparator) { const ComparatorType& comparator) {
using ViewType = Kokkos::View<DataType, Properties...>; using ViewType = Kokkos::View<DataType, Properties...>;
static_assert( static_assert(
@ -397,12 +407,12 @@ sort_device_view_with_comparator(
// and then copies data back. Potentially, this can later be changed // and then copies data back. Potentially, this can later be changed
// with a better solution like our own quicksort on device or similar. // with a better solution like our own quicksort on device or similar.
using ViewType = Kokkos::View<DataType, Properties...>;
using MemSpace = typename ViewType::memory_space;
// Note with HIP unified memory this code path is still the right thing to do // Note with HIP unified memory this code path is still the right thing to do
// if we end up here when RocThrust is not enabled. // if we end up here when RocThrust is not enabled.
// The create_mirror_view_and_copy will do the right thing (no copy). // The create_mirror_view_and_copy will do the right thing (no copy).
#ifndef KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY #ifndef KOKKOS_IMPL_HIP_UNIFIED_MEMORY
using ViewType = Kokkos::View<DataType, Properties...>;
using MemSpace = typename ViewType::memory_space;
static_assert(!SpaceAccessibility<HostSpace, MemSpace>::accessible, static_assert(!SpaceAccessibility<HostSpace, MemSpace>::accessible,
"Impl::sort_device_view_with_comparator: should not be called " "Impl::sort_device_view_with_comparator: should not be called "
"on a view that is already accessible on the host"); "on a view that is already accessible on the host");

View File

@ -91,7 +91,7 @@ template <typename ExecutionSpace, typename IteratorType, typename ValueType,
int> = 0> int> = 0>
ValueType reduce(const ExecutionSpace& ex, IteratorType first, ValueType reduce(const ExecutionSpace& ex, IteratorType first,
IteratorType last, ValueType init_reduction_value) { IteratorType last, ValueType init_reduction_value) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::reduce_default_functors_exespace_impl( return Impl::reduce_default_functors_exespace_impl(
@ -105,7 +105,7 @@ template <typename ExecutionSpace, typename IteratorType, typename ValueType,
ValueType reduce(const std::string& label, const ExecutionSpace& ex, ValueType reduce(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last, IteratorType first, IteratorType last,
ValueType init_reduction_value) { ValueType init_reduction_value) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::reduce_default_functors_exespace_impl(label, ex, first, last, return Impl::reduce_default_functors_exespace_impl(label, ex, first, last,
@ -119,7 +119,7 @@ template <typename ExecutionSpace, typename DataType, typename... Properties,
ValueType reduce(const ExecutionSpace& ex, ValueType reduce(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& view, const ::Kokkos::View<DataType, Properties...>& view,
ValueType init_reduction_value) { ValueType init_reduction_value) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
@ -137,7 +137,7 @@ template <typename ExecutionSpace, typename DataType, typename... Properties,
ValueType reduce(const std::string& label, const ExecutionSpace& ex, ValueType reduce(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& view, const ::Kokkos::View<DataType, Properties...>& view,
ValueType init_reduction_value) { ValueType init_reduction_value) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
@ -157,7 +157,7 @@ template <typename ExecutionSpace, typename IteratorType, typename ValueType,
ValueType reduce(const ExecutionSpace& ex, IteratorType first, ValueType reduce(const ExecutionSpace& ex, IteratorType first,
IteratorType last, ValueType init_reduction_value, IteratorType last, ValueType init_reduction_value,
BinaryOp joiner) { BinaryOp joiner) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::reduce_custom_functors_exespace_impl( return Impl::reduce_custom_functors_exespace_impl(
@ -172,7 +172,7 @@ template <typename ExecutionSpace, typename IteratorType, typename ValueType,
ValueType reduce(const std::string& label, const ExecutionSpace& ex, ValueType reduce(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last, IteratorType first, IteratorType last,
ValueType init_reduction_value, BinaryOp joiner) { ValueType init_reduction_value, BinaryOp joiner) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::reduce_custom_functors_exespace_impl( return Impl::reduce_custom_functors_exespace_impl(
@ -186,7 +186,7 @@ template <typename ExecutionSpace, typename DataType, typename... Properties,
ValueType reduce(const ExecutionSpace& ex, ValueType reduce(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& view, const ::Kokkos::View<DataType, Properties...>& view,
ValueType init_reduction_value, BinaryOp joiner) { ValueType init_reduction_value, BinaryOp joiner) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
@ -204,7 +204,7 @@ template <typename ExecutionSpace, typename DataType, typename... Properties,
ValueType reduce(const std::string& label, const ExecutionSpace& ex, ValueType reduce(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& view, const ::Kokkos::View<DataType, Properties...>& view,
ValueType init_reduction_value, BinaryOp joiner) { ValueType init_reduction_value, BinaryOp joiner) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
@ -258,7 +258,7 @@ template <
KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle,
IteratorType first, IteratorType last, IteratorType first, IteratorType last,
ValueType init_reduction_value) { ValueType init_reduction_value) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::reduce_default_functors_team_impl(teamHandle, first, last, return Impl::reduce_default_functors_team_impl(teamHandle, first, last,
@ -273,7 +273,7 @@ KOKKOS_FUNCTION ValueType
reduce(const TeamHandleType& teamHandle, reduce(const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType, Properties...>& view, const ::Kokkos::View<DataType, Properties...>& view,
ValueType init_reduction_value) { ValueType init_reduction_value) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
@ -294,7 +294,7 @@ KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle,
IteratorType first, IteratorType last, IteratorType first, IteratorType last,
ValueType init_reduction_value, ValueType init_reduction_value,
BinaryOp joiner) { BinaryOp joiner) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::reduce_custom_functors_team_impl(teamHandle, first, last, return Impl::reduce_custom_functors_team_impl(teamHandle, first, last,
@ -309,7 +309,7 @@ KOKKOS_FUNCTION ValueType
reduce(const TeamHandleType& teamHandle, reduce(const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType, Properties...>& view, const ::Kokkos::View<DataType, Properties...>& view,
ValueType init_reduction_value, BinaryOp joiner) { ValueType init_reduction_value, BinaryOp joiner) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;

View File

@ -117,7 +117,7 @@ ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1,
ValueType init_reduction_value, ValueType init_reduction_value,
BinaryJoinerType joiner, BinaryJoinerType joiner,
BinaryTransform transformer) { BinaryTransform transformer) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::transform_reduce_custom_functors_exespace_impl( return Impl::transform_reduce_custom_functors_exespace_impl(
@ -136,7 +136,7 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex,
IteratorType2 first2, ValueType init_reduction_value, IteratorType2 first2, ValueType init_reduction_value,
BinaryJoinerType joiner, BinaryJoinerType joiner,
BinaryTransform transformer) { BinaryTransform transformer) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::transform_reduce_custom_functors_exespace_impl( return Impl::transform_reduce_custom_functors_exespace_impl(
@ -157,7 +157,7 @@ ValueType transform_reduce(
ValueType init_reduction_value, BinaryJoinerType joiner, ValueType init_reduction_value, BinaryJoinerType joiner,
BinaryTransform transformer) { BinaryTransform transformer) {
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view);
@ -182,7 +182,7 @@ ValueType transform_reduce(
ValueType init_reduction_value, BinaryJoinerType joiner, ValueType init_reduction_value, BinaryJoinerType joiner,
BinaryTransform transformer) { BinaryTransform transformer) {
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view);
@ -208,7 +208,7 @@ ValueType transform_reduce(const ExecutionSpace& ex, IteratorType first1,
IteratorType last1, ValueType init_reduction_value, IteratorType last1, ValueType init_reduction_value,
BinaryJoinerType joiner, BinaryJoinerType joiner,
UnaryTransform transformer) { UnaryTransform transformer) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::transform_reduce_custom_functors_exespace_impl( return Impl::transform_reduce_custom_functors_exespace_impl(
@ -228,7 +228,7 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex,
ValueType init_reduction_value, ValueType init_reduction_value,
BinaryJoinerType joiner, BinaryJoinerType joiner,
UnaryTransform transformer) { UnaryTransform transformer) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::transform_reduce_custom_functors_exespace_impl( return Impl::transform_reduce_custom_functors_exespace_impl(
@ -248,7 +248,7 @@ ValueType transform_reduce(const ExecutionSpace& ex,
BinaryJoinerType joiner, BinaryJoinerType joiner,
UnaryTransform transformer) { UnaryTransform transformer) {
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
@ -270,7 +270,7 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex,
BinaryJoinerType joiner, BinaryJoinerType joiner,
UnaryTransform transformer) { UnaryTransform transformer) {
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);
@ -345,7 +345,7 @@ KOKKOS_FUNCTION ValueType transform_reduce(
const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1,
IteratorType2 first2, ValueType init_reduction_value, IteratorType2 first2, ValueType init_reduction_value,
BinaryJoinerType joiner, BinaryTransform transformer) { BinaryJoinerType joiner, BinaryTransform transformer) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::transform_reduce_custom_functors_team_impl( return Impl::transform_reduce_custom_functors_team_impl(
@ -366,7 +366,7 @@ transform_reduce(const TeamHandleType& teamHandle,
ValueType init_reduction_value, BinaryJoinerType joiner, ValueType init_reduction_value, BinaryJoinerType joiner,
BinaryTransform transformer) { BinaryTransform transformer) {
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view);
@ -393,7 +393,7 @@ KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle,
ValueType init_reduction_value, ValueType init_reduction_value,
BinaryJoinerType joiner, BinaryJoinerType joiner,
UnaryTransform transformer) { UnaryTransform transformer) {
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
return Impl::transform_reduce_custom_functors_team_impl( return Impl::transform_reduce_custom_functors_team_impl(
@ -412,7 +412,7 @@ transform_reduce(const TeamHandleType& teamHandle,
ValueType init_reduction_value, BinaryJoinerType joiner, ValueType init_reduction_value, BinaryJoinerType joiner,
UnaryTransform transformer) { UnaryTransform transformer) {
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
static_assert(std::is_move_constructible<ValueType>::value, static_assert(std::is_move_constructible_v<ValueType>,
"ValueType must be move constructible."); "ValueType must be move constructible.");
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view);

View File

@ -33,12 +33,12 @@ struct is_admissible_to_kokkos_std_algorithms : std::false_type {};
template <typename T> template <typename T>
struct is_admissible_to_kokkos_std_algorithms< struct is_admissible_to_kokkos_std_algorithms<
T, std::enable_if_t<::Kokkos::is_view<T>::value && T::rank() == 1 && T, std::enable_if_t<::Kokkos::is_view<T>::value && T::rank() == 1 &&
(std::is_same<typename T::traits::array_layout, (std::is_same_v<typename T::traits::array_layout,
Kokkos::LayoutLeft>::value || Kokkos::LayoutLeft> ||
std::is_same<typename T::traits::array_layout, std::is_same_v<typename T::traits::array_layout,
Kokkos::LayoutRight>::value || Kokkos::LayoutRight> ||
std::is_same<typename T::traits::array_layout, std::is_same_v<typename T::traits::array_layout,
Kokkos::LayoutStride>::value)>> Kokkos::LayoutStride>)>>
: std::true_type {}; : std::true_type {};
template <class ViewType> template <class ViewType>
@ -102,8 +102,8 @@ struct are_random_access_iterators;
template <class T> template <class T>
struct are_random_access_iterators<T> { struct are_random_access_iterators<T> {
static constexpr bool value = static constexpr bool value =
is_iterator_v<T> && std::is_base_of<std::random_access_iterator_tag, is_iterator_v<T> && std::is_base_of_v<std::random_access_iterator_tag,
typename T::iterator_category>::value; typename T::iterator_category>;
}; };
template <class Head, class... Tail> template <class Head, class... Tail>
@ -165,9 +165,8 @@ struct iterators_have_matching_difference_type<T> {
template <class T1, class T2> template <class T1, class T2>
struct iterators_have_matching_difference_type<T1, T2> { struct iterators_have_matching_difference_type<T1, T2> {
static constexpr bool value = static constexpr bool value = std::is_same_v<typename T1::difference_type,
std::is_same<typename T1::difference_type, typename T2::difference_type>;
typename T2::difference_type>::value;
}; };
template <class T1, class T2, class... Tail> template <class T1, class T2, class... Tail>

View File

@ -30,7 +30,7 @@ namespace Impl {
template <class IteratorType1, class IteratorType2> template <class IteratorType1, class IteratorType2>
struct StdMoveBackwardFunctor { struct StdMoveBackwardFunctor {
using index_type = typename IteratorType1::difference_type; using index_type = typename IteratorType1::difference_type;
static_assert(std::is_signed<index_type>::value, static_assert(std::is_signed_v<index_type>,
"Kokkos: StdMoveBackwardFunctor requires signed index type"); "Kokkos: StdMoveBackwardFunctor requires signed index type");
IteratorType1 m_last; IteratorType1 m_last;

View File

@ -36,18 +36,18 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
using iterator_type = RandomAccessIterator<view_type>; using iterator_type = RandomAccessIterator<view_type>;
using iterator_category = std::random_access_iterator_tag; using iterator_category = std::random_access_iterator_tag;
using value_type = typename view_type::value_type; using value_type = typename view_type::non_const_value_type;
using difference_type = ptrdiff_t; using difference_type = ptrdiff_t;
using pointer = typename view_type::pointer_type; using pointer = typename view_type::pointer_type;
using reference = typename view_type::reference_type; using reference = typename view_type::reference_type;
static_assert(view_type::rank == 1 && static_assert(view_type::rank == 1 &&
(std::is_same<typename view_type::traits::array_layout, (std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutLeft>::value || Kokkos::LayoutLeft> ||
std::is_same<typename view_type::traits::array_layout, std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutRight>::value || Kokkos::LayoutRight> ||
std::is_same<typename view_type::traits::array_layout, std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutStride>::value), Kokkos::LayoutStride>),
"RandomAccessIterator only supports 1D Views with LayoutLeft, " "RandomAccessIterator only supports 1D Views with LayoutLeft, "
"LayoutRight, LayoutStride."); "LayoutRight, LayoutStride.");
@ -61,9 +61,9 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond #ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
template <class OtherViewType> template <class OtherViewType>
requires(std::is_constructible_v<view_type, OtherViewType>) KOKKOS_FUNCTION requires(std::is_constructible_v<view_type, OtherViewType>)
explicit(!std::is_convertible_v<OtherViewType, view_type>) KOKKOS_FUNCTION explicit(!std::is_convertible_v<OtherViewType, view_type>)
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other) RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {} : m_view(other.m_view), m_current_index(other.m_current_index) {}
#else #else
template < template <

View File

@ -30,7 +30,7 @@ namespace Impl {
template <class InputIterator> template <class InputIterator>
struct StdReverseFunctor { struct StdReverseFunctor {
using index_type = typename InputIterator::difference_type; using index_type = typename InputIterator::difference_type;
static_assert(std::is_signed<index_type>::value, static_assert(std::is_signed_v<index_type>,
"Kokkos: StdReverseFunctor requires signed index type"); "Kokkos: StdReverseFunctor requires signed index type");
InputIterator m_first; InputIterator m_first;

View File

@ -30,7 +30,7 @@ namespace Impl {
template <class InputIterator, class OutputIterator> template <class InputIterator, class OutputIterator>
struct StdReverseCopyFunctor { struct StdReverseCopyFunctor {
using index_type = typename InputIterator::difference_type; using index_type = typename InputIterator::difference_type;
static_assert(std::is_signed<index_type>::value, static_assert(std::is_signed_v<index_type>,
"Kokkos: StdReverseCopyFunctor requires signed index type"); "Kokkos: StdReverseCopyFunctor requires signed index type");
InputIterator m_last; InputIterator m_last;

View File

@ -1,12 +1,10 @@
#Leave these here for now - I don't need transitive deps anyway #Leave these here for now - I don't need transitive deps anyway
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) kokkos_include_directories(${CMAKE_CURRENT_BINARY_DIR})
KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) kokkos_include_directories(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) kokkos_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src)
KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/core/unit_test/category_files) kokkos_include_directories(${KOKKOS_SOURCE_DIR}/core/unit_test/category_files)
set(ALGORITHM UnitTestMain.cpp)
SET(ALGORITHM UnitTestMain.cpp)
foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget) foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
string(TOUPPER ${Tag} DEVICE) string(TOUPPER ${Tag} DEVICE)
@ -23,21 +21,11 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
# Generate a .cpp file for each one that runs it on the current backend (Tag), # Generate a .cpp file for each one that runs it on the current backend (Tag),
# and add this .cpp file to the sources for UnitTest_RandomAndSort. # and add this .cpp file to the sources for UnitTest_RandomAndSort.
set(ALGO_SORT_SOURCES) set(ALGO_SORT_SOURCES)
foreach(SOURCE_Input foreach(SOURCE_Input TestSort TestSortByKey TestSortCustomComp TestBinSortA TestBinSortB TestNestedSort)
TestSort
TestSortByKey
TestSortCustomComp
TestBinSortA
TestBinSortB
TestNestedSort
)
set(file ${dir}/${SOURCE_Input}.cpp) set(file ${dir}/${SOURCE_Input}.cpp)
# Write to a temporary intermediate file and call configure_file to avoid # Write to a temporary intermediate file and call configure_file to avoid
# updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs.
file(WRITE ${dir}/dummy.cpp file(WRITE ${dir}/dummy.cpp "#include <Test${Tag}_Category.hpp>\n" "#include <${SOURCE_Input}.hpp>\n")
"#include <Test${Tag}_Category.hpp>\n"
"#include <${SOURCE_Input}.hpp>\n"
)
configure_file(${dir}/dummy.cpp ${file}) configure_file(${dir}/dummy.cpp ${file})
list(APPEND ALGO_SORT_SOURCES ${file}) list(APPEND ALGO_SORT_SOURCES ${file})
endforeach() endforeach()
@ -47,14 +35,9 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
# ------------------------------------------ # ------------------------------------------
# do as above # do as above
set(ALGO_RANDOM_SOURCES) set(ALGO_RANDOM_SOURCES)
foreach(SOURCE_Input foreach(SOURCE_Input TestRandom)
TestRandom
)
set(file ${dir}/${SOURCE_Input}.cpp) set(file ${dir}/${SOURCE_Input}.cpp)
file(WRITE ${dir}/dummy.cpp file(WRITE ${dir}/dummy.cpp "#include <Test${Tag}_Category.hpp>\n" "#include <${SOURCE_Input}.hpp>\n")
"#include <Test${Tag}_Category.hpp>\n"
"#include <${SOURCE_Input}.hpp>\n"
)
configure_file(${dir}/dummy.cpp ${file}) configure_file(${dir}/dummy.cpp ${file})
list(APPEND ALGO_RANDOM_SOURCES ${file}) list(APPEND ALGO_RANDOM_SOURCES ${file})
endforeach() endforeach()
@ -65,11 +48,7 @@ endforeach()
# std set A # std set A
# ------------------------------------------ # ------------------------------------------
set(STDALGO_SOURCES_A) set(STDALGO_SOURCES_A)
foreach(Name foreach(Name StdReducers StdAlgorithmsConstraints RandomAccessIterator)
StdReducers
StdAlgorithmsConstraints
RandomAccessIterator
)
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp) list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
endforeach() endforeach()
@ -77,10 +56,7 @@ endforeach()
# std set B # std set B
# ------------------------------------------ # ------------------------------------------
set(STDALGO_SOURCES_B) set(STDALGO_SOURCES_B)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsMinMaxElementOps)
StdAlgorithmsCommon
StdAlgorithmsMinMaxElementOps
)
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp) list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
endforeach() endforeach()
@ -88,22 +64,23 @@ endforeach()
# std set C # std set C
# ------------------------------------------ # ------------------------------------------
set(STDALGO_SOURCES_C) set(STDALGO_SOURCES_C)
foreach(Name foreach(
StdAlgorithmsCommon Name
StdAlgorithmsLexicographicalCompare StdAlgorithmsCommon
StdAlgorithmsForEach StdAlgorithmsLexicographicalCompare
StdAlgorithmsFind StdAlgorithmsForEach
StdAlgorithmsFindFirstOf StdAlgorithmsFind
StdAlgorithmsFindEnd StdAlgorithmsFindFirstOf
StdAlgorithmsCount StdAlgorithmsFindEnd
StdAlgorithmsEqual StdAlgorithmsCount
StdAlgorithmsAllAnyNoneOf StdAlgorithmsEqual
StdAlgorithmsAdjacentFind StdAlgorithmsAllAnyNoneOf
StdAlgorithmsSearch StdAlgorithmsAdjacentFind
StdAlgorithmsSearch_n StdAlgorithmsSearch
StdAlgorithmsMismatch StdAlgorithmsSearch_n
StdAlgorithmsMoveBackward StdAlgorithmsMismatch
) StdAlgorithmsMoveBackward
)
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp) list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
endforeach() endforeach()
@ -111,27 +88,28 @@ endforeach()
# std set D # std set D
# ------------------------------------------ # ------------------------------------------
set(STDALGO_SOURCES_D) set(STDALGO_SOURCES_D)
foreach(Name foreach(
StdAlgorithmsCommon Name
StdAlgorithmsModOps StdAlgorithmsCommon
StdAlgorithmsModSeqOps StdAlgorithmsModOps
StdAlgorithmsReplace StdAlgorithmsModSeqOps
StdAlgorithmsReplaceIf StdAlgorithmsReplace
StdAlgorithmsReplaceCopy StdAlgorithmsReplaceIf
StdAlgorithmsReplaceCopyIf StdAlgorithmsReplaceCopy
StdAlgorithmsCopyIf StdAlgorithmsReplaceCopyIf
StdAlgorithmsUnique StdAlgorithmsCopyIf
StdAlgorithmsUniqueCopy StdAlgorithmsUnique
StdAlgorithmsRemove StdAlgorithmsUniqueCopy
StdAlgorithmsRemoveIf StdAlgorithmsRemove
StdAlgorithmsRemoveCopy StdAlgorithmsRemoveIf
StdAlgorithmsRemoveCopyIf StdAlgorithmsRemoveCopy
StdAlgorithmsRotate StdAlgorithmsRemoveCopyIf
StdAlgorithmsRotateCopy StdAlgorithmsRotate
StdAlgorithmsReverse StdAlgorithmsRotateCopy
StdAlgorithmsShiftLeft StdAlgorithmsReverse
StdAlgorithmsShiftRight StdAlgorithmsShiftLeft
) StdAlgorithmsShiftRight
)
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp) list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
endforeach() endforeach()
@ -139,20 +117,21 @@ endforeach()
# std set E # std set E
# ------------------------------------------ # ------------------------------------------
set(STDALGO_SOURCES_E) set(STDALGO_SOURCES_E)
foreach(Name foreach(
StdAlgorithmsCommon Name
StdAlgorithmsIsSorted StdAlgorithmsCommon
StdAlgorithmsIsSortedUntil StdAlgorithmsIsSorted
StdAlgorithmsPartitioningOps StdAlgorithmsIsSortedUntil
StdAlgorithmsPartitionCopy StdAlgorithmsPartitioningOps
StdAlgorithmsNumerics StdAlgorithmsPartitionCopy
StdAlgorithmsAdjacentDifference StdAlgorithmsNumerics
StdAlgorithmsExclusiveScan StdAlgorithmsAdjacentDifference
StdAlgorithmsInclusiveScan StdAlgorithmsExclusiveScan
StdAlgorithmsTransformUnaryOp StdAlgorithmsInclusiveScan
StdAlgorithmsTransformExclusiveScan StdAlgorithmsTransformUnaryOp
StdAlgorithmsTransformInclusiveScan StdAlgorithmsTransformExclusiveScan
) StdAlgorithmsTransformInclusiveScan
)
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp) list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
endforeach() endforeach()
@ -160,11 +139,7 @@ endforeach()
# std team Q # std team Q
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_Q) set(STDALGO_TEAM_SOURCES_Q)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamInclusiveScan StdAlgorithmsTeamTransformInclusiveScan)
StdAlgorithmsCommon
StdAlgorithmsTeamInclusiveScan
StdAlgorithmsTeamTransformInclusiveScan
)
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
endforeach() endforeach()
@ -172,11 +147,7 @@ endforeach()
# std team P # std team P
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_P) set(STDALGO_TEAM_SOURCES_P)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamExclusiveScan StdAlgorithmsTeamTransformExclusiveScan)
StdAlgorithmsCommon
StdAlgorithmsTeamExclusiveScan
StdAlgorithmsTeamTransformExclusiveScan
)
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
endforeach() endforeach()
@ -184,14 +155,9 @@ endforeach()
# std team M # std team M
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_M) set(STDALGO_TEAM_SOURCES_M)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamTransformUnaryOp StdAlgorithmsTeamTransformBinaryOp
StdAlgorithmsCommon StdAlgorithmsTeamGenerate StdAlgorithmsTeamGenerate_n StdAlgorithmsTeamSwapRanges
StdAlgorithmsTeamTransformUnaryOp )
StdAlgorithmsTeamTransformBinaryOp
StdAlgorithmsTeamGenerate
StdAlgorithmsTeamGenerate_n
StdAlgorithmsTeamSwapRanges
)
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
endforeach() endforeach()
@ -199,14 +165,9 @@ endforeach()
# std team L # std team L
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_L) set(STDALGO_TEAM_SOURCES_L)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamIsSorted StdAlgorithmsTeamIsSortedUntil
StdAlgorithmsCommon StdAlgorithmsTeamIsPartitioned StdAlgorithmsTeamPartitionCopy StdAlgorithmsTeamPartitionPoint
StdAlgorithmsTeamIsSorted )
StdAlgorithmsTeamIsSortedUntil
StdAlgorithmsTeamIsPartitioned
StdAlgorithmsTeamPartitionCopy
StdAlgorithmsTeamPartitionPoint
)
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
endforeach() endforeach()
@ -214,13 +175,9 @@ endforeach()
# std team I # std team I
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_I) set(STDALGO_TEAM_SOURCES_I)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamUnique StdAlgorithmsTeamAdjacentDifference StdAlgorithmsTeamReduce
StdAlgorithmsCommon StdAlgorithmsTeamTransformReduce
StdAlgorithmsTeamUnique )
StdAlgorithmsTeamAdjacentDifference
StdAlgorithmsTeamReduce
StdAlgorithmsTeamTransformReduce
)
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
endforeach() endforeach()
@ -228,18 +185,19 @@ endforeach()
# std team H # std team H
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_H) set(STDALGO_TEAM_SOURCES_H)
foreach(Name foreach(
StdAlgorithmsCommon Name
StdAlgorithmsTeamCopy StdAlgorithmsCommon
StdAlgorithmsTeamCopy_n StdAlgorithmsTeamCopy
StdAlgorithmsTeamCopyBackward StdAlgorithmsTeamCopy_n
StdAlgorithmsTeamCopyIf StdAlgorithmsTeamCopyBackward
StdAlgorithmsTeamUniqueCopy StdAlgorithmsTeamCopyIf
StdAlgorithmsTeamRemove StdAlgorithmsTeamUniqueCopy
StdAlgorithmsTeamRemoveIf StdAlgorithmsTeamRemove
StdAlgorithmsTeamRemoveCopy StdAlgorithmsTeamRemoveIf
StdAlgorithmsTeamRemoveCopyIf StdAlgorithmsTeamRemoveCopy
) StdAlgorithmsTeamRemoveCopyIf
)
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
endforeach() endforeach()
@ -247,13 +205,9 @@ endforeach()
# std team G # std team G
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_G) set(STDALGO_TEAM_SOURCES_G)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamMove StdAlgorithmsTeamMoveBackward StdAlgorithmsTeamShiftLeft
StdAlgorithmsCommon StdAlgorithmsTeamShiftRight
StdAlgorithmsTeamMove )
StdAlgorithmsTeamMoveBackward
StdAlgorithmsTeamShiftLeft
StdAlgorithmsTeamShiftRight
)
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
endforeach() endforeach()
@ -261,13 +215,9 @@ endforeach()
# std team F # std team F
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_F) set(STDALGO_TEAM_SOURCES_F)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamReverse StdAlgorithmsTeamReverseCopy StdAlgorithmsTeamRotate
StdAlgorithmsCommon StdAlgorithmsTeamRotateCopy
StdAlgorithmsTeamReverse )
StdAlgorithmsTeamReverseCopy
StdAlgorithmsTeamRotate
StdAlgorithmsTeamRotateCopy
)
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
endforeach() endforeach()
@ -275,15 +225,16 @@ endforeach()
# std team E # std team E
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_E) set(STDALGO_TEAM_SOURCES_E)
foreach(Name foreach(
StdAlgorithmsCommon Name
StdAlgorithmsTeamFill StdAlgorithmsCommon
StdAlgorithmsTeamFill_n StdAlgorithmsTeamFill
StdAlgorithmsTeamReplace StdAlgorithmsTeamFill_n
StdAlgorithmsTeamReplaceIf StdAlgorithmsTeamReplace
StdAlgorithmsTeamReplaceCopy StdAlgorithmsTeamReplaceIf
StdAlgorithmsTeamReplaceCopyIf StdAlgorithmsTeamReplaceCopy
) StdAlgorithmsTeamReplaceCopyIf
)
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
endforeach() endforeach()
@ -291,12 +242,7 @@ endforeach()
# std team D # std team D
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_D) set(STDALGO_TEAM_SOURCES_D)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamMinElement StdAlgorithmsTeamMaxElement StdAlgorithmsTeamMinMaxElement)
StdAlgorithmsCommon
StdAlgorithmsTeamMinElement
StdAlgorithmsTeamMaxElement
StdAlgorithmsTeamMinMaxElement
)
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
endforeach() endforeach()
@ -304,16 +250,17 @@ endforeach()
# std team C # std team C
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_C) set(STDALGO_TEAM_SOURCES_C)
foreach(Name foreach(
StdAlgorithmsCommon Name
StdAlgorithmsTeamFind StdAlgorithmsCommon
StdAlgorithmsTeamFindIf StdAlgorithmsTeamFind
StdAlgorithmsTeamFindIfNot StdAlgorithmsTeamFindIf
StdAlgorithmsTeamAllOf StdAlgorithmsTeamFindIfNot
StdAlgorithmsTeamAnyOf StdAlgorithmsTeamAllOf
StdAlgorithmsTeamNoneOf StdAlgorithmsTeamAnyOf
StdAlgorithmsTeamSearchN StdAlgorithmsTeamNoneOf
) StdAlgorithmsTeamSearchN
)
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
endforeach() endforeach()
@ -321,13 +268,9 @@ endforeach()
# std team B # std team B
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_B) set(STDALGO_TEAM_SOURCES_B)
foreach(Name foreach(Name StdAlgorithmsCommon StdAlgorithmsTeamEqual StdAlgorithmsTeamSearch StdAlgorithmsTeamFindEnd
StdAlgorithmsCommon StdAlgorithmsTeamFindFirstOf
StdAlgorithmsTeamEqual )
StdAlgorithmsTeamSearch
StdAlgorithmsTeamFindEnd
StdAlgorithmsTeamFindFirstOf
)
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
endforeach() endforeach()
@ -335,34 +278,33 @@ endforeach()
# std team A # std team A
# ------------------------------------------ # ------------------------------------------
set(STDALGO_TEAM_SOURCES_A) set(STDALGO_TEAM_SOURCES_A)
foreach(Name foreach(
StdAlgorithmsCommon Name
StdAlgorithmsTeamAdjacentFind StdAlgorithmsCommon
StdAlgorithmsTeamCount StdAlgorithmsTeamAdjacentFind
StdAlgorithmsTeamCountIf StdAlgorithmsTeamCount
StdAlgorithmsTeamForEach StdAlgorithmsTeamCountIf
StdAlgorithmsTeamForEachN StdAlgorithmsTeamForEach
StdAlgorithmsTeamLexicographicalCompare StdAlgorithmsTeamForEachN
StdAlgorithmsTeamMismatch StdAlgorithmsTeamLexicographicalCompare
) StdAlgorithmsTeamMismatch
)
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp) list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
endforeach() endforeach()
# FIXME_OPENMPTARGET - remove sort test as it leads to ICE with clang/16 and above at compile time. # FIXME_OPENMPTARGET - remove sort test as it leads to ICE with clang/16 and above at compile time.
if(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL "Clang" AND KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 16.0.0) if(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL "Clang" AND KOKKOS_CXX_COMPILER_VERSION
list(REMOVE_ITEM ALGO_SORT_SOURCES VERSION_GREATER_EQUAL 16.0.0
TestSort.cpp )
) list(REMOVE_ITEM ALGO_SORT_SOURCES TestSort.cpp)
endif() endif()
# FIXME_OPENMPTARGET remove tests for OpenMPTarget because in these cases # FIXME_OPENMPTARGET remove tests for OpenMPTarget because in these cases
# the impl needs to use either Kokkos or tailored reducers # the impl needs to use either Kokkos or tailored reducers
# which results in runtime memory errors. # which results in runtime memory errors.
if(KOKKOS_ENABLE_OPENMPTARGET) if(KOKKOS_ENABLE_OPENMPTARGET)
list(REMOVE_ITEM STDALGO_TEAM_SOURCES_L list(REMOVE_ITEM STDALGO_TEAM_SOURCES_L TestStdAlgorithmsTeamIsPartitioned.cpp
TestStdAlgorithmsTeamIsPartitioned.cpp TestStdAlgorithmsTeamPartitionPoint.cpp TestStdAlgorithmsTeamPartitionCopy.cpp
TestStdAlgorithmsTeamPartitionPoint.cpp
TestStdAlgorithmsTeamPartitionCopy.cpp
) )
endif() endif()
@ -370,7 +312,9 @@ endif()
# in these cases the impl needs to use either Kokkos or # in these cases the impl needs to use either Kokkos or
# tailored reducers which results in runtime memory errors. # tailored reducers which results in runtime memory errors.
if(KOKKOS_ENABLE_OPENMPTARGET) if(KOKKOS_ENABLE_OPENMPTARGET)
list(REMOVE_ITEM STDALGO_TEAM_SOURCES_C list(
REMOVE_ITEM
STDALGO_TEAM_SOURCES_C
TestStdAlgorithmsTeamFind.cpp TestStdAlgorithmsTeamFind.cpp
TestStdAlgorithmsTeamFindIf.cpp TestStdAlgorithmsTeamFindIf.cpp
TestStdAlgorithmsTeamFindIfNot.cpp TestStdAlgorithmsTeamFindIfNot.cpp
@ -386,35 +330,20 @@ endif()
# FRIZZI: 04/26/2023: not sure if the compilation error is still applicable # FRIZZI: 04/26/2023: not sure if the compilation error is still applicable
# but we conservatively leave this guard on # but we conservatively leave this guard on
if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)) if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM))
KOKKOS_ADD_EXECUTABLE_AND_TEST( kokkos_add_executable_and_test(
UnitTest_Sort UnitTest_Sort SOURCES UnitTestMain.cpp TestStdAlgorithmsCommon.cpp ${ALGO_SORT_SOURCES}
SOURCES
UnitTestMain.cpp
TestStdAlgorithmsCommon.cpp
${ALGO_SORT_SOURCES}
) )
KOKKOS_ADD_EXECUTABLE_AND_TEST( kokkos_add_executable_and_test(UnitTest_Random SOURCES UnitTestMain.cpp ${ALGO_RANDOM_SOURCES})
UnitTest_Random
SOURCES
UnitTestMain.cpp
${ALGO_RANDOM_SOURCES}
)
endif() endif()
# FIXME_OPENMPTARGET: These tests cause internal compiler errors as of 09/01/22 # FIXME_OPENMPTARGET: These tests cause internal compiler errors as of 09/01/22
# when compiling for Intel's Xe-HP GPUs. # when compiling for Intel's Xe-HP GPUs.
if(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM) if(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)
list(REMOVE_ITEM STDALGO_SOURCES_D list(REMOVE_ITEM STDALGO_SOURCES_D TestStdAlgorithmsCopyIf.cpp TestStdAlgorithmsRemoveCopy.cpp
TestStdAlgorithmsCopyIf.cpp TestStdAlgorithmsUnique.cpp TestStdAlgorithmsUniqueCopy.cpp
TestStdAlgorithmsRemoveCopy.cpp
TestStdAlgorithmsUnique.cpp
TestStdAlgorithmsUniqueCopy.cpp
)
list(REMOVE_ITEM STDALGO_SOURCES_E
TestStdAlgorithmsExclusiveScan.cpp
TestStdAlgorithmsInclusiveScan.cpp
) )
list(REMOVE_ITEM STDALGO_SOURCES_E TestStdAlgorithmsExclusiveScan.cpp TestStdAlgorithmsInclusiveScan.cpp)
endif() endif()
# FIXME_OPENMPTARGET remove tests for OpenMPTarget # FIXME_OPENMPTARGET remove tests for OpenMPTarget
@ -422,48 +351,31 @@ endif()
if(KOKKOS_ENABLE_OPENMPTARGET) if(KOKKOS_ENABLE_OPENMPTARGET)
# the following use either Kokkos or tailored reducers # the following use either Kokkos or tailored reducers
# which results in runtime memory errors. # which results in runtime memory errors.
list(REMOVE_ITEM STDALGO_TEAM_SOURCES_B list(REMOVE_ITEM STDALGO_TEAM_SOURCES_B TestStdAlgorithmsTeamFindEnd.cpp TestStdAlgorithmsTeamFindFirstOf.cpp
TestStdAlgorithmsTeamFindEnd.cpp TestStdAlgorithmsTeamSearch.cpp
TestStdAlgorithmsTeamFindFirstOf.cpp
TestStdAlgorithmsTeamSearch.cpp
) )
list(REMOVE_ITEM STDALGO_TEAM_SOURCES_A list(REMOVE_ITEM STDALGO_TEAM_SOURCES_A TestStdAlgorithmsTeamAdjacentFind.cpp
TestStdAlgorithmsTeamAdjacentFind.cpp TestStdAlgorithmsTeamLexicographicalCompare.cpp TestStdAlgorithmsTeamMismatch.cpp
TestStdAlgorithmsTeamLexicographicalCompare.cpp
TestStdAlgorithmsTeamMismatch.cpp
) )
# this causes an illegal memory access if team_members_have_matching_result # this causes an illegal memory access if team_members_have_matching_result
# is called # is called
list(REMOVE_ITEM STDALGO_TEAM_SOURCES_M list(REMOVE_ITEM STDALGO_TEAM_SOURCES_M TestStdAlgorithmsTeamTransformBinaryOp.cpp)
TestStdAlgorithmsTeamTransformBinaryOp.cpp
)
endif() endif()
foreach(ID A;B;C;D;E) foreach(ID A;B;C;D;E)
KOKKOS_ADD_EXECUTABLE_AND_TEST( kokkos_add_executable_and_test(AlgorithmsUnitTest_StdSet_${ID} SOURCES UnitTestMain.cpp ${STDALGO_SOURCES_${ID}})
AlgorithmsUnitTest_StdSet_${ID}
SOURCES
UnitTestMain.cpp
${STDALGO_SOURCES_${ID}}
)
endforeach() endforeach()
foreach(ID A;B;C;D;E;F;G;H;I;L;M;P;Q) foreach(ID A;B;C;D;E;F;G;H;I;L;M;P;Q)
KOKKOS_ADD_EXECUTABLE_AND_TEST( kokkos_add_executable_and_test(
AlgorithmsUnitTest_StdSet_Team_${ID} AlgorithmsUnitTest_StdSet_Team_${ID} SOURCES UnitTestMain.cpp ${STDALGO_TEAM_SOURCES_${ID}}
SOURCES )
UnitTestMain.cpp
${STDALGO_TEAM_SOURCES_${ID}}
)
endforeach() endforeach()
# FIXME_OPENMPTARGET This test causes internal compiler errors as of 09/01/22 # FIXME_OPENMPTARGET This test causes internal compiler errors as of 09/01/22
# when compiling for Intel's Xe-HP GPUs. # when compiling for Intel's Xe-HP GPUs.
if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)) if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM))
KOKKOS_ADD_EXECUTABLE( kokkos_add_executable(AlgorithmsUnitTest_StdAlgoCompileOnly SOURCES TestStdAlgorithmsCompileOnly.cpp)
AlgorithmsUnitTest_StdAlgoCompileOnly
SOURCES TestStdAlgorithmsCompileOnly.cpp
)
endif() endif()

View File

@ -31,13 +31,13 @@ struct bin3d_is_sorted_struct {
using value_type = unsigned int; using value_type = unsigned int;
using execution_space = ExecutionSpace; using execution_space = ExecutionSpace;
Kokkos::View<Scalar * [3], ExecutionSpace> keys; Kokkos::View<Scalar* [3], ExecutionSpace> keys;
int max_bins; int max_bins;
Scalar min; Scalar min;
Scalar max; Scalar max;
bin3d_is_sorted_struct(Kokkos::View<Scalar * [3], ExecutionSpace> keys_, bin3d_is_sorted_struct(Kokkos::View<Scalar* [3], ExecutionSpace> keys_,
int max_bins_, Scalar min_, Scalar max_) int max_bins_, Scalar min_, Scalar max_)
: keys(keys_), max_bins(max_bins_), min(min_), max(max_) {} : keys(keys_), max_bins(max_bins_), min(min_), max(max_) {}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
@ -65,9 +65,9 @@ struct sum3D {
using value_type = double; using value_type = double;
using execution_space = ExecutionSpace; using execution_space = ExecutionSpace;
Kokkos::View<Scalar * [3], ExecutionSpace> keys; Kokkos::View<Scalar* [3], ExecutionSpace> keys;
sum3D(Kokkos::View<Scalar * [3], ExecutionSpace> keys_) : keys(keys_) {} sum3D(Kokkos::View<Scalar* [3], ExecutionSpace> keys_) : keys(keys_) {}
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(int i, double& count) const { void operator()(int i, double& count) const {
count += keys(i, 0); count += keys(i, 0);
@ -77,8 +77,8 @@ struct sum3D {
}; };
template <class ExecutionSpace, typename KeyType> template <class ExecutionSpace, typename KeyType>
void test_3D_sort_impl(unsigned int n) { void test_3D_sort_impl(size_t n) {
using KeyViewType = Kokkos::View<KeyType * [3], ExecutionSpace>; using KeyViewType = Kokkos::View<KeyType* [3], ExecutionSpace>;
KeyViewType keys("Keys", n * n * n); KeyViewType keys("Keys", n * n * n);
@ -207,7 +207,7 @@ void test_sort_integer_overflow() {
// array with two extrema in reverse order to expose integer overflow bug in // array with two extrema in reverse order to expose integer overflow bug in
// bin calculation // bin calculation
T a[2] = {Kokkos::Experimental::finite_max<T>::value, T a[2] = {Kokkos::Experimental::finite_max<T>::value,
Kokkos::Experimental::finite_min<T>::value}; Kokkos::Experimental::finite_min<T>::value};
auto vd = Kokkos::create_mirror_view_and_copy( auto vd = Kokkos::create_mirror_view_and_copy(
ExecutionSpace(), Kokkos::View<T[2], Kokkos::HostSpace>(a)); ExecutionSpace(), Kokkos::View<T[2], Kokkos::HostSpace>(a));
Kokkos::sort(vd); Kokkos::sort(vd);
@ -219,6 +219,10 @@ void test_sort_integer_overflow() {
} // namespace BinSortSetA } // namespace BinSortSetA
TEST(TEST_CATEGORY, BinSortGenericTests) { TEST(TEST_CATEGORY, BinSortGenericTests) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExecutionSpace = TEST_EXECSPACE; using ExecutionSpace = TEST_EXECSPACE;
using key_type = unsigned; using key_type = unsigned;
constexpr int N = 171; constexpr int N = 171;
@ -246,11 +250,11 @@ TEST(TEST_CATEGORY, BinSortEmptyView) {
// does not matter if we use int or something else // does not matter if we use int or something else
Kokkos::View<int*, ExecutionSpace> v("v", 0); Kokkos::View<int*, ExecutionSpace> v("v", 0);
// test all exposed public sort methods // test all exposed public sort methods are callable and do not throw
ASSERT_NO_THROW(Sorter.sort(ExecutionSpace(), v, 0, 0)); Sorter.sort(ExecutionSpace(), v, 0, 0);
ASSERT_NO_THROW(Sorter.sort(v, 0, 0)); Sorter.sort(v, 0, 0);
ASSERT_NO_THROW(Sorter.sort(ExecutionSpace(), v)); Sorter.sort(ExecutionSpace(), v);
ASSERT_NO_THROW(Sorter.sort(v)); Sorter.sort(v);
} }
TEST(TEST_CATEGORY, BinSortEmptyKeysView) { TEST(TEST_CATEGORY, BinSortEmptyKeysView) {
@ -263,7 +267,26 @@ TEST(TEST_CATEGORY, BinSortEmptyKeysView) {
BinOp_t binOp(5, 0, 10); BinOp_t binOp(5, 0, 10);
Kokkos::BinSort<KeyViewType, BinOp_t> Sorter(ExecutionSpace{}, kv, binOp); Kokkos::BinSort<KeyViewType, BinOp_t> Sorter(ExecutionSpace{}, kv, binOp);
ASSERT_NO_THROW(Sorter.create_permute_vector(ExecutionSpace{})); Sorter.create_permute_vector(ExecutionSpace{}); // does not throw
}
// BinSort may delegate sorting within bins to std::sort when running on host
// and having a sufficiently large number of items within a single bin (10 by
// default). Test that this is done without undefined behavior when accessing
// the boundaries of the bin. Should be used in conjunction with a memory
// sanitizer or bounds check.
TEST(TEST_CATEGORY, BinSort_issue_7221) {
using ExecutionSpace = TEST_EXECSPACE;
using KeyViewType = Kokkos::View<int*, ExecutionSpace>;
KeyViewType kv("kv", 11);
using BinOp_t = Kokkos::BinOp1D<KeyViewType>;
BinOp_t binOp(1, -10, 10);
Kokkos::BinSort<KeyViewType, BinOp_t> Sorter(ExecutionSpace{}, kv, binOp,
/*sort_within_bins*/ true);
Sorter.create_permute_vector(ExecutionSpace{}); // does not throw
} }
} // namespace Test } // namespace Test

View File

@ -185,6 +185,10 @@ void run_for_rank2() {
} // namespace BinSortSetB } // namespace BinSortSetB
TEST(TEST_CATEGORY, BinSortUnsignedKeyLayoutStrideValues) { TEST(TEST_CATEGORY, BinSortUnsignedKeyLayoutStrideValues) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExeSpace = TEST_EXECSPACE; using ExeSpace = TEST_EXECSPACE;
using key_type = unsigned; using key_type = unsigned;
BinSortSetB::run_for_rank1<ExeSpace, key_type, int>(); BinSortSetB::run_for_rank1<ExeSpace, key_type, int>();

View File

@ -386,6 +386,11 @@ void test_nested_sort_by_key(unsigned int N, KeyType minKey, KeyType maxKey,
} // namespace NestedSortImpl } // namespace NestedSortImpl
TEST(TEST_CATEGORY, NestedSort) { TEST(TEST_CATEGORY, NestedSort) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExecutionSpace = TEST_EXECSPACE; using ExecutionSpace = TEST_EXECSPACE;
NestedSortImpl::test_nested_sort<ExecutionSpace, unsigned>(171, 0U, UINT_MAX); NestedSortImpl::test_nested_sort<ExecutionSpace, unsigned>(171, 0U, UINT_MAX);
NestedSortImpl::test_nested_sort<ExecutionSpace, float>(42, -1e6f, 1e6f); NestedSortImpl::test_nested_sort<ExecutionSpace, float>(42, -1e6f, 1e6f);
@ -394,6 +399,11 @@ TEST(TEST_CATEGORY, NestedSort) {
} }
TEST(TEST_CATEGORY, NestedSortByKey) { TEST(TEST_CATEGORY, NestedSortByKey) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExecutionSpace = TEST_EXECSPACE; using ExecutionSpace = TEST_EXECSPACE;
// Second/third template arguments are key and value respectively. // Second/third template arguments are key and value respectively.

View File

@ -542,6 +542,11 @@ void test_duplicate_stream() {
} // namespace AlgoRandomImpl } // namespace AlgoRandomImpl
TEST(TEST_CATEGORY, Random_XorShift64) { TEST(TEST_CATEGORY, Random_XorShift64) {
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
using ExecutionSpace = TEST_EXECSPACE; using ExecutionSpace = TEST_EXECSPACE;
#if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \ #if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \
@ -562,6 +567,10 @@ TEST(TEST_CATEGORY, Random_XorShift64) {
TEST(TEST_CATEGORY, Random_XorShift1024_0) { TEST(TEST_CATEGORY, Random_XorShift1024_0) {
using ExecutionSpace = TEST_EXECSPACE; using ExecutionSpace = TEST_EXECSPACE;
// FIXME_OPENMPTARGET - causes runtime failure with CrayClang compiler
#if defined(KOKKOS_COMPILER_CRAY_LLVM) && defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "known to fail with OpenMPTarget+Cray LLVM";
#endif
#if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \ #if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \
defined(KOKKOS_ENABLE_HIP) defined(KOKKOS_ENABLE_HIP)
@ -589,7 +598,7 @@ TEST(TEST_CATEGORY, Multi_streams) {
#endif #endif
#if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) #if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU)
if constexpr (std::is_same_v<ExecutionSpace, Kokkos::Experimental::SYCL>) { if constexpr (std::is_same_v<ExecutionSpace, Kokkos::SYCL>) {
GTEST_SKIP() << "Failing on NVIDIA GPUs"; // FIXME_SYCL GTEST_SKIP() << "Failing on NVIDIA GPUs"; // FIXME_SYCL
} }
#endif #endif

View File

@ -23,7 +23,7 @@ namespace stdalgos {
struct random_access_iterator_test : std_algorithms_test { struct random_access_iterator_test : std_algorithms_test {
public: public:
virtual void SetUp() { void SetUp() override {
Kokkos::parallel_for(m_static_view.extent(0), Kokkos::parallel_for(m_static_view.extent(0),
AssignIndexFunctor<static_view_t>(m_static_view)); AssignIndexFunctor<static_view_t>(m_static_view));
@ -264,6 +264,37 @@ TEST_F(random_access_iterator_test, traits_helpers) {
static_assert(KE::Impl::are_iterators_v<T1_t, T2_t, T3_t>); static_assert(KE::Impl::are_iterators_v<T1_t, T2_t, T3_t>);
static_assert(KE::Impl::are_random_access_iterators_v<T1_t, T2_t, T3_t>); static_assert(KE::Impl::are_random_access_iterators_v<T1_t, T2_t, T3_t>);
static_assert(!KE::Impl::are_iterators_v<int, T2_t, T3_t>); static_assert(!KE::Impl::are_iterators_v<int, T2_t, T3_t>);
static_assert(std::is_same_v<decltype(KE::begin(m_static_view))::value_type,
value_type>);
static_assert(std::is_same_v<decltype(KE::begin(m_dynamic_view))::value_type,
value_type>);
static_assert(std::is_same_v<decltype(KE::begin(m_strided_view))::value_type,
value_type>);
static_assert(
std::is_same_v<decltype(KE::end(m_static_view))::value_type, value_type>);
static_assert(std::is_same_v<decltype(KE::end(m_dynamic_view))::value_type,
value_type>);
static_assert(std::is_same_v<decltype(KE::end(m_strided_view))::value_type,
value_type>);
static_assert(
std::is_same_v<decltype(KE::begin(m_static_view))::value_type,
decltype(KE::cbegin(m_static_view))::value_type>);
static_assert(
std::is_same_v<decltype(KE::begin(m_dynamic_view))::value_type,
decltype(KE::cbegin(m_dynamic_view))::value_type>);
static_assert(
std::is_same_v<decltype(KE::begin(m_strided_view))::value_type,
decltype(KE::cbegin(m_strided_view))::value_type>);
static_assert(std::is_same_v<decltype(KE::end(m_static_view))::value_type,
decltype(KE::cend(m_static_view))::value_type>);
static_assert(std::is_same_v<decltype(KE::end(m_dynamic_view))::value_type,
decltype(KE::cend(m_dynamic_view))::value_type>);
static_assert(std::is_same_v<decltype(KE::end(m_strided_view))::value_type,
decltype(KE::cend(m_strided_view))::value_type>);
} }
} // namespace stdalgos } // namespace stdalgos

Some files were not shown because too many files have changed in this diff Show More