Merge branch 'lammps:develop' into alphataubio-kokkos-fixes

This commit is contained in:
alphataubio
2024-10-01 02:13:26 -04:00
committed by GitHub
561 changed files with 22901 additions and 17237 deletions

View File

@ -27,9 +27,9 @@ jobs:
- name: Install extra packages - name: Install extra packages
run: | run: |
sudo apt-get update
sudo apt-get install -y ccache \ sudo apt-get install -y ccache \
libeigen3-dev \ libeigen3-dev \
libgsl-dev \
libcurl4-openssl-dev \ libcurl4-openssl-dev \
mold \ mold \
mpi-default-bin \ mpi-default-bin \

View File

@ -1,4 +1,4 @@
# GitHub action to build LAMMPS on Windows with Visual C++ # GitHub action to test LAMMPS on Windows with Visual C++
name: "Windows Unit Tests" name: "Windows Unit Tests"
on: on:
@ -11,6 +11,10 @@ on:
workflow_dispatch: workflow_dispatch:
concurrency:
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{github.event_name == 'pull_request'}}
jobs: jobs:
build: build:
name: Windows Compilation Test name: Windows Compilation Test

109
.github/workflows/full-regression.yml vendored Normal file
View File

@ -0,0 +1,109 @@
# GitHub action to build LAMMPS on Linux and run regression tests
name: "Full Regression Test"
on:
push:
branches:
- develop
workflow_dispatch:
jobs:
build:
name: Build LAMMPS
# restrict to official LAMMPS repository
if: ${{ github.repository == 'lammps/lammps' }}
runs-on: ubuntu-latest
env:
CCACHE_DIR: ${{ github.workspace }}/.ccache
strategy:
max-parallel: 8
matrix:
idx: [ 0, 1, 2, 3, 4, 5, 6, 7 ]
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 2
show-progress: false
- name: Install extra packages
run: |
sudo apt-get update
sudo apt-get install -y ccache ninja-build libeigen3-dev \
libcurl4-openssl-dev python3-dev \
mpi-default-bin mpi-default-dev
- name: Create Build Environment
run: mkdir build
- name: Set up ccache
uses: actions/cache@v4
with:
path: ${{ env.CCACHE_DIR }}
key: linux-full-ccache-${{ github.sha }}
restore-keys: linux-full-ccache-
- name: Building LAMMPS via CMake
shell: bash
run: |
ccache -z
python3 -m venv linuxenv
source linuxenv/bin/activate
python3 -m pip install --upgrade pip
python3 -m pip install numpy pyyaml junit_xml
cmake -S cmake -B build \
-C cmake/presets/gcc.cmake \
-C cmake/presets/most.cmake \
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache \
-D CMAKE_C_COMPILER_LAUNCHER=ccache \
-D BUILD_SHARED_LIBS=off \
-D DOWNLOAD_POTENTIALS=off \
-D PKG_MANIFOLD=on \
-D PKG_ML-PACE=on \
-D PKG_ML-RANN=on \
-D PKG_RHEO=on \
-D PKG_PTM=on \
-D PKG_PYTHON=on \
-D PKG_QTB=on \
-D PKG_SMTBQ=on \
-G Ninja
cmake --build build
ccache -s
- name: Run Full Regression Tests
shell: bash
run: |
source linuxenv/bin/activate
python3 tools/regression-tests/run_tests.py \
--lmp-bin=build/lmp \
--config-file=tools/regression-tests/config_serial.yaml \
--examples-top-level=examples --analyze --num-workers=8
python3 tools/regression-tests/run_tests.py \
--lmp-bin=build/lmp \
--config-file=tools/regression-tests/config_serial.yaml \
--list-input=input-list-${{ matrix.idx }}.txt \
--output-file=output-${{ matrix.idx }}.xml \
--progress-file=progress-${{ matrix.idx }}.yaml \
--log-file=run-${{ matrix.idx }}.log
tar -cvf full-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: full-regression-test-artifact-${{ matrix.idx }}
path: full-regression-test-${{ matrix.idx }}.tar
merge:
runs-on: ubuntu-latest
needs: build
steps:
- name: Merge Artifacts
uses: actions/upload-artifact/merge@v4
with:
name: merged-full-regresssion-artifact
pattern: full-regression-test-artifact-*

118
.github/workflows/quick-regression.yml vendored Normal file
View File

@ -0,0 +1,118 @@
# GitHub action to build LAMMPS on Linux and run selected regression tests
name: "Quick Regression Test"
on:
pull_request:
branches:
- develop
workflow_dispatch:
concurrency:
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{github.event_name == 'pull_request'}}
jobs:
build:
name: Build LAMMPS
# restrict to official LAMMPS repository
if: ${{ github.repository == 'lammps/lammps' }}
runs-on: ubuntu-latest
env:
CCACHE_DIR: ${{ github.workspace }}/.ccache
strategy:
max-parallel: 4
matrix:
idx: [ 0, 1, 2, 3 ]
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
show-progress: false
- name: Install extra packages
run: |
sudo apt-get update
sudo apt-get install -y ccache ninja-build libeigen3-dev \
libcurl4-openssl-dev python3-dev \
mpi-default-bin mpi-default-dev
- name: Create Build Environment
run: mkdir build
- name: Set up ccache
uses: actions/cache@v4
with:
path: ${{ env.CCACHE_DIR }}
key: linux-quick-ccache-${{ github.sha }}
restore-keys: linux-quick-ccache-
- name: Building LAMMPS via CMake
shell: bash
run: |
ccache -z
python3 -m venv linuxenv
source linuxenv/bin/activate
python3 -m pip install --upgrade pip
python3 -m pip install numpy pyyaml junit_xml
cmake -S cmake -B build \
-C cmake/presets/gcc.cmake \
-C cmake/presets/most.cmake \
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache \
-D CMAKE_C_COMPILER_LAUNCHER=ccache \
-D BUILD_SHARED_LIBS=off \
-D DOWNLOAD_POTENTIALS=off \
-D PKG_MANIFOLD=on \
-D PKG_ML-PACE=on \
-D PKG_ML-RANN=on \
-D PKG_RHEO=on \
-D PKG_PTM=on \
-D PKG_PYTHON=on \
-D PKG_QTB=on \
-D PKG_SMTBQ=on \
-G Ninja
cmake --build build
ccache -s
- name: Run Regression Tests for Modified Styles
shell: bash
run: |
source linuxenv/bin/activate
python3 tools/regression-tests/run_tests.py \
--lmp-bin=build/lmp \
--config-file=tools/regression-tests/config_quick.yaml \
--examples-top-level=examples \
--quick-reference=tools/regression-tests/reference.yaml \
--quick --quick-branch=origin/develop --quick-max=100 --num-workers=4
if [ -f input-list-${{ matrix.idx }}.txt ]
then \
python3 tools/regression-tests/run_tests.py \
--lmp-bin=build/lmp \
--config-file=tools/regression-tests/config_quick.yaml \
--list-input=input-list-${{ matrix.idx }}.txt \
--output-file=output-${{ matrix.idx }}.xml \
--progress-file=progress-${{ matrix.idx }}.yaml \
--log-file=run-${{ matrix.idx }}.log
fi
tar -cvf quick-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: quick-regression-test-artifact-${{ matrix.idx }}
path: quick-regression-test-${{ matrix.idx }}.tar
merge:
runs-on: ubuntu-latest
needs: build
steps:
- name: Merge Artifacts
uses: actions/upload-artifact/merge@v4
with:
name: merged-quick-regresssion-artifact
pattern: quick-regression-test-artifact-*

37
.github/workflows/style-check.yml vendored Normal file
View File

@ -0,0 +1,37 @@
# GitHub action to run checks from tools/coding_standard
name: "Check for Programming Style Conformance"
on:
push:
branches:
- develop
pull_request:
branches:
- develop
workflow_dispatch:
concurrency:
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{github.event_name == 'pull_request'}}
jobs:
build:
name: Programming Style Conformance
if: ${{ github.repository == 'lammps/lammps' }}
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Run Tests
working-directory: src
shell: bash
run: |
make check-whitespace
make check-permissions
make check-homepage
make check-errordocs

View File

@ -11,6 +11,10 @@ on:
workflow_dispatch: workflow_dispatch:
concurrency:
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{github.event_name == 'pull_request'}}
jobs: jobs:
build: build:
name: Linux Unit Test name: Linux Unit Test
@ -27,9 +31,9 @@ jobs:
- name: Install extra packages - name: Install extra packages
run: | run: |
sudo apt-get update
sudo apt-get install -y ccache \ sudo apt-get install -y ccache \
libeigen3-dev \ libeigen3-dev \
libgsl-dev \
libcurl4-openssl-dev \ libcurl4-openssl-dev \
mold \ mold \
ninja-build \ ninja-build \

View File

@ -11,6 +11,10 @@ on:
workflow_dispatch: workflow_dispatch:
concurrency:
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{github.event_name == 'pull_request'}}
jobs: jobs:
build: build:
name: MacOS Unit Test name: MacOS Unit Test

View File

@ -497,7 +497,7 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUA
PROPERTIES COMPILE_OPTIONS "-std=c++14") PROPERTIES COMPILE_OPTIONS "-std=c++14")
endif() endif()
if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR BUILD_TOOLS) if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR BUILD_TOOLS)
enable_language(C) enable_language(C)
if (NOT USE_INTERNAL_LINALG) if (NOT USE_INTERNAL_LINALG)
find_package(LAPACK) find_package(LAPACK)
@ -572,7 +572,7 @@ else()
endif() endif()
foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF
PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM COMPRESS ML-PACE LEPTON RHEO EXTRA-COMMAND) PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM COMPRESS ML-PACE LEPTON EXTRA-COMMAND)
if(PKG_${PKG_WITH_INCL}) if(PKG_${PKG_WITH_INCL})
include(Packages/${PKG_WITH_INCL}) include(Packages/${PKG_WITH_INCL})
endif() endif()

View File

@ -4,6 +4,8 @@
option(BUILD_DOC "Build LAMMPS HTML documentation" OFF) option(BUILD_DOC "Build LAMMPS HTML documentation" OFF)
if(BUILD_DOC) if(BUILD_DOC)
option(BUILD_DOC_VENV "Build LAMMPS documentation virtual environment" ON)
mark_as_advanced(BUILD_DOC_VENV)
# Current Sphinx versions require at least Python 3.8 # Current Sphinx versions require at least Python 3.8
# use default (or custom) Python executable, if version is sufficient # use default (or custom) Python executable, if version is sufficient
if(Python_VERSION VERSION_GREATER_EQUAL 3.8) if(Python_VERSION VERSION_GREATER_EQUAL 3.8)
@ -18,14 +20,6 @@ if(BUILD_DOC)
find_package(Doxygen 1.8.10 REQUIRED) find_package(Doxygen 1.8.10 REQUIRED)
file(GLOB DOC_SOURCES CONFIGURE_DEPENDS ${LAMMPS_DOC_DIR}/src/[^.]*.rst) file(GLOB DOC_SOURCES CONFIGURE_DEPENDS ${LAMMPS_DOC_DIR}/src/[^.]*.rst)
add_custom_command(
OUTPUT docenv
COMMAND ${VIRTUALENV} docenv
)
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config) set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config)
set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in) set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in)
set(SPHINX_STATIC_DIR ${SPHINX_CONFIG_DIR}/_static) set(SPHINX_STATIC_DIR ${SPHINX_CONFIG_DIR}/_static)
@ -44,6 +38,15 @@ if(BUILD_DOC)
# configure paths in conf.py, since relative paths change when file is copied # configure paths in conf.py, since relative paths change when file is copied
configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE}) configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE})
if(BUILD_DOC_VENV)
add_custom_command(
OUTPUT docenv
COMMAND ${VIRTUALENV} docenv
)
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
add_custom_command( add_custom_command(
OUTPUT ${DOC_BUILD_DIR}/requirements.txt OUTPUT ${DOC_BUILD_DIR}/requirements.txt
DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE} DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
@ -53,6 +56,15 @@ if(BUILD_DOC)
COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install -r ${DOC_BUILD_DIR}/requirements.txt --upgrade COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
) )
set(DOCENV_DEPS docenv ${DOC_BUILD_DIR}/requirements.txt)
if(NOT TARGET Sphinx::sphinx-build)
add_executable(Sphinx::sphinx-build IMPORTED GLOBAL)
set_target_properties(Sphinx::sphinx-build PROPERTIES IMPORTED_LOCATION "${DOCENV_BINARY_DIR}/sphinx-build")
endif()
else()
find_package(Sphinx)
endif()
set(MATHJAX_URL "https://github.com/mathjax/MathJax/archive/3.1.3.tar.gz" CACHE STRING "URL for MathJax tarball") set(MATHJAX_URL "https://github.com/mathjax/MathJax/archive/3.1.3.tar.gz" CACHE STRING "URL for MathJax tarball")
set(MATHJAX_MD5 "b81661c6e6ba06278e6ae37b30b0c492" CACHE STRING "MD5 checksum of MathJax tarball") set(MATHJAX_MD5 "b81661c6e6ba06278e6ae37b30b0c492" CACHE STRING "MD5 checksum of MathJax tarball")
mark_as_advanced(MATHJAX_URL) mark_as_advanced(MATHJAX_URL)
@ -97,8 +109,8 @@ if(BUILD_DOC)
endif() endif()
add_custom_command( add_custom_command(
OUTPUT html OUTPUT html
DEPENDS ${DOC_SOURCES} docenv ${DOC_BUILD_DIR}/requirements.txt ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE} DEPENDS ${DOC_SOURCES} ${DOCENV_DEPS} ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE}
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html COMMAND Sphinx::sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html
COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF
COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp

View File

@ -0,0 +1,29 @@
# Find sphinx-build
find_program(Sphinx_EXECUTABLE NAMES sphinx-build
PATH_SUFFIXES bin
DOC "Sphinx documenation build executable")
mark_as_advanced(Sphinx_EXECUTABLE)
if(Sphinx_EXECUTABLE)
execute_process(COMMAND ${Sphinx_EXECUTABLE} --version
OUTPUT_VARIABLE sphinx_version
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE _sphinx_version_result)
if(_sphinx_version_result)
message(WARNING "Unable to determine sphinx-build verison: ${_sphinx_version_result}")
else()
string(REGEX REPLACE "sphinx-build ([0-9.]+).*"
"\\1"
Sphinx_VERSION
"${sphinx_version}")
endif()
if(NOT TARGET Sphinx::sphinx-build)
add_executable(Sphinx::sphinx-build IMPORTED GLOBAL)
set_target_properties(Sphinx::sphinx-build PROPERTIES IMPORTED_LOCATION "${Sphinx_EXECUTABLE}")
endif()
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Sphinx REQUIRED_VARS Sphinx_EXECUTABLE VERSION_VAR Sphinx_VERSION)

View File

@ -8,8 +8,24 @@ endif()
######################################################################## ########################################################################
# consistency checks and Kokkos options/settings required by LAMMPS # consistency checks and Kokkos options/settings required by LAMMPS
if(Kokkos_ENABLE_CUDA) if(Kokkos_ENABLE_CUDA)
message(STATUS "KOKKOS: Enabling CUDA LAMBDA function support") option(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC "CUDA asynchronous malloc support" OFF)
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "" FORCE) mark_as_advanced(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
if(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
message(STATUS "KOKKOS: CUDA malloc async support enabled")
else()
message(STATUS "KOKKOS: CUDA malloc async support disabled")
endif()
endif()
if(Kokkos_ENABLE_HIP)
option(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS "Enable multiple kernel instantiations with HIP" ON)
mark_as_advanced(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS)
option(Kokkos_ENABLE_ROCTHRUST "Use RoCThrust library" ON)
mark_as_advanced(Kokkos_ENABLE_ROCTHRUST)
if(Kokkos_ARCH_AMD_GFX942 OR Kokkos_ARCH_AMD_GFX940)
option(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY "Enable unified memory with HIP" ON)
mark_as_advanced(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
endif()
endif() endif()
# Adding OpenMP compiler flags without the checks done for # Adding OpenMP compiler flags without the checks done for
# BUILD_OMP can result in compile failures. Enforce consistency. # BUILD_OMP can result in compile failures. Enforce consistency.
@ -18,6 +34,15 @@ if(Kokkos_ENABLE_OPENMP)
message(FATAL_ERROR "Must enable BUILD_OMP with Kokkos_ENABLE_OPENMP") message(FATAL_ERROR "Must enable BUILD_OMP with Kokkos_ENABLE_OPENMP")
endif() endif()
endif() endif()
if(Kokkos_ENABLE_SERIAL)
if(NOT (Kokkos_ENABLE_OPENMP OR Kokkos_ENABLE_THREADS OR
Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP OR Kokkos_ENABLE_SYCL
OR Kokkos_ENABLE_OPENMPTARGET))
option(Kokkos_ENABLE_ATOMICS_BYPASS "Disable atomics for Kokkos Serial Backend" ON)
mark_as_advanced(Kokkos_ENABLE_ATOMICS_BYPASS)
endif()
endif()
######################################################################## ########################################################################
option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF) option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF)
@ -45,8 +70,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject) include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "243de871b3dc2cf3990c1c404032df83" CACHE STRING "MD5 checksum of KOKKOS tarball") set(KOKKOS_MD5 "de6ee80d00b6212b02bfb7f1e71a8392" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5) mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK) GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -71,7 +96,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS) elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 4.3.01 REQUIRED CONFIG) find_package(Kokkos 4.4.01 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos) target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else() else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

View File

@ -1,2 +0,0 @@
find_package(GSL 2.6 REQUIRED)
target_link_libraries(lammps PRIVATE GSL::gsl)

View File

@ -67,6 +67,7 @@ set(WIN_PACKAGES
REACTION REACTION
REAXFF REAXFF
REPLICA REPLICA
RHEO
RIGID RIGID
SHOCK SHOCK
SMTBQ SMTBQ

View File

@ -60,6 +60,7 @@ set(ALL_PACKAGES
REACTION REACTION
REAXFF REAXFF
REPLICA REPLICA
RHEO
RIGID RIGID
SHOCK SHOCK
SPH SPH

View File

@ -60,6 +60,7 @@ set(WIN_PACKAGES
REACTION REACTION
REAXFF REAXFF
REPLICA REPLICA
RHEO
RIGID RIGID
SHOCK SHOCK
SMTBQ SMTBQ

View File

@ -138,12 +138,27 @@ during development:
The status of this automated testing can be viewed on `https://ci.lammps.org The status of this automated testing can be viewed on `https://ci.lammps.org
<https://ci.lammps.org>`_. <https://ci.lammps.org>`_.
The scripts and inputs for integration, run, and regression testing The scripts and inputs for integration, run, and legacy regression
are maintained in a testing are maintained in a `separate repository
`separate repository <https://github.com/lammps/lammps-testing>`_ <https://github.com/lammps/lammps-testing>`_ of the LAMMPS project on
of the LAMMPS project on GitHub. A few tests are also run as GitHub GitHub. A few tests are also run as GitHub Actions and their
Actions and their configuration files are in the ``.github/workflows/`` configuration files are in the ``.github/workflows/`` folder of the
folder of the LAMMPS git tree. LAMMPS git tree.
Regression tests can also be performed locally with the :ref:`regression
tester tool <regression>`. The tool checks if a given LAMMPS binary run
with selected input examples produces thermo output that is consistent
with the provided log files. The script can be run in one pass over all
available input files, but it can also first create multiple lists of
inputs or folders that can then be run with multiple workers
concurrently to speed things up. Another mode allows to do a quick
check of inputs that contain commands that have changes in the current
checkout branch relative to a git branch. This works similar to the two
pass mode, but will select only shorter runs and no more than 100 inputs
that are chosen randomly. This ensures that this test runs
significantly faster compared to the full test run. These test runs can
also be performed with instrumented LAMMPS binaries (see previous
section).
The unit testing facility is integrated into the CMake build process of The unit testing facility is integrated into the CMake build process of
the LAMMPS source code distribution itself. It can be enabled by the LAMMPS source code distribution itself. It can be enabled by

View File

@ -2251,28 +2251,38 @@ verified to work in February 2020 with Quantum Espresso versions 6.3 to
RHEO package RHEO package
------------ ------------
To build with this package you must have the `GNU Scientific Library This package depends on the BPM package.
(GSL) <https://www.gnu.org/software/gsl/>` installed in locations that
are accessible in your environment. The GSL library should be at least
version 2.7.
.. tabs:: .. tabs::
.. tab:: CMake build .. tab:: CMake build
If CMake cannot find the GSL library or include files, you can set:
.. code-block:: bash .. code-block:: bash
-D GSL_ROOT_DIR=path # path to root of GSL installation -D PKG_RHEO=yes # enable the package itself
-D PKG_BPM=yes # the RHEO package requires BPM
-D USE_INTERNAL_LINALG=value # prefer internal LAPACK if true
Some features in the RHEO package are dependent on code in the BPM
package so the latter one *must* be enabled as well.
The RHEO package also requires LAPACK (and BLAS) and CMake
can identify their locations and pass that info to the RHEO
build script. But on some systems this may cause problems when
linking or the dependency is not desired. By using the setting
``-D USE_INTERNAL_LINALG=yes`` when running the CMake
configuration, you will select compiling and linking the bundled
linear algebra library and work around the limitations.
.. tab:: Traditional make .. tab:: Traditional make
LAMMPS will try to auto-detect the GSL compiler and linker flags The RHEO package requires LAPACK (and BLAS) which can be either
from the corresponding ``pkg-config`` file (``gsl.pc``), otherwise a system provided library or the bundled "linalg" library. This
you can edit the file ``lib/rheo/Makefile.lammps`` is a subset of LAPACK translated to C++. For that, one of the
to specify the paths and library names where indicated by comments. provided ``Makefile.lammps.<config>`` files needs to be copied
This must be done **before** the package is installed. to ``Makefile.lammps`` and edited as needed. The default file
uses the bundled "linalg" library, which can be built by
``make lib-linalg args='-m serial'`` in the ``src`` folder.
---------- ----------

View File

@ -229,8 +229,7 @@ can be used with the Intel or GNU compiler (see the ``FFT_LIB`` setting
above). above).
The NVIDIA Performance Libraries (NVPL) FFT library is optimized for NVIDIA The NVIDIA Performance Libraries (NVPL) FFT library is optimized for NVIDIA
Grace Armv9.0 architecture. You can download it from Grace Armv9.0 architecture. You can download it from https://docs.nvidia.com/nvpl/
`https://docs.nvidia.com/nvpl/`_.
The cuFFT and hipFFT FFT libraries are packaged with NVIDIA's CUDA and The cuFFT and hipFFT FFT libraries are packaged with NVIDIA's CUDA and
AMD's HIP installations, respectively. These FFT libraries require the AMD's HIP installations, respectively. These FFT libraries require the

View File

@ -56,7 +56,7 @@ lammps.org". General questions about LAMMPS should be posted in the
- SNL - SNL
- jmgoff at sandia.gov - jmgoff at sandia.gov
- machine learned potentials, QEq solvers, Python - machine learned potentials, QEq solvers, Python
* - Megan McCarthy * - Meg McCarthy
- SNL - SNL
- megmcca at sandia.gov - megmcca at sandia.gov
- alloys, micro-structure, machine learned potentials - alloys, micro-structure, machine learned potentials
@ -67,7 +67,7 @@ lammps.org". General questions about LAMMPS should be posted in the
* - `Trung Nguyen <tn_>`_ * - `Trung Nguyen <tn_>`_
- U Chicago - U Chicago
- ndactrung at gmail.com - ndactrung at gmail.com
- soft matter, GPU package - soft matter, GPU package, DIELECTRIC package, regression testing
.. _rb: https://rbberger.github.io/ .. _rb: https://rbberger.github.io/
.. _gc: https://enthalpiste.fr/ .. _gc: https://enthalpiste.fr/

View File

@ -3,71 +3,70 @@ Running LAMMPS on Windows
To run a serial (non-MPI) executable, follow these steps: To run a serial (non-MPI) executable, follow these steps:
* Get a command prompt by going to Start->Run... , * Install a LAMMPS installer package from https://packages.lammps.org/windows.html
then typing "cmd". * Open the "Command Prompt" or "Terminal" app.
* Move to the directory where you have your input script, * Change to the directory where you have your input script,
(e.g. by typing: cd "Documents"). (e.g. by typing: cd "Documents").
* At the command prompt, type "lmp -in in.file", where * At the command prompt, type "lmp -in in.file.lmp", where
in.file is the name of your LAMMPS input script. ``in.file.lmp`` is the name of your LAMMPS input script.
Note that the serial executable includes support for multi-threading Note that the serial executable includes support for multi-threading
parallelization from the styles in the OPENMP packages. To run with parallelization from the styles in the OPENMP and KOKKOS packages.
4 threads, you can type this: To run with 4 threads, you can type this:
.. code-block:: bash .. code-block:: bash
lmp -in in.lj -pk omp 4 -sf omp lmp -in in.lj.lmp -pk omp 4 -sf omp
lmp -in in.lj.lmp -k on t 4 -sf kk
Alternately, you can also install a package with LAMMPS-GUI included and
open the LAMMPS-GUI app (the package includes the command line version
of LAMMPS as well) and open the input file in the GUI and run it from
there. For details on LAMMPS-GUI, see :doc:`Howto_lammps_gui`.
---------- ----------
For the MPI executable, which allows you to run LAMMPS under Windows For the MS-MPI executables, which allow you to run LAMMPS under Windows
in parallel, follow these steps. in parallel using MPI rather than multi-threading, follow these steps.
Download and install a compatible MPI library binary package: Download and install the MS-MPI runtime package ``msmpisetup.exe`` from
https://www.microsoft.com/en-us/download/details.aspx?id=105289 (Note
* for 32-bit Windows: `mpich2-1.4.1p1-win-ia32.msi <https://download.lammps.org/thirdparty/mpich2-1.4.1p1-win-ia32.msi>`_ that the ``msmpisdk.msi`` is **only** required for **compilation** of
* for 64-bit Windows: `mpich2-1.4.1p1-win-x86-64.msi <https://download.lammps.org/thirdparty/mpich2-1.4.1p1-win-x86-64.msi>`_ LAMMPS from source on Windows using Microsoft Visual Studio). After
installation of MS-MPI perform a reboot.
The LAMMPS Windows installer packages will automatically adjust your
path for the default location of this MPI package. After the
installation of the MPICH2 software, it needs to be integrated into
the system. For this you need to start a Command Prompt in
*Administrator Mode* (right click on the icon and select it). Change
into the MPICH2 installation directory, then into the subdirectory
**bin** and execute **smpd.exe -install**\ . Exit the command window.
* Get a new, regular command prompt by going to Start->Run... ,
then typing "cmd".
* Move to the directory where you have your input file
(e.g. by typing: cd "Documents").
Then you can run the executable in serial like in the example above Then you can run the executable in serial like in the example above
or in parallel using MPI with one of the following commands: or in parallel using MPI with one of the following commands:
.. code-block:: bash .. code-block:: bash
mpiexec -localonly 4 lmp -in in.file mpiexec -localonly 4 lmp -in in.file.lmp
mpiexec -np 4 lmp -in in.file mpiexec -np 4 lmp -in in.file.lmp
where in.file is the name of your LAMMPS input script. For the latter where ``in.file.lmp`` is the name of your LAMMPS input script. For the
case, you may be prompted to enter the password that you set during latter case, you may be prompted to enter the password that you set
installation of the MPI library software. during installation of the MPI library software.
In this mode, output may not immediately show up on the screen, so if In this mode, output may not immediately show up on the screen, so if
your input script takes a long time to execute, you may need to be your input script takes a long time to execute, you may need to be
patient before the output shows up. patient before the output shows up.
The parallel executable can also run on a single processor by typing Note that the parallel executable also includes OpenMP multi-threading
something like this: through both the OPENMP and the KOKKOS package, which can be combined
with MPI using something like:
.. code-block:: bash .. code-block:: bash
lmp -in in.lj mpiexec -localonly 2 lmp -in in.lj.lmp -pk omp 2 -sf omp
mpiexec -localonly 2 lmp -in in.lj.lmp -kokkos on t 2 -sf kk
Note that the parallel executable also includes OpenMP -------------
multi-threading, which can be combined with MPI using something like:
.. code-block:: bash
mpiexec -localonly 2 lmp -in in.lj -pk omp 2 -sf omp
MPI parallelization will work for *all* functionality in LAMMPS and in
many cases the MPI parallelization is more efficient than
multi-threading since LAMMPS was designed from ground up for MPI
parallelization using domain decomposition. Multi-threading is only
available for selected styles and implemented on top of the MPI
parallelization. Multi-threading is most useful for systems with large
load imbalances when using domain decomposition and a smaller number
of threads (<= 8).

View File

@ -1022,7 +1022,7 @@ regression tests with a given LAMMPS binary. The tool launches the
LAMMPS binary with any given input script under one of the `examples` LAMMPS binary with any given input script under one of the `examples`
subdirectories, and compares the thermo output in the generated log file subdirectories, and compares the thermo output in the generated log file
with those in the provided log file with the same number of processors with those in the provided log file with the same number of processors
ub the same subdirectory. If the differences between the actual and in the same subdirectory. If the differences between the actual and
reference values are within specified tolerances, the test is considered reference values are within specified tolerances, the test is considered
passed. For each test batch, that is, a set of example input scripts, passed. For each test batch, that is, a set of example input scripts,
the mpirun command, the LAMMPS command line arguments, and the the mpirun command, the LAMMPS command line arguments, and the

View File

@ -319,25 +319,34 @@ all types from 1 to :math:`N`. A leading asterisk means all types from
:math:`N` (inclusive). A middle asterisk means all types from m to n :math:`N` (inclusive). A middle asterisk means all types from m to n
(inclusive). (inclusive).
Currently *bond* does not support bond_style hybrid nor bond_style If :doc:`bond_style hybrid <bond_hybrid>` is used, *bstyle* should be a
hybrid/overlay as bond styles. The bond styles that currently work sub-style name. The bond styles that currently work with fix adapt are:
with fix_adapt are
+------------------------------------+------------+------------+ +-----------------------------------------------------+---------------------------+------------+
| :doc:`class2 <bond_class2>` | r0 | type bonds | | :doc:`class2 <bond_class2>` | r0 | type bonds |
+------------------------------------+------------+------------+ +-----------------------------------------------------+---------------------------+------------+
| :doc:`fene <bond_fene>` | k,r0 | type bonds | | :doc:`fene <bond_fene>` | k,r0 | type bonds |
+------------------------------------+------------+------------+ +-----------------------------------------------------+---------------------------+------------+
| :doc:`fene/expand <bond_fene_expand>` | k,r0,epsilon,sigma,shift | type bonds |
+-----------------------------------------------------+---------------------------+------------+
| :doc:`fene/nm <bond_fene>` | k,r0 | type bonds | | :doc:`fene/nm <bond_fene>` | k,r0 | type bonds |
+------------------------------------+------------+------------+ +-----------------------------------------------------+---------------------------+------------+
| :doc:`gromos <bond_gromos>` | k,r0 | type bonds | | :doc:`gromos <bond_gromos>` | k,r0 | type bonds |
+------------------------------------+------------+------------+ +-----------------------------------------------------+---------------------------+------------+
| :doc:`harmonic <bond_harmonic>` | k,r0 | type bonds | | :doc:`harmonic <bond_harmonic>` | k,r0 | type bonds |
+------------------------------------+------------+------------+ +-----------------------------------------------------+---------------------------+------------+
| :doc:`harmonic/restrain <bond_harmonic_restrain>` | k | type bonds |
+-----------------------------------------------------+---------------------------+------------+
| :doc:`harmonic/shift <bond_harmonic_shift>` | k,r0,r1 | type bonds |
+-----------------------------------------------------+---------------------------+------------+
| :doc:`harmonic/shift/cut <bond_harmonic_shift_cut>` | k,r0,r1 | type bonds |
+-----------------------------------------------------+---------------------------+------------+
| :doc:`mm3 <bond_mm3>` | k,r0 | type bonds |
+-----------------------------------------------------+---------------------------+------------+
| :doc:`morse <bond_morse>` | r0 | type bonds | | :doc:`morse <bond_morse>` | r0 | type bonds |
+------------------------------------+------------+------------+ +-----------------------------------------------------+---------------------------+------------+
| :doc:`nonlinear <bond_nonlinear>` | epsilon,r0 | type bonds | | :doc:`nonlinear <bond_nonlinear>` | epsilon,r0 | type bonds |
+------------------------------------+------------+------------+ +-----------------------------------------------------+---------------------------+------------+
---------- ----------
@ -357,15 +366,34 @@ all types from 1 to :math:`N`. A leading asterisk means all types from
:math:`N` (inclusive). A middle asterisk means all types from m to n :math:`N` (inclusive). A middle asterisk means all types from m to n
(inclusive). (inclusive).
Currently *angle* does not support angle_style hybrid nor angle_style If :doc:`angle_style hybrid <angle_hybrid>` is used, *astyle* should be a
hybrid/overlay as angle styles. The angle styles that currently work sub-style name. The angle styles that currently work with fix adapt are:
with fix_adapt are
+------------------------------------+----------+-------------+ +--------------------------------------------------------------------+-----------------+-------------+
| :doc:`harmonic <angle_harmonic>` | k,theta0 | type angles | | :doc:`harmonic <angle_harmonic>` | k,theta0 | type angles |
+------------------------------------+----------+-------------+ +--------------------------------------------------------------------+-----------------+-------------+
| :doc:`charmm <angle_charmm>` | k,theta0 | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`class2 <angle_class2>` | k2,k3,k4,theta0 | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`cosine <angle_cosine>` | k | type angles | | :doc:`cosine <angle_cosine>` | k | type angles |
+------------------------------------+----------+-------------+ +--------------------------------------------------------------------+-----------------+-------------+
| :doc:`cosine/periodic <angle_cosine_periodic>` | k,b,n | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`cosine/squared/restricted <angle_cosine_squared_restricted>` | k,theta0 | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`dipole <angle_dipole>` | k,gamma0 | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`fourier <angle_fourier>` | k,c0,c1,c2 | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`fourier/simple <angle_fourier_simple>` | k,c,n | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`mm3 <angle_mm3>` | k,theta0 | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`quartic <angle_quartic>` | k2,k3,k4,theta0 | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
| :doc:`spica <angle_spica>` | k,theta0 | type angles |
+--------------------------------------------------------------------+-----------------+-------------+
Note that internally, theta0 is stored in radians, so the variable Note that internally, theta0 is stored in radians, so the variable
this fix uses to reset theta0 needs to generate values in radians. this fix uses to reset theta0 needs to generate values in radians.

View File

@ -50,8 +50,8 @@ Syntax
*intra_energy* value = intramolecular energy (energy units) *intra_energy* value = intramolecular energy (energy units)
*tfac_insert* value = scale up/down temperature of inserted atoms (unitless) *tfac_insert* value = scale up/down temperature of inserted atoms (unitless)
*overlap_cutoff* value = maximum pair distance for overlap rejection (distance units) *overlap_cutoff* value = maximum pair distance for overlap rejection (distance units)
*max* value = Maximum number of molecules allowed in the system *max* value = Maximum number of atoms allowed in the fix group (and region)
*min* value = Minimum number of molecules allowed in the system *min* value = Minimum number of atoms allowed in the fix group (and region)
Examples Examples
"""""""" """"""""
@ -380,10 +380,11 @@ an infinite positive energy to all new configurations that place any
pair of atoms closer than the specified overlap cutoff distance. pair of atoms closer than the specified overlap cutoff distance.
The *max* and *min* keywords allow for the restriction of the number of The *max* and *min* keywords allow for the restriction of the number of
atoms in the simulation. They automatically reject all insertion or atoms in the fix group (and region in case the *region* keyword is
deletion moves that would take the system beyond the set boundaries. used). They automatically reject all insertion or deletion moves that
Should the system already be beyond the boundary, only moves that bring would take the system beyond the set boundaries. Should the system
the system closer to the bounds may be accepted. already be beyond the boundary, only moves that bring the system closer
to the bounds may be accepted.
The *group* keyword adds all inserted atoms to the :doc:`group <group>` The *group* keyword adds all inserted atoms to the :doc:`group <group>`
of the group-ID value. The *grouptype* keyword adds all inserted atoms of the group-ID value. The *grouptype* keyword adds all inserted atoms

View File

@ -51,7 +51,7 @@ index file. When specifying group IDs, only those groups will be
written to the index file. In order to follow the Gromacs conventions, written to the index file. In order to follow the Gromacs conventions,
the group *all* will be renamed to *System* in the index file. the group *all* will be renamed to *System* in the index file.
The *ndx2group* command will create of update group definitions from The *ndx2group* command will create or update group definitions from
those stored in an index file. Without specifying any group IDs, all those stored in an index file. Without specifying any group IDs, all
groups except *System* will be read from the index file and the groups except *System* will be read from the index file and the
corresponding groups recreated. If a group of the same name already corresponding groups recreated. If a group of the same name already

View File

@ -115,10 +115,11 @@ to tell LAMMPS how many parallel files exist, via its specified
The format of the dump file is selected through the *format* keyword. The format of the dump file is selected through the *format* keyword.
If specified, it must be the last keyword used, since all remaining If specified, it must be the last keyword used, since all remaining
arguments are passed on to the dump reader. The *native* format is arguments are passed on to the dump reader. The *native* format is for
for native LAMMPS dump files, written with a :doc:`dump atom <dump>` native LAMMPS dump files, written with a :doc:`dump atom <dump>` or
or :doc:`dump custom <dump>` command. The *xyz* format is for generic XYZ :doc:`dump custom <dump>` command. The *xyz* format is for generic XYZ
formatted dump files. These formats take no additional values. formatted dump files (see details below). These formats take no
additional values.
The *molfile* format supports reading data through using the `VMD <vmd_>`_ The *molfile* format supports reading data through using the `VMD <vmd_>`_
molfile plugin interface. This dump reader format is only available, molfile plugin interface. This dump reader format is only available,
@ -230,23 +231,39 @@ will then have a label corresponding to the fix-ID rather than "x" or
"xs". The *label* keyword can also be used to specify new column "xs". The *label* keyword can also be used to specify new column
labels for fields *id* and *type*\ . labels for fields *id* and *type*\ .
For dump files in *xyz* format, only the *x*, *y*, and *z* fields are For dump files in *xyz* format, only the *type*, *x*, *y*, and *z*
supported. The dump file does not store atom IDs, so these are fields are supported. There are many variants of the XYZ file format.
assigned consecutively to the atoms as they appear in the dump file, LAMMPS will read the number of atoms from the first line of each frame,
starting from 1. Thus you should ensure that order of atoms is ignore the second (title) line, and then read one line for each atom in the format:
consistent from snapshot to snapshot in the XYZ dump file. See
the :doc:`dump_modify sort <dump_modify>` command if the XYZ dump file .. parsed-literal::
was written by LAMMPS.
<label> <x coordinate> <y coordinate> <z coordinate>
If the atom label is a numeric integer (like with XYZ files created by
created with default settings by :doc:`dump style <dump>` *xyz*), that
number will be used as the atom type. If the atom label is a string,
then a type map must be created using the :doc:`labelmap command
<labelmap>`. This map needs to associate each (numeric) atom type with
a string label. The numeric atom type is stored internally.
The xyz format dump file does not store atom IDs, so these are assigned
consecutively to the atoms as they appear in the dump file, starting
from 1. Thus you should ensure that the order of atoms is consistent
from snapshot to snapshot in the XYZ dump file. See the
:doc:`dump_modify sort <dump_modify>` command if the XYZ dump file was
written by LAMMPS.
For dump files in *molfile* format, the *x*, *y*, *z*, *vx*, *vy*, and For dump files in *molfile* format, the *x*, *y*, *z*, *vx*, *vy*, and
*vz* fields can be specified. However, not all molfile formats store *vz* fields can be specified. However, not all molfile formats store
velocities, or their respective plugins may not support reading of velocities, or their respective plugins may not support reading of
velocities. The molfile dump files do not store atom IDs, so these velocities. The molfile dump files do not store atom IDs, so these are
are assigned consecutively to the atoms as they appear in the dump assigned consecutively to the atoms as they appear in the dump file,
file, starting from 1. Thus you should ensure that order of atoms are starting from 1. Thus you should ensure that the order of atoms are
consistent from snapshot to snapshot in the molfile dump file. consistent from snapshot to snapshot in the molfile dump file. See the
See the :doc:`dump_modify sort <dump_modify>` command if the dump file :doc:`dump_modify sort <dump_modify>` command if the dump file was
was written by LAMMPS. written by LAMMPS.
The *adios* format supports all fields that the *native* format supports The *adios* format supports all fields that the *native* format supports
except for the *q* charge field. except for the *q* charge field.

View File

@ -18,7 +18,7 @@ Syntax
*delete* = no args *delete* = no args
*block* args = xlo xhi ylo yhi zlo zhi *block* args = xlo xhi ylo yhi zlo zhi
xlo,xhi,ylo,yhi,zlo,zhi = bounds of block in all dimensions (distance units) xlo,xhi,ylo,yhi,zlo,zhi = bounds of block in all dimensions (distance units)
xlo,xhi,ylo,yhi,zlo,zhi can be a variable xlo,xhi,ylo,yhi,zlo,zhi can be a variable (see below)
*cone* args = dim c1 c2 radlo radhi lo hi *cone* args = dim c1 c2 radlo radhi lo hi
dim = *x* or *y* or *z* = axis of cone dim = *x* or *y* or *z* = axis of cone
c1,c2 = coords of cone axis in other 2 dimensions (distance units) c1,c2 = coords of cone axis in other 2 dimensions (distance units)
@ -38,6 +38,7 @@ Syntax
*plane* args = px py pz nx ny nz *plane* args = px py pz nx ny nz
px,py,pz = point on the plane (distance units) px,py,pz = point on the plane (distance units)
nx,ny,nz = direction normal to plane (distance units) nx,ny,nz = direction normal to plane (distance units)
px,py,pz can be a variable (see below)
*prism* args = xlo xhi ylo yhi zlo zhi xy xz yz *prism* args = xlo xhi ylo yhi zlo zhi xy xz yz
xlo,xhi,ylo,yhi,zlo,zhi = bounds of untilted prism (distance units) xlo,xhi,ylo,yhi,zlo,zhi = bounds of untilted prism (distance units)
xy = distance to tilt y in x direction (distance units) xy = distance to tilt y in x direction (distance units)
@ -206,9 +207,10 @@ parameters a,b,c for style *ellipsoid*, can each be specified as an
equal-style :doc:`variable <variable>`. Likewise, for style *sphere* equal-style :doc:`variable <variable>`. Likewise, for style *sphere*
and *ellipsoid* the x-, y-, and z- coordinates of the center of the and *ellipsoid* the x-, y-, and z- coordinates of the center of the
sphere/ellipsoid can be specified as an equal-style variable. And for sphere/ellipsoid can be specified as an equal-style variable. And for
style *cylinder* the two center positions c1 and c2 for the location style *cylinder* the two center positions c1 and c2 for the location of
of the cylinder axes can be specified as a equal-style variable. For style *cone* the cylinder axes can be specified as a equal-style variable. For style
all properties can be defined via equal-style variables. *cone* all properties can be defined via equal-style variables. For
style *plane* the point can be defined via equal-style variables.
If the value is a variable, it should be specified as v_name, where If the value is a variable, it should be specified as v_name, where
name is the variable name. In this case, the variable will be name is the variable name. In this case, the variable will be

View File

@ -141,6 +141,7 @@ arg
arge arge
args args
argv argv
Armv
arrhenius arrhenius
Arun Arun
arXiv arXiv

View File

@ -13,11 +13,11 @@ neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule/intra all neigh_modify exclude molecule/intra all
pair_style line/lj 2.5 pair_style line/lj 2.5
pair_coeff * * 1.0 1.0 1.0 0.25 2.5 pair_coeff * * 0.25 0.25 1.0 0.25 2.5
fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983 fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983
fix 3 all deform 1 x scale 0.3 y scale 0.3 fix 3 all deform 5 x scale 0.8 y scale 0.8
fix 4 all enforce2d fix 4 all enforce2d
compute 10 all property/atom end1x end1y end2x end2y compute 10 all property/atom end1x end1y end2x end2y
@ -26,7 +26,7 @@ compute 10 all property/atom end1x end1y end2x end2y
#dump 2 all custom 500 dump1.line id type & #dump 2 all custom 500 dump1.line id type &
# c_10[1] c_10[2] c_10[3] c_10[4] # c_10[1] c_10[2] c_10[3] c_10[4]
timestep 0.004 timestep 0.001
compute 1 all erotate/asphere compute 1 all erotate/asphere
compute 2 all ke compute 2 all ke

View File

@ -24,7 +24,7 @@ set group small mass 0.01
pair_style lj/cut 1.5 pair_style lj/cut 1.5
pair_coeff 1 1 1.0 1.0 pair_coeff 1 1 1.0 1.0
pair_coeff 2 2 0.0 1.0 0.0 pair_coeff 2 2 0.0 1.0 0.0
pair_coeff 1 2 0.0 1.0 pair_coeff 1 2 1.0 1.0
delete_atoms overlap 1.5 small big delete_atoms overlap 1.5 small big
@ -34,7 +34,7 @@ reset_timestep 0
velocity small create 1.44 87287 loop geom velocity small create 1.44 87287 loop geom
neighbor 0.3 multi neighbor 0.8 multi
neigh_modify delay 0 every 1 check yes neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule/intra big include big neigh_modify exclude molecule/intra big include big
@ -46,19 +46,19 @@ neigh_modify include big
pair_style line/lj 2.5 pair_style line/lj 2.5
pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5 pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5
pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0 pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0
pair_coeff 1 2 1.0 0.0 0.0 1.0 0.0 pair_coeff 1 2 1.0 0.2 1.0 0.2 2.5
# use fix SRD to push small particles out from inside big ones # use fix SRD to push small particles out from inside big ones
# if comment out, big particles won't see SRD particles # if comment out, big particles won't see SRD particles
timestep 0.001 timestep 0.0001
fix 1 big rigid molecule fix 1 big rigid molecule
fix 2 small srd 20 big 1.0 0.25 49894 & fix 2 small srd 20 big 1.0 0.25 49894 &
search 0.2 cubic warn 0.0001 shift yes 49829 & search 0.2 cubic warn 0.0001 shift yes 49829 &
overlap yes collision noslip overlap yes collision noslip
fix 3 all deform 1 x scale 0.35 y scale 0.35 fix 3 all deform 1 x scale 1.25 y scale 1.25
fix 4 all enforce2d fix 4 all enforce2d
# diagnostics # diagnostics
@ -96,12 +96,12 @@ change_box all triclinic
fix 2 small srd 20 big 1.0 0.25 49894 & fix 2 small srd 20 big 1.0 0.25 49894 &
search 0.2 cubic warn 0.0001 shift yes 49829 & search 0.2 cubic warn 0.0001 shift yes 49829 &
overlap yes collision noslip tstat yes overlap yes collision noslip #tstat yes
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz #dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump2.line.srd id type & #dump 2 all custom 500 dump2.line.srd id type &
# c_10[1] c_10[2] c_10[3] c_10[4] # c_10[1] c_10[2] c_10[3] c_10[4]
fix 3 all deform 1 xy erate 0.05 units box remap v fix 3 all deform 1 xy erate 0.0002 units box remap v
run 40000 run 30000

View File

@ -1,213 +0,0 @@
LAMMPS (1 Feb 2014)
# Aspherical shear demo - 2d line box and triangle mixture, implicit solvent
units lj
atom_style line
dimension 2
read_data data.line
orthogonal box = (-22.5539 -22.5539 -0.5) to (22.5539 22.5539 0.5)
4 by 2 by 1 MPI processor grid
reading atoms ...
350 atoms
350 lines
velocity all create 1.44 320984 loop geom
neighbor 0.3 bin
neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule all
pair_style line/lj 2.5
pair_coeff * * 1.0 0.25
fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983
100 rigid bodies with 350 atoms
fix 3 all deform 1 x scale 0.3 y scale 0.3
fix 4 all enforce2d
compute 10 all property/atom end1x end1y end2x end2y
#dump 1 all custom 500 dump1.atom id type x y z ix iy iz
#dump 2 all custom 500 dump1.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
timestep 0.004
compute 1 all erotate/asphere
compute 2 all ke
compute 3 all pe
variable toteng equal (c_1+c_2+c_3)/atoms
thermo 1000
thermo_style custom step temp f_2 pe ke c_1 c_2 c_3 v_toteng
run 10000
Memory usage per processor = 2.6072 Mbytes
Step Temp 2 PotEng KinEng 1 2 3 toteng
0 0 1.2780105 0 -0 0.037823677 0.50989511 0 0.54771879
1000 0 1.9896906 -0.13333756 -0 0.12630626 0.72641827 -0.13333756 0.71938697
2000 0 2.0408541 -0.24906647 -0 0.13199238 0.74265938 -0.24906647 0.62558529
3000 0 1.9921323 -0.39759798 -0 0.11671173 0.73705927 -0.39759798 0.45617302
4000 0 2.1392159 -0.36475197 -0 0.11923802 0.7975688 -0.36475197 0.55205485
5000 0 2.139715 -0.52582091 -0 0.15846417 0.75855653 -0.52582091 0.39119979
6000 0 2.1313904 -0.65532027 -0 0.11090422 0.80254883 -0.65532027 0.25813278
7000 0 1.9416614 -0.81322598 -0 0.11722471 0.71491587 -0.81322598 0.018914608
8000 0 1.9388183 -1.0581149 -0 0.10142762 0.72949452 -1.0581149 -0.22719275
9000 0 2.2830265 -1.583347 -0 0.14583927 0.83260066 -1.583347 -0.60490709
10000 0 4.1416666 -2.6034045 -0 0.18839177 1.5866082 -2.6034045 -0.82840455
Loop time of 2.84713 on 8 procs for 10000 steps with 350 atoms
Pair time (%) = 1.26607 (44.4682)
Neigh time (%) = 0.0163046 (0.572669)
Comm time (%) = 0.786148 (27.612)
Outpt time (%) = 0.000368953 (0.0129588)
Other time (%) = 0.778241 (27.3342)
Nlocal: 43.75 ave 51 max 39 min
Histogram: 1 2 0 2 0 1 1 0 0 1
Nghost: 170.25 ave 180 max 160 min
Histogram: 2 1 0 0 0 0 2 0 1 2
Neighs: 963.125 ave 1209 max 767 min
Histogram: 2 0 2 0 1 0 1 0 1 1
Total # of neighbors = 7705
Ave neighs/atom = 22.0143
Neighbor list builds = 987
Dangerous builds = 0
#undump 1
#undump 2
unfix 3
change_box all triclinic
triclinic box = (-6.76616 -6.76616 -0.5) to (6.76616 6.76616 0.5) with tilt (0 0 0)
#dump 1 all custom 500 dump2.atom id type x y z ix iy iz
#dump 2 all custom 500 dump2.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
fix 3 all deform 1 xy erate 0.01 units box
run 100000
Memory usage per processor = 2.75978 Mbytes
Step Temp 2 PotEng KinEng 1 2 3 toteng
10000 0 4.1416666 -2.6024092 -0 0.18839177 1.5866082 -2.6024092 -0.82740923
11000 0 1.8408319 -3.1031477 -0 0.12073234 0.66819561 -3.1031477 -2.3142198
12000 0 2.0793172 -3.0329681 -0 0.086927592 0.80420833 -3.0329681 -2.1418322
13000 0 2.2022136 -2.99776 -0 0.14309291 0.8007129 -2.99776 -2.0539542
14000 0 1.9510757 -3.094649 -0 0.09482969 0.74134559 -3.094649 -2.2584737
15000 0 1.9874689 -3.1431753 -0 0.1083061 0.7434663 -3.1431753 -2.2914029
16000 0 1.8484778 -2.9491537 -0 0.079102883 0.71310191 -2.9491537 -2.1569489
17000 0 2.1978438 -2.9675694 -0 0.11677634 0.82515673 -2.9675694 -2.0256363
18000 0 2.0293397 -2.9860257 -0 0.1287845 0.7409325 -2.9860257 -2.1163087
19000 0 2.0077219 -3.005622 -0 0.12697603 0.7334762 -3.005622 -2.1451698
20000 0 2.1806369 -3.0622132 -0 0.11066657 0.82389212 -3.0622132 -2.1276545
21000 0 1.8156509 -3.1031481 -0 0.10227614 0.67585994 -3.1031481 -2.325012
22000 0 2.1028516 -3.0861182 -0 0.098877162 0.80234493 -3.0861182 -2.1848961
23000 0 1.8994891 -3.0110243 -0 0.10961187 0.70445488 -3.0110243 -2.1969576
24000 0 1.9305389 -3.0057136 -0 0.11735151 0.7100223 -3.0057136 -2.1783398
25000 0 1.9553918 -3.0848948 -0 0.13217467 0.70585039 -3.0848948 -2.2468697
26000 0 1.8903754 -2.9543658 -0 0.099925113 0.71023579 -2.9543658 -2.1442049
27000 0 2.2624684 -3.2416154 -0 0.11398815 0.85564117 -3.2416154 -2.2719861
28000 0 2.0335234 -3.1795174 -0 0.10291986 0.76859015 -3.1795174 -2.3080074
29000 0 1.7056403 -3.1198739 -0 0.076174496 0.65481419 -3.1198739 -2.3888853
30000 0 2.1203465 -3.0863113 -0 0.11355683 0.79516311 -3.0863113 -2.1775914
31000 0 1.8446708 -3.2764357 -0 0.11006455 0.68050865 -3.2764357 -2.4858625
32000 0 1.7947968 -3.081031 -0 0.087540776 0.68165784 -3.081031 -2.3118324
33000 0 2.2576228 -3.1042222 -0 0.15666855 0.81088407 -3.1042222 -2.1366696
34000 0 1.8522306 -3.0037311 -0 0.08276626 0.71104684 -3.0037311 -2.209918
35000 0 2.0611686 -2.9877406 -0 0.10822003 0.77513794 -2.9877406 -2.1043826
36000 0 2.0739798 -2.981184 -0 0.11784198 0.77100651 -2.981184 -2.0923355
37000 0 2.2120215 -2.8961216 -0 0.13172943 0.8162798 -2.8961216 -1.9481124
38000 0 2.1097357 -2.9746777 -0 0.11881736 0.78535507 -2.9746777 -2.0705053
39000 0 1.8928127 -2.9482169 -0 0.10915752 0.70204792 -2.9482169 -2.1370115
40000 0 1.8515483 -3.003524 -0 0.096981256 0.69653943 -3.003524 -2.2100033
41000 0 1.8569065 -3.0834675 -0 0.10562739 0.6901897 -3.0834675 -2.2876504
42000 0 2.2638459 -3.0577224 -0 0.10896442 0.86125524 -3.0577224 -2.0875027
43000 0 2.0992151 -3.1219185 -0 0.13280434 0.76685929 -3.1219185 -2.2222548
44000 0 2.3479986 -3.0702445 -0 0.14438131 0.86190379 -3.0702445 -2.0639594
45000 0 2.0295235 -2.9619688 -0 0.13726365 0.73253213 -2.9619688 -2.092173
46000 0 2.0597998 -3.1282569 -0 0.10139093 0.7813804 -3.1282569 -2.2454856
47000 0 1.8173541 -3.1122647 -0 0.10380346 0.67506259 -3.1122647 -2.3333986
48000 0 1.9650208 -3.22153 -0 0.10115952 0.74099226 -3.22153 -2.3793782
49000 0 1.8836303 -3.0781944 -0 0.10561306 0.70165705 -3.0781944 -2.2709243
50000 0 1.7799289 -3.1090208 -0 0.10522105 0.65760561 -3.1090208 -2.3461941
51000 0 1.7270244 -2.894789 -0 0.079316066 0.66083727 -2.894789 -2.1546357
52000 0 2.1036512 -3.0708266 -0 0.11762021 0.7839446 -3.0708266 -2.1692618
53000 0 2.1749106 -3.043193 -0 0.11908953 0.81301499 -3.043193 -2.1110885
54000 0 1.7245018 -3.1528646 -0 0.11118993 0.62788226 -3.1528646 -2.4137924
55000 0 1.7599209 -3.0543405 -0 0.098996756 0.65525507 -3.0543405 -2.3000886
56000 0 1.9085953 -3.1072383 -0 0.10931083 0.70865859 -3.1072383 -2.2892689
57000 0 1.9682028 -3.131335 -0 0.094973379 0.74854212 -3.131335 -2.2878195
58000 0 1.7813545 -3.167275 -0 0.085523136 0.6779145 -3.167275 -2.4038374
59000 0 2.0324497 -3.1103534 -0 0.13343285 0.73761703 -3.1103534 -2.2393035
60000 0 1.697349 -3.152831 -0 0.086035815 0.64139945 -3.152831 -2.4253957
61000 0 2.061537 -3.0730484 -0 0.12753143 0.75598441 -3.0730484 -2.1895325
62000 0 1.8186587 -3.1590894 -0 0.096701457 0.68272371 -3.1590894 -2.3796643
63000 0 1.9692063 -3.1053354 -0 0.094864849 0.7490807 -3.1053354 -2.2613899
64000 0 2.0113722 -3.0898117 -0 0.12640828 0.73560838 -3.0898117 -2.227795
65000 0 1.9350085 -3.2290712 -0 0.11850297 0.71078638 -3.2290712 -2.3997819
66000 0 2.0569306 -3.0489252 -0 0.12731012 0.75423156 -3.0489252 -2.1673835
67000 0 2.0119071 -3.10894 -0 0.099142151 0.76310375 -3.10894 -2.2466941
68000 0 2.0615321 -3.167552 -0 0.11066625 0.77284751 -3.167552 -2.2840383
69000 0 1.8731077 -3.1864825 -0 0.12982742 0.672933 -3.1864825 -2.3837221
70000 0 2.109088 -3.1787326 -0 0.13107619 0.77281866 -3.1787326 -2.2748377
71000 0 1.9764202 -3.0731116 -0 0.12035585 0.72668137 -3.0731116 -2.2260744
72000 0 1.6460591 -3.0865952 -0 0.09486718 0.61058673 -3.0865952 -2.3811413
73000 0 1.9756864 -3.0431747 -0 0.1041775 0.74254526 -3.0431747 -2.196452
74000 0 2.0553658 -3.1290715 -0 0.12267262 0.75819842 -3.1290715 -2.2482004
75000 0 1.550997 -2.9813403 -0 0.094350599 0.57036238 -2.9813403 -2.3166273
76000 0 1.9824851 -3.0057088 -0 0.11621875 0.73341774 -3.0057088 -2.1560723
77000 0 1.8451505 -3.0063772 -0 0.12602143 0.66475735 -3.0063772 -2.2155984
78000 0 1.934755 -3.0603773 -0 0.11800375 0.71117696 -3.0603773 -2.2311966
79000 0 2.1982093 -3.095224 -0 0.12840585 0.81368383 -3.095224 -2.1531343
80000 0 1.7547828 -3.0620364 -0 0.092552269 0.65949751 -3.0620364 -2.3099866
81000 0 2.1064588 -3.1631358 -0 0.11415036 0.78861768 -3.1631358 -2.2603677
82000 0 2.0039288 -3.1190532 -0 0.13374009 0.72508654 -3.1190532 -2.2602266
83000 0 1.7749465 -3.2172309 -0 0.11017601 0.65051537 -3.2172309 -2.4565396
84000 0 1.7865147 -3.1433093 -0 0.086859958 0.67878922 -3.1433093 -2.3776602
85000 0 1.4541199 -3.0123872 -0 0.096301993 0.52689225 -3.0123872 -2.3891929
86000 0 2.0345567 -2.9577061 -0 0.11756878 0.7543841 -2.9577061 -2.0857532
87000 0 1.9655671 -3.0235755 -0 0.13068174 0.71170417 -3.0235755 -2.1811896
88000 0 2.1238733 -3.0972095 -0 0.15660293 0.75362848 -3.0972095 -2.1869781
89000 0 1.93413 -3.0018414 -0 0.11736651 0.71154634 -3.0018414 -2.1729285
90000 0 2.0106701 -3.0778989 -0 0.1150105 0.74670527 -3.0778989 -2.2161831
91000 0 1.9700224 -3.0072749 -0 0.1200151 0.72428023 -3.0072749 -2.1629796
92000 0 2.3062898 -3.1245149 -0 0.16045862 0.82795128 -3.1245149 -2.136105
93000 0 1.9977984 -3.1072212 -0 0.098381856 0.75781746 -3.1072212 -2.2510219
94000 0 1.9684844 -3.0923519 -0 0.11228362 0.73135255 -3.0923519 -2.2487157
95000 0 1.8913269 -3.1044766 -0 0.088944817 0.72162386 -3.1044766 -2.293908
96000 0 2.139258 -2.9032081 -0 0.12122357 0.7956013 -2.9032081 -1.9863833
97000 0 2.1549839 -3.0073823 -0 0.14177897 0.78178555 -3.0073823 -2.0838178
98000 0 2.001084 -3.0411655 -0 0.16549603 0.69211139 -3.0411655 -2.1835581
99000 0 2.269108 -3.0749812 -0 0.11370514 0.85876972 -3.0749812 -2.1025064
100000 0 2.0270482 -3.0802101 -0 0.098599302 0.77013563 -3.0802101 -2.2114752
101000 0 1.9412796 -3.1543623 -0 0.11953004 0.71244692 -3.1543623 -2.3223853
102000 0 2.0146461 -3.0140006 -0 0.10152606 0.7618937 -3.0140006 -2.1505808
103000 0 1.7377282 -3.1862597 -0 0.082111131 0.66262952 -3.1862597 -2.441519
104000 0 1.7973897 -3.1055088 -0 0.093370304 0.67693958 -3.1055088 -2.3351989
105000 0 2.2615194 -2.9636424 -0 0.12584102 0.84338157 -2.9636424 -1.9944198
106000 0 1.8974182 -2.9505576 -0 0.085928679 0.72725057 -2.9505576 -2.1373783
107000 0 2.0691161 -3.0804349 -0 0.12411017 0.76265389 -3.0804349 -2.1936708
108000 0 2.0457472 -2.981702 -0 0.09308074 0.78366806 -2.981702 -2.1049532
109000 0 1.6610604 -3.1463569 -0 0.066318676 0.64556436 -3.1463569 -2.4344738
110000 0 1.9606721 -3.080013 -0 0.13164067 0.70864736 -3.080013 -2.2397249
Loop time of 62.2251 on 8 procs for 100000 steps with 350 atoms
Pair time (%) = 43.4946 (69.8987)
Neigh time (%) = 0.395421 (0.635469)
Comm time (%) = 10.3551 (16.6414)
Outpt time (%) = 0.00358662 (0.00576394)
Other time (%) = 7.97644 (12.8187)
Nlocal: 43.75 ave 51 max 33 min
Histogram: 1 0 1 0 0 2 0 1 2 1
Nghost: 168.375 ave 180 max 155 min
Histogram: 1 1 0 0 1 0 2 2 0 1
Neighs: 971 ave 1278 max 631 min
Histogram: 1 1 0 1 0 1 2 1 0 1
Total # of neighbors = 7768
Ave neighs/atom = 22.1943
Neighbor list builds = 7621
Dangerous builds = 0

View File

@ -1,244 +0,0 @@
LAMMPS (1 Feb 2014)
# Aspherical shear demo - 2d line boxes, solvated by SRD particles
units lj
atom_style line
atom_modify first big
dimension 2
read_data data.line.srd
orthogonal box = (-28.7968 -28.7968 -0.5) to (28.7968 28.7968 0.5)
4 by 2 by 1 MPI processor grid
reading atoms ...
400 atoms
400 lines
# add small particles as hi density lattice
lattice sq 0.4
Lattice spacing in x,y,z = 1.58114 1.58114 1.58114
region plane block INF INF INF INF -0.001 0.001
lattice sq 10.0
Lattice spacing in x,y,z = 0.316228 0.316228 0.316228
create_atoms 2 region plane
Created 33489 atoms
group big type 1
400 atoms in group big
group small type 2
33489 atoms in group small
set group small mass 0.01
33489 settings made for mass
# delete overlaps
# must set 1-2 cutoff to non-zero value
pair_style lj/cut 1.5
pair_coeff 1 1 1.0 1.0
pair_coeff 2 2 0.0 1.0 0.0
pair_coeff 1 2 0.0 1.0
delete_atoms overlap 1.5 small big
Deleted 13605 atoms, new total = 20284
# SRD run
reset_timestep 0
velocity small create 1.44 87287 loop geom
neighbor 0.3 bin
neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule big include big
communicate multi group big vel yes
neigh_modify include big
# no pairwise interactions with small particles
pair_style line/lj 2.5
pair_coeff 1 1 1.0 1.0
pair_coeff 2 2 0.0 1.0 0.0
pair_coeff 1 2 0.0 1.0 0.0
# use fix SRD to push small particles out from inside big ones
# if comment out, big particles won't see SRD particles
timestep 0.001
fix 1 big rigid molecule
100 rigid bodies with 400 atoms
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip
fix 3 all deform 1 x scale 0.35 y scale 0.35
fix 4 all enforce2d
# diagnostics
compute tsmall small temp/deform
compute tbig big temp
variable pebig equal pe*atoms/count(big)
variable ebig equal etotal*atoms/count(big)
compute 1 big erotate/asphere
compute 2 all ke
compute 3 all pe
variable toteng equal (c_1+c_2+c_3)/atoms
thermo 1000
thermo_style custom step temp c_tsmall f_2[9] c_1 etotal v_pebig v_ebig press
thermo_modify temp tbig
WARNING: Temperature for thermo pressure is not for group all (../thermo.cpp:439)
compute 10 big property/atom end1x end1y end2x end2y
#dump 1 all custom 500 dump1.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump1.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
run 10000
WARNING: Using fix srd with box deformation but no SRD thermostat (../fix_srd.cpp:385)
SRD info:
SRD/big particles = 19884 400
big particle diameter max/min = 1.99882 0.503306
SRD temperature & lamda = 1 0.2
SRD max distance & max velocity = 0.8 40
SRD grid counts: 230 230 1
SRD grid size: request, actual (xyz) = 0.25, 0.250407 0.250407 1
SRD per actual grid cell = 0.444963
SRD viscosity = 4.2356
big/SRD mass density ratio = 14.0918
WARNING: SRD bin size for fix srd differs from user request (../fix_srd.cpp:2853)
WARNING: Fix srd grid size > 1/4 of big particle diameter (../fix_srd.cpp:2875)
# of rescaled SRD velocities = 0
ave/max small velocity = 15.906 29.1054
ave/max big velocity = 0 0
WARNING: Using compute temp/deform with inconsistent fix deform remap option (../compute_temp_deform.cpp:76)
Memory usage per processor = 7.79007 Mbytes
Step Temp tsmall 2[9] 1 TotEng pebig ebig Press
0 0 1.4528554 0 0 0 0 0 0
1000 0 1.1122612 1.1071958 0.00058011072 0 0 0 0.32625408
2000 0 1.0254475 1.0231236 0.00072347646 -2.3144253e-06 -0.00011736451 -0.00011736451 0.44526308
3000 0 1.0089214 0.99966408 0.00061511441 -0.00023298189 -0.011814512 -0.011814512 0.87208025
4000 0 1.0066185 0.99391102 0.00079065587 -0.0021630644 -0.10968899 -0.10968899 1.3901494
5000 0 1.0109003 1.0009124 0.00071588733 -0.0040023493 -0.20295913 -0.20295913 1.2736445
6000 0 1.0119642 1.0105049 0.00080741361 -0.0067908165 -0.34436231 -0.34436231 2.5492857
7000 0 1.0048989 0.98912274 0.00098851463 -0.010933999 -0.55446309 -0.55446309 3.5707613
8000 0 1.0021597 0.99377939 0.0008405671 -0.015690841 -0.79568252 -0.79568252 6.685381
9000 0 1.0197546 1.00114 0.0012645557 -0.021124602 -1.0712286 -1.0712286 18.907993
10000 0 1.0401079 1.0184189 0.0036111452 0.0099395852 0.50403637 0.50403637 96.909257
Loop time of 3.93391 on 8 procs for 10000 steps with 20284 atoms
Pair time (%) = 0.110261 (2.80284)
Neigh time (%) = 0.32853 (8.35124)
Comm time (%) = 0.199613 (5.07416)
Outpt time (%) = 0.00108692 (0.0276295)
Other time (%) = 3.29442 (83.7441)
Nlocal: 2535.5 ave 2850 max 2226 min
Histogram: 1 0 1 1 1 1 1 0 1 1
Nghost: 109.25 ave 117 max 102 min
Histogram: 1 0 2 0 1 1 1 1 0 1
Neighs: 526 ave 665 max 407 min
Histogram: 1 2 0 1 0 1 1 1 0 1
Total # of neighbors = 4208
Ave neighs/atom = 0.207454
Neighbor list builds = 522
Dangerous builds = 0
#undump 1
#undump 2
unfix 3
change_box all triclinic
triclinic box = (-10.0789 -10.0789 -0.5) to (10.0789 10.0789 0.5) with tilt (0 0 0)
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip tstat yes
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump2.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
fix 3 all deform 1 xy erate 0.05 units box remap v
run 40000
SRD info:
SRD/big particles = 19884 400
big particle diameter max/min = 1.99882 0.503306
SRD temperature & lamda = 1 0.2
SRD max distance & max velocity = 0.8 40
SRD grid counts: 81 81 1
SRD grid size: request, actual (xyz) = 0.25, 0.248861 0.248861 1
SRD per actual grid cell = -11.3331
SRD viscosity = -1920.36
big/SRD mass density ratio = -0.546468
WARNING: SRD bin size for fix srd differs from user request (../fix_srd.cpp:2853)
WARNING: Fix srd grid size > 1/4 of big particle diameter (../fix_srd.cpp:2875)
WARNING: Fix srd viscosity < 0.0 due to low SRD density (../fix_srd.cpp:2877)
# of rescaled SRD velocities = 1
ave/max small velocity = 12.724 40
ave/max big velocity = 1.54523 5.36901
Memory usage per processor = 4.23847 Mbytes
Step Temp tsmall 2[9] 1 TotEng pebig ebig Press
10000 0 1.0277413 0 0.0036111452 0.010135973 0.51399517 0.51399517 60.794865
11000 0 1.0024742 1 0.00040482851 -0.031643325 -1.604633 -1.604633 55.531632
12000 0 1.0036177 1 0.00036416993 -0.030844063 -1.5641024 -1.5641024 56.694788
13000 0 1.0043067 1 0.00068862721 -0.030673787 -1.5554677 -1.5554677 56.852537
14000 0 1.0042533 1 0.00050997053 -0.035967153 -1.8238943 -1.8238943 51.254109
15000 0 1.003956 1 0.00048458218 -0.038060147 -1.9300301 -1.9300301 50.346943
16000 0 1.0059929 1 0.0004740426 -0.037433402 -1.8982478 -1.8982478 49.82532
17000 0 1.0042401 1 0.00039855238 -0.034314446 -1.7400855 -1.7400855 48.430648
18000 0 1.0025952 1 0.00046207703 -0.034166102 -1.732563 -1.732563 53.883613
19000 0 1.0025881 1 0.00034818943 -0.037341367 -1.8935807 -1.8935807 48.90662
20000 0 1.0009932 1 0.00031244041 -0.035274728 -1.7887814 -1.7887814 47.665935
21000 0 1.0025603 1 0.0005514826 -0.034350221 -1.7418997 -1.7418997 63.290704
22000 0 1.0038575 1 0.00034453716 -0.03576596 -1.8136918 -1.8136918 49.805726
23000 0 1.0030321 1 0.00049535709 -0.035873083 -1.819124 -1.819124 48.973244
24000 0 1.0048593 1 0.00034225992 -0.03322158 -1.6846663 -1.6846663 52.557383
25000 0 1.0024423 1 0.00059157362 -0.030152319 -1.5290241 -1.5290241 56.733821
26000 0 1.0022703 1 0.00047512976 -0.029563064 -1.499143 -1.499143 56.271943
27000 0 1.0024537 1 0.00054158319 -0.02957039 -1.4995145 -1.4995145 55.246787
28000 0 1.0023325 1 0.00051129428 -0.035115211 -1.7806924 -1.7806924 49.718172
29000 0 1.002865 1 0.00056728135 -0.035484703 -1.7994293 -1.7994293 48.387401
30000 0 1.0022693 1 0.00040284402 -0.033844303 -1.7162446 -1.7162446 50.486676
31000 0 1.0037106 1 0.00056291948 -0.036529699 -1.852421 -1.852421 52.011803
32000 0 1.0022542 1 0.00041877447 -0.036903458 -1.8713743 -1.8713743 52.712289
33000 0 1.0021364 1 0.00040871451 -0.033616728 -1.7047043 -1.7047043 55.199593
34000 0 1.003779 1 0.00040667148 -0.031745412 -1.6098098 -1.6098098 51.055261
35000 0 1.0015547 1 0.00042509365 -0.033603064 -1.7040114 -1.7040114 53.887389
36000 0 1.0024849 1 0.00040589851 -0.032151258 -1.6303903 -1.6303903 55.673615
37000 0 1.0038204 1 0.00040542177 -0.033643242 -1.7060488 -1.7060488 51.834694
38000 0 1.0031777 1 0.00040237126 -0.034711811 -1.7602359 -1.7602359 56.275778
39000 0 1.002777 1 0.00051021239 -0.034375104 -1.7431615 -1.7431615 48.461145
40000 0 1.0053762 1 0.00041566465 -0.036016384 -1.8263908 -1.8263908 48.937456
41000 0 1.0039078 1 0.00049726673 -0.036481888 -1.8499966 -1.8499966 49.290465
42000 0 1.0033408 1 0.00045236191 -0.034254524 -1.7370469 -1.7370469 52.146603
43000 0 1.0039893 1 0.00048362351 -0.032519474 -1.6490625 -1.6490625 53.320599
44000 0 1.0036667 1 0.00046834006 -0.031099875 -1.5770747 -1.5770747 55.907531
45000 0 1.0035765 1 0.0005195031 -0.029559708 -1.4989728 -1.4989728 56.191165
46000 0 1.004514 1 0.00050134348 -0.028852909 -1.463131 -1.463131 57.547696
47000 0 1.003864 1 0.00038963389 -0.031339229 -1.5892123 -1.5892123 55.775764
48000 0 1.003145 1 0.00035436452 -0.032390682 -1.6425315 -1.6425315 55.233354
49000 0 1.0042615 1 0.00053161075 -0.029960468 -1.5192953 -1.5192953 54.611685
50000 0 1.0037096 1 0.00031191389 -0.02942011 -1.4918938 -1.4918938 58.653762
Loop time of 27.1275 on 8 procs for 40000 steps with 20284 atoms
Pair time (%) = 1.54277 (5.68709)
Neigh time (%) = 1.26864 (4.67658)
Comm time (%) = 1.19871 (4.4188)
Outpt time (%) = 0.00421953 (0.0155544)
Other time (%) = 23.1132 (85.202)
Nlocal: 2535.5 ave 2670 max 2406 min
Histogram: 1 1 1 0 0 2 1 1 0 1
Nghost: 114.5 ave 124 max 103 min
Histogram: 1 0 1 0 2 1 0 1 0 2
Neighs: 521.5 ave 692 max 442 min
Histogram: 3 1 1 1 0 0 0 1 0 1
Total # of neighbors = 4172
Ave neighs/atom = 0.205679
Neighbor list builds = 2002
Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation

View File

@ -0,0 +1,189 @@
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
using 1 OpenMP thread(s) per MPI task
# Aspherical shear demo - 2d line box and triangle mixture, implicit solvent
units lj
atom_style line
dimension 2
read_data data.line
Reading data file ...
orthogonal box = (-22.553882 -22.553882 -0.5) to (22.553882 22.553882 0.5)
1 by 1 by 1 MPI processor grid
reading atoms ...
350 atoms
350 lines
read_data CPU = 0.003 seconds
velocity all create 1.44 320984 loop geom
neighbor 0.3 bin
neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule/intra all
pair_style line/lj 2.5
pair_coeff * * 0.25 0.25 1.0 0.25 2.5
fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983
100 rigid bodies with 350 atoms
fix 3 all deform 5 x scale 0.8 y scale 0.8
fix 4 all enforce2d
compute 10 all property/atom end1x end1y end2x end2y
#dump 1 all custom 500 dump1.atom id type x y z ix iy iz
#dump 2 all custom 500 dump1.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
timestep 0.001
compute 1 all erotate/asphere
compute 2 all ke
compute 3 all pe
variable toteng equal (c_1+c_2+c_3)/atoms
compute_modify thermo_temp extra/dof -350
thermo 1000
thermo_style custom step f_2 pe ke c_1 c_2 c_3 v_toteng
run 10000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
@Article{Gissinger24,
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
journal = {J. Phys. Chem. B},
year = 2024,
volume = 128,
number = 13,
pages = {3282-3297}
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 2.8
ghost atom cutoff = 2.8
binsize = 1.4, bins = 33 33 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/2d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.742 | 4.742 | 4.742 Mbytes
Step f_2 PotEng KinEng c_1 c_2 c_3 v_toteng
0 1.1872976 0 0.46543528 0.03617212 0.46543528 0 0.5016074
1000 1.9084412 -0.001043719 0.71003395 0.089891202 0.71003395 -0.001043719 0.79888143
2000 2.31668 -0.020711665 0.83611544 0.13062287 0.83611544 -0.020711665 0.94602664
3000 2.3094506 -0.062018072 0.80584123 0.15326943 0.80584123 -0.062018072 0.8970926
4000 2.4383442 -0.053659995 0.86864073 0.14696993 0.86864073 -0.053659995 0.96195067
5000 2.5885917 -0.15612821 0.90351683 0.17156633 0.90351683 -0.15612821 0.91895495
6000 2.1187379 -0.072041135 0.76933527 0.11557939 0.76933527 -0.072041135 0.81287353
7000 2.1407592 -0.057727294 0.75154343 0.13827066 0.75154343 -0.057727294 0.8320868
8000 2.0661844 -0.097529608 0.71093839 0.14547433 0.71093839 -0.097529608 0.75888311
9000 2.1574445 -0.052890857 0.78826145 0.11363136 0.78826145 -0.052890857 0.84900195
10000 2.473097 -0.092376489 0.85756337 0.16861278 0.85756337 -0.092376489 0.93379965
Loop time of 1.16704 on 1 procs for 10000 steps with 350 atoms
Performance: 740334.372 tau/day, 8568.685 timesteps/s, 2.999 Matom-step/s
99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.80757 | 0.80757 | 0.80757 | 0.0 | 69.20
Neigh | 0.0090046 | 0.0090046 | 0.0090046 | 0.0 | 0.77
Comm | 0.011808 | 0.011808 | 0.011808 | 0.0 | 1.01
Output | 0.000194 | 0.000194 | 0.000194 | 0.0 | 0.02
Modify | 0.3317 | 0.3317 | 0.3317 | 0.0 | 28.42
Other | | 0.006768 | | | 0.58
Nlocal: 350 ave 350 max 350 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 101 ave 101 max 101 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 853 ave 853 max 853 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 853
Ave neighs/atom = 2.4371429
Neighbor list builds = 274
Dangerous builds = 0
#undump 1
#undump 2
unfix 3
change_box all triclinic
Changing box ...
triclinic box = (-18.043106 -18.043106 -0.5) to (18.043106 18.043106 0.5) with tilt (0 0 0)
#dump 1 all custom 500 dump2.atom id type x y z ix iy iz
#dump 2 all custom 500 dump2.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
fix 3 all deform 1 xy erate 0.01 units box
run 10000
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 2.8
ghost atom cutoff = 2.8
binsize = 1.4, bins = 26 26 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton/tri
stencil: half/bin/2d/tri
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.756 | 4.756 | 4.756 Mbytes
Step f_2 PotEng KinEng c_1 c_2 c_3 v_toteng
10000 2.473097 -0.092380532 0.85753671 0.16863499 0.85753671 -0.092380532 0.93379117
11000 2.5185915 -0.1015737 0.84230876 0.197573 0.84230876 -0.1015737 0.93830806
12000 2.5971728 -0.13087195 0.90608898 0.17248755 0.90608898 -0.13087195 0.94770458
13000 2.5065714 -0.14666584 0.83804307 0.19683483 0.83804307 -0.14666584 0.88821206
14000 2.1672357 -0.10720878 0.79713452 0.10973398 0.79713452 -0.10720878 0.79965972
15000 2.3444438 -0.14780285 0.81642139 0.15695019 0.81642139 -0.14780285 0.82556873
16000 2.2672973 -0.12590007 0.76672668 0.17081014 0.76672668 -0.12590007 0.81163675
17000 2.3999651 -0.15464102 0.85708567 0.14289234 0.85708567 -0.15464102 0.84533699
18000 2.2024356 -0.14077779 0.76727873 0.14718521 0.76727873 -0.14077779 0.77368615
19000 2.5411572 -0.092014576 0.85750101 0.19297197 0.85750101 -0.092014576 0.95845841
20000 2.2113691 -0.11944862 0.77295445 0.14564597 0.77295445 -0.11944862 0.79915181
Loop time of 1.76388 on 1 procs for 10000 steps with 350 atoms
Performance: 489827.896 tau/day, 5669.304 timesteps/s, 1.984 Matom-step/s
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 1.3668 | 1.3668 | 1.3668 | 0.0 | 77.49
Neigh | 0.018849 | 0.018849 | 0.018849 | 0.0 | 1.07
Comm | 0.013338 | 0.013338 | 0.013338 | 0.0 | 0.76
Output | 0.00020434 | 0.00020434 | 0.00020434 | 0.0 | 0.01
Modify | 0.35698 | 0.35698 | 0.35698 | 0.0 | 20.24
Other | | 0.007667 | | | 0.43
Nlocal: 350 ave 350 max 350 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 119 ave 119 max 119 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 997 ave 997 max 997 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 997
Ave neighs/atom = 2.8485714
Neighbor list builds = 283
Dangerous builds = 0
Total wall time: 0:00:02

View File

@ -0,0 +1,189 @@
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
using 1 OpenMP thread(s) per MPI task
# Aspherical shear demo - 2d line box and triangle mixture, implicit solvent
units lj
atom_style line
dimension 2
read_data data.line
Reading data file ...
orthogonal box = (-22.553882 -22.553882 -0.5) to (22.553882 22.553882 0.5)
2 by 2 by 1 MPI processor grid
reading atoms ...
350 atoms
350 lines
read_data CPU = 0.003 seconds
velocity all create 1.44 320984 loop geom
neighbor 0.3 bin
neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule/intra all
pair_style line/lj 2.5
pair_coeff * * 0.25 0.25 1.0 0.25 2.5
fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983
100 rigid bodies with 350 atoms
fix 3 all deform 5 x scale 0.8 y scale 0.8
fix 4 all enforce2d
compute 10 all property/atom end1x end1y end2x end2y
#dump 1 all custom 500 dump1.atom id type x y z ix iy iz
#dump 2 all custom 500 dump1.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
timestep 0.001
compute 1 all erotate/asphere
compute 2 all ke
compute 3 all pe
variable toteng equal (c_1+c_2+c_3)/atoms
compute_modify thermo_temp extra/dof -350
thermo 1000
thermo_style custom step f_2 pe ke c_1 c_2 c_3 v_toteng
run 10000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
@Article{Gissinger24,
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
journal = {J. Phys. Chem. B},
year = 2024,
volume = 128,
number = 13,
pages = {3282-3297}
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 2.8
ghost atom cutoff = 2.8
binsize = 1.4, bins = 33 33 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/2d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.736 | 4.736 | 4.736 Mbytes
Step f_2 PotEng KinEng c_1 c_2 c_3 v_toteng
0 1.1872976 0 0.46543528 0.03617212 0.46543528 0 0.5016074
1000 1.9084412 -0.001043719 0.71003395 0.089891202 0.71003395 -0.001043719 0.79888143
2000 2.31668 -0.020711665 0.83611544 0.13062287 0.83611544 -0.020711665 0.94602664
3000 2.3094506 -0.062018072 0.80584123 0.15326943 0.80584123 -0.062018072 0.8970926
4000 2.4383442 -0.053659995 0.86864073 0.14696993 0.86864073 -0.053659995 0.96195067
5000 2.5885917 -0.15612821 0.90351683 0.17156633 0.90351683 -0.15612821 0.91895494
6000 2.118738 -0.072041141 0.76933528 0.1155794 0.76933528 -0.072041141 0.81287354
7000 2.1407609 -0.057727453 0.75154404 0.13827078 0.75154404 -0.057727453 0.83208737
8000 2.066348 -0.097639137 0.71096217 0.14551295 0.71096217 -0.097639137 0.75883597
9000 2.1969096 -0.050133795 0.8028133 0.11559948 0.8028133 -0.050133795 0.86827899
10000 2.3932442 -0.098008752 0.83753779 0.15678193 0.83753779 -0.098008752 0.89631097
Loop time of 0.596624 on 4 procs for 10000 steps with 350 atoms
Performance: 1448149.372 tau/day, 16760.988 timesteps/s, 5.866 Matom-step/s
98.2% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.17419 | 0.21073 | 0.27006 | 7.8 | 35.32
Neigh | 0.0015899 | 0.0018079 | 0.0020932 | 0.5 | 0.30
Comm | 0.041688 | 0.101 | 0.13805 | 11.3 | 16.93
Output | 0.00011945 | 0.0001262 | 0.000143 | 0.0 | 0.02
Modify | 0.26906 | 0.27183 | 0.27467 | 0.4 | 45.56
Other | | 0.01113 | | | 1.87
Nlocal: 87.5 ave 94 max 80 min
Histogram: 1 1 0 0 0 0 0 0 0 2
Nghost: 54.5 ave 67 max 42 min
Histogram: 1 0 1 0 0 0 0 1 0 1
Neighs: 212.75 ave 286 max 122 min
Histogram: 1 0 0 1 0 0 0 0 1 1
Total # of neighbors = 851
Ave neighs/atom = 2.4314286
Neighbor list builds = 273
Dangerous builds = 0
#undump 1
#undump 2
unfix 3
change_box all triclinic
Changing box ...
triclinic box = (-18.043106 -18.043106 -0.5) to (18.043106 18.043106 0.5) with tilt (0 0 0)
#dump 1 all custom 500 dump2.atom id type x y z ix iy iz
#dump 2 all custom 500 dump2.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
fix 3 all deform 1 xy erate 0.01 units box
run 10000
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 2.8
ghost atom cutoff = 2.8
binsize = 1.4, bins = 26 26 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton/tri
stencil: half/bin/2d/tri
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.752 | 4.752 | 4.752 Mbytes
Step f_2 PotEng KinEng c_1 c_2 c_3 v_toteng
10000 2.3932442 -0.098029446 0.83751359 0.1568021 0.83751359 -0.098029446 0.89628624
11000 2.4541893 -0.15538223 0.82053681 0.19271549 0.82053681 -0.15538223 0.85787007
12000 2.4124449 -0.14024177 0.84559598 0.15692416 0.84559598 -0.14024177 0.86227837
13000 2.2095814 -0.10822636 0.73930104 0.173052 0.73930104 -0.10822636 0.80412668
14000 2.0946831 -0.087303541 0.77743494 0.10023865 0.77743494 -0.087303541 0.79037005
15000 2.0911016 -0.11524594 0.74044758 0.12978235 0.74044758 -0.11524594 0.75498398
16000 1.9736428 -0.17259563 0.67852978 0.13943094 0.67852978 -0.17259563 0.64536509
17000 2.3284367 -0.17673537 0.77018991 0.1897596 0.77018991 -0.17673537 0.78321414
18000 2.3178564 -0.16634746 0.81488323 0.1487365 0.81488323 -0.16634746 0.79727227
19000 2.7497251 -0.18254513 1.0041125 0.14528424 1.0041125 -0.18254513 0.96685165
20000 2.3897059 -0.12664177 0.8390411 0.15426546 0.8390411 -0.12664177 0.86666479
Loop time of 0.79519 on 4 procs for 10000 steps with 350 atoms
Performance: 1086533.223 tau/day, 12575.616 timesteps/s, 4.401 Matom-step/s
98.2% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.26601 | 0.35051 | 0.443 | 12.3 | 44.08
Neigh | 0.0029891 | 0.0036725 | 0.0040631 | 0.7 | 0.46
Comm | 0.049744 | 0.14268 | 0.22734 | 19.3 | 17.94
Output | 0.00013783 | 0.00014503 | 0.00016304 | 0.0 | 0.02
Modify | 0.27963 | 0.28446 | 0.29049 | 0.8 | 35.77
Other | | 0.01372 | | | 1.73
Nlocal: 87.5 ave 101 max 75 min
Histogram: 1 0 1 0 0 0 1 0 0 1
Nghost: 61.5 ave 67 max 50 min
Histogram: 1 0 0 0 0 0 0 0 2 1
Neighs: 244.75 ave 268 max 198 min
Histogram: 1 0 0 0 0 0 0 1 0 2
Total # of neighbors = 979
Ave neighs/atom = 2.7971429
Neighbor list builds = 289
Dangerous builds = 0
Total wall time: 0:00:01

View File

@ -0,0 +1,363 @@
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
using 1 OpenMP thread(s) per MPI task
# Aspherical shear demo - 2d line boxes, solvated by SRD particles
units lj
atom_style line
atom_modify first big
dimension 2
read_data data.line.srd
Reading data file ...
orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
1 by 1 by 1 MPI processor grid
reading atoms ...
400 atoms
400 lines
read_data CPU = 0.003 seconds
# add small particles as hi density lattice
lattice sq 0.4
Lattice spacing in x,y,z = 1.5811388 1.5811388 1.5811388
region plane block INF INF INF INF -0.001 0.001
lattice sq 10.0
Lattice spacing in x,y,z = 0.31622777 0.31622777 0.31622777
create_atoms 2 region plane
Created 33489 atoms
using lattice units in orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
create_atoms CPU = 0.003 seconds
group big type 1
400 atoms in group big
group small type 2
33489 atoms in group small
set group small mass 0.01
Setting atom values ...
33489 settings made for mass
# delete overlaps
# must set 1-2 cutoff to non-zero value
pair_style lj/cut 1.5
pair_coeff 1 1 1.0 1.0
pair_coeff 2 2 0.0 1.0 0.0
pair_coeff 1 2 1.0 1.0
delete_atoms overlap 1.5 small big
System init for delete_atoms ...
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.8
ghost atom cutoff = 1.8
binsize = 0.9, bins = 64 64 2
2 neighbor lists, perpetual/occasional/extra = 1 1 0
(1) command delete_atoms, occasional
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/2d
bin: standard
(2) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/2d
bin: standard
WARNING: Delete_atoms cutoff > minimum neighbor cutoff (src/delete_atoms.cpp:313)
Deleted 13605 atoms, new total = 20284
# SRD run
reset_timestep 0
velocity small create 1.44 87287 loop geom
neighbor 0.8 multi
neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule/intra big include big
comm_modify mode multi group big vel yes
neigh_modify include big
# no pairwise interactions with small particles
pair_style line/lj 2.5
pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5
pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0
pair_coeff 1 2 1.0 0.2 1.0 0.2 2.5
# use fix SRD to push small particles out from inside big ones
# if comment out, big particles won't see SRD particles
timestep 0.0001
fix 1 big rigid molecule
100 rigid bodies with 400 atoms
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip
fix 3 all deform 1 x scale 1.25 y scale 1.25
fix 4 all enforce2d
# diagnostics
compute tsmall small temp/deform
compute tbig big temp
variable pebig equal pe*atoms/count(big)
variable ebig equal etotal*atoms/count(big)
compute_modify tbig extra/dof -350
compute 1 big erotate/asphere
compute 2 all ke
compute 3 all pe
variable toteng equal (c_1+c_2+c_3)/atoms
thermo 1000
thermo_style custom step c_tsmall f_2[9] c_1 etotal v_pebig v_ebig press
thermo_modify temp tbig
WARNING: Temperature for thermo pressure is not for group all (src/thermo.cpp:533)
compute 10 big property/atom end1x end1y end2x end2y
#dump 1 all custom 500 dump1.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump1.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
run 10000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
@Article{Gissinger24,
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
journal = {J. Phys. Chem. B},
year = 2024,
volume = 128,
number = 13,
pages = {3282-3297}
}
- neighbor multi command: doi:10.1016/j.cpc.2008.03.005, doi:10.1007/s40571-020-00361-2
@Article{Intveld08,
author = {in 't Veld, P. J. and S. J.~Plimpton and G. S. Grest},
title = {Accurate and Efficient Methods for Modeling Colloidal
Mixtures in an Explicit Solvent using Molecular Dynamics},
journal = {Comput.\ Phys.\ Commut.},
year = 2008,
volume = 179,
pages = {320--329}
}
@article{Shire2020,
author = {Shire, Tom and Hanley, Kevin J. and Stratford, Kevin},
title = {{DEM} Simulations of Polydisperse Media: Efficient Contact
Detection Applied to Investigate the Quasi-Static Limit},
journal = {Computational Particle Mechanics},
year = {2020}
@article{Monti2022,
author = {Monti, Joseph M. and Clemmer, Joel T. and Srivastava,
Ishan and Silbert, Leonardo E. and Grest, Gary S.
and Lechman, Jeremy B.},
title = {Large-scale frictionless jamming with power-law particle
size distributions},
journal = {Phys. Rev. E},
volume = {106}
issue = {3}
year = {2022}
}
- fix srd command: doi:10.1063/1.3419070
@Article{Petersen10,
author = {M. K. Petersen and J. B. Lechman and S. J. Plimpton and
G. S. Grest and in 't Veld, P. J. and P. R. Schunk},
title = {Mesoscale Hydrodynamics via Stochastic Rotation
Dynamics: Comparison with {L}ennard-{J}ones Fluid},
journal = {J.~Chem.\ Phys.},
year = 2010,
volume = 132,
pages = 174106
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
WARNING: Using compute temp/deform with inconsistent fix deform remap option (src/compute_temp_deform.cpp:71)
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
SRD info:
SRD/big particles = 19884 400
big particle diameter max/min = 1.9988171 0.50330564
SRD temperature & lamda = 1 0.02
SRD max distance & max velocity = 0.08 40
SRD grid counts: 230 230 1
SRD grid size: request, actual (xyz) = 0.25, 0.25040659 0.25040659 1
SRD per actual grid cell = 0.45503978
SRD viscosity = 0.92062623
big/SRD mass density ratio = 12.298053
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
# of rescaled SRD velocities = 0
ave/max small velocity = 15.906001 29.105426
ave/max big velocity = 0 0
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 3.3
ghost atom cutoff = 3.3
binsize = 1.65, bins = 35 35 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/multi/atomonly/newton
stencil: half/multi/2d
bin: multi
Per MPI rank memory allocation (min/avg/max) = 43.18 | 43.18 | 43.18 Mbytes
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
0 1.6084386 0 0 0 0 0 0
1000 1.4566787 1.2042825 0.00015900089 0.0011955837 0 0.060628048 0.75322967
2000 1.3561018 1.2092936 0.00026739313 0.0020783306 0 0.10539215 0.53247025
3000 1.3327447 1.1696777 0.0003215116 0.0024935719 0 0.12644903 0.32024668
4000 1.3031273 1.1385596 0.00037337107 0.0031214473 0 0.15828859 0.64066244
5000 1.28204 1.0983763 0.00047966362 0.0033295434 0 0.16884114 0.32103666
6000 1.2654396 1.0969562 0.00049917373 0.0037382387 0 0.18956609 0.16211614
7000 1.2401503 1.1232046 0.00045137168 0.0037522201 0 0.19027508 0.014071672
8000 1.2349025 1.059008 0.00056648801 0.0039664742 0 0.20113991 0.24483817
9000 1.2272907 1.0371006 0.00056271544 0.0037847074 0 0.19192251 0.032968056
10000 1.2153817 1.0209113 0.00053291164 0.0037197691 0 0.18862949 0.3237698
Loop time of 6.0346 on 1 procs for 10000 steps with 20284 atoms
Performance: 14317.424 tau/day, 1657.109 timesteps/s, 33.613 Matom-step/s
96.2% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.10636 | 0.10636 | 0.10636 | 0.0 | 1.76
Neigh | 0.01905 | 0.01905 | 0.01905 | 0.0 | 0.32
Comm | 0.041223 | 0.041223 | 0.041223 | 0.0 | 0.68
Output | 0.0017015 | 0.0017015 | 0.0017015 | 0.0 | 0.03
Modify | 5.8165 | 5.8165 | 5.8165 | 0.0 | 96.39
Other | | 0.04976 | | | 0.82
Nlocal: 20284 ave 20284 max 20284 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 84 ave 84 max 84 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 0 ave 0 max 0 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 0
Ave neighs/atom = 0
Neighbor list builds = 500
Dangerous builds = 0
#undump 1
#undump 2
unfix 3
change_box all triclinic
Changing box ...
triclinic box = (-35.995947 -35.995947 -0.5) to (35.995947 35.995947 0.5) with tilt (0 0 0)
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip #tstat yes
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump2.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
fix 3 all deform 1 xy erate 0.0002 units box remap v
run 30000
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
SRD info:
SRD/big particles = 19884 400
big particle diameter max/min = 1.9988171 0.50330564
SRD temperature & lamda = 1 0.02
SRD max distance & max velocity = 0.08 40
SRD grid counts: 288 288 1
SRD grid size: request, actual (xyz) = 0.25, 0.24997185 0.24997185 1
SRD per actual grid cell = 0.26976265
SRD viscosity = 1.0312189
big/SRD mass density ratio = 20.672578
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
# of rescaled SRD velocities = 0
ave/max small velocity = 12.805429 40
ave/max big velocity = 0.50400801 2.1697856
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 3.3
ghost atom cutoff = 3.3
binsize = 1.65, bins = 44 44 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/multi/atomonly/newton/tri
stencil: half/multi/2d/tri
bin: multi
Per MPI rank memory allocation (min/avg/max) = 58.85 | 58.85 | 58.85 Mbytes
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
10000 1.0423767 0 0.00053289774 0.0037197858 0 0.18863034 0.014558088
11000 1.0333923 1.0309908 0.00058283908 0.0042336078 0 0.21468625 0.13036048
12000 1.0268543 1.0290122 0.00055294944 0.0044278861 0 0.2245381 0.53275676
13000 1.0257856 1.0140434 0.00051520803 0.0042888387 0 0.21748701 0.2066835
14000 1.0239497 1.0321558 0.00054742053 0.004259664 0 0.21600756 0.24430097
15000 1.0194475 1.0119806 0.00053619565 0.0041113765 0 0.2084879 0.18420434
16000 1.016421 1.0309991 0.00049553642 0.0041338645 0 0.20962827 0.18298406
17000 1.0088527 0.99723694 0.00049824173 0.0044777581 0 0.22706711 0.65557023
18000 1.0073877 1.0136962 0.00058327835 0.0045355884 0 0.22999969 0.13700737
19000 1.0026916 1.0253129 0.00056760151 0.0045138437 0 0.22889701 0.16443344
20000 1.0024042 0.98988084 0.00057568118 0.0048644983 0 0.24667871 0.060864322
21000 0.99500641 0.99075324 0.00062187543 0.0050480594 0 0.25598709 0.18428384
22000 0.99286446 0.98642266 0.00061024492 0.0050497174 0 0.25607117 0.18499738
23000 0.99204086 0.98869005 0.00064013575 0.0052921112 0 0.26836296 0.046987646
24000 0.98768375 1.0110613 0.00065868636 0.0049285304 0 0.24992578 0.22623751
25000 0.98763308 0.98752678 0.00067070463 0.0049364253 0 0.25032613 0.022819382
26000 0.9844889 1.0059918 0.00075364589 0.0053381778 0 0.270699 0.089735771
27000 0.98574608 0.99094039 0.00080711088 0.0054137233 0 0.27452991 0.14477885
28000 0.98139419 0.97487219 0.00071242405 0.0053315389 0 0.27036234 -0.016357088
29000 0.98458705 0.98600894 0.00068355573 0.0050157315 0 0.25434775 0.015749835
30000 0.98440759 1.0010793 0.00067594666 0.0048121267 0 0.24402295 0.20422918
31000 0.98060632 0.9825933 0.00069529947 0.0050649881 -0.00010700206 0.25684555 0.0014601267
32000 0.97616624 0.94976641 0.00067682494 0.0052111319 -0.00013230279 0.2642565 0.013738873
33000 0.97926122 0.95756794 0.00071774059 0.0053716513 -0.00024804252 0.27239644 0.10643895
34000 0.97227771 0.94365547 0.00080885359 0.0055744144 -0.00063217879 0.28267856 0.12472959
35000 0.97295679 0.98103625 0.0008130852 0.0057598371 -0.00097218501 0.29208134 0.88422099
36000 0.97131919 0.97164636 0.0008345433 0.0062212675 -0.001543055 0.31548048 0.22389401
37000 0.96988732 0.98589058 0.00083271625 0.0062999411 -0.0026064482 0.31947001 0.11797226
38000 0.97135512 0.96486494 0.00076943974 0.0057397763 -0.0041174109 0.29106406 0.072790492
39000 0.97386738 0.95218062 0.00079594844 0.0056308358 -0.0069006312 0.28553968 0.12417837
40000 0.97914045 1.0033941 0.000830798 0.0055952714 -0.0086705514 0.28373621 0.1976663
Loop time of 15.944 on 1 procs for 30000 steps with 20284 atoms
Performance: 16256.931 tau/day, 1881.589 timesteps/s, 38.166 Matom-step/s
99.5% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.31871 | 0.31871 | 0.31871 | 0.0 | 2.00
Neigh | 0.082974 | 0.082974 | 0.082974 | 0.0 | 0.52
Comm | 0.16092 | 0.16092 | 0.16092 | 0.0 | 1.01
Output | 0.0047257 | 0.0047257 | 0.0047257 | 0.0 | 0.03
Modify | 15.189 | 15.189 | 15.189 | 0.0 | 95.27
Other | | 0.1871 | | | 1.17
Nlocal: 20284 ave 20284 max 20284 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 76 ave 76 max 76 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 38 ave 38 max 38 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 38
Ave neighs/atom = 0.0018733978
Neighbor list builds = 1500
Dangerous builds = 0
Total wall time: 0:00:22

View File

@ -0,0 +1,363 @@
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
using 1 OpenMP thread(s) per MPI task
# Aspherical shear demo - 2d line boxes, solvated by SRD particles
units lj
atom_style line
atom_modify first big
dimension 2
read_data data.line.srd
Reading data file ...
orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
2 by 2 by 1 MPI processor grid
reading atoms ...
400 atoms
400 lines
read_data CPU = 0.003 seconds
# add small particles as hi density lattice
lattice sq 0.4
Lattice spacing in x,y,z = 1.5811388 1.5811388 1.5811388
region plane block INF INF INF INF -0.001 0.001
lattice sq 10.0
Lattice spacing in x,y,z = 0.31622777 0.31622777 0.31622777
create_atoms 2 region plane
Created 33489 atoms
using lattice units in orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
create_atoms CPU = 0.001 seconds
group big type 1
400 atoms in group big
group small type 2
33489 atoms in group small
set group small mass 0.01
Setting atom values ...
33489 settings made for mass
# delete overlaps
# must set 1-2 cutoff to non-zero value
pair_style lj/cut 1.5
pair_coeff 1 1 1.0 1.0
pair_coeff 2 2 0.0 1.0 0.0
pair_coeff 1 2 1.0 1.0
delete_atoms overlap 1.5 small big
System init for delete_atoms ...
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.8
ghost atom cutoff = 1.8
binsize = 0.9, bins = 64 64 2
2 neighbor lists, perpetual/occasional/extra = 1 1 0
(1) command delete_atoms, occasional
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/2d
bin: standard
(2) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/2d
bin: standard
WARNING: Delete_atoms cutoff > minimum neighbor cutoff (src/delete_atoms.cpp:313)
Deleted 13605 atoms, new total = 20284
# SRD run
reset_timestep 0
velocity small create 1.44 87287 loop geom
neighbor 0.8 multi
neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule/intra big include big
comm_modify mode multi group big vel yes
neigh_modify include big
# no pairwise interactions with small particles
pair_style line/lj 2.5
pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5
pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0
pair_coeff 1 2 1.0 0.2 1.0 0.2 2.5
# use fix SRD to push small particles out from inside big ones
# if comment out, big particles won't see SRD particles
timestep 0.0001
fix 1 big rigid molecule
100 rigid bodies with 400 atoms
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip
fix 3 all deform 1 x scale 1.25 y scale 1.25
fix 4 all enforce2d
# diagnostics
compute tsmall small temp/deform
compute tbig big temp
variable pebig equal pe*atoms/count(big)
variable ebig equal etotal*atoms/count(big)
compute_modify tbig extra/dof -350
compute 1 big erotate/asphere
compute 2 all ke
compute 3 all pe
variable toteng equal (c_1+c_2+c_3)/atoms
thermo 1000
thermo_style custom step c_tsmall f_2[9] c_1 etotal v_pebig v_ebig press
thermo_modify temp tbig
WARNING: Temperature for thermo pressure is not for group all (src/thermo.cpp:533)
compute 10 big property/atom end1x end1y end2x end2y
#dump 1 all custom 500 dump1.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump1.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
run 10000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
@Article{Gissinger24,
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
journal = {J. Phys. Chem. B},
year = 2024,
volume = 128,
number = 13,
pages = {3282-3297}
}
- neighbor multi command: doi:10.1016/j.cpc.2008.03.005, doi:10.1007/s40571-020-00361-2
@Article{Intveld08,
author = {in 't Veld, P. J. and S. J.~Plimpton and G. S. Grest},
title = {Accurate and Efficient Methods for Modeling Colloidal
Mixtures in an Explicit Solvent using Molecular Dynamics},
journal = {Comput.\ Phys.\ Commut.},
year = 2008,
volume = 179,
pages = {320--329}
}
@article{Shire2020,
author = {Shire, Tom and Hanley, Kevin J. and Stratford, Kevin},
title = {{DEM} Simulations of Polydisperse Media: Efficient Contact
Detection Applied to Investigate the Quasi-Static Limit},
journal = {Computational Particle Mechanics},
year = {2020}
@article{Monti2022,
author = {Monti, Joseph M. and Clemmer, Joel T. and Srivastava,
Ishan and Silbert, Leonardo E. and Grest, Gary S.
and Lechman, Jeremy B.},
title = {Large-scale frictionless jamming with power-law particle
size distributions},
journal = {Phys. Rev. E},
volume = {106}
issue = {3}
year = {2022}
}
- fix srd command: doi:10.1063/1.3419070
@Article{Petersen10,
author = {M. K. Petersen and J. B. Lechman and S. J. Plimpton and
G. S. Grest and in 't Veld, P. J. and P. R. Schunk},
title = {Mesoscale Hydrodynamics via Stochastic Rotation
Dynamics: Comparison with {L}ennard-{J}ones Fluid},
journal = {J.~Chem.\ Phys.},
year = 2010,
volume = 132,
pages = 174106
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
WARNING: Using compute temp/deform with inconsistent fix deform remap option (src/compute_temp_deform.cpp:71)
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
SRD info:
SRD/big particles = 19884 400
big particle diameter max/min = 1.9988171 0.50330564
SRD temperature & lamda = 1 0.02
SRD max distance & max velocity = 0.08 40
SRD grid counts: 230 230 1
SRD grid size: request, actual (xyz) = 0.25, 0.25040659 0.25040659 1
SRD per actual grid cell = 0.45503978
SRD viscosity = 0.92062623
big/SRD mass density ratio = 12.298053
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
# of rescaled SRD velocities = 0
ave/max small velocity = 15.906001 29.105426
ave/max big velocity = 0 0
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 3.3
ghost atom cutoff = 3.3
binsize = 1.65, bins = 35 35 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/multi/atomonly/newton
stencil: half/multi/2d
bin: multi
Per MPI rank memory allocation (min/avg/max) = 13.44 | 13.45 | 13.47 Mbytes
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
0 1.6084386 0 0 0 0 0 0
1000 1.4539924 1.2578325 0.0001679809 0.0010497614 0 0.053233399 0.69841607
2000 1.3516746 1.1693535 0.00031932331 0.0021450754 0 0.10877677 0.058141137
3000 1.3294093 1.1539986 0.00032558495 0.0022345521 0 0.11331414 0.085892255
4000 1.3049311 1.1174444 0.00039637116 0.0026520968 0 0.13448783 0.10574422
5000 1.2790124 1.1209176 0.0004519172 0.0032635257 0 0.16549339 0.59225702
6000 1.2631214 1.0868777 0.00052274216 0.003527049 0 0.17885666 0.26093936
7000 1.2520054 1.0780954 0.00051683183 0.0038111141 0 0.1932616 -0.0014733978
8000 1.2412037 1.0587149 0.00056620143 0.0038329297 0 0.19436786 0.31576462
9000 1.2242125 1.0699712 0.00065335672 0.0039948578 0 0.20257924 0.19755012
10000 1.2155758 1.0279682 0.00059730828 0.004142635 0 0.21007302 0.30022953
Loop time of 1.92412 on 4 procs for 10000 steps with 20284 atoms
Performance: 44903.607 tau/day, 5197.177 timesteps/s, 105.420 Matom-step/s
97.6% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.026855 | 0.027529 | 0.028377 | 0.4 | 1.43
Neigh | 0.0073723 | 0.007433 | 0.0075693 | 0.1 | 0.39
Comm | 0.062837 | 0.065574 | 0.066747 | 0.6 | 3.41
Output | 0.00072894 | 0.00076558 | 0.00083361 | 0.0 | 0.04
Modify | 1.7861 | 1.7926 | 1.8 | 0.4 | 93.16
Other | | 0.03026 | | | 1.57
Nlocal: 5071 ave 5096 max 5051 min
Histogram: 1 0 1 0 0 1 0 0 0 1
Nghost: 44 ave 51 max 40 min
Histogram: 2 0 0 1 0 0 0 0 0 1
Neighs: 0 ave 0 max 0 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Total # of neighbors = 0
Ave neighs/atom = 0
Neighbor list builds = 500
Dangerous builds = 0
#undump 1
#undump 2
unfix 3
change_box all triclinic
Changing box ...
triclinic box = (-35.995947 -35.995947 -0.5) to (35.995947 35.995947 0.5) with tilt (0 0 0)
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip #tstat yes
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump2.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
fix 3 all deform 1 xy erate 0.0002 units box remap v
run 30000
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
SRD info:
SRD/big particles = 19884 400
big particle diameter max/min = 1.9988171 0.50330564
SRD temperature & lamda = 1 0.02
SRD max distance & max velocity = 0.08 40
SRD grid counts: 288 288 1
SRD grid size: request, actual (xyz) = 0.25, 0.24997185 0.24997185 1
SRD per actual grid cell = 0.26976265
SRD viscosity = 1.0312189
big/SRD mass density ratio = 20.672578
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
# of rescaled SRD velocities = 0
ave/max small velocity = 12.826666 40
ave/max big velocity = 0.53469722 1.7415554
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 3.3
ghost atom cutoff = 3.3
binsize = 1.65, bins = 44 44 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/multi/atomonly/newton/tri
stencil: half/multi/2d/tri
bin: multi
Per MPI rank memory allocation (min/avg/max) = 17.6 | 17.6 | 17.61 Mbytes
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
10000 1.0416233 0 0.0005972922 0.0041426543 0 0.210074 0.016213064
11000 1.0366852 1.0236717 0.00066926382 0.004404743 0 0.22336452 0.097686059
12000 1.028695 1.0023004 0.00065323121 0.0043971164 0 0.22297777 0.30007875
13000 1.0223214 1.0296267 0.00060201395 0.0041815724 0 0.21204754 0.17307062
14000 1.0210601 1.0092227 0.00057020066 0.0041936718 0 0.2126611 0.13379405
15000 1.0169223 1.003359 0.00060818329 0.0046120335 0 0.23387622 0.39144341
16000 1.012222 0.99937463 0.00060471571 0.004533278 0 0.22988253 0.16026051
17000 1.0087993 0.98937449 0.00061843646 0.0048888197 0 0.24791205 0.095507016
18000 1.0030288 0.94092316 0.00054252694 0.0046740839 0 0.2370228 -0.070838649
19000 1.0010739 0.99759814 0.00054041829 0.0045481798 0 0.2306382 0.1944996
20000 1.004189 1.0157285 0.00053330893 0.0044999261 0 0.22819125 0.14452619
21000 0.99601133 1.003771 0.00053282188 0.0048954851 0 0.24825005 0.20196263
22000 0.99445408 1.0163465 0.00059954941 0.0051668276 0 0.26200983 0.21332194
23000 0.99663947 0.94920707 0.00057729723 0.0051052499 0 0.25888722 0.19282224
24000 0.99500015 1.0021742 0.00064312956 0.0053430656 0 0.27094686 0.18044164
25000 0.99388189 0.98446723 0.00060150964 0.0054017824 0 0.27392438 0.18844037
26000 0.99632932 0.94670024 0.00052479857 0.004905147 0 0.24874 0.17961314
27000 0.99112962 1.0247118 0.00053159824 0.004752248 0 0.2409865 0.12037737
28000 0.98887153 0.97734068 0.00052255365 0.0050199491 0 0.25456162 0.29110866
29000 0.98938025 0.99467239 0.00053095044 0.0050303107 0 0.25508706 0.59776339
30000 0.99081592 0.98822122 0.00056693985 0.0052452228 0 0.26598525 0.0150843
31000 0.99050903 1.0184912 0.00056742049 0.0056515686 0 0.28659104 0.07877756
32000 0.98929926 0.99257634 0.00058111732 0.005740055 0 0.29107819 0.19146041
33000 0.98698723 0.94872564 0.00056547335 0.0057628447 0 0.29223385 0.076755599
34000 0.98967998 0.98777081 0.00056045905 0.0055645603 0 0.28217885 0.095025284
35000 0.98777734 0.95746323 0.00050104756 0.0055630681 -4.7847642e-05 0.28210318 0.25667997
36000 0.98661597 0.98801246 0.00047074618 0.0054500805 -5.8956193e-05 0.27637358 0.18221607
37000 0.98390111 0.9837894 0.00044581144 0.0054703357 -7.4197741e-05 0.27740072 0.11959303
38000 0.98092646 1.0142057 0.0004945556 0.0056372628 -8.6736668e-05 0.2858656 0.017325263
39000 0.98125957 0.94219822 0.00059691168 0.0060914156 -9.9726556e-05 0.30889569 0.0081217281
40000 0.98362942 0.9649582 0.00062286697 0.0063736358 -0.00010700337 0.32320707 0.293121
Loop time of 5.08621 on 4 procs for 30000 steps with 20284 atoms
Performance: 50961.296 tau/day, 5898.298 timesteps/s, 119.641 Matom-step/s
98.8% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.084321 | 0.084948 | 0.086233 | 0.3 | 1.67
Neigh | 0.027521 | 0.029247 | 0.030632 | 0.7 | 0.58
Comm | 0.19101 | 0.20262 | 0.2068 | 1.5 | 3.98
Output | 0.0019581 | 0.0020369 | 0.0022544 | 0.3 | 0.04
Modify | 4.681 | 4.6884 | 4.7051 | 0.4 | 92.18
Other | | 0.07891 | | | 1.55
Nlocal: 5071 ave 5197 max 4951 min
Histogram: 1 0 0 1 0 0 1 0 0 1
Nghost: 44 ave 50 max 41 min
Histogram: 2 0 0 1 0 0 0 0 0 1
Neighs: 5.75 ave 12 max 0 min
Histogram: 1 0 0 1 0 1 0 0 0 1
Total # of neighbors = 23
Ave neighs/atom = 0.0011338986
Neighbor list builds = 1500
Dangerous builds = 0
Total wall time: 0:00:07

View File

@ -0,0 +1,323 @@
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
using 1 OpenMP thread(s) per MPI task
# Aspherical shear demo - 2d line boxes, solvated by SRD particles
units lj
atom_style line
atom_modify first big
dimension 2
read_data data.line.srd
Reading data file ...
orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
4 by 2 by 1 MPI processor grid
reading atoms ...
400 atoms
400 lines
read_data CPU = 0.002 seconds
# add small particles as hi density lattice
lattice sq 0.4
Lattice spacing in x,y,z = 1.5811388 1.5811388 1.5811388
region plane block INF INF INF INF -0.001 0.001
lattice sq 10.0
Lattice spacing in x,y,z = 0.31622777 0.31622777 0.31622777
create_atoms 2 region plane
Created 33489 atoms
using lattice units in orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
create_atoms CPU = 0.001 seconds
group big type 1
400 atoms in group big
group small type 2
33489 atoms in group small
set group small mass 0.01
Setting atom values ...
33489 settings made for mass
# delete overlaps
# must set 1-2 cutoff to non-zero value
pair_style lj/cut 1.5
pair_coeff 1 1 1.0 1.0
pair_coeff 2 2 0.0 1.0 0.0
pair_coeff 1 2 1.0 1.0
delete_atoms overlap 1.5 small big
System init for delete_atoms ...
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.8
ghost atom cutoff = 1.8
binsize = 0.9, bins = 64 64 2
2 neighbor lists, perpetual/occasional/extra = 1 1 0
(1) command delete_atoms, occasional
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/2d
bin: standard
(2) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/2d
bin: standard
WARNING: Delete_atoms cutoff > minimum neighbor cutoff (src/delete_atoms.cpp:313)
Deleted 13605 atoms, new total = 20284
# SRD run
reset_timestep 0
velocity small create 1.44 87287 loop geom
neighbor 0.8 multi
neigh_modify delay 0 every 1 check yes
neigh_modify exclude molecule/intra big include big
comm_modify mode multi group big vel yes
neigh_modify include big
# no pairwise interactions with small particles
pair_style line/lj 2.5
pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5
pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0
pair_coeff 1 2 1.0 0.2 1.0 0.2 2.5
# use fix SRD to push small particles out from inside big ones
# if comment out, big particles won't see SRD particles
timestep 0.0002
fix 1 big rigid molecule
100 rigid bodies with 400 atoms
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip
fix 3 all deform 1 x scale 1.25 y scale 1.25
fix 4 all enforce2d
# diagnostics
compute tsmall small temp/deform
compute tbig big temp
variable pebig equal pe*atoms/count(big)
variable ebig equal etotal*atoms/count(big)
compute_modify tbig extra/dof -350
compute 1 big erotate/asphere
compute 2 all ke
compute 3 all pe
variable toteng equal (c_1+c_2+c_3)/atoms
thermo 1000
thermo_style custom step c_tsmall f_2[9] c_1 etotal v_pebig v_ebig press
thermo_modify temp tbig
WARNING: Temperature for thermo pressure is not for group all (src/thermo.cpp:533)
compute 10 big property/atom end1x end1y end2x end2y
#dump 1 all custom 500 dump1.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump1.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
run 10000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
@Article{Gissinger24,
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
journal = {J. Phys. Chem. B},
year = 2024,
volume = 128,
number = 13,
pages = {3282-3297}
}
- neighbor multi command: doi:10.1016/j.cpc.2008.03.005, doi:10.1007/s40571-020-00361-2
@Article{Intveld08,
author = {in 't Veld, P. J. and S. J.~Plimpton and G. S. Grest},
title = {Accurate and Efficient Methods for Modeling Colloidal
Mixtures in an Explicit Solvent using Molecular Dynamics},
journal = {Comput.\ Phys.\ Commut.},
year = 2008,
volume = 179,
pages = {320--329}
}
@article{Shire2020,
author = {Shire, Tom and Hanley, Kevin J. and Stratford, Kevin},
title = {{DEM} Simulations of Polydisperse Media: Efficient Contact
Detection Applied to Investigate the Quasi-Static Limit},
journal = {Computational Particle Mechanics},
year = {2020}
@article{Monti2022,
author = {Monti, Joseph M. and Clemmer, Joel T. and Srivastava,
Ishan and Silbert, Leonardo E. and Grest, Gary S.
and Lechman, Jeremy B.},
title = {Large-scale frictionless jamming with power-law particle
size distributions},
journal = {Phys. Rev. E},
volume = {106}
issue = {3}
year = {2022}
}
- fix srd command: doi:10.1063/1.3419070
@Article{Petersen10,
author = {M. K. Petersen and J. B. Lechman and S. J. Plimpton and
G. S. Grest and in 't Veld, P. J. and P. R. Schunk},
title = {Mesoscale Hydrodynamics via Stochastic Rotation
Dynamics: Comparison with {L}ennard-{J}ones Fluid},
journal = {J.~Chem.\ Phys.},
year = 2010,
volume = 132,
pages = 174106
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
WARNING: Using compute temp/deform with inconsistent fix deform remap option (src/compute_temp_deform.cpp:71)
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
SRD info:
SRD/big particles = 19884 400
big particle diameter max/min = 1.9988171 0.50330564
SRD temperature & lamda = 1 0.04
SRD max distance & max velocity = 0.16 40
SRD grid counts: 230 230 1
SRD grid size: request, actual (xyz) = 0.25, 0.25040659 0.25040659 1
SRD per actual grid cell = 0.45503978
SRD viscosity = 1.0732692
big/SRD mass density ratio = 12.298053
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
# of rescaled SRD velocities = 0
ave/max small velocity = 15.906001 29.105426
ave/max big velocity = 0 0
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 3.3
ghost atom cutoff = 3.3
binsize = 1.65, bins = 35 35 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/multi/atomonly/newton
stencil: half/multi/2d
bin: multi
Per MPI rank memory allocation (min/avg/max) = 9.752 | 9.762 | 9.781 Mbytes
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
0 1.4809886 0 0 0 0 0 0
1000 1.2265081 1.1522909 0.00027866069 0.0022427232 0 0.1137285 0.58262976
2000 1.1757141 1.1251323 0.00040597152 0.003287261 0 0.16669701 0.27109853
3000 1.126304 1.0646585 0.00050542958 0.0040748897 0 0.20663766 0.36959653
4000 1.1140297 1.0761435 0.00060430927 0.0052863645 0 0.26807154 0.35099205
5000 1.0823307 1.0256814 0.00066557681 0.0051213284 0 0.25970256 0.28491631
6000 1.0656188 1.0387091 0.00066454105 0.005213537 0 0.26437846 0.2150581
7000 1.0600108 1.0099931 0.0007238719 0.005470459 0 0.27740698 0.21846568
8000 1.0464374 1.0049819 0.00061068366 0.0053702582 0 0.27232579 0.1326171
9000 1.0381097 1.0126049 0.00057318728 0.0055976321 0 0.28385592 0.16797796
10000 1.0312982 1.0018962 0.00050597007 0.0051153126 0 0.2593975 0.15281043
Loop time of 1.2997 on 8 procs for 10000 steps with 20284 atoms
Performance: 132953.355 tau/day, 7694.060 timesteps/s, 156.066 Matom-step/s
96.7% CPU use with 8 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.01406 | 0.014495 | 0.01532 | 0.3 | 1.12
Neigh | 0.0051916 | 0.0053424 | 0.0054386 | 0.1 | 0.41
Comm | 0.062053 | 0.065305 | 0.069014 | 0.9 | 5.02
Output | 0.00062289 | 0.00065254 | 0.00077243 | 0.0 | 0.05
Modify | 1.169 | 1.178 | 1.1905 | 0.6 | 90.64
Other | | 0.03589 | | | 2.76
Nlocal: 2535.5 ave 2637 max 2476 min
Histogram: 1 3 1 1 0 0 0 0 1 1
Nghost: 31.125 ave 42 max 25 min
Histogram: 3 0 1 2 0 0 0 0 1 1
Neighs: 0 ave 0 max 0 min
Histogram: 8 0 0 0 0 0 0 0 0 0
Total # of neighbors = 0
Ave neighs/atom = 0
Neighbor list builds = 500
Dangerous builds = 0
#undump 1
#undump 2
unfix 3
change_box all triclinic
Changing box ...
triclinic box = (-35.995947 -35.995947 -0.5) to (35.995947 35.995947 0.5) with tilt (0 0 0)
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip #tstat yes
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
#dump 2 all custom 500 dump2.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
fix 3 all deform 1 xy erate 0.0002 units box remap v
run 40000
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
SRD info:
SRD/big particles = 19884 400
big particle diameter max/min = 1.9988171 0.50330564
SRD temperature & lamda = 1 0.04
SRD max distance & max velocity = 0.16 40
SRD grid counts: 288 288 1
SRD grid size: request, actual (xyz) = 0.25, 0.24997185 0.24997185 1
SRD per actual grid cell = 0.26976265
SRD viscosity = 1.5799049
big/SRD mass density ratio = 20.672578
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
# of rescaled SRD velocities = 1
ave/max small velocity = 12.481632 40
ave/max big velocity = 0.58815233 1.5489134
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 3.3
ghost atom cutoff = 3.3
binsize = 1.65, bins = 44 44 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair line/lj, perpetual
attributes: half, newton on
pair build: half/multi/atomonly/newton/tri
stencil: half/multi/2d/tri
bin: multi
Per MPI rank memory allocation (min/avg/max) = 11.95 | 11.95 | 11.95 Mbytes
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
10000 0.98889306 0 0.00050595647 0.0051153289 0 0.25939833 0.020019811
11000 0.98995417 0.99721412 0.0005166709 0.0054320552 0 0.27545952 0.03376188
12000 0.99401168 0.95502319 0.00053659416 0.0054974426 0 0.27877531 0.056856034
13000 0.98790692 0.96488357 0.00061076364 0.0053196554 0 0.26975972 0.2677228
14000 0.98939233 0.98809694 0.00052317779 0.0050536922 0 0.25627273 0.20381427
15000 0.99046365 0.97048828 0.00056914402 0.0053274908 0 0.27015706 0.082163088
16000 0.99743447 1.0111983 0.00053706165 0.0048068384 0 0.24375478 0.06429569
17000 0.99622359 1.0124982 0.0005905197 0.0055966416 -7.4541903e-05 0.2838057 0.25418037
18000 0.99283929 0.99581721 0.00061730878 0.0059098964 -0.00014754106 0.29969084 0.12701702
19000 0.99175517 1.0115581 0.0005128714 0.0059214809 -0.00046203987 0.30027829 0.2640506
20000 0.98895811 0.97731528 0.00052294314 0.0055670021 -0.0016029702 0.28230268 0.26239209
21000 0.98904093 0.98120258 0.00054169978 0.0052762731 -0.0026834879 0.26755981 0.15432481
22000 0.98773149 0.99217218 0.00056091602 0.0052754729 -0.0044093345 0.26751923 0.2208051
23000 0.98802862 0.96564549 0.00062226338 0.0053263454 -0.008790914 0.27009898 0.18058977
24000 0.98754002 1.024015 0.00057178953 0.0054083382 -0.0094859317 0.27425683 0.01598147
25000 0.98656341 0.94647246 0.00066215791 0.0053555849 -0.014877058 0.27158171 0.28794628
26000 0.98750471 1.0077626 0.00058955769 0.0057081639 -0.013151869 0.28946099 0.20814911
27

View File

@ -81,6 +81,7 @@ liblammpsplugin_t *liblammpsplugin_load(const char *lib)
ADDSYM(python_finalize); ADDSYM(python_finalize);
ADDSYM(error); ADDSYM(error);
ADDSYM(expand);
ADDSYM(file); ADDSYM(file);
ADDSYM(command); ADDSYM(command);

View File

@ -126,6 +126,7 @@ struct _liblammpsplugin {
void (*python_finalize)(); void (*python_finalize)();
void (*error)(void *, int, const char *); void (*error)(void *, int, const char *);
char *(*expand)(void *, const char *);
void (*file)(void *, const char *); void (*file)(void *, const char *);
char *(*command)(void *, const char *); char *(*command)(void *, const char *);

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-182-g93942f2013-modified) LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-372-g51d104975a)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
using 1 OpenMP thread(s) per MPI task using 1 OpenMP thread(s) per MPI task
# Test of MEAM potential for HGa # Test of MEAM potential for HGa
@ -67,7 +67,7 @@ Created 1 atoms
variable teng equal "c_eatoms" variable teng equal "c_eatoms"
compute pot_energy all pe/atom compute pot_energy all pe/atom
compute stress all stress/atom NULL compute stress all stress/atom NULL
# dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6] dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
run 1 run 1
WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60) WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
Neighbor list info ... Neighbor list info ...
@ -89,22 +89,22 @@ Neighbor list info ...
bin: none bin: none
Per MPI rank memory allocation (min/avg/max) = 8.587 | 8.587 | 8.587 Mbytes Per MPI rank memory allocation (min/avg/max) = 8.587 | 8.587 | 8.587 Mbytes
Step Temp TotEng Press Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_eatoms Step Temp TotEng Press Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_eatoms
0 0 15.433079 491354.7 838670.96 635393.15 0 80195.797 0 0 8 8 8 512 15.433079 0 0 15.438614 491542.52 839006.02 635621.55 0 80225.587 0 0 8 8 8 512 15.438614
1 0 15.433079 491354.7 838670.96 635393.15 0 80195.797 0 0 8 8 8 512 15.433079 1 0 15.438614 491542.52 839006.02 635621.55 0 80225.587 0 0 8 8 8 512 15.438614
Loop time of 4.4446e-05 on 1 procs for 1 steps with 3 atoms Loop time of 0.000144827 on 1 procs for 1 steps with 3 atoms
Performance: 1943.932 ns/day, 0.012 hours/ns, 22499.213 timesteps/s, 67.498 katom-step/s Performance: 596.574 ns/day, 0.040 hours/ns, 6904.790 timesteps/s, 20.714 katom-step/s
31.5% CPU use with 1 MPI tasks x 1 OpenMP threads 21.4% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 2.9908e-05 | 2.9908e-05 | 2.9908e-05 | 0.0 | 67.29 Pair | 9.2136e-05 | 9.2136e-05 | 9.2136e-05 | 0.0 | 63.62
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 1.033e-06 | 1.033e-06 | 1.033e-06 | 0.0 | 2.32 Comm | 4.389e-06 | 4.389e-06 | 4.389e-06 | 0.0 | 3.03
Output | 9.347e-06 | 9.347e-06 | 9.347e-06 | 0.0 | 21.03 Output | 3.9556e-05 | 3.9556e-05 | 3.9556e-05 | 0.0 | 27.31
Modify | 2.02e-07 | 2.02e-07 | 2.02e-07 | 0.0 | 0.45 Modify | 9.92e-07 | 9.92e-07 | 9.92e-07 | 0.0 | 0.68
Other | | 3.956e-06 | | | 8.90 Other | | 7.754e-06 | | | 5.35
Nlocal: 3 ave 3 max 3 min Nlocal: 3 ave 3 max 3 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0

View File

@ -1,4 +1,4 @@
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-182-g93942f2013-modified) LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-372-g51d104975a)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
using 1 OpenMP thread(s) per MPI task using 1 OpenMP thread(s) per MPI task
# Test of MEAM potential for HGa # Test of MEAM potential for HGa
@ -67,7 +67,7 @@ Created 1 atoms
variable teng equal "c_eatoms" variable teng equal "c_eatoms"
compute pot_energy all pe/atom compute pot_energy all pe/atom
compute stress all stress/atom NULL compute stress all stress/atom NULL
# dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6] dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
run 1 run 1
WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60) WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
Neighbor list info ... Neighbor list info ...
@ -89,22 +89,22 @@ Neighbor list info ...
bin: none bin: none
Per MPI rank memory allocation (min/avg/max) = 7.965 | 8.123 | 8.594 Mbytes Per MPI rank memory allocation (min/avg/max) = 7.965 | 8.123 | 8.594 Mbytes
Step Temp TotEng Press Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_eatoms Step Temp TotEng Press Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_eatoms
0 0 15.433079 491354.7 838670.96 635393.15 0 80195.797 0 0 8 8 8 512 15.433079 0 0 15.438614 491542.52 839006.02 635621.55 0 80225.587 0 0 8 8 8 512 15.438614
1 0 15.433079 491354.7 838670.96 635393.15 0 80195.797 0 0 8 8 8 512 15.433079 1 0 15.438614 491542.52 839006.02 635621.55 0 80225.587 0 0 8 8 8 512 15.438614
Loop time of 8.70645e-05 on 4 procs for 1 steps with 3 atoms Loop time of 0.000328503 on 4 procs for 1 steps with 3 atoms
Performance: 992.368 ns/day, 0.024 hours/ns, 11485.738 timesteps/s, 34.457 katom-step/s Performance: 263.011 ns/day, 0.091 hours/ns, 3044.110 timesteps/s, 9.132 katom-step/s
29.0% CPU use with 4 MPI tasks x 1 OpenMP threads 75.3% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 4.3957e-05 | 4.67e-05 | 5.1056e-05 | 0.0 | 53.64 Pair | 0.0001419 | 0.0001471 | 0.00015891 | 0.0 | 44.78
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 1.105e-05 | 1.3822e-05 | 1.7033e-05 | 0.0 | 15.88 Comm | 2.2092e-05 | 2.8424e-05 | 3.667e-05 | 0.0 | 8.65
Output | 1.5765e-05 | 1.9045e-05 | 2.5216e-05 | 0.0 | 21.87 Output | 8.6275e-05 | 0.00010558 | 0.0001422 | 0.0 | 32.14
Modify | 2.58e-07 | 3.465e-07 | 3.81e-07 | 0.0 | 0.40 Modify | 1.093e-06 | 2.4148e-06 | 5.651e-06 | 0.0 | 0.74
Other | | 7.151e-06 | | | 8.21 Other | | 4.498e-05 | | | 13.69
Nlocal: 0.75 ave 3 max 0 min Nlocal: 0.75 ave 3 max 0 min
Histogram: 3 0 0 0 0 0 0 0 0 1 Histogram: 3 0 0 0 0 0 0 0 0 1

View File

@ -15,8 +15,8 @@ velocity all create 1.44 87287 loop geom
region slice block 4 6 INF INF INF INF region slice block 4 6 INF INF INF INF
set region slice type 2 set region slice type 2
pair_style lj/cut 2.5 pair_style lj/cut 4.0
pair_coeff * * 1.0 1.0 1.0 pair_coeff * * 1.0 1.0
neighbor 0.3 bin neighbor 0.3 bin
neigh_modify delay 0 every 1 neigh_modify delay 0 every 1
@ -24,14 +24,14 @@ neigh_modify delay 0 every 1
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1 fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
fix 2 all deform 1 xy erate 0.01 remap v fix 2 all deform 1 xy erate 0.01 remap v
#dump 1 all custom 5000 dump.nemd id type x y z #dump 1 all custom 500 dump.nemd id type x y z
#dump 2 all image 1000 image.*.jpg type type adiam 1.2 #dump 2 all image 100 image.*.jpg type type adiam 1.2
#dump_modify 2 pad 5 #dump_modify 2 pad 5
#dump 3 all movie 1000 movie.mpg type type adiam 1.2 #dump 3 all movie 100 movie.mpg type type adiam 1.2
#dump_modify 3 pad 5 #dump_modify 3 pad 5
thermo 1000 thermo 50
run 50000 run 1000

View File

@ -1,137 +0,0 @@
LAMMPS (27 Nov 2018)
using 1 OpenMP thread(s) per MPI task
# 2d NEMD simulation
units lj
atom_style atomic
dimension 2
lattice sq2 0.8442
Lattice spacing in x,y,z = 1.53919 1.53919 1.53919
region box prism 0 10 0 8 -0.5 0.5 0 0 0
create_box 2 box
Created triclinic box = (0 0 -0.769595) to (15.3919 12.3135 0.769595) with tilt (0 0 0)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 160 atoms
Time spent = 0.000332355 secs
mass * 1.0
velocity all create 1.44 87287 loop geom
region slice block 4 6 INF INF INF INF
set region slice type 2
40 settings made for type
pair_style lj/cut 2.5
pair_coeff * * 1.0 1.0 1.0
neighbor 0.3 bin
neigh_modify delay 0 every 1
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
fix 2 all deform 1 xy erate 0.01 remap v
#dump 1 all custom 5000 dump.nemd id type x y z
#dump 2 all image 1000 image.*.jpg type type adiam 1.2
#dump_modify 2 pad 5
#dump 3 all movie 1000 movie.mpg type type adiam 1.2
#dump_modify 3 pad 5
thermo 1000
run 50000
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.3
ghost atom cutoff = 1.3
binsize = 0.65, bins = 24 19 3
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/newton/tri
stencil: half/bin/2d/newton/tri
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.065 | 3.065 | 3.065 Mbytes
Step Temp E_pair E_mol TotEng Press Volume
0 1.44 0 0 1.431 1.2080502 189.52855
1000 1.1326992 0.25863754 0 1.3842573 6.0588079 189.52855
2000 0.99104643 0.37634349 0 1.3611959 7.8993387 189.52855
3000 1.0749743 0.21908728 0 1.2873429 6.2659517 189.52855
4000 1.0986742 0.27147022 0 1.3632777 5.8778262 189.52855
5000 1.071838 0.23413372 0 1.2992728 5.9120887 189.52855
6000 1.0013194 0.26923671 0 1.2642979 6.2802759 189.52855
7000 0.94110685 0.3224557 0 1.2576806 6.1864166 189.52855
8000 0.97391513 0.28793383 0 1.255762 6.5071893 189.52855
9000 0.95346063 0.31050593 0 1.2580074 6.3321512 189.52855
10000 0.96236447 0.26298203 0 1.2193317 6.4083918 189.52855
11000 0.9511149 0.27571527 0 1.2208857 6.0949768 189.52855
12000 1.0186935 0.18134918 0 1.1936758 5.1269128 189.52855
13000 0.96350682 0.23171507 0 1.1892 5.7367267 189.52855
14000 0.94740402 0.27357945 0 1.2150622 6.0156532 189.52855
15000 0.87951545 0.27745111 0 1.1514696 6.297405 189.52855
16000 0.93216196 0.27020559 0 1.1965415 6.6188833 189.52855
17000 0.94109936 0.24756193 0 1.1827794 5.8993088 189.52855
18000 0.97325239 0.27996398 0 1.2471335 6.1486561 189.52855
19000 1.0494686 0.27132686 0 1.3142363 6.6757065 189.52855
20000 1.0391862 0.25195457 0 1.2846459 6.143235 189.52855
21000 0.96407137 0.27359166 0 1.2316376 5.9577116 189.52855
22000 0.97954534 0.31920255 0 1.2926257 6.5320163 189.52855
23000 0.97585473 0.24154424 0 1.2112999 6.0839179 189.52855
24000 1.0522109 0.1646952 0 1.2103298 5.0388687 189.52855
25000 0.93707172 0.25655806 0 1.1877731 5.819887 189.52855
26000 0.89798775 0.26629627 0 1.1586716 6.0393558 189.52855
27000 0.93259926 0.24542428 0 1.1721948 5.3560986 189.52855
28000 0.8428223 0.20784302 0 1.0453977 4.956911 189.52855
29000 0.81653505 0.21924932 0 1.030681 5.271501 189.52855
30000 0.90157811 0.15070734 0 1.0466506 4.476142 189.52855
31000 0.86580039 0.21115151 0 1.0715407 5.0056915 189.52855
32000 0.89768096 0.28377249 0 1.1758429 5.8449711 189.52855
33000 1.0504011 0.29009694 0 1.333933 6.1319155 189.52855
34000 1.2009765 0.19137934 0 1.3848498 4.9643885 189.52855
35000 1.208705 0.27071222 0 1.4718628 6.2162389 189.52855
36000 1.2211309 0.28389521 0 1.497394 6.5090715 189.52855
37000 1.1384381 0.42795547 0 1.5592783 8.5129272 189.52855
38000 1.2198334 0.34335732 0 1.5555668 7.2940883 189.52855
39000 1.1562045 0.35783089 0 1.5068091 7.340999 189.52855
40000 1.2145924 0.28410558 0 1.4911068 6.234986 189.52855
41000 1.1240878 0.34663237 0 1.4636946 7.1720193 189.52855
42000 1.2491422 0.26815889 0 1.509494 6.1390803 189.52855
43000 1.1387564 0.33755832 0 1.4691975 7.0577597 189.52855
44000 1.0031598 0.4081807 0 1.4050708 8.2732113 189.52855
45000 1.0166213 0.29131017 0 1.3015776 6.1907807 189.52855
46000 0.96251302 0.31483519 0 1.2713325 6.6987235 189.52855
47000 0.89809294 0.30909884 0 1.2015787 6.3997583 189.52855
48000 0.86736217 0.31917648 0 1.1811176 7.1584774 189.52855
49000 0.91979053 0.21099403 0 1.1250359 5.4968259 189.52855
50000 0.87079959 0.24059333 0 1.1059504 5.6039305 189.52855
Loop time of 1.54353 on 1 procs for 50000 steps with 160 atoms
Performance: 13993916.675 tau/day, 32393.326 timesteps/s
99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.20172 | 0.20172 | 0.20172 | 0.0 | 13.07
Neigh | 0.16634 | 0.16634 | 0.16634 | 0.0 | 10.78
Comm | 0.068928 | 0.068928 | 0.068928 | 0.0 | 4.47
Output | 0.00059891 | 0.00059891 | 0.00059891 | 0.0 | 0.04
Modify | 1.0123 | 1.0123 | 1.0123 | 0.0 | 65.59
Other | | 0.09361 | | | 6.06
Nlocal: 160 ave 160 max 160 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 73 ave 73 max 73 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 353 ave 353 max 353 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 353
Ave neighs/atom = 2.20625
Neighbor list builds = 5273
Dangerous builds = 0
Total wall time: 0:00:01

View File

@ -1,137 +0,0 @@
LAMMPS (27 Nov 2018)
using 1 OpenMP thread(s) per MPI task
# 2d NEMD simulation
units lj
atom_style atomic
dimension 2
lattice sq2 0.8442
Lattice spacing in x,y,z = 1.53919 1.53919 1.53919
region box prism 0 10 0 8 -0.5 0.5 0 0 0
create_box 2 box
Created triclinic box = (0 0 -0.769595) to (15.3919 12.3135 0.769595) with tilt (0 0 0)
2 by 2 by 1 MPI processor grid
create_atoms 1 box
Created 160 atoms
Time spent = 0.000308275 secs
mass * 1.0
velocity all create 1.44 87287 loop geom
region slice block 4 6 INF INF INF INF
set region slice type 2
40 settings made for type
pair_style lj/cut 2.5
pair_coeff * * 1.0 1.0 1.0
neighbor 0.3 bin
neigh_modify delay 0 every 1
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
fix 2 all deform 1 xy erate 0.01 remap v
#dump 1 all custom 5000 dump.nemd id type x y z
#dump 2 all image 1000 image.*.jpg type type adiam 1.2
#dump_modify 2 pad 5
#dump 3 all movie 1000 movie.mpg type type adiam 1.2
#dump_modify 3 pad 5
thermo 1000
run 50000
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.3
ghost atom cutoff = 1.3
binsize = 0.65, bins = 24 19 3
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/newton/tri
stencil: half/bin/2d/newton/tri
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.062 | 3.062 | 3.062 Mbytes
Step Temp E_pair E_mol TotEng Press Volume
0 1.44 0 0 1.431 1.2080502 189.52855
1000 1.1682693 0.24486562 0 1.4058332 5.8092954 189.52855
2000 1.0928734 0.27609364 0 1.3621366 6.2237017 189.52855
3000 1.09088 0.24816112 0 1.3322231 5.7001547 189.52855
4000 1.0110684 0.29868377 0 1.303433 7.3312319 189.52855
5000 0.91033678 0.28330698 0 1.1879542 6.1840352 189.52855
6000 0.93416074 0.22661127 0 1.1549335 5.3619735 189.52855
7000 0.93305734 0.19203739 0 1.1192631 5.2497547 189.52855
8000 0.88944438 0.19421381 0 1.0780992 4.9733446 189.52855
9000 0.86949257 0.21207681 0 1.0761351 5.4687076 189.52855
10000 0.80088203 0.24071142 0 1.0365879 5.334545 189.52855
11000 0.88899727 0.19972767 0 1.0831687 4.8832207 189.52855
12000 0.93045817 0.17883252 0 1.1034753 4.9081709 189.52855
13000 0.9724196 0.19089684 0 1.1572388 5.3460903 189.52855
14000 0.93902186 0.25513773 0 1.1882907 6.3338337 189.52855
15000 0.91879903 0.31605547 0 1.229112 6.2085671 189.52855
16000 0.9860058 0.26863362 0 1.2484769 6.514688 189.52855
17000 1.0354756 0.23445357 0 1.2634574 6.1519296 189.52855
18000 1.0244774 0.27511827 0 1.2931927 6.2230002 189.52855
19000 1.1581216 0.21558936 0 1.3664727 5.5458237 189.52855
20000 1.0552168 0.29344488 0 1.3420666 6.4880315 189.52855
21000 0.97925435 0.31583414 0 1.2889681 6.7584093 189.52855
22000 1.0112494 0.26246834 0 1.2673974 5.4112008 189.52855
23000 1.0463332 0.26049752 0 1.3002911 6.1359606 189.52855
24000 1.1130319 0.19848564 0 1.3045611 5.7088487 189.52855
25000 1.0355662 0.28048951 0 1.3095834 6.4596476 189.52855
26000 1.0823932 0.21784218 0 1.2934704 5.106334 189.52855
27000 0.99719525 0.32679678 0 1.3177596 6.7399277 189.52855
28000 1.0665868 0.25002709 0 1.3099477 6.2732557 189.52855
29000 1.0312798 0.30650087 0 1.3313351 7.0581024 189.52855
30000 1.0388277 0.29812912 0 1.3304641 6.2533028 189.52855
31000 1.0461658 0.21344416 0 1.2530714 5.3631154 189.52855
32000 1.0233681 0.27545017 0 1.2924222 5.9612896 189.52855
33000 1.1353086 0.20278244 0 1.3309953 5.7619128 189.52855
34000 1.0374791 0.29661216 0 1.327607 6.5124409 189.52855
35000 1.0752783 0.21684443 0 1.2854022 5.4759171 189.52855
36000 1.0383445 0.27068641 0 1.3025412 6.8367218 189.52855
37000 0.97341144 0.24034988 0 1.2076775 6.1335996 189.52855
38000 0.9285918 0.2737544 0 1.1965425 5.8750327 189.52855
39000 0.84869423 0.30079207 0 1.144182 6.8909326 189.52855
40000 0.88237131 0.26049171 0 1.1373482 6.3932981 189.52855
41000 0.90368591 0.21064132 0 1.1086792 5.5627232 189.52855
42000 0.93436749 0.20367569 0 1.1322034 5.1420052 189.52855
43000 0.91378588 0.26155533 0 1.16963 6.366756 189.52855
44000 0.91673608 0.25967314 0 1.1706796 6.0846334 189.52855
45000 1.0233334 0.25463562 0 1.2715732 6.0924255 189.52855
46000 0.96184729 0.35422095 0 1.3100567 7.0249175 189.52855
47000 1.134079 0.26196034 0 1.3889514 6.3476756 189.52855
48000 1.0552136 0.303812 0 1.3524305 6.6968927 189.52855
49000 1.1282184 0.2100955 0 1.3312626 5.8658659 189.52855
50000 1.0493816 0.31540438 0 1.3582274 6.6348173 189.52855
Loop time of 1.09903 on 4 procs for 50000 steps with 160 atoms
Performance: 19653623.953 tau/day, 45494.500 timesteps/s
94.4% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.057854 | 0.05974 | 0.062726 | 0.7 | 5.44
Neigh | 0.047791 | 0.049863 | 0.054819 | 1.3 | 4.54
Comm | 0.3581 | 0.38553 | 0.39784 | 2.6 | 35.08
Output | 0.001116 | 0.0014414 | 0.0023859 | 1.4 | 0.13
Modify | 0.41102 | 0.42642 | 0.4493 | 2.3 | 38.80
Other | | 0.176 | | | 16.02
Nlocal: 40 ave 42 max 39 min
Histogram: 2 0 0 1 0 0 0 0 0 1
Nghost: 36.5 ave 37 max 36 min
Histogram: 2 0 0 0 0 0 0 0 0 2
Neighs: 87.5 ave 94 max 81 min
Histogram: 1 1 0 0 0 0 0 0 1 1
Total # of neighbors = 350
Ave neighs/atom = 2.1875
Neighbor list builds = 5276
Dangerous builds = 0
Total wall time: 0:00:01

View File

@ -0,0 +1,111 @@
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-394-g75f86a68a7-modified)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
using 1 OpenMP thread(s) per MPI task
# 2d NEMD simulation
units lj
atom_style atomic
dimension 2
lattice sq2 0.8442
Lattice spacing in x,y,z = 1.5391903 1.5391903 1.5391903
region box prism 0 10 0 8 -0.5 0.5 0 0 0
create_box 2 box
Created triclinic box = (0 0 -0.76959516) to (15.391903 12.313523 0.76959516) with tilt (0 0 0)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 160 atoms
using lattice units in triclinic box = (0 0 -0.76959516) to (15.391903 12.313523 0.76959516) with tilt (0 0 0)
create_atoms CPU = 0.000 seconds
mass * 1.0
velocity all create 1.44 87287 loop geom
region slice block 4 6 INF INF INF INF
set region slice type 2
Setting atom values ...
40 settings made for type
pair_style lj/cut 4.0
pair_coeff * * 1.0 1.0
neighbor 0.3 bin
neigh_modify delay 0 every 1
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
fix 2 all deform 1 xy erate 0.01 remap v
#dump 1 all custom 500 dump.nemd id type x y z
#dump 2 all image 100 image.*.jpg type type adiam 1.2
#dump_modify 2 pad 5
#dump 3 all movie 100 movie.mpg type type adiam 1.2
#dump_modify 3 pad 5
thermo 50
run 1000
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 4.3
ghost atom cutoff = 4.3
binsize = 2.15, bins = 8 6 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton/tri
stencil: half/bin/2d/tri
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.065 | 3.065 | 3.065 Mbytes
Step Temp E_pair E_mol TotEng Press Volume
0 1.44 -2.6548731 0 -1.2238731 1.9354912 189.52855
50 0.97904822 -2.1934929 0 -1.2205637 5.0001562 189.52855
100 0.96423603 -2.1711413 0 -1.2129318 5.3040025 189.52855
150 0.96430794 -2.153062 0 -1.194781 5.3988945 189.52855
200 1.056585 -2.2379316 0 -1.1879503 5.0007883 189.52855
250 1.0183256 -2.1921531 0 -1.1801921 5.5370076 189.52855
300 0.96855552 -2.140465 0 -1.177963 5.7188412 189.52855
350 1.0115567 -2.1883272 0 -1.1830927 5.4437104 189.52855
400 0.93743103 -2.1230826 0 -1.1915105 5.7059909 189.52855
450 1.1120368 -2.3041274 0 -1.1990408 4.646396 189.52855
500 0.99611106 -2.2039016 0 -1.2140162 5.1526658 189.52855
550 1.1075519 -2.3349751 0 -1.2343453 4.0671355 189.52855
600 1.0550783 -2.3126484 0 -1.2641644 4.5423735 189.52855
650 0.98516169 -2.2664919 0 -1.2874875 4.8365475 189.52855
700 0.97899201 -2.2815136 0 -1.3086403 4.5415389 189.52855
750 1.0107776 -2.3375258 0 -1.3330656 4.3655082 189.52855
800 0.97711804 -2.3221241 0 -1.3511131 4.2153988 189.52855
850 0.8984454 -2.258341 0 -1.3655108 4.6759265 189.52855
900 0.85409237 -2.2157566 0 -1.3670024 5.0180073 189.52855
950 0.90195434 -2.2500988 0 -1.3537817 4.8189466 189.52855
1000 1.0047283 -2.3359434 0 -1.3374947 4.0788763 189.52855
Loop time of 0.0331477 on 1 procs for 1000 steps with 160 atoms
Performance: 13032596.122 tau/day, 30168.047 timesteps/s, 4.827 Matom-step/s
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.017584 | 0.017584 | 0.017584 | 0.0 | 53.05
Neigh | 0.0080996 | 0.0080996 | 0.0080996 | 0.0 | 24.43
Comm | 0.0010864 | 0.0010864 | 0.0010864 | 0.0 | 3.28
Output | 9.9819e-05 | 9.9819e-05 | 9.9819e-05 | 0.0 | 0.30
Modify | 0.0057062 | 0.0057062 | 0.0057062 | 0.0 | 17.21
Other | | 0.0005715 | | | 1.72
Nlocal: 160 ave 160 max 160 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 271 ave 271 max 271 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3881 ave 3881 max 3881 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 3881
Ave neighs/atom = 24.25625
Neighbor list builds = 106
Dangerous builds = 0
Total wall time: 0:00:00

View File

@ -0,0 +1,111 @@
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-394-g75f86a68a7-modified)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
using 1 OpenMP thread(s) per MPI task
# 2d NEMD simulation
units lj
atom_style atomic
dimension 2
lattice sq2 0.8442
Lattice spacing in x,y,z = 1.5391903 1.5391903 1.5391903
region box prism 0 10 0 8 -0.5 0.5 0 0 0
create_box 2 box
Created triclinic box = (0 0 -0.76959516) to (15.391903 12.313523 0.76959516) with tilt (0 0 0)
2 by 2 by 1 MPI processor grid
create_atoms 1 box
Created 160 atoms
using lattice units in triclinic box = (0 0 -0.76959516) to (15.391903 12.313523 0.76959516) with tilt (0 0 0)
create_atoms CPU = 0.001 seconds
mass * 1.0
velocity all create 1.44 87287 loop geom
region slice block 4 6 INF INF INF INF
set region slice type 2
Setting atom values ...
40 settings made for type
pair_style lj/cut 4.0
pair_coeff * * 1.0 1.0
neighbor 0.3 bin
neigh_modify delay 0 every 1
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
fix 2 all deform 1 xy erate 0.01 remap v
#dump 1 all custom 500 dump.nemd id type x y z
#dump 2 all image 100 image.*.jpg type type adiam 1.2
#dump_modify 2 pad 5
#dump 3 all movie 100 movie.mpg type type adiam 1.2
#dump_modify 3 pad 5
thermo 50
run 1000
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 4.3
ghost atom cutoff = 4.3
binsize = 2.15, bins = 8 6 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton/tri
stencil: half/bin/2d/tri
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.064 | 3.064 | 3.064 Mbytes
Step Temp E_pair E_mol TotEng Press Volume
0 1.44 -2.6548731 0 -1.2238731 1.9354912 189.52855
50 0.97904822 -2.1934929 0 -1.2205637 5.0001562 189.52855
100 0.96423603 -2.1711413 0 -1.2129318 5.3040025 189.52855
150 0.96430794 -2.153062 0 -1.194781 5.3988945 189.52855
200 1.056585 -2.2379316 0 -1.1879503 5.0007883 189.52855
250 1.0183256 -2.1921531 0 -1.1801921 5.5370076 189.52855
300 0.96855552 -2.140465 0 -1.177963 5.7188412 189.52855
350 1.0115567 -2.1883272 0 -1.1830927 5.4437104 189.52855
400 0.93743103 -2.1230826 0 -1.1915105 5.7059909 189.52855
450 1.1120368 -2.3041274 0 -1.1990408 4.646396 189.52855
500 0.99611106 -2.2039016 0 -1.2140162 5.1526658 189.52855
550 1.1075519 -2.3349751 0 -1.2343453 4.0671355 189.52855
600 1.0550783 -2.3126484 0 -1.2641644 4.5423735 189.52855
650 0.98516169 -2.2664919 0 -1.2874875 4.8365475 189.52855
700 0.97899201 -2.2815136 0 -1.3086403 4.5415389 189.52855
750 1.0107776 -2.3375258 0 -1.3330656 4.3655082 189.52855
800 0.97711804 -2.3221241 0 -1.3511131 4.2153988 189.52855
850 0.8984454 -2.258341 0 -1.3655108 4.6759265 189.52855
900 0.85409237 -2.2157566 0 -1.3670024 5.0180073 189.52855
950 0.90195434 -2.2500988 0 -1.3537817 4.8189466 189.52855
1000 1.0047283 -2.3359434 0 -1.3374947 4.0788763 189.52855
Loop time of 0.0158907 on 4 procs for 1000 steps with 160 atoms
Performance: 27185684.597 tau/day, 62929.825 timesteps/s, 10.069 Matom-step/s
98.0% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.0044606 | 0.004562 | 0.0047619 | 0.2 | 28.71
Neigh | 0.0023154 | 0.0023979 | 0.002494 | 0.1 | 15.09
Comm | 0.0051743 | 0.0054807 | 0.0056638 | 0.3 | 34.49
Output | 7.5535e-05 | 8.9889e-05 | 0.00012988 | 0.0 | 0.57
Modify | 0.002223 | 0.0023624 | 0.0026372 | 0.3 | 14.87
Other | | 0.0009979 | | | 6.28
Nlocal: 40 ave 42 max 38 min
Histogram: 1 0 1 0 0 0 0 1 0 1
Nghost: 163.5 ave 166 max 162 min
Histogram: 2 0 0 0 0 1 0 0 0 1
Neighs: 970.25 ave 1016 max 942 min
Histogram: 1 1 0 0 1 0 0 0 0 1
Total # of neighbors = 3881
Ave neighs/atom = 24.25625
Neighbor list builds = 106
Dangerous builds = 0
Total wall time: 0:00:00

View File

@ -100,6 +100,7 @@ MODULE LIBLAMMPS
CONTAINS CONTAINS
PROCEDURE :: close => lmp_close PROCEDURE :: close => lmp_close
PROCEDURE :: error => lmp_error PROCEDURE :: error => lmp_error
PROCEDURE :: expand => lmp_expand
PROCEDURE :: file => lmp_file PROCEDURE :: file => lmp_file
PROCEDURE :: command => lmp_command PROCEDURE :: command => lmp_command
PROCEDURE :: commands_list => lmp_commands_list PROCEDURE :: commands_list => lmp_commands_list
@ -410,6 +411,14 @@ MODULE LIBLAMMPS
TYPE(c_ptr), VALUE :: error_text TYPE(c_ptr), VALUE :: error_text
END SUBROUTINE lammps_error END SUBROUTINE lammps_error
FUNCTION lammps_expand(handle, line) BIND(C)
IMPORT :: c_ptr
IMPLICIT NONE
TYPE(c_ptr), INTENT(IN), VALUE :: handle
TYPE(c_ptr), INTENT(IN), VALUE :: line
TYPE(c_ptr) :: lammps_expand
END FUNCTION lammps_expand
SUBROUTINE lammps_file(handle, filename) BIND(C) SUBROUTINE lammps_file(handle, filename) BIND(C)
IMPORT :: c_ptr IMPORT :: c_ptr
IMPLICIT NONE IMPLICIT NONE
@ -1107,10 +1116,24 @@ CONTAINS
CALL lammps_free(str) CALL lammps_free(str)
END SUBROUTINE lmp_error END SUBROUTINE lmp_error
! equivalent function to lammps_expand()
FUNCTION lmp_expand(self, line)
CLASS(lammps), INTENT(IN) :: self
CHARACTER(len=*), INTENT(IN) :: line
TYPE(c_ptr) :: str, res
CHARACTER(len=:), ALLOCATABLE :: lmp_expand
str = f2c_string(line)
res = lammps_expand(self%handle, str)
CALL lammps_free(str)
lmp_expand = c2f_string(res)
CALL lammps_free(res)
END FUNCTION lmp_expand
! equivalent function to lammps_file() ! equivalent function to lammps_file()
SUBROUTINE lmp_file(self, filename) SUBROUTINE lmp_file(self, filename)
CLASS(lammps), INTENT(IN) :: self CLASS(lammps), INTENT(IN) :: self
CHARACTER(len=*) :: filename CHARACTER(len=*), INTENT(IN) :: filename
TYPE(c_ptr) :: str TYPE(c_ptr) :: str
str = f2c_string(filename) str = f2c_string(filename)
@ -1121,7 +1144,7 @@ CONTAINS
! equivalent function to lammps_command() ! equivalent function to lammps_command()
SUBROUTINE lmp_command(self, cmd) SUBROUTINE lmp_command(self, cmd)
CLASS(lammps), INTENT(IN) :: self CLASS(lammps), INTENT(IN) :: self
CHARACTER(len=*) :: cmd CHARACTER(len=*), INTENT(IN) :: cmd
TYPE(c_ptr) :: str TYPE(c_ptr) :: str
str = f2c_string(cmd) str = f2c_string(cmd)
@ -1155,7 +1178,7 @@ CONTAINS
! equivalent function to lammps_commands_string() ! equivalent function to lammps_commands_string()
SUBROUTINE lmp_commands_string(self, str) SUBROUTINE lmp_commands_string(self, str)
CLASS(lammps), INTENT(IN) :: self CLASS(lammps), INTENT(IN) :: self
CHARACTER(len=*) :: str CHARACTER(len=*), INTENT(IN) :: str
TYPE(c_ptr) :: tmp TYPE(c_ptr) :: tmp
tmp = f2c_string(str) tmp = f2c_string(str)
@ -1173,7 +1196,7 @@ CONTAINS
! equivalent function to lammps_get_thermo ! equivalent function to lammps_get_thermo
REAL(c_double) FUNCTION lmp_get_thermo(self,name) REAL(c_double) FUNCTION lmp_get_thermo(self,name)
CLASS(lammps), INTENT(IN) :: self CLASS(lammps), INTENT(IN) :: self
CHARACTER(LEN=*) :: name CHARACTER(LEN=*), INTENT(IN) :: name
TYPE(c_ptr) :: Cname TYPE(c_ptr) :: Cname
Cname = f2c_string(name) Cname = f2c_string(name)
@ -1185,7 +1208,7 @@ CONTAINS
FUNCTION lmp_last_thermo(self,what,index) RESULT(thermo_data) FUNCTION lmp_last_thermo(self,what,index) RESULT(thermo_data)
CLASS(lammps), INTENT(IN), TARGET :: self CLASS(lammps), INTENT(IN), TARGET :: self
CHARACTER(LEN=*), INTENT(IN) :: what CHARACTER(LEN=*), INTENT(IN) :: what
INTEGER :: index INTEGER, INTENT(IN) :: index
INTEGER(c_int) :: idx INTEGER(c_int) :: idx
TYPE(lammps_data) :: thermo_data, type_data TYPE(lammps_data) :: thermo_data, type_data
INTEGER(c_int) :: datatype INTEGER(c_int) :: datatype

View File

@ -586,8 +586,25 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
const int b2y=_block_cell_2d; const int b2y=_block_cell_2d;
const int g2x=static_cast<int>(ceil(static_cast<double>(_maxspecial)/b2x)); const int g2x=static_cast<int>(ceil(static_cast<double>(_maxspecial)/b2x));
const int g2y=static_cast<int>(ceil(static_cast<double>(nt)/b2y)); const int g2y=static_cast<int>(ceil(static_cast<double>(nt)/b2y));
// the maximum number of blocks on the device is typically 65535
// in principle we can use a lower number to have more resource per block 32768
const int max_num_blocks = 65535;
int shift = 0;
if (g2y < max_num_blocks) {
_shared->k_transpose.set_size(g2x,g2y,b2x,b2y); _shared->k_transpose.set_size(g2x,g2y,b2x,b2y);
_shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt); _shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt,&shift);
} else {
// using a fixed number of blocks
int g2y_m = max_num_blocks;
_shared->k_transpose.set_size(g2x,g2y_m,b2x,b2y);
// number of chunks needed for the whole transpose
const int num_chunks = ceil(static_cast<double>(g2y) / g2y_m);
for (int i = 0; i < num_chunks; i++) {
_shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt,&shift);
shift += g2y_m*b2y;
}
}
time_transpose.stop(); time_transpose.stop();
} }

View File

@ -147,7 +147,7 @@ __kernel void kernel_calc_cell_counts(const unsigned *restrict cell_id,
__kernel void transpose(__global tagint *restrict out, __kernel void transpose(__global tagint *restrict out,
const __global tagint *restrict in, const __global tagint *restrict in,
int columns_in, int rows_in) int columns_in, int rows_in, int shift)
{ {
__local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1]; __local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
@ -158,15 +158,15 @@ __kernel void transpose(__global tagint *restrict out,
unsigned i=bi*BLOCK_CELL_2D+ti; unsigned i=bi*BLOCK_CELL_2D+ti;
unsigned j=bj*BLOCK_CELL_2D+tj; unsigned j=bj*BLOCK_CELL_2D+tj;
if ((i<columns_in) && (j<rows_in)) if ((i<columns_in) && (j+shift<rows_in))
block[tj][ti]=in[j*columns_in+i]; block[tj][ti]=in[(j+shift)*columns_in+i];
__syncthreads(); __syncthreads();
i=bj*BLOCK_CELL_2D+ti; i=bj*BLOCK_CELL_2D+ti;
j=bi*BLOCK_CELL_2D+tj; j=bi*BLOCK_CELL_2D+tj;
if ((i<rows_in) && (j<columns_in)) if ((i+shift<rows_in) && (j<columns_in))
out[j*rows_in+i] = block[ti][tj]; out[j*rows_in+i+shift] = block[ti][tj];
} }
#ifndef LAL_USE_OLD_NEIGHBOR #ifndef LAL_USE_OLD_NEIGHBOR

View File

@ -1,12 +1,103 @@
# CHANGELOG # CHANGELOG
## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.4.01)
### Features:
* Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229)
### Backend and Architecture Enhancements:
#### CUDA:
* Experimental support for unified memory mode (intended for Grace-Hopper etc.) [\#6823](https://github.com/kokkos/kokkos/pull/6823)
### Bug Fixes
* OpenMP: Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284)
* Fix implicit copy assignment operators in few AVX2 masks being deleted [#7296](https://github.com/kokkos/kokkos/pull/7296)
## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00)
### Features:
* Add `Kokkos::View` conversions from and to [`std::mdspan`](https://en.cppreference.com/w/cpp/container/mdspan) [\#6830](https://github.com/kokkos/kokkos/pull/6830) [\#7069](https://github.com/kokkos/kokkos/pull/7069)
### Backend and Architecture Enhancements:
#### CUDA:
* `nvcc_wrapper`: Adding ability to process `--disable-warnings` flag [\#6936](https://github.com/kokkos/kokkos/issues/6936)
* Use recommended/max team size functions in Cuda ParallelFor and Reduce constructors [\#6891](https://github.com/kokkos/kokkos/issues/6891)
* Improve compile-times when building with `Kokkos_ENABLE_DEBUG_BOUNDS_CHECK` in Cuda [\#7013](https://github.com/kokkos/kokkos/pull/7013)
#### HIP:
* Use HIP builtin atomics [\#6882](https://github.com/kokkos/kokkos/pull/6882) [\#7000](https://github.com/kokkos/kokkos/pull/7000)
* Enable user-specified compiler and linker flags for AMD GPUs [\#7127](https://github.com/kokkos/kokkos/pull/7127)
#### SYCL:
* Add support for Graphs [\#6912](https://github.com/kokkos/kokkos/pull/6912)
* Fix multi-GPU support [\#6887](https://github.com/kokkos/kokkos/pull/6887)
* Improve performance of reduction and scan operations [\#6562](https://github.com/kokkos/kokkos/pull/6562), [\#6750](https://github.com/kokkos/kokkos/pull/6750)
* Fix lock for guarding scratch space in `TeamPolicy` `parallel_reduce` [\#6988](https://github.com/kokkos/kokkos/pull/6988)
* Include submission command queue property information into `SYCL::print_configuration()` [\#7004](https://github.com/kokkos/kokkos/pull/7004)
#### OpenACC:
* Make `TeamPolicy` `parallel_for` execute on the correct async queue [\#7012](https://github.com/kokkos/kokkos/pull/7012)
#### OpenMPTarget:
* Honor user requested loop ordering in `MDRange` policy [\#6925](https://github.com/kokkos/kokkos/pull/6925)
* Prevent data races by guarding the scratch space used in `parallel_scan` [\#6998](https://github.com/kokkos/kokkos/pull/6998)
#### HPX:
* Workaround issue with template argument deduction to support compilation with NVCC [\#7015](https://github.com/kokkos/kokkos/pull/7015)
### General Enhancements
* Improve performance of view copies in host parallel regions [\#6730](https://github.com/kokkos/kokkos/pull/6730)
* Harmonize convertibility rules of `Kokkos::RandomAccessIterator` with `View`s [\#6929](https://github.com/kokkos/kokkos/pull/6929)
* Add a check precondition non-overlapping ranges for the `adjacent_difference` algorithm in debug mode [\#6922](https://github.com/kokkos/kokkos/pull/6922)
* Add deduction guides for `TeamPolicy` [\#7030](https://github.com/kokkos/kokkos/pull/7030)
* SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802)
* Updates for `Kokkos::Array`: add `kokkos_swap(Array<T, N>)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148)
* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040)
### Build System Changes
* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965)
* Update Intel GPU architectures in Makefile [\#6895](https://github.com/kokkos/kokkos/pull/6895)
* Fix use of OpenMP with Cuda or HIP as compile language [\#6972](https://github.com/kokkos/kokkos/pull/6972)
* Define and enforce new minimum compiler versions for C++20 support [\#7128](https://github.com/kokkos/kokkos/pull/7128), [\#7123](https://github.com/kokkos/kokkos/pull/7123)
* Add nvidia Grace CPU architecture: `Kokkos_ARCH_ARMV9_GRACE` [\#7158](https://github.com/kokkos/kokkos/pull/7158)
* Fix Makefile.kokkos for Threads [\#6896](https://github.com/kokkos/kokkos/pull/6896)
* Remove support for NVHPC as CUDA device compiler [\#6987](https://github.com/kokkos/kokkos/pull/6987)
* Fix using CUDAToolkit for CMake 3.28.4 and higher [\#7062](https://github.com/kokkos/kokkos/pull/7062)
### Incompatibilities (i.e. breaking changes)
* Drop `Kokkos::Array` special treatment in `View`s [\#6906](https://github.com/kokkos/kokkos/pull/6906)
* Drop `Experimental::RawMemoryAllocationFailure` [\#7145](https://github.com/kokkos/kokkos/pull/7145)
### Deprecations
* Remove `Experimental::LayoutTiled` class template and deprecate `is_layouttiled` trait [\#6907](https://github.com/kokkos/kokkos/pull/6907)
* Deprecate `Kokkos::layout_iterate_type_selector` [\#7076](https://github.com/kokkos/kokkos/pull/7076)
* Deprecate specialization of `Kokkos::pair` for a single element [\#6947](https://github.com/kokkos/kokkos/pull/6947)
* Deprecate `deep_copy` of `UnorderedMap` of different size [\#6812](https://github.com/kokkos/kokkos/pull/6812)
* Deprecate trailing `Proxy` template argument of `Kokkos::Array` [\#6934](https://github.com/kokkos/kokkos/pull/6934)
* Deprecate implicit conversions of integers to `ChunkSize` [\#7151](https://github.com/kokkos/kokkos/pull/7151)
* Deprecate implicit conversions to execution spaces [\#7156](https://github.com/kokkos/kokkos/pull/7156)
### Bug Fixes
* Do not return a copy of the input functor in `Experimental::for_each` [\#6910](https://github.com/kokkos/kokkos/pull/6910)
* Fix `realloc` on views of non-default constructible element types [\#6993](https://github.com/kokkos/kokkos/pull/6993)
* Fix undefined behavior in `View` initialization or fill with zeros [\#7014](https://github.com/kokkos/kokkos/pull/7014)
* Fix `sort_by_key` on host execution spaces when building with NVCC [\#7059](https://github.com/kokkos/kokkos/pull/7059)
* Fix using shared libraries and -fvisibility=hidden [\#7065](https://github.com/kokkos/kokkos/pull/7065)
* Fix view reference counting when functor copy constructor throws in parallel dispatch [\#6289](https://github.com/kokkos/kokkos/pull/6289)
* Fix `initialize(InitializationSetting)` for handling `print_configuration` setting [\#7098](https://github.com/kokkos/kokkos/pull/7098)
* Thread safety fixes for the Serial and OpenMP backend [\#7080](https://github.com/kokkos/kokkos/pull/7080), [\#6151](https://github.com/kokkos/kokkos/pull/6151)
## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) ## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01)
### Backend and Architecture Enhancements: ### Backend and Architecture Enhancements:
#### HIP: #### HIP:
* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877) * MI300 support unified memory [\#6877](https://github.com/kokkos/kokkos/pull/6877)
### Bug Fixes ### Bug Fixes
* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951) * Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951)

65
lib/kokkos/CITATION.cff Normal file
View File

@ -0,0 +1,65 @@
cff-version: 1.2.0
title: Kokkos
message: >-
If you use this software, please cite the overview paper
type: software
authors:
- name: The Kokkos authors
website: https://kokkos.org/community/team/
identifiers:
- type: url
website: https://kokkos.org/kokkos-core-wiki/citation.html
repository-code: 'https://github.com/kokkos/kokkos'
url: 'https://kokkos.org/'
license: Apache-2.0
preferred-citation:
type: article
authors:
- given-names: Christian R.
family-names: Trott
- given-names: Damien
family-names: Lebrun-Grandié
- given-names: Daniel
family-names: Arndt
- family-names: Ciesko
given-names: Jan
- given-names: Vinh
family-names: Dang
- family-names: Ellingwood
given-names: Nathan
- given-names: Rahulkumar
family-names: Gayatri
- given-names: Evan
family-names: Harvey
- given-names: Daisy S.
family-names: Hollman
- given-names: Dan
family-names: Ibanez
- given-names: Nevin
family-names: Liber
- given-names: Jonathan
family-names: Madsen
- given-names: Jeff
family-names: Miles
- given-names: David
family-names: Poliakoff
- given-names: Amy
family-names: Powell
- given-names: Sivasankaran
family-names: Rajamanickam
- given-names: Mikael
family-names: Simberg
- given-names: Dan
family-names: Sunderland
- given-names: Bruno
family-names: Turcksin
- given-names: Jeremiah
family-names: Wilke
doi: 10.1109/TPDS.2021.3097283
journal: IEEE Transactions on Parallel and Distributed Systems
start: 805
end: 817
title: "Kokkos 3: Programming Model Extensions for the Exascale Era"
volume: 33
issue: 4
year: 2022

View File

@ -150,7 +150,7 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 3) set(Kokkos_VERSION_MINOR 4)
set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}") message(STATUS "Kokkos version: ${Kokkos_VERSION}")

View File

@ -11,7 +11,7 @@ CXXFLAGS += $(SHFLAGS)
endif endif
KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 3 KOKKOS_VERSION_MINOR = 4
KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
@ -21,11 +21,11 @@ KOKKOS_DEVICES ?= "OpenMP"
# Options: # Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9 # IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 # AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= "" KOKKOS_ARCH ?= ""
# Options: yes,no # Options: yes,no
KOKKOS_DEBUG ?= "no" KOKKOS_DEBUG ?= "no"
@ -41,7 +41,7 @@ KOKKOS_STANDALONE_CMAKE ?= "no"
# Default settings specific options. # Default settings specific options.
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async # Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async
KOKKOS_CUDA_OPTIONS ?= "enable_lambda" KOKKOS_CUDA_OPTIONS ?= "disable_malloc_async"
# Options: rdc # Options: rdc
KOKKOS_HIP_OPTIONS ?= "" KOKKOS_HIP_OPTIONS ?= ""
@ -328,12 +328,43 @@ KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL)
KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX) KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX)
KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR) KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) # Traditionally, we supported, e.g., IntelGen9 instead of Intel_Gen9. The latter
# matches the CMake option but we also accept the former for backward-compatibility.
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen11)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen12LP)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9) \
+ $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11) \
+ $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen)
endif
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG1)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_XeHP)
endif
# Traditionally the architecture was called PVC instead of Intel_PVC. This
# version makes us accept IntelPVC and Intel_PVC as well.
KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC) KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC)
# NVIDIA based. # NVIDIA based.
@ -394,7 +425,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2)
KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX) KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc)) KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc))
# IBM based. # IBM based.
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8) KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
@ -758,6 +790,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON")
KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
@ -1216,6 +1256,8 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0)
endif endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY")
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)

View File

@ -81,7 +81,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)

View File

@ -1,4 +1,4 @@
![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) [![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)](https://kokkos.org)
# Kokkos: Core Libraries # Kokkos: Core Libraries
@ -10,43 +10,66 @@ hierarchies and multiple types of execution resources. It currently can use
CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other
backends in development. backends in development.
**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.** **Kokkos Core is part of the [Kokkos C++ Performance Portability Programming Ecosystem](https://kokkos.org/about/abstract/).**
For the complete documentation, click below: Kokkos is a [Linux Foundation](https://linuxfoundation.org) project.
# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki) ## Learning about Kokkos
# Learning about Kokkos
To start learning about Kokkos: To start learning about Kokkos:
- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities. - [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities.
- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. - [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html). - [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html).
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability. - [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
## Obtaining Kokkos
The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).
The current release is [4.3.01](https://github.com/kokkos/kokkos/releases/tag/4.3.01).
```bash
curl -OJ -L https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz
# Or with wget
wget https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz
```
To clone the latest development version of Kokkos from GitHub:
```bash
git clone -b develop https://github.com/kokkos/kokkos.git
```
### Building Kokkos
To build Kokkos, you will need to have a C++ compiler that supports C++17 or later.
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html).
Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html).
You can also install Kokkos using [Spack](https://spack.io/): `spack install kokkos`. [Available configuration options](https://packages.spack.io/package.html?name=kokkos) can be displayed using `spack info kokkos`.
## For the complete documentation: [kokkos.org/kokkos-core-wiki/](https://kokkos.org/kokkos-core-wiki/)
## Support
For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue. For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue.
For non-public questions send an email to: *crtrott(at)sandia.gov* For non-public questions send an email to: *crtrott(at)sandia.gov*
# Contributing to Kokkos ## Contributing
Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute. Please see [this page](https://kokkos.org/kokkos-core-wiki/contributing.html) for details on how to contribute.
# Requirements, Building and Installing ## Citing Kokkos
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html). Please see the [following page](https://kokkos.org/kokkos-core-wiki/citation.html).
Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html). ## License
# Citing Kokkos
Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html).
# License
[![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html) [![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html)

View File

@ -189,6 +189,33 @@ void applyPermutation(const ExecutionSpace& space,
KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); }); KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); });
} }
// FIXME_NVCC: nvcc has trouble compiling lambdas inside a function with
// variadic templates (sort_by_key_via_sort). Switch to using functors instead.
template <typename Permute>
struct IotaFunctor {
Permute _permute;
KOKKOS_FUNCTION void operator()(int i) const { _permute(i) = i; }
};
template <typename Keys>
struct LessFunctor {
Keys _keys;
KOKKOS_FUNCTION bool operator()(int i, int j) const {
return _keys(i) < _keys(j);
}
};
// FIXME_NVCC+MSVC: We can't use a lambda instead of a functor which gave us
// "For this host platform/dialect, an extended lambda cannot be defined inside
// the 'if' or 'else' block of a constexpr if statement"
template <typename Keys, typename Comparator>
struct KeyComparisonFunctor {
Keys m_keys;
Comparator m_comparator;
KOKKOS_FUNCTION bool operator()(int i, int j) const {
return m_comparator(m_keys(i), m_keys(j));
}
};
template <class ExecutionSpace, class KeysDataType, class... KeysProperties, template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties, class ValuesDataType, class... ValuesProperties,
class... MaybeComparator> class... MaybeComparator>
@ -207,10 +234,9 @@ void sort_by_key_via_sort(
n); n);
// iota // iota
Kokkos::parallel_for( Kokkos::parallel_for("Kokkos::sort_by_key_via_sort::iota",
"Kokkos::sort_by_key_via_sort::iota",
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n), Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
KOKKOS_LAMBDA(int i) { permute(i) = i; }); IotaFunctor<decltype(permute)>{permute});
using Layout = using Layout =
typename Kokkos::View<unsigned int*, ExecutionSpace>::array_layout; typename Kokkos::View<unsigned int*, ExecutionSpace>::array_layout;
@ -228,16 +254,15 @@ void sort_by_key_via_sort(
Kokkos::DefaultHostExecutionSpace host_exec; Kokkos::DefaultHostExecutionSpace host_exec;
if constexpr (sizeof...(MaybeComparator) == 0) { if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort( Kokkos::sort(host_exec, host_permute,
host_exec, host_permute, LessFunctor<decltype(host_keys)>{host_keys});
KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); });
} else { } else {
auto keys_comparator = auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...)); std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort( Kokkos::sort(
host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) { host_exec, host_permute,
return keys_comparator(host_keys(i), host_keys(j)); KeyComparisonFunctor<decltype(host_keys), decltype(keys_comparator)>{
}); host_keys, keys_comparator});
} }
host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort"); host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort");
Kokkos::deep_copy(exec, permute, host_permute); Kokkos::deep_copy(exec, permute, host_permute);
@ -262,16 +287,14 @@ void sort_by_key_via_sort(
} }
#else #else
if constexpr (sizeof...(MaybeComparator) == 0) { if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort( Kokkos::sort(exec, permute, LessFunctor<decltype(keys)>{keys});
exec, permute,
KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); });
} else { } else {
auto keys_comparator = auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...)); std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort( Kokkos::sort(
exec, permute, KOKKOS_LAMBDA(int i, int j) { exec, permute,
return keys_comparator(keys(i), keys(j)); KeyComparisonFunctor<decltype(keys), decltype(keys_comparator)>{
}); keys, keys_comparator});
} }
#endif #endif
} }

View File

@ -29,33 +29,31 @@ namespace Experimental {
template < template <
class ExecutionSpace, class IteratorType, class UnaryFunctorType, class ExecutionSpace, class IteratorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0> std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, void for_each(const std::string& label, const ExecutionSpace& ex,
IteratorType first, IteratorType last, IteratorType first, IteratorType last, UnaryFunctorType functor) {
UnaryFunctorType functor) { Impl::for_each_exespace_impl(label, ex, first, last, std::move(functor));
return Impl::for_each_exespace_impl(label, ex, first, last,
std::move(functor));
} }
template < template <
class ExecutionSpace, class IteratorType, class UnaryFunctorType, class ExecutionSpace, class IteratorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0> std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, void for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last,
IteratorType last, UnaryFunctorType functor) { UnaryFunctorType functor) {
return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", ex,
ex, first, last, std::move(functor)); first, last, std::move(functor));
} }
template < template <
class ExecutionSpace, class DataType, class... Properties, class ExecutionSpace, class DataType, class... Properties,
class UnaryFunctorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0> std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, void for_each(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v, const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) { UnaryFunctorType functor) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v),
std::move(functor)); std::move(functor));
} }
@ -63,15 +61,14 @@ template <
class ExecutionSpace, class DataType, class... Properties, class ExecutionSpace, class DataType, class... Properties,
class UnaryFunctorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0> std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
UnaryFunctorType for_each(const ExecutionSpace& ex, void for_each(const ExecutionSpace& ex,
const ::Kokkos::View<DataType, Properties...>& v, const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) { UnaryFunctorType functor) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex,
KE::begin(v), KE::end(v), KE::begin(v), KE::end(v), std::move(functor));
std::move(functor));
} }
// //
@ -82,23 +79,22 @@ UnaryFunctorType for_each(const ExecutionSpace& ex,
template <class TeamHandleType, class IteratorType, class UnaryFunctorType, template <class TeamHandleType, class IteratorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_team_handle_v<TeamHandleType>, int> = 0> std::enable_if_t<Kokkos::is_team_handle_v<TeamHandleType>, int> = 0>
KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle, KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle,
IteratorType first, IteratorType last, IteratorType first, IteratorType last,
UnaryFunctorType functor) { UnaryFunctorType functor) {
return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); Impl::for_each_team_impl(teamHandle, first, last, std::move(functor));
} }
template <class TeamHandleType, class DataType, class... Properties, template <class TeamHandleType, class DataType, class... Properties,
class UnaryFunctorType, class UnaryFunctorType,
std::enable_if_t<Kokkos::is_team_handle_v<TeamHandleType>, int> = 0> std::enable_if_t<Kokkos::is_team_handle_v<TeamHandleType>, int> = 0>
KOKKOS_FUNCTION UnaryFunctorType KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle,
for_each(const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType, Properties...>& v, const ::Kokkos::View<DataType, Properties...>& v,
UnaryFunctorType functor) { UnaryFunctorType functor) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
namespace KE = ::Kokkos::Experimental; namespace KE = ::Kokkos::Experimental;
return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v),
std::move(functor)); std::move(functor));
} }

View File

@ -82,6 +82,11 @@ OutputIteratorType adjacent_difference_exespace_impl(
return first_dest; return first_dest;
} }
#ifdef KOKKOS_ENABLE_DEBUG
// check for overlapping iterators
Impl::expect_no_overlap(first_from, last_from, first_dest);
#endif
// run // run
const auto num_elements = const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from); Kokkos::Experimental::distance(first_from, last_from);
@ -114,6 +119,11 @@ KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl(
return first_dest; return first_dest;
} }
#ifdef KOKKOS_ENABLE_DEBUG
// check for overlapping iterators
Impl::expect_no_overlap(first_from, last_from, first_dest);
#endif
// run // run
const auto num_elements = const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from); Kokkos::Experimental::distance(first_from, last_from);

View File

@ -24,6 +24,9 @@ namespace Kokkos {
namespace Experimental { namespace Experimental {
namespace Impl { namespace Impl {
template <class T>
class RandomAccessIterator;
template <typename T, typename enable = void> template <typename T, typename enable = void>
struct is_admissible_to_kokkos_std_algorithms : std::false_type {}; struct is_admissible_to_kokkos_std_algorithms : std::false_type {};
@ -58,6 +61,18 @@ using is_iterator = Kokkos::is_detected<iterator_category_t, T>;
template <class T> template <class T>
inline constexpr bool is_iterator_v = is_iterator<T>::value; inline constexpr bool is_iterator_v = is_iterator<T>::value;
template <typename ViewType>
struct is_kokkos_iterator : std::false_type {};
template <typename ViewType>
struct is_kokkos_iterator<RandomAccessIterator<ViewType>> {
static constexpr bool value =
is_admissible_to_kokkos_std_algorithms<ViewType>::value;
};
template <class T>
inline constexpr bool is_kokkos_iterator_v = is_kokkos_iterator<T>::value;
// //
// are_iterators // are_iterators
// //
@ -215,6 +230,38 @@ KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first,
(void)last; (void)last;
} }
//
// Check if kokkos iterators are overlapping
//
template <typename IteratorType1, typename IteratorType2>
KOKKOS_INLINE_FUNCTION void expect_no_overlap(
[[maybe_unused]] IteratorType1 first, [[maybe_unused]] IteratorType1 last,
[[maybe_unused]] IteratorType2 s_first) {
if constexpr (is_kokkos_iterator_v<IteratorType1> &&
is_kokkos_iterator_v<IteratorType2>) {
auto const view1 = first.view();
auto const view2 = s_first.view();
std::size_t stride1 = view1.stride(0);
std::size_t stride2 = view2.stride(0);
ptrdiff_t first_diff = view1.data() - view2.data();
// FIXME If strides are not identical, checks may not be made
// with the cost of O(1)
// Currently, checks are made only if strides are identical
// If first_diff == 0, there is already an overlap
if (stride1 == stride2 || first_diff == 0) {
[[maybe_unused]] bool is_no_overlap = (first_diff % stride1);
auto* first_pointer1 = view1.data();
auto* first_pointer2 = view2.data();
[[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first);
[[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first);
KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 ||
last_pointer1 <= first_pointer2 || is_no_overlap);
}
}
}
} // namespace Impl } // namespace Impl
} // namespace Experimental } // namespace Experimental
} // namespace Kokkos } // namespace Kokkos

View File

@ -151,7 +151,8 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl(
} }
#if defined KOKKOS_COMPILER_INTEL || \ #if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }

View File

@ -42,8 +42,7 @@ struct StdForEachFunctor {
}; };
template <class HandleType, class IteratorType, class UnaryFunctorType> template <class HandleType, class IteratorType, class UnaryFunctorType>
UnaryFunctorType for_each_exespace_impl(const std::string& label, void for_each_exespace_impl(const std::string& label, const HandleType& handle,
const HandleType& handle,
IteratorType first, IteratorType last, IteratorType first, IteratorType last,
UnaryFunctorType functor) { UnaryFunctorType functor) {
// checks // checks
@ -56,8 +55,6 @@ UnaryFunctorType for_each_exespace_impl(const std::string& label,
label, RangePolicy<HandleType>(handle, 0, num_elements), label, RangePolicy<HandleType>(handle, 0, num_elements),
StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor)); StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor));
handle.fence("Kokkos::for_each: fence after operation"); handle.fence("Kokkos::for_each: fence after operation");
return functor;
} }
template <class ExecutionSpace, class IteratorType, class SizeType, template <class ExecutionSpace, class IteratorType, class SizeType,
@ -75,7 +72,7 @@ IteratorType for_each_n_exespace_impl(const std::string& label,
} }
for_each_exespace_impl(label, ex, first, last, std::move(functor)); for_each_exespace_impl(label, ex, first, last, std::move(functor));
// no neeed to fence since for_each_exespace_impl fences already // no need to fence since for_each_exespace_impl fences already
return last; return last;
} }
@ -84,9 +81,9 @@ IteratorType for_each_n_exespace_impl(const std::string& label,
// team impl // team impl
// //
template <class TeamHandleType, class IteratorType, class UnaryFunctorType> template <class TeamHandleType, class IteratorType, class UnaryFunctorType>
KOKKOS_FUNCTION UnaryFunctorType KOKKOS_FUNCTION void for_each_team_impl(const TeamHandleType& teamHandle,
for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, IteratorType first, IteratorType last,
IteratorType last, UnaryFunctorType functor) { UnaryFunctorType functor) {
// checks // checks
Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::static_assert_random_access_and_accessible(teamHandle, first);
Impl::expect_valid_range(first, last); Impl::expect_valid_range(first, last);
@ -96,7 +93,6 @@ for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first,
TeamThreadRange(teamHandle, 0, num_elements), TeamThreadRange(teamHandle, 0, num_elements),
StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor)); StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor));
teamHandle.team_barrier(); teamHandle.team_barrier();
return functor;
} }
template <class TeamHandleType, class IteratorType, class SizeType, template <class TeamHandleType, class IteratorType, class SizeType,
@ -113,7 +109,7 @@ for_each_n_team_impl(const TeamHandleType& teamHandle, IteratorType first,
} }
for_each_team_impl(teamHandle, first, last, std::move(functor)); for_each_team_impl(teamHandle, first, last, std::move(functor));
// no neeed to fence since for_each_team_impl fences already // no need to fence since for_each_team_impl fences already
return last; return last;
} }

View File

@ -59,6 +59,30 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
ptrdiff_t current_index) ptrdiff_t current_index)
: m_view(view), m_current_index(current_index) {} : m_view(view), m_current_index(current_index) {}
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
template <class OtherViewType>
requires(std::is_constructible_v<view_type, OtherViewType>) KOKKOS_FUNCTION
explicit(!std::is_convertible_v<OtherViewType, view_type>)
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
#else
template <
class OtherViewType,
std::enable_if_t<std::is_constructible_v<view_type, OtherViewType> &&
!std::is_convertible_v<OtherViewType, view_type>,
int> = 0>
KOKKOS_FUNCTION explicit RandomAccessIterator(
const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
template <class OtherViewType,
std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>,
int> = 0>
KOKKOS_FUNCTION RandomAccessIterator(
const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {}
#endif
KOKKOS_FUNCTION KOKKOS_FUNCTION
iterator_type& operator++() { iterator_type& operator++() {
++m_current_index; ++m_current_index;
@ -152,9 +176,16 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
KOKKOS_FUNCTION KOKKOS_FUNCTION
reference operator*() const { return m_view(m_current_index); } reference operator*() const { return m_view(m_current_index); }
KOKKOS_FUNCTION
view_type view() const { return m_view; }
private: private:
view_type m_view; view_type m_view;
ptrdiff_t m_current_index = 0; ptrdiff_t m_current_index = 0;
// Needed for the converting constructor accepting another iterator
template <class>
friend class RandomAccessIterator;
}; };
} // namespace Impl } // namespace Impl

View File

@ -176,7 +176,8 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl(
} }
#if defined KOKKOS_COMPILER_INTEL || \ #if defined KOKKOS_COMPILER_INTEL || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }

View File

@ -46,6 +46,44 @@ TEST_F(random_access_iterator_test, constructor) {
EXPECT_TRUE(true); EXPECT_TRUE(true);
} }
TEST_F(random_access_iterator_test, constructiblity) {
auto first_d = KE::begin(m_dynamic_view);
auto cfirst_d = KE::cbegin(m_dynamic_view);
static_assert(std::is_constructible_v<decltype(cfirst_d), decltype(first_d)>);
static_assert(
!std::is_constructible_v<decltype(first_d), decltype(cfirst_d)>);
[[maybe_unused]] decltype(cfirst_d) tmp_cfirst_d(first_d);
auto first_s = KE::begin(m_static_view);
auto cfirst_s = KE::cbegin(m_static_view);
static_assert(std::is_constructible_v<decltype(cfirst_s), decltype(first_s)>);
static_assert(
!std::is_constructible_v<decltype(first_s), decltype(cfirst_s)>);
[[maybe_unused]] decltype(cfirst_s) tmp_cfirst_s(first_s);
auto first_st = KE::begin(m_strided_view);
auto cfirst_st = KE::cbegin(m_strided_view);
static_assert(
std::is_constructible_v<decltype(cfirst_st), decltype(first_st)>);
static_assert(
!std::is_constructible_v<decltype(first_st), decltype(cfirst_st)>);
[[maybe_unused]] decltype(cfirst_st) tmp_cfirst_st(first_st);
// [FIXME] Better to have tests for the explicit specifier with an expression.
// As soon as View converting constructors are re-implemented with a
// conditional explicit, we may add those tests.
static_assert(std::is_constructible_v<decltype(first_s), decltype(first_d)>);
static_assert(std::is_constructible_v<decltype(first_st), decltype(first_d)>);
static_assert(std::is_constructible_v<decltype(first_d), decltype(first_s)>);
static_assert(std::is_constructible_v<decltype(first_st), decltype(first_s)>);
static_assert(std::is_constructible_v<decltype(first_d), decltype(first_st)>);
static_assert(std::is_constructible_v<decltype(first_s), decltype(first_st)>);
EXPECT_TRUE(true);
}
template <class IteratorType, class ValueType> template <class IteratorType, class ValueType>
void test_random_access_it_verify(IteratorType it, ValueType gold_value) { void test_random_access_it_verify(IteratorType it, ValueType gold_value) {
using view_t = Kokkos::View<typename IteratorType::value_type>; using view_t = Kokkos::View<typename IteratorType::value_type>;

View File

@ -69,7 +69,7 @@ void iota(ExecutionSpace const &space, ViewType const &v,
typename ViewType::value_type value = 0) { typename ViewType::value_type value = 0) {
using ValueType = typename ViewType::value_type; using ValueType = typename ViewType::value_type;
Kokkos::parallel_for( Kokkos::parallel_for(
"ArborX::Algorithms::iota", "Kokkos::Algorithms::iota",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, v.extent(0)), Kokkos::RangePolicy<ExecutionSpace>(space, 0, v.extent(0)),
KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; }); KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; });
} }
@ -87,6 +87,18 @@ TEST(TEST_CATEGORY, SortByKeyEmptyView) {
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values)); Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
} }
// Test #7036
TEST(TEST_CATEGORY, SortByKeyEmptyViewHost) {
using ExecutionSpace = Kokkos::DefaultHostExecutionSpace;
// does not matter if we use int or something else
Kokkos::View<int *, ExecutionSpace> keys("keys", 0);
Kokkos::View<float *, ExecutionSpace> values("values", 0);
ASSERT_NO_THROW(
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
}
TEST(TEST_CATEGORY, SortByKey) { TEST(TEST_CATEGORY, SortByKey) {
using ExecutionSpace = TEST_EXECSPACE; using ExecutionSpace = TEST_EXECSPACE;
using MemorySpace = typename ExecutionSpace::memory_space; using MemorySpace = typename ExecutionSpace::memory_space;

View File

@ -81,5 +81,114 @@ TEST(std_algorithms, is_admissible_to_std_algorithms) {
strided_view_3d_t>::value); strided_view_3d_t>::value);
} }
TEST(std_algorithms, expect_no_overlap) {
namespace KE = Kokkos::Experimental;
using value_type = double;
static constexpr size_t extent0 = 13;
//-------------
// 1d views
//-------------
using static_view_1d_t = Kokkos::View<value_type[extent0]>;
[[maybe_unused]] static_view_1d_t static_view_1d{
"std-algo-test-1d-contiguous-view-static"};
using dyn_view_1d_t = Kokkos::View<value_type*>;
[[maybe_unused]] dyn_view_1d_t dynamic_view_1d{
"std-algo-test-1d-contiguous-view-dynamic", extent0};
using strided_view_1d_t = Kokkos::View<value_type*, Kokkos::LayoutStride>;
Kokkos::LayoutStride layout1d{extent0, 2};
strided_view_1d_t strided_view_1d{"std-algo-test-1d-strided-view", layout1d};
// Overlapping because iterators are identical
#if defined(KOKKOS_ENABLE_DEBUG)
auto first_s = KE::begin(static_view_1d);
auto last_s = first_s + extent0;
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s, last_s, first_s); },
"Kokkos contract violation:.*");
auto first_d = KE::begin(dynamic_view_1d);
auto last_d = first_d + extent0;
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d, last_d, first_d); },
"Kokkos contract violation:.*");
auto first_st = KE::begin(strided_view_1d);
auto last_st = first_st + extent0;
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_st, last_st, first_st); },
"Kokkos contract violation:.*");
#endif
// Ranges are overlapped
static constexpr size_t sub_extent0 = 6, offset0 = 3;
std::pair<size_t, size_t> range0(0, sub_extent0),
range1(offset0, offset0 + sub_extent0);
#if defined(KOKKOS_ENABLE_DEBUG)
auto static_view_1d_0 = Kokkos::subview(static_view_1d, range0);
auto static_view_1d_1 = Kokkos::subview(static_view_1d, range1);
auto first_s0 = KE::begin(static_view_1d_0); // [0, 6)
auto last_s0 = first_s0 + static_view_1d_0.extent(0);
auto first_s1 = KE::begin(static_view_1d_1); // [3, 9)
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s0, last_s0, first_s1); },
"Kokkos contract violation:.*");
auto dynamic_view_1d_0 = Kokkos::subview(dynamic_view_1d, range0);
auto dynamic_view_1d_1 = Kokkos::subview(dynamic_view_1d, range1);
auto first_d0 = KE::begin(dynamic_view_1d_0); // [0, 6)
auto last_d0 = first_d0 + dynamic_view_1d_0.extent(0);
auto first_d1 = KE::begin(dynamic_view_1d_1); // [3, 9)
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d0, last_d0, first_d1); },
"Kokkos contract violation:.*");
#endif
auto strided_view_1d_0 = Kokkos::subview(strided_view_1d, range0);
auto strided_view_1d_1 = Kokkos::subview(strided_view_1d, range1);
auto first_st0 = KE::begin(strided_view_1d_0); // [0, 12)
auto last_st0 = first_st0 + strided_view_1d_0.extent(0);
auto first_st1 = KE::begin(strided_view_1d_1); // [3, 15)
// Does not overlap since offset (=3) is not divisible by stride (=2)
EXPECT_NO_THROW(
{ KE::Impl::expect_no_overlap(first_st0, last_st0, first_st1); });
// Iterating over the same range without overlapping
Kokkos::View<value_type[2][extent0], Kokkos::LayoutLeft> static_view_2d{
"std-algo-test-2d-contiguous-view-static"};
auto sub_static_view_1d_0 = Kokkos::subview(static_view_2d, 0, Kokkos::ALL);
auto sub_static_view_1d_1 = Kokkos::subview(static_view_2d, 1, Kokkos::ALL);
auto sub_first_s0 = KE::begin(sub_static_view_1d_0); // 0, 2, 4, ...
auto sub_last_s0 = sub_first_s0 + sub_static_view_1d_0.extent(0);
auto sub_first_s1 = KE::begin(sub_static_view_1d_1); // 1, 3, 5, ...
EXPECT_NO_THROW({
KE::Impl::expect_no_overlap(sub_first_s0, sub_last_s0, sub_first_s1);
});
Kokkos::View<value_type**, Kokkos::LayoutLeft> dynamic_view_2d{
"std-algo-test-2d-contiguous-view-dynamic", 2, extent0};
auto sub_dynamic_view_1d_0 = Kokkos::subview(dynamic_view_2d, 0, Kokkos::ALL);
auto sub_dynamic_view_1d_1 = Kokkos::subview(dynamic_view_2d, 1, Kokkos::ALL);
auto sub_first_d0 = KE::begin(sub_dynamic_view_1d_0); // 0, 2, 4, ...
auto sub_last_d0 = sub_first_d0 + sub_dynamic_view_1d_0.extent(0);
auto sub_first_d1 = KE::begin(sub_dynamic_view_1d_1); // 1, 3, 5, ...
EXPECT_NO_THROW({
KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1);
});
Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3};
Kokkos::View<value_type**, Kokkos::LayoutStride> strided_view_2d{
"std-algo-test-2d-contiguous-view-strided", layout2d};
auto sub_strided_view_1d_0 = Kokkos::subview(strided_view_2d, 0, Kokkos::ALL);
auto sub_strided_view_1d_1 = Kokkos::subview(strided_view_2d, 1, Kokkos::ALL);
auto sub_first_st0 = KE::begin(sub_strided_view_1d_0); // 0, 6, 12, ...
auto sub_last_st0 = sub_first_st0 + sub_strided_view_1d_0.extent(0);
auto sub_first_st1 = KE::begin(sub_strided_view_1d_1); // 1, 7, 13, ...
EXPECT_NO_THROW({
KE::Impl::expect_no_overlap(sub_first_st0, sub_last_st0, sub_first_st1);
});
}
} // namespace stdalgos } // namespace stdalgos
} // namespace Test } // namespace Test

View File

@ -85,7 +85,7 @@ struct TestFunctorA {
break; break;
} }
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
case 2: { case 2: {
auto it = KE::exclusive_scan( auto it = KE::exclusive_scan(
@ -213,7 +213,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
break; break;
} }
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
case 2: case 2:
case 3: { case 3: {
auto it = exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom), auto it = exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom),
@ -242,7 +242,7 @@ template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() { void run_all_scenarios() {
for (int numTeams : teamSizesToTest) { for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) { for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
for (int apiId : {0, 1, 2, 3}) { for (int apiId : {0, 1, 2, 3}) {
#else #else
for (int apiId : {0, 1}) { for (int apiId : {0, 1}) {

View File

@ -52,7 +52,7 @@ struct TestFunctorA {
Kokkos::single(Kokkos::PerTeam(member), Kokkos::single(Kokkos::PerTeam(member),
[=, *this]() { m_returnsView(myRowIndex) = result; }); [=, *this]() { m_returnsView(myRowIndex) = result; });
} }
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) { else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type; using value_type = typename ViewType::value_type;
result = KE::is_sorted(member, KE::cbegin(myRowView), KE::cend(myRowView), result = KE::is_sorted(member, KE::cbegin(myRowView), KE::cend(myRowView),
@ -179,7 +179,7 @@ template <class LayoutTag, class ValueType>
void run_all_scenarios(bool makeDataSortedOnPurpose) { void run_all_scenarios(bool makeDataSortedOnPurpose) {
for (int numTeams : teamSizesToTest) { for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 5153}) { for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 5153}) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
for (int apiId : {0, 1, 2, 3}) { for (int apiId : {0, 1, 2, 3}) {
#else #else
for (int apiId : {0, 1}) { for (int apiId : {0, 1}) {

View File

@ -73,7 +73,7 @@ struct TestFunctorA {
m_distancesView(myRowIndex) = resultDist; m_distancesView(myRowIndex) = resultDist;
}); });
} }
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) { else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type; using value_type = typename ViewType::value_type;
auto it = KE::is_sorted_until(member, KE::cbegin(myRowView), auto it = KE::is_sorted_until(member, KE::cbegin(myRowView),
@ -226,7 +226,7 @@ template <class LayoutTag, class ValueType>
void run_all_scenarios(const std::string& name, const std::vector<int>& cols) { void run_all_scenarios(const std::string& name, const std::vector<int>& cols) {
for (int numTeams : teamSizesToTest) { for (int numTeams : teamSizesToTest) {
for (const auto& numCols : cols) { for (const auto& numCols : cols) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
for (int apiId : {0, 1, 2, 3}) { for (int apiId : {0, 1, 2, 3}) {
#else #else
for (int apiId : {0, 1}) { for (int apiId : {0, 1}) {

View File

@ -59,7 +59,7 @@ struct TestFunctorA {
m_distancesView(myRowIndex) = resultDist; m_distancesView(myRowIndex) = resultDist;
}); });
} }
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) { else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type; using value_type = typename ViewType::value_type;
auto it = auto it =
@ -170,7 +170,7 @@ void run_all_scenarios() {
} }
TEST(std_algorithms_max_element_team_test, test) { TEST(std_algorithms_max_element_team_test, test) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
run_all_scenarios<DynamicTag, int>(); run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedTwoRowsTag, double>(); run_all_scenarios<StridedTwoRowsTag, double>();
run_all_scenarios<StridedThreeRowsTag, int>(); run_all_scenarios<StridedThreeRowsTag, int>();

View File

@ -59,7 +59,7 @@ struct TestFunctorA {
m_distancesView(myRowIndex) = resultDist; m_distancesView(myRowIndex) = resultDist;
}); });
} }
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) { else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type; using value_type = typename ViewType::value_type;
auto it = auto it =
@ -169,7 +169,7 @@ void run_all_scenarios() {
} }
TEST(std_algorithms_min_element_team_test, test) { TEST(std_algorithms_min_element_team_test, test) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
run_all_scenarios<DynamicTag, int>(); run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedTwoRowsTag, double>(); run_all_scenarios<StridedTwoRowsTag, double>();
run_all_scenarios<StridedThreeRowsTag, int>(); run_all_scenarios<StridedThreeRowsTag, int>();

View File

@ -66,7 +66,7 @@ struct TestFunctorA {
m_distancesView(myRowIndex, 1) = resultDist2; m_distancesView(myRowIndex, 1) = resultDist2;
}); });
} }
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
else if (m_apiPick == 2) { else if (m_apiPick == 2) {
using value_type = typename ViewType::value_type; using value_type = typename ViewType::value_type;
auto itPair = auto itPair =
@ -188,7 +188,7 @@ void run_all_scenarios() {
} }
TEST(std_algorithms_minmax_element_team_test, test) { TEST(std_algorithms_minmax_element_team_test, test) {
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
run_all_scenarios<DynamicTag, int>(); run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedTwoRowsTag, double>(); run_all_scenarios<StridedTwoRowsTag, double>();
run_all_scenarios<StridedThreeRowsTag, int>(); run_all_scenarios<StridedThreeRowsTag, int>();

View File

@ -16,7 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp> #include <TestStdAlgorithmsCommon.hpp>
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
namespace Test { namespace Test {
namespace stdalgos { namespace stdalgos {

View File

@ -16,7 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp> #include <TestStdAlgorithmsCommon.hpp>
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
namespace Test { namespace Test {
namespace stdalgos { namespace stdalgos {

View File

@ -16,7 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp> #include <TestStdAlgorithmsCommon.hpp>
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
namespace Test { namespace Test {
namespace stdalgos { namespace stdalgos {

View File

@ -16,7 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp> #include <TestStdAlgorithmsCommon.hpp>
#if not defined KOKKOS_ENABLE_OPENMPTARGET #ifndef KOKKOS_ENABLE_OPENMPTARGET
namespace Test { namespace Test {
namespace stdalgos { namespace stdalgos {

View File

@ -5,6 +5,6 @@ build_script:
- cmd: >- - cmd: >-
mkdir build && mkdir build &&
cd build && cd build &&
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF && cmake c:\projects\source -DKokkos_ENABLE_IMPL_MDSPAN=OFF -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF &&
cmake --build . --target install && cmake --build . --target install &&
ctest -C Debug --output-on-failure ctest -C Debug --output-on-failure

View File

@ -4,7 +4,7 @@ KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups) KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency) KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream) KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(view_copy_constructor)
#FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow. #FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow.
IF(NOT Kokkos_ENABLE_OPENMPTARGET) IF(NOT Kokkos_ENABLE_OPENMPTARGET)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance) KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance)

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
view_copy_constructor
SOURCES view_copy_constructor.cpp
)

View File

@ -0,0 +1,46 @@
KOKKOS_DEVICES=Serial
KOKKOS_ARCH = ""
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
ifndef KOKKOS_PATH
KOKKOS_PATH = $(MAKEFILE_PATH)../..
endif
SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
vpath %.cpp $(sort $(dir $(SRC)))
default: build
echo "Start Build"
CXX = clang++
EXE = view_copy_constructor.exe
CXXFLAGS ?= -Ofast
override CXXFLAGS += -I$(MAKEFILE_PATH)
DEPFLAGS = -M
LINK = ${CXX}
LINKFLAGS = -Ofast
KOKKOS_CXX_STANDARD=c++20
OBJ = $(notdir $(SRC:.cpp=.o))
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o view_copy_constructor.cuda view_copy_constructor.exe
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)

View File

@ -0,0 +1,310 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
// The function "test_view_collection" exposes the copy constructor
// and destructor overheads in Kokkos View objects
// Please see the lines marked by "NOTE".
#include <limits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sys/time.h>
#include <Kokkos_Core.hpp>
#include <iostream>
// NVIEWS is the number of Kokkos View objects in our ViewCollection object
// We have chosen a large value of 40 to make it easier to see performance
// differences when using the likelihood attribute
#define NVIEWS 40
class ViewCollection {
public:
Kokkos::View<double*> v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40;
double m_expected_sum;
double m_side_effect;
int m_N;
ViewCollection(int N)
: v1("v1", N),
v2("v2", N),
v3("v3", N),
v4("v4", N),
v5("v5", N),
v6("v6", N),
v7("v7", N),
v8("v8", N),
v9("v9", N),
v10("v10", N),
v11("v11", N),
v12("v12", N),
v13("v13", N),
v14("v14", N),
v15("v15", N),
v16("v16", N),
v17("v17", N),
v18("v18", N),
v19("v19", N),
v20("v20", N),
v21("v21", N),
v22("v22", N),
v23("v23", N),
v24("v24", N),
v25("v25", N),
v26("v26", N),
v27("v27", N),
v28("v28", N),
v29("v29", N),
v30("v30", N),
v31("v31", N),
v32("v32", N),
v33("v33", N),
v34("v34", N),
v35("v35", N),
v36("v36", N),
v37("v37", N),
v38("v38", N),
v39("v39", N),
v40("v40", N),
m_expected_sum(N * NVIEWS),
m_side_effect(0.0),
m_N(N) {
for (int i = 0; i < N; ++i) {
v1(i) = 1;
v2(i) = 1;
v3(i) = 1;
v4(i) = 1;
v5(i) = 1;
v6(i) = 1;
v7(i) = 1;
v8(i) = 1;
v9(i) = 1;
v10(i) = 1;
v11(i) = 1;
v12(i) = 1;
v13(i) = 1;
v14(i) = 1;
v15(i) = 1;
v16(i) = 1;
v17(i) = 1;
v18(i) = 1;
v19(i) = 1;
v20(i) = 1;
v21(i) = 1;
v22(i) = 1;
v23(i) = 1;
v24(i) = 1;
v25(i) = 1;
v26(i) = 1;
v27(i) = 1;
v28(i) = 1;
v29(i) = 1;
v30(i) = 1;
v31(i) = 1;
v32(i) = 1;
v33(i) = 1;
v34(i) = 1;
v35(i) = 1;
v36(i) = 1;
v37(i) = 1;
v38(i) = 1;
v39(i) = 1;
v40(i) = 1;
}
}
// The ADD_COPY_CONSTRUCTOR macro is helpful to compare time in the copy
// constructor between compilers. We have found that the GNU compiler
// is sometimes able to inline the default copy constructor.
#ifdef ADD_COPY_CONSTRUCTOR
__attribute__((noinline)) ViewCollection(const ViewCollection& other)
: v1(other.v1),
v2(other.v2),
v3(other.v3),
v4(other.v4),
v5(other.v5),
v6(other.v6),
v7(other.v7),
v8(other.v8),
v9(other.v9),
v10(other.v10),
v11(other.v11),
v12(other.v12),
v13(other.v13),
v14(other.v14),
v15(other.v15),
v16(other.v16),
v17(other.v17),
v18(other.v18),
v19(other.v19),
v20(other.v20),
v21(other.v21),
v22(other.v22),
v23(other.v23),
v24(other.v24),
v25(other.v25),
v26(other.v26),
v27(other.v27),
v28(other.v28),
v29(other.v29),
v30(other.v30),
v31(other.v31),
v32(other.v32),
v33(other.v33),
v34(other.v34),
v35(other.v35),
v36(other.v36),
v37(other.v37),
v38(other.v38),
v39(other.v39),
v40(other.v40),
m_expected_sum(other.m_expected_sum),
m_side_effect(other.m_side_effect),
m_N(other.m_N) {}
#endif
KOKKOS_INLINE_FUNCTION
double sum_views(int ii, bool execute_kernel) {
double result = 0.0;
if (execute_kernel) {
// This code is only executed when using the command line option -k
// The computation references all Kokkos views. This may help our
// effort to stop compilers from optimizing away the Kokkos views
for (int i = 0; i < m_N; ++i) {
result += v1(i) + v2(i) + v3(i) + v4(i) + v5(i) + v6(i) + v7(i) +
v8(i) + v9(i) + v10(i) + v11(i) + v12(i) + v13(i) + v14(i) +
v15(i) + v16(i) + v17(i) + v18(i) + v19(i) + v20(i) + v21(i) +
v22(i) + v23(i) + v24(i) + v25(i) + v26(i) + v27(i) + v28(i) +
v29(i) + v30(i) + v31(i) + v32(i) + v33(i) + v34(i) + v35(i) +
v36(i) + v37(i) + v38(i) + v39(i) + v40(i);
}
} else {
result = m_expected_sum;
}
// This statement introduces a side effect that may help our effort to
// stop compilers from optimizing away the temporary ViewCollection object
m_side_effect = result * (ii + 1);
return result;
}
};
void test_view_collection_kk(int N, int num_iter, bool execute_kernel) {
ViewCollection view_collection(N);
Kokkos::Timer view_collection_timer;
double max_value = 0.0;
// Max Reduction boilerplate code taken from slide 53 of
// kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf
Kokkos::parallel_reduce(
"collection-reduction", num_iter,
KOKKOS_LAMBDA(int i, double& valueToUpdate) {
// NOTE: The following lines expose the Kokkos View overheads
ViewCollection tmp_view_collection = view_collection;
double my_value = tmp_view_collection.sum_views(i, execute_kernel);
if (my_value > valueToUpdate) valueToUpdate = my_value;
},
Kokkos::Max<double>(max_value));
double view_collection_time = view_collection_timer.seconds();
bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6;
std::cout << "View Time = " << view_collection_time << " seconds"
<< std::endl;
if (success) {
std::cout << "Kokkos run:" << std::endl;
std::cout << "SUCCESS" << std::endl;
} else {
std::cout << "FAILURE" << std::endl;
}
}
void test_view_collection_serial(int N, int num_iter, bool execute_kernel) {
ViewCollection view_collection(N);
Kokkos::Timer view_collection_timer;
double max_value = 0.0;
// Max Reduction boilerplate code taken from slide 53 of
// kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf
for (int i = 0; i < num_iter; ++i) {
// NOTE: The following lines expose the Kokkos View overheads
ViewCollection tmp_view_collection = view_collection;
double my_value = tmp_view_collection.sum_views(i, execute_kernel);
if (my_value > max_value) max_value = my_value;
}
double view_collection_time = view_collection_timer.seconds();
bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6;
std::cout << "View Time 2 = " << view_collection_time << " seconds"
<< std::endl;
if (success) {
std::cout << "Serial run:" << std::endl;
std::cout << "SUCCESS" << std::endl;
} else {
std::cout << "FAILURE" << std::endl;
}
}
int main(int argc, char* argv[]) {
// The benchmark is only testing reference counting for views on host.
#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_SERIAL) || \
defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_HPX)
int N = 1;
int num_iter = 1 << 27;
bool execute_kernel = false;
for (int i = 0; i < argc; i++) {
if ((strcmp(argv[i], "-N") == 0)) {
N = atoi(argv[++i]);
if (N < 1) {
std::cout << "Array extent must be >= 1" << std::endl;
exit(1);
}
} else if (strcmp(argv[i], "-i") == 0) {
num_iter = atoi(argv[++i]);
if (num_iter < 1) {
std::cout << "Number of iterations must be >= 1" << std::endl;
exit(1);
}
} else if (strcmp(argv[i], "-k") == 0) {
execute_kernel = true;
} else if ((strcmp(argv[i], "-h") == 0)) {
printf(" Options:\n");
printf(" -N <int>: Array extent\n");
printf(" -i <int>: Number of iterations\n");
printf(" -k: Execute the summation kernel\n");
printf(" -h: Print this message\n\n");
exit(1);
}
}
std::cout << "Array extent = " << N << std::endl;
std::cout << "Iterations = " << num_iter << std::endl;
std::cout << "Execute summation kernel = " << std::boolalpha << execute_kernel
<< std::noboolalpha << std::endl;
// Test inside a Kokkos kernel.
Kokkos::initialize(argc, argv);
{ test_view_collection_kk(N, num_iter, execute_kernel); }
// Test outside Kokkos kernel.
test_view_collection_serial(N, num_iter, execute_kernel);
Kokkos::finalize();
#endif
return 0;
}

View File

@ -233,7 +233,7 @@ do
cuda_args="$cuda_args $1" cuda_args="$cuda_args $1"
;; ;;
#Handle more known nvcc args #Handle more known nvcc args
--extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler) --extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler|--disable-warnings)
cuda_args="$cuda_args $1" cuda_args="$cuda_args $1"
;; ;;
#Handle known nvcc args that have an argument #Handle known nvcc args that have an argument

View File

@ -1,6 +1,5 @@
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib
TEST_OPTIONAL_TPLS CUSPARSE
) )
TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib)

View File

@ -225,8 +225,13 @@ FUNCTION(kokkos_compilation)
# if built w/o CUDA support, we want to basically make this a no-op # if built w/o CUDA support, we want to basically make this a no-op
SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@) SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@)
IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
SET(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..")
ENDIF()
# search relative first and then absolute # search relative first and then absolute
SET(_HINTS "${CMAKE_CURRENT_LIST_DIR}/../.." "@CMAKE_INSTALL_PREFIX@") SET(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@")
# find kokkos_launch_compiler # find kokkos_launch_compiler
FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER

View File

@ -37,6 +37,7 @@
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA // deprecated #cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA // deprecated
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR #cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC #cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS #cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY #cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
@ -52,6 +53,8 @@
#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated #cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated
#cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION #cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION
#cmakedefine KOKKOS_ENABLE_IMPL_MDSPAN #cmakedefine KOKKOS_ENABLE_IMPL_MDSPAN
#cmakedefine KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY
#cmakedefine KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND
#cmakedefine KOKKOS_ENABLE_ATOMICS_BYPASS #cmakedefine KOKKOS_ENABLE_ATOMICS_BYPASS
/* TPL Settings */ /* TPL Settings */
@ -65,6 +68,7 @@
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX
#cmakedefine KOKKOS_ARCH_ARMV81 #cmakedefine KOKKOS_ARCH_ARMV81
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX2 #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX2
#cmakedefine KOKKOS_ARCH_ARMV9_GRACE
#cmakedefine KOKKOS_ARCH_A64FX #cmakedefine KOKKOS_ARCH_A64FX
#cmakedefine KOKKOS_ARCH_AVX #cmakedefine KOKKOS_ARCH_AVX
#cmakedefine KOKKOS_ARCH_AVX2 #cmakedefine KOKKOS_ARCH_AVX2

View File

@ -7,13 +7,18 @@ IF (NOT CUDAToolkit_ROOT)
ENDIF() ENDIF()
ENDIF() ENDIF()
# FIXME CMake 3.28.4 creates more targets than we export IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC AND CMAKE_VERSION VERSION_LESS "3.20.1")
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0" AND CMAKE_VERSION VERSION_LESS "3.28.4") MESSAGE(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
find_package(CUDAToolkit)
ELSE()
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
ENDIF() ENDIF()
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
find_package(CUDAToolkit REQUIRED)
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart
)
KOKKOS_EXPORT_CMAKE_TPL(CUDAToolkit REQUIRED)
ELSE()
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
IF (TARGET CUDA::cudart) IF (TARGET CUDA::cudart)
SET(FOUND_CUDART TRUE) SET(FOUND_CUDART TRUE)
@ -30,14 +35,10 @@ ELSE()
ENDIF() ENDIF()
include(FindPackageHandleStandardArgs) include(FindPackageHandleStandardArgs)
IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${DEFAULT_MSG} FOUND_CUDART FOUND_CUDA_DRIVER)
SET(KOKKOS_CUDA_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
ELSE()
SET(KOKKOS_CUDA_ERROR DEFAULT_MSG)
ENDIF()
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${KOKKOS_CUDA_ERROR} FOUND_CUDART FOUND_CUDA_DRIVER)
IF (FOUND_CUDA_DRIVER AND FOUND_CUDART) IF (FOUND_CUDA_DRIVER AND FOUND_CUDART)
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart
) )
ENDIF() ENDIF()
ENDIF()

View File

@ -35,7 +35,6 @@ IF(NOT _CUDA_FAILURE)
GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS) GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY}) GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY})
KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
ELSE() ELSE()
SET(TPL_ENABLE_CUDA OFF) SET(TPL_ENABLE_CUDA OFF)
ENDIF() ENDIF()

View File

@ -1,26 +0,0 @@
#@HEADER
# ************************************************************************
#
# Kokkos v. 4.0
# Copyright (2022) National Technology & Engineering
# Solutions of Sandia, LLC (NTESS).
#
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
#
# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
#
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ************************************************************************
# @HEADER
#include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
#IF (TPL_ENABLE_CUDA)
# GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
# GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
# GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
# KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
#ENDIF()

View File

@ -118,14 +118,6 @@ FUNCTION(KOKKOS_ADD_TEST)
ENDIF() ENDIF()
ENDFUNCTION() ENDFUNCTION()
FUNCTION(KOKKOS_ADD_ADVANCED_TEST)
if (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_ADVANCED_TEST(${ARGN})
else()
# TODO Write this
endif()
ENDFUNCTION()
MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME)
ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME})
TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES})

View File

@ -28,6 +28,7 @@ KOKKOS_CHECK_DEPRECATED_OPTIONS(
#------------------------------------------------------------------------------- #-------------------------------------------------------------------------------
SET(KOKKOS_ARCH_LIST) SET(KOKKOS_ARCH_LIST)
include(CheckCXXCompilerFlag)
KOKKOS_DEPRECATED_LIST(ARCH ARCH) KOKKOS_DEPRECATED_LIST(ARCH ARCH)
@ -49,6 +50,7 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU")
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU") DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU")
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU") DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU")
DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support") DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support")
DECLARE_AND_CHECK_HOST_ARCH(ARMV9_GRACE "ARMv9 NVIDIA Grace CPU")
DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs") DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs")
DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs") DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs")
DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs") DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs")
@ -189,12 +191,6 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
ELSEIF(CUDAToolkit_BIN_DIR) ELSEIF(CUDAToolkit_BIN_DIR)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..) GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..)
ENDIF() ENDIF()
ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
SET(CUDA_ARCH_FLAG "-gpu")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda)
IF (KOKKOS_ENABLE_CUDA) # FIXME ideally unreachable when CUDA not enabled
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -cuda)
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
SET(CUDA_ARCH_FLAG "-arch") SET(CUDA_ARCH_FLAG "-arch")
ENDIF() ENDIF()
@ -209,6 +205,11 @@ ENDIF()
#------------------------------- KOKKOS_HIP_OPTIONS --------------------------- #------------------------------- KOKKOS_HIP_OPTIONS ---------------------------
KOKKOS_OPTION(IMPL_AMDGPU_FLAGS "" STRING "Set compiler flags for AMD GPUs")
KOKKOS_OPTION(IMPL_AMDGPU_LINK "" STRING "Set linker flags for AMD GPUs")
MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_FLAGS)
MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_LINK)
#clear anything that might be in the cache #clear anything that might be in the cache
GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS) GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS)
IF(KOKKOS_ENABLE_HIP) IF(KOKKOS_ENABLE_HIP)
@ -301,6 +302,20 @@ IF (KOKKOS_ARCH_A64FX)
) )
ENDIF() ENDIF()
IF (KOKKOS_ARCH_ARMV9_GRACE)
SET(KOKKOS_ARCH_ARM_NEON ON)
check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2)
check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS)
IF (COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
DEFAULT -mcpu=neoverse-n2 -msve-vector-bits=128
)
ELSE()
MESSAGE(WARNING "Compiler does not support ARMv9 Grace architecture")
ENDIF()
ENDIF()
IF (KOKKOS_ARCH_ZEN) IF (KOKKOS_ARCH_ZEN)
COMPILER_SPECIFIC_FLAGS( COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
@ -535,16 +550,16 @@ IF (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC)
SET(KOKKOS_ARCH_AVX512XEON OFF) SET(KOKKOS_ARCH_AVX512XEON OFF)
ENDIF() ENDIF()
# FIXME_NVCC nvcc doesn't seem to support Arm Neon.
IF(KOKKOS_ARCH_ARM_NEON AND KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
UNSET(KOKKOS_ARCH_ARM_NEON)
ENDIF()
IF (NOT KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA) IF (NOT KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA)
IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
COMPILER_SPECIFIC_FLAGS( COMPILER_SPECIFIC_FLAGS(
Clang -fcuda-rdc Clang -fcuda-rdc
NVIDIA --relocatable-device-code=true NVIDIA --relocatable-device-code=true
NVHPC -gpu=rdc
)
ELSEIF(KOKKOS_ENABLE_CUDA)
COMPILER_SPECIFIC_FLAGS(
NVHPC -gpu=nordc
) )
ENDIF() ENDIF()
ENDIF() ENDIF()
@ -571,7 +586,7 @@ IF (KOKKOS_ENABLE_HIP)
COMPILER_SPECIFIC_FLAGS( COMPILER_SPECIFIC_FLAGS(
DEFAULT -fgpu-rdc DEFAULT -fgpu-rdc
) )
IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC AND NOT KOKKOS_IMPL_AMDGPU_FLAGS)
COMPILER_SPECIFIC_LINK_OPTIONS( COMPILER_SPECIFIC_LINK_OPTIONS(
DEFAULT --hip-link DEFAULT --hip-link
) )
@ -653,11 +668,6 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE) SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE)
IF(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) IF(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
SET(CMAKE_CUDA_ARCHITECTURES ${KOKKOS_CUDA_ARCHITECTURES} PARENT_SCOPE) SET(CMAKE_CUDA_ARCHITECTURES ${KOKKOS_CUDA_ARCHITECTURES} PARENT_SCOPE)
ELSE()
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${FLAG})
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}")
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}")
ELSE() ELSE()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
@ -666,7 +676,6 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
ENDIF() ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()
ENDIF()
LIST(APPEND KOKKOS_CUDA_ARCH_FLAGS ${FLAG}) LIST(APPEND KOKKOS_CUDA_ARCH_FLAGS ${FLAG})
SET(KOKKOS_CUDA_ARCH_FLAGS ${KOKKOS_CUDA_ARCH_FLAGS} PARENT_SCOPE) SET(KOKKOS_CUDA_ARCH_FLAGS ${KOKKOS_CUDA_ARCH_FLAGS} PARENT_SCOPE)
LIST(APPEND KOKKOS_CUDA_ARCH_LIST ${ARCH}) LIST(APPEND KOKKOS_CUDA_ARCH_LIST ${ARCH})
@ -707,8 +716,10 @@ FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG)
IF(KOKKOS_ENABLE_HIP) IF(KOKKOS_ENABLE_HIP)
SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE) SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE)
ENDIF() ENDIF()
IF(NOT KOKKOS_IMPL_AMDGPU_FLAGS)
SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE) SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE)
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
ENDIF()
IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE)
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
ENDIF() ENDIF()
@ -724,6 +735,15 @@ FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
CHECK_AMDGPU_ARCH(${ARCH} ${FLAG}) CHECK_AMDGPU_ARCH(${ARCH} ${FLAG})
ENDFOREACH() ENDFOREACH()
IF(KOKKOS_IMPL_AMDGPU_FLAGS)
IF (NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
MESSAGE(FATAL_ERROR "When IMPL_AMDGPU_FLAGS is set the architecture autodectection is disabled. "
"Please explicitly set the GPU architecture.")
ENDIF()
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${KOKKOS_IMPL_AMDGPU_FLAGS}")
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${KOKKOS_IMPL_AMDGPU_LINK}")
ENDIF()
MACRO(SET_AND_CHECK_AMD_ARCH ARCH FLAG) MACRO(SET_AND_CHECK_AMD_ARCH ARCH FLAG)
KOKKOS_SET_OPTION(ARCH_${ARCH} ON) KOKKOS_SET_OPTION(ARCH_${ARCH} ON)
CHECK_AMDGPU_ARCH(${ARCH} ${FLAG}) CHECK_AMDGPU_ARCH(${ARCH} ${FLAG})
@ -984,7 +1004,7 @@ IF (KOKKOS_ARCH_HOPPER90)
ENDIF() ENDIF()
#HIP detection of gpu arch #HIP detection of gpu arch
IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED) IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED AND NOT KOKKOS_IMPL_AMDGPU_FLAGS)
FIND_PROGRAM(ROCM_ENUMERATOR rocm_agent_enumerator) FIND_PROGRAM(ROCM_ENUMERATOR rocm_agent_enumerator)
IF(NOT ROCM_ENUMERATOR) IF(NOT ROCM_ENUMERATOR)
MESSAGE(FATAL_ERROR "Autodetection of AMD GPU architecture not possible as " MESSAGE(FATAL_ERROR "Autodetection of AMD GPU architecture not possible as "

View File

@ -42,12 +42,8 @@ IF(Kokkos_ENABLE_CUDA)
# If launcher was found and nvcc_wrapper was not specified as # If launcher was found and nvcc_wrapper was not specified as
# compiler and `CMAKE_CXX_COMPILIER_LAUNCHER` is not set, set to use launcher. # compiler and `CMAKE_CXX_COMPILIER_LAUNCHER` is not set, set to use launcher.
# Will ensure CMAKE_CXX_COMPILER is replaced by nvcc_wrapper # Will ensure CMAKE_CXX_COMPILER is replaced by nvcc_wrapper
IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC))
IF(CMAKE_CXX_COMPILER_LAUNCHER) IF(CMAKE_CXX_COMPILER_LAUNCHER)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
MESSAGE(STATUS "Using nvc++ as device compiler requires Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON!")
ENDIF()
MESSAGE(FATAL_ERROR "Cannot use CMAKE_CXX_COMPILER_LAUNCHER if the CMAKE_CXX_COMPILER is not able to compile CUDA code, i.e. nvcc_wrapper or clang++!") MESSAGE(FATAL_ERROR "Cannot use CMAKE_CXX_COMPILER_LAUNCHER if the CMAKE_CXX_COMPILER is not able to compile CUDA code, i.e. nvcc_wrapper or clang++!")
ENDIF() ENDIF()
# the first argument to launcher is always the C++ compiler defined by cmake # the first argument to launcher is always the C++ compiler defined by cmake
@ -149,56 +145,85 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Fujitsu)
ENDIF() ENDIF()
# Enforce the minimum compilers supported by Kokkos. # Enforce the minimum compilers supported by Kokkos.
SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") IF(NOT CMAKE_CXX_STANDARD)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) 8.0.0 or higher") SET(CMAKE_CXX_STANDARD 17)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) 10.0.0 or higher") ENDIF()
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) 15.0.0 or higher") IF(CMAKE_CXX_STANDARD EQUAL 17)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 8.2.0 or higher") SET(KOKKOS_CLANG_CPU_MINIMUM 8.0.0)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 19.0.5 or higher") SET(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) 2021.1.1 or higher") SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) 2023.0.0 or higher") SET(KOKKOS_GCC_MINIMUM 8.2.0)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 11.0.0 or higher") SET(KOKKOS_INTEL_MINIMUM 19.0.5)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 5.2.0 or higher") SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI 22.3 or higher") SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC 19.29 or higher") SET(KOKKOS_NVCC_MINIMUM 11.0.0)
SET(KOKKOS_HIPCC_MINIMUM 5.2.0)
SET(KOKKOS_NVHPC_MINIMUM 22.3)
SET(KOKKOS_MSVC_MINIMUM 19.29)
ELSE()
SET(KOKKOS_CLANG_CPU_MINIMUM 14.0.0)
SET(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0)
SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
SET(KOKKOS_GCC_MINIMUM 10.1.0)
SET(KOKKOS_INTEL_MINIMUM "not supported")
SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0)
SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
SET(KOKKOS_NVCC_MINIMUM 12.0.0)
SET(KOKKOS_HIPCC_MINIMUM 5.2.0)
SET(KOKKOS_NVHPC_MINIMUM 22.3)
SET(KOKKOS_MSVC_MINIMUM 19.30)
ENDIF()
SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos for C++${CMAKE_CXX_STANDARD}. Required minimum compiler versions:")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) ${KOKKOS_CLANG_CPU_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel ${KOKKOS_INTEL_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC ${KOKKOS_HIPCC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI ${KOKKOS_NVHPC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC ${KOKKOS_MSVC_MINIMUM}")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported") SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\nCompiler: ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION}\n") SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\nCompiler: ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION}\n")
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT Kokkos_ENABLE_CUDA) IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT Kokkos_ENABLE_CUDA)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.0.0) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CPU_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_CUDA) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_CUDA)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 10.0.0) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CUDA_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.2.0) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_GCC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.0.5) IF((NOT CMAKE_CXX_STANDARD EQUAL 17) OR (KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_MINIMUM}))
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2021.1.1) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND Kokkos_ENABLE_SYCL) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND Kokkos_ENABLE_SYCL)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2023.0.0) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.0.0) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVCC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE) SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 5.2.0) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_HIPCC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 22.3) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVHPC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
# Treat PGI internally as NVHPC to simplify handling both compilers. # Treat PGI internally as NVHPC to simplify handling both compilers.
@ -206,13 +231,13 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NV
# backward-compatible to pgc++. # backward-compatible to pgc++.
SET(KOKKOS_CXX_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE) SET(KOKKOS_CXX_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.29) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_MSVC_MINIMUM})
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL OR KOKKOS_CXX_COMPILER_ID STREQUAL XLClang) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL OR KOKKOS_CXX_COMPILER_ID STREQUAL XLClang)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_OPENMPTARGET) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_OPENMPTARGET)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS KOKKOS_CLANG_OPENMPTARGET_MINIMUM)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF() ENDIF()
ENDIF() ENDIF()

View File

@ -48,6 +48,8 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda
# resolved but we keep the option around a bit longer to be safe. # resolved but we keep the option around a bit longer to be safe.
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)") KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler") KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
KOKKOS_ENABLE_OPTION(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA")
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" ) KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" )
KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" ) KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" )
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP") KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
@ -75,8 +77,12 @@ KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified me
# This option will go away eventually, but allows fallback to old implementation when needed. # This option will go away eventually, but allows fallback to old implementation when needed.
KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation") KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation")
KOKKOS_ENABLE_OPTION(ATOMICS_BYPASS OFF "**NOT RECOMMENDED** Whether to make atomics non-atomic for non-threaded MPI-only use cases") KOKKOS_ENABLE_OPTION(ATOMICS_BYPASS OFF "**NOT RECOMMENDED** Whether to make atomics non-atomic for non-threaded MPI-only use cases")
KOKKOS_ENABLE_OPTION(IMPL_REF_COUNT_BRANCH_UNLIKELY ON "Whether to use the C++20 `[[unlikely]]` attribute in the view reference counting")
mark_as_advanced(Kokkos_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY)
KOKKOS_ENABLE_OPTION(IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND OFF "Whether to enable a workaround for invalid use of View of Views that causes program hang on destruction.")
mark_as_advanced(Kokkos_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND)
KOKKOS_ENABLE_OPTION(IMPL_MDSPAN OFF "Whether to enable experimental mdspan support") KOKKOS_ENABLE_OPTION(IMPL_MDSPAN ON "Whether to enable experimental mdspan support")
KOKKOS_ENABLE_OPTION(MDSPAN_EXTERNAL OFF BOOL "Whether to use an external version of mdspan") KOKKOS_ENABLE_OPTION(MDSPAN_EXTERNAL OFF BOOL "Whether to use an external version of mdspan")
KOKKOS_ENABLE_OPTION(IMPL_SKIP_COMPILER_MDSPAN ON BOOL "Whether to use an internal version of mdspan even if the compiler supports mdspan") KOKKOS_ENABLE_OPTION(IMPL_SKIP_COMPILER_MDSPAN ON BOOL "Whether to use an internal version of mdspan even if the compiler supports mdspan")
mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN) mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN)
@ -131,7 +137,7 @@ FUNCTION(check_device_specific_options)
ENDIF() ENDIF()
ENDFUNCTION() ENDFUNCTION()
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC IMPL_CUDA_UNIFIED_MEMORY)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH)

View File

@ -709,7 +709,12 @@ MACRO(kokkos_find_imported NAME)
ENDIF() ENDIF()
IF (NOT TPL_LIBRARY_SUFFIXES) IF (NOT TPL_LIBRARY_SUFFIXES)
SET(TPL_LIBRARY_SUFFIXES lib lib64) SET(TPL_LIBRARY_SUFFIXES lib)
IF(KOKKOS_IMPL_32BIT)
LIST(APPEND TPL_LIBRARY_SUFFIXES lib32)
ELSE()
LIST(APPEND TPL_LIBRARY_SUFFIXES lib64)
ENDIF()
ENDIF() ENDIF()
SET(${NAME}_INCLUDE_DIRS) SET(${NAME}_INCLUDE_DIRS)

View File

@ -124,12 +124,8 @@ IF(KOKKOS_ENABLE_CUDA)
ELSEIF(CMAKE_CXX_EXTENSIONS) ELSEIF(CMAKE_CXX_EXTENSIONS)
MESSAGE(FATAL_ERROR "Compiling CUDA code with clang doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF") MESSAGE(FATAL_ERROR "Compiling CUDA code with clang doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF")
ENDIF() ENDIF()
ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}")
MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. To allow nvc++ as Cuda compiler, Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON must be set!")
ELSE()
MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or NVC++ or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}")
ENDIF()
ENDIF() ENDIF()
ENDIF() ENDIF()

View File

@ -103,13 +103,19 @@ if (Kokkos_ENABLE_IMPL_MDSPAN AND Kokkos_ENABLE_MDSPAN_EXTERNAL)
endif() endif()
IF (Kokkos_ENABLE_OPENMP) IF (Kokkos_ENABLE_OPENMP)
find_package(OpenMP REQUIRED) find_package(OpenMP REQUIRED COMPONENTS CXX)
# FIXME_TRILINOS Trilinos doesn't allow for Kokkos to use find_dependency # FIXME_TRILINOS Trilinos doesn't allow for Kokkos to use find_dependency
# so we just append the flags here instead of linking with the OpenMP target. # so we just append the flags here instead of linking with the OpenMP target.
IF(KOKKOS_HAS_TRILINOS) IF(KOKKOS_HAS_TRILINOS)
COMPILER_SPECIFIC_FLAGS(DEFAULT ${OpenMP_CXX_FLAGS}) COMPILER_SPECIFIC_FLAGS(DEFAULT ${OpenMP_CXX_FLAGS})
ELSE() ELSE()
KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED) KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED COMPONENTS CXX)
ENDIF()
IF(Kokkos_ENABLE_HIP AND KOKKOS_COMPILE_LANGUAGE STREQUAL HIP)
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS ${OpenMP_CXX_FLAGS})
ENDIF()
IF(Kokkos_ENABLE_CUDA AND KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -Xcompiler ${OpenMP_CXX_FLAGS})
ENDIF() ENDIF()
ENDIF() ENDIF()

View File

@ -160,6 +160,12 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
) )
ENDIF() ENDIF()
ENDIF() ENDIF()
# We noticed problems with -fvisibility=hidden for inline static variables
# if Kokkos was built as shared library.
IF(BUILD_SHARED_LIBS)
SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY VISIBILITY_INLINES_HIDDEN ON)
SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY CXX_VISIBILITY_PRESET hidden)
ENDIF()
ENDFUNCTION() ENDFUNCTION()
FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME) FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME)
@ -241,34 +247,6 @@ MACRO(KOKKOS_CONFIGURE_CORE)
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${KOKKOS_ENABLED_DEVICES}") KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${KOKKOS_ENABLED_DEVICES}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_SetupBackend.hpp "KOKKOS_SETUP" "setup/Kokkos_Setup" "${DEVICE_SETUP_LIST}") KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_SetupBackend.hpp "KOKKOS_SETUP" "setup/Kokkos_Setup" "${DEVICE_SETUP_LIST}")
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${KOKKOS_ENABLED_DEVICES}") KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${KOKKOS_ENABLED_DEVICES}")
SET(_DEFAULT_HOST_MEMSPACE "::Kokkos::HostSpace")
KOKKOS_OPTION(DEFAULT_DEVICE_MEMORY_SPACE "" STRING "Override default device memory space")
KOKKOS_OPTION(DEFAULT_HOST_MEMORY_SPACE "" STRING "Override default host memory space")
KOKKOS_OPTION(DEFAULT_DEVICE_EXECUTION_SPACE "" STRING "Override default device execution space")
KOKKOS_OPTION(DEFAULT_HOST_PARALLEL_EXECUTION_SPACE "" STRING "Override default host parallel execution space")
IF (NOT Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE STREQUAL "")
SET(_DEVICE_PARALLEL ${Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE})
MESSAGE(STATUS "Override default device execution space: ${_DEVICE_PARALLEL}")
SET(KOKKOS_DEVICE_SPACE_ACTIVE ON)
ELSE()
IF (_DEVICE_PARALLEL STREQUAL "NoTypeDefined")
SET(KOKKOS_DEVICE_SPACE_ACTIVE OFF)
ELSE()
SET(KOKKOS_DEVICE_SPACE_ACTIVE ON)
ENDIF()
ENDIF()
IF (NOT Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE STREQUAL "")
SET(_HOST_PARALLEL ${Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE})
MESSAGE(STATUS "Override default host parallel execution space: ${_HOST_PARALLEL}")
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON)
ELSE()
IF (_HOST_PARALLEL STREQUAL "NoTypeDefined")
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE OFF)
ELSE()
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON)
ENDIF()
ENDIF()
#We are ready to configure the header
CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY) CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)
ENDMACRO() ENDMACRO()
@ -484,15 +462,10 @@ ENDFUNCTION()
FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET)
IF(KOKKOS_HAS_TRILINOS)
#ignore the target, tribits doesn't do anything directly with targets
TRIBITS_INCLUDE_DIRECTORIES(${ARGN})
ELSE() #append to a list for later
KOKKOS_LIB_TYPE(${TARGET} INCTYPE) KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
FOREACH(DIR ${ARGN}) FOREACH(DIR ${ARGN})
TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $<BUILD_INTERFACE:${DIR}>) TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $<BUILD_INTERFACE:${DIR}>)
ENDFOREACH() ENDFOREACH()
ENDIF()
ENDFUNCTION() ENDFUNCTION()
FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET)

View File

@ -1,26 +0,0 @@
#@HEADER
# ************************************************************************
#
# Kokkos v. 4.0
# Copyright (2022) National Technology & Engineering
# Solutions of Sandia, LLC (NTESS).
#
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
#
# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
#
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#@HEADER
# Check for CUDA support
IF (NOT TPL_ENABLE_CUDA)
MESSAGE(FATAL_ERROR "\nCUSPARSE requires CUDA")
ELSE()
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
ENDIF()

View File

@ -944,13 +944,13 @@ class DualView : public ViewTraits<DataType, Properties...> {
if (sizeMismatch) { if (sizeMismatch) {
::Kokkos::realloc(arg_prop, d_view, n0, n1, n2, n3, n4, n5, n6, n7); ::Kokkos::realloc(arg_prop, d_view, n0, n1, n2, n3, n4, n5, n6, n7);
if (alloc_prop_input::initialize) { if constexpr (alloc_prop_input::initialize) {
h_view = create_mirror_view(typename t_host::memory_space(), d_view); h_view = create_mirror_view(typename t_host::memory_space(), d_view);
} else { } else {
h_view = create_mirror_view(Kokkos::WithoutInitializing, h_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_host::memory_space(), d_view); typename t_host::memory_space(), d_view);
} }
} else if (alloc_prop_input::initialize) { } else if constexpr (alloc_prop_input::initialize) {
if constexpr (alloc_prop_input::has_execution_space) { if constexpr (alloc_prop_input::has_execution_space) {
const auto& exec_space = const auto& exec_space =
Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop); Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop);
@ -1038,12 +1038,10 @@ class DualView : public ViewTraits<DataType, Properties...> {
/* Resize on Device */ /* Resize on Device */
if (sizeMismatch) { if (sizeMismatch) {
::Kokkos::resize(properties, d_view, n0, n1, n2, n3, n4, n5, n6, n7); ::Kokkos::resize(properties, d_view, n0, n1, n2, n3, n4, n5, n6, n7);
if (alloc_prop_input::initialize) { // this part of the lambda was relocated in a method as it contains a
h_view = create_mirror_view(typename t_host::memory_space(), d_view); // `if constexpr`. In some cases, both branches were evaluated
} else { // leading to a compile error
h_view = create_mirror_view(Kokkos::WithoutInitializing, resync_host(properties);
typename t_host::memory_space(), d_view);
}
/* Mark Device copy as modified */ /* Mark Device copy as modified */
++modified_flags(1); ++modified_flags(1);
@ -1054,13 +1052,10 @@ class DualView : public ViewTraits<DataType, Properties...> {
/* Resize on Host */ /* Resize on Host */
if (sizeMismatch) { if (sizeMismatch) {
::Kokkos::resize(properties, h_view, n0, n1, n2, n3, n4, n5, n6, n7); ::Kokkos::resize(properties, h_view, n0, n1, n2, n3, n4, n5, n6, n7);
if (alloc_prop_input::initialize) { // this part of the lambda was relocated in a method as it contains a
d_view = create_mirror_view(typename t_dev::memory_space(), h_view); // `if constexpr`. In some cases, both branches were evaluated
// leading to a compile error
} else { resync_device(properties);
d_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_dev::memory_space(), h_view);
}
/* Mark Host copy as modified */ /* Mark Host copy as modified */
++modified_flags(0); ++modified_flags(0);
@ -1099,6 +1094,39 @@ class DualView : public ViewTraits<DataType, Properties...> {
} }
} }
private:
// resync host mirror from device
// this code was relocated from a lambda as it contains a `if constexpr`.
// In some cases, both branches were evaluated, leading to a compile error
template <class... ViewCtorArgs>
inline void resync_host(Impl::ViewCtorProp<ViewCtorArgs...> const&) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
if constexpr (alloc_prop_input::initialize) {
h_view = create_mirror_view(typename t_host::memory_space(), d_view);
} else {
h_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_host::memory_space(), d_view);
}
}
// resync device mirror from host
// this code was relocated from a lambda as it contains a `if constexpr`
// In some cases, both branches were evaluated leading to a compile error
template <class... ViewCtorArgs>
inline void resync_device(Impl::ViewCtorProp<ViewCtorArgs...> const&) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
if constexpr (alloc_prop_input::initialize) {
d_view = create_mirror_view(typename t_dev::memory_space(), h_view);
} else {
d_view = create_mirror_view(Kokkos::WithoutInitializing,
typename t_dev::memory_space(), h_view);
}
}
public:
void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,

View File

@ -1657,8 +1657,7 @@ KOKKOS_FUNCTION auto as_view_of_rank_n(
if constexpr (std::is_same_v<decltype(layout), Kokkos::LayoutLeft> || if constexpr (std::is_same_v<decltype(layout), Kokkos::LayoutLeft> ||
std::is_same_v<decltype(layout), Kokkos::LayoutRight> || std::is_same_v<decltype(layout), Kokkos::LayoutRight> ||
std::is_same_v<decltype(layout), Kokkos::LayoutStride> || std::is_same_v<decltype(layout), Kokkos::LayoutStride>) {
is_layouttiled<decltype(layout)>::value) {
for (int i = N; i < 7; ++i) for (int i = N; i < 7; ++i)
layout.dimension[i] = KOKKOS_IMPL_CTOR_DEFAULT_ARG; layout.dimension[i] = KOKKOS_IMPL_CTOR_DEFAULT_ARG;
} }
@ -1933,254 +1932,155 @@ struct MirrorDRVType {
} // namespace Impl } // namespace Impl
namespace Impl { namespace Impl {
// create a mirror
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs> template <class T, class... P, class... ViewCtorArgs>
inline typename DynRankView<T, P...>::HostMirror create_mirror( inline auto create_mirror(const DynRankView<T, P...>& src,
const DynRankView<T, P...>& src, const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, check_view_ctor_args_create_mirror<ViewCtorArgs...>();
std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
nullptr) {
using src_type = DynRankView<T, P...>;
using dst_type = typename src_type::HostMirror;
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
!alloc_prop_input::has_label,
"The view constructor arguments passed to Kokkos::create_mirror "
"must not include a label!");
static_assert(
!alloc_prop_input::has_pointer,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not include a pointer!");
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
auto prop_copy = Impl::with_properties_if_unset( auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror")); arg_prop, std::string(src.label()).append("_mirror"));
return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank())); if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
}
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(
const DynRankView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
std::enable_if_t<Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
nullptr) {
using dst_type = typename Impl::MirrorDRVType< using dst_type = typename Impl::MirrorDRVType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T, typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type; P...>::view_type;
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; return dst_type(prop_copy,
Impl::reconstructLayout(src.layout(), src.rank()));
} else {
using src_type = DynRankView<T, P...>;
using dst_type = typename src_type::HostMirror;
static_assert( return dst_type(prop_copy,
!alloc_prop_input::has_label, Impl::reconstructLayout(src.layout(), src.rank()));
"The view constructor arguments passed to Kokkos::create_mirror " }
"must not include a label!"); #if defined(KOKKOS_COMPILER_INTEL) || \
static_assert( (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!alloc_prop_input::has_pointer, !defined(KOKKOS_COMPILER_MSVC))
"The view constructor arguments passed to Kokkos::create_mirror must " __builtin_unreachable();
"not include a pointer!"); #endif
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank()));
} }
} // namespace Impl } // namespace Impl
// Create a mirror in host space // public interface
template <class T, class... P> template <class T, class... P,
inline typename DynRankView<T, P...>::HostMirror create_mirror( class Enable = std::enable_if_t<
const DynRankView<T, P...>& src, std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize, inline auto create_mirror(const DynRankView<T, P...>& src) {
void>::value>* = nullptr) { return Impl::create_mirror(src, Kokkos::view_alloc());
return Impl::create_mirror(src, Kokkos::Impl::ViewCtorProp<>{});
} }
template <class T, class... P> // public interface that accepts a without initializing flag
inline typename DynRankView<T, P...>::HostMirror create_mirror( template <class T, class... P,
Kokkos::Impl::WithoutInitializing_t wi, const DynRankView<T, P...>& src, class Enable = std::enable_if_t<
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize, std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
void>::value>* = nullptr) { inline auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi,
const DynRankView<T, P...>& src) {
return Impl::create_mirror(src, Kokkos::view_alloc(wi)); return Impl::create_mirror(src, Kokkos::view_alloc(wi));
} }
template <class T, class... P, class... ViewCtorArgs> // public interface that accepts a space
inline typename DynRankView<T, P...>::HostMirror create_mirror(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const DynRankView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = nullptr) {
return Impl::create_mirror(src, arg_prop);
}
// Create a mirror in a new space
template <class Space, class T, class... P, template <class Space, class T, class... P,
typename Enable = std::enable_if_t< class Enable = std::enable_if_t<
Kokkos::is_space<Space>::value && Kokkos::is_space<Space>::value &&
std::is_void<typename ViewTraits<T, P...>::specialize>::value>> std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror( auto create_mirror(const Space&, const Kokkos::DynRankView<T, P...>& src) {
const Space&, const Kokkos::DynRankView<T, P...>& src) {
return Impl::create_mirror( return Impl::create_mirror(
src, Kokkos::view_alloc(typename Space::memory_space{})); src, Kokkos::view_alloc(typename Space::memory_space{}));
} }
template <class Space, class T, class... P> // public interface that accepts a space and a without initializing flag
typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror( template <class Space, class T, class... P,
Kokkos::Impl::WithoutInitializing_t wi, const Space&, class Enable = std::enable_if_t<
const Kokkos::DynRankView<T, P...>& src, Kokkos::is_space<Space>::value &&
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize, std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
void>::value>* = nullptr) { auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::DynRankView<T, P...>& src) {
return Impl::create_mirror( return Impl::create_mirror(
src, Kokkos::view_alloc(wi, typename Space::memory_space{})); src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
} }
template <class T, class... P, class... ViewCtorArgs> // public interface that accepts arbitrary view constructor args passed by a
inline auto create_mirror( // view_alloc
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, template <class T, class... P, class... ViewCtorArgs,
const DynRankView<T, P...>& src, typename Enable = std::enable_if_t<
std::enable_if_t< std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
std::is_void<typename ViewTraits<T, P...>::specialize>::value && inline auto create_mirror(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = nullptr) { const DynRankView<T, P...>& src) {
using ReturnType = typename Impl::MirrorDRVType< return Impl::create_mirror(src, arg_prop);
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type;
return ReturnType{Impl::create_mirror(src, arg_prop)};
} }
namespace Impl { namespace Impl {
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
std::is_same<
typename DynRankView<T, P...>::memory_space,
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::value,
typename DynRankView<T, P...>::HostMirror>
create_mirror_view(const DynRankView<T, P...>& src,
const typename Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
}
// create a mirror view
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs> template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t< inline auto create_mirror_view(
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
!(std::is_same<
typename DynRankView<T, P...>::memory_space,
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::value),
typename DynRankView<T, P...>::HostMirror>
create_mirror_view(
const DynRankView<T, P...>& src, const DynRankView<T, P...>& src,
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) { [[maybe_unused]] const typename Impl::ViewCtorProp<ViewCtorArgs...>&
return Kokkos::Impl::create_mirror(src, arg_prop); arg_prop) {
if constexpr (!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
if constexpr (std::is_same<typename DynRankView<T, P...>::memory_space,
typename DynRankView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename DynRankView<T, P...>::data_type,
typename DynRankView<
T, P...>::HostMirror::data_type>::value) {
return typename DynRankView<T, P...>::HostMirror(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
} else {
if constexpr (Impl::MirrorDRViewType<typename Impl::ViewCtorProp<
ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
return typename Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
} }
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
inline std::enable_if_t<
Kokkos::is_space<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space>::value &&
Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace,
typename Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type>
create_mirror_view(const Kokkos::DynRankView<T, P...>& src,
const typename Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
inline std::enable_if_t<
Kokkos::is_space<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space>::value &&
!Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace,
typename Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type>
create_mirror_view(
const Kokkos::DynRankView<T, P...>& src,
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::Impl::create_mirror(src, arg_prop);
}
} // namespace Impl } // namespace Impl
// Create a mirror view in host space // public interface
template <class T, class... P> template <class T, class... P>
inline std::enable_if_t< inline auto create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
(std::is_same< return Impl::create_mirror_view(src, Kokkos::view_alloc());
typename DynRankView<T, P...>::memory_space,
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::value),
typename DynRankView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
return src;
}
template <class T, class... P>
inline std::enable_if_t<
!(std::is_same<
typename DynRankView<T, P...>::memory_space,
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename DynRankView<T, P...>::data_type,
typename DynRankView<T, P...>::HostMirror::data_type>::value),
typename DynRankView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
return Kokkos::create_mirror(src);
} }
// public interface that accepts a without initializing flag
template <class T, class... P> template <class T, class... P>
inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
const DynRankView<T, P...>& src) { const DynRankView<T, P...>& src) {
return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); return Impl::create_mirror_view(src, Kokkos::view_alloc(wi));
} }
// Create a mirror view in a new space // public interface that accepts a space
// FIXME_C++17 Improve SFINAE here.
template <class Space, class T, class... P, template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline typename Impl::MirrorDRViewType<Space, T, P...>::view_type inline auto create_mirror_view(const Space&,
create_mirror_view( const Kokkos::DynRankView<T, P...>& src) {
const Space&, const Kokkos::DynRankView<T, P...>& src, return Impl::create_mirror_view(
std::enable_if_t< src, Kokkos::view_alloc(typename Space::memory_space()));
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>* = nullptr) {
return src;
} }
// FIXME_C++17 Improve SFINAE here. // public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P, template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>> typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline typename Impl::MirrorDRViewType<Space, T, P...>::view_type
create_mirror_view(
const Space& space, const Kokkos::DynRankView<T, P...>& src,
std::enable_if_t<
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>* = nullptr) {
return Kokkos::create_mirror(space, src);
}
template <class Space, class T, class... P>
inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
const Space&, const Space&,
const Kokkos::DynRankView<T, P...>& src) { const Kokkos::DynRankView<T, P...>& src) {
@ -2188,6 +2088,8 @@ inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
src, Kokkos::view_alloc(typename Space::memory_space{}, wi)); src, Kokkos::view_alloc(typename Space::memory_space{}, wi));
} }
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs> template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror_view( inline auto create_mirror_view(
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
@ -2195,58 +2097,29 @@ inline auto create_mirror_view(
return Impl::create_mirror_view(src, arg_prop); return Impl::create_mirror_view(src, arg_prop);
} }
template <class... ViewCtorArgs, class T, class... P> // create a mirror view and deep copy it
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class... ViewCtorArgs, class T, class... P,
class Enable = std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value>>
auto create_mirror_view_and_copy( auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>&, [[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::DynRankView<T, P...>& src, const Kokkos::DynRankView<T, P...>& src) {
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
Impl::check_view_ctor_args_create_mirror_view_and_copy<ViewCtorArgs...>();
if constexpr (Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
// same behavior as deep_copy(src, src) // same behavior as deep_copy(src, src)
if (!alloc_prop_input::has_execution_space) if constexpr (!alloc_prop_input::has_execution_space)
fence( fence(
"Kokkos::create_mirror_view_and_copy: fence before returning src view"); "Kokkos::create_mirror_view_and_copy: fence before returning src "
"view");
return src; return src;
} } else {
template <class... ViewCtorArgs, class T, class... P>
auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::DynRankView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
!Impl::MirrorDRViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
using Space = typename alloc_prop_input::memory_space; using Space = typename alloc_prop_input::memory_space;
using Mirror = typename Impl::MirrorDRViewType<Space, T, P...>::view_type; using Mirror = typename Impl::MirrorDRViewType<Space, T, P...>::view_type;
@ -2265,6 +2138,11 @@ auto create_mirror_view_and_copy(
deep_copy(mirror, src); deep_copy(mirror, src);
return mirror; return mirror;
} }
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC)
__builtin_unreachable();
#endif
}
template <class Space, class T, class... P> template <class Space, class T, class... P>
auto create_mirror_view_and_copy(const Space&, auto create_mirror_view_and_copy(const Space&,

View File

@ -590,62 +590,21 @@ struct MirrorDynamicViewType {
} // namespace Impl } // namespace Impl
namespace Impl { namespace Impl {
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(
const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert( // create a mirror
!alloc_prop_input::has_label, // private interface that accepts arbitrary view constructor args passed by a
"The view constructor arguments passed to Kokkos::create_mirror " // view_alloc
"must not include a label!"); template <class T, class... P, class... ViewCtorArgs>
static_assert( inline auto create_mirror(const Kokkos::Experimental::DynamicView<T, P...>& src,
!alloc_prop_input::has_pointer, const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
"The view constructor arguments passed to Kokkos::create_mirror must " using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
"not include a pointer!"); check_view_ctor_args_create_mirror<ViewCtorArgs...>();
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
auto prop_copy = Impl::with_properties_if_unset( auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror")); arg_prop, std::string(src.label()).append("_mirror"));
auto ret = typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror( if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
ret.resize_serial(src.extent(0));
return ret;
}
template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror(
const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
std::enable_if_t<Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
!alloc_prop_input::has_label,
"The view constructor arguments passed to Kokkos::create_mirror "
"must not include a label!");
static_assert(
!alloc_prop_input::has_pointer,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not include a pointer!");
static_assert(
!alloc_prop_input::allow_padding,
"The view constructor arguments passed to Kokkos::create_mirror must "
"not explicitly allow padding!");
using MemorySpace = typename alloc_prop_input::memory_space; using MemorySpace = typename alloc_prop_input::memory_space;
auto prop_copy = Impl::with_properties_if_unset(
arg_prop, std::string(src.label()).append("_mirror"));
auto ret = typename Kokkos::Impl::MirrorDynamicViewType< auto ret = typename Kokkos::Impl::MirrorDynamicViewType<
MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(), MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(),
@ -653,33 +612,59 @@ inline auto create_mirror(
ret.resize_serial(src.extent(0)); ret.resize_serial(src.extent(0));
return ret;
} else {
auto ret = typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror(
prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
ret.resize_serial(src.extent(0));
return ret; return ret;
} }
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
}
} // namespace Impl } // namespace Impl
// Create a mirror in host space // public interface
template <class T, class... P> template <class T, class... P,
typename Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror( inline auto create_mirror(
const Kokkos::Experimental::DynamicView<T, P...>& src) { const Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); return Impl::create_mirror(src, Impl::ViewCtorProp<>{});
} }
template <class T, class... P> // public interface that accepts a without initializing flag
template <class T, class... P,
typename Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror( inline auto create_mirror(
Kokkos::Impl::WithoutInitializing_t wi, Kokkos::Impl::WithoutInitializing_t wi,
const Kokkos::Experimental::DynamicView<T, P...>& src) { const Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror(src, Kokkos::view_alloc(wi)); return Impl::create_mirror(src, Kokkos::view_alloc(wi));
} }
// Create a mirror in a new space // public interface that accepts a space
template <class Space, class T, class... P> template <class Space, class T, class... P,
typename Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror( inline auto create_mirror(
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) { const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror( return Impl::create_mirror(
src, Kokkos::view_alloc(typename Space::memory_space{})); src, Kokkos::view_alloc(typename Space::memory_space{}));
} }
template <class Space, class T, class... P> // public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
typename Enable = std::enable_if_t<
Kokkos::is_space<Space>::value &&
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
typename Kokkos::Impl::MirrorDynamicViewType<Space, T, P...>::view_type typename Kokkos::Impl::MirrorDynamicViewType<Space, T, P...>::view_type
create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::Experimental::DynamicView<T, P...>& src) { const Kokkos::Experimental::DynamicView<T, P...>& src) {
@ -687,7 +672,11 @@ create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
src, Kokkos::view_alloc(wi, typename Space::memory_space{})); src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
} }
template <class T, class... P, class... ViewCtorArgs> // public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs,
typename Enable = std::enable_if_t<
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
inline auto create_mirror( inline auto create_mirror(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::Experimental::DynamicView<T, P...>& src) { const Kokkos::Experimental::DynamicView<T, P...>& src) {
@ -696,76 +685,56 @@ inline auto create_mirror(
namespace Impl { namespace Impl {
// create a mirror view
// private interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs> template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t< inline auto create_mirror_view(
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space && const Kokkos::Experimental::DynamicView<T, P...>& src,
(std::is_same< [[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
typename Kokkos::Experimental::DynamicView<T, P...>::memory_space, if constexpr (!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
if constexpr (std::is_same<typename Kokkos::Experimental::DynamicView<
T, P...>::memory_space,
typename Kokkos::Experimental::DynamicView< typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::memory_space>::value && T, P...>::HostMirror::memory_space>::value &&
std::is_same< std::is_same<typename Kokkos::Experimental::DynamicView<
typename Kokkos::Experimental::DynamicView<T, P...>::data_type, T, P...>::data_type,
typename Kokkos::Experimental::DynamicView< typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::data_type>::value), T, P...>::HostMirror::data_type>::value) {
typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror> return
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src, typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror(src);
const Impl::ViewCtorProp<ViewCtorArgs...>&) { } else {
return src; return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
} else {
if constexpr (Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<
ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
return typename Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::view_type(src);
} else {
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
}
}
#if defined(KOKKOS_COMPILER_INTEL) || \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable();
#endif
} }
template <class T, class... P, class... ViewCtorArgs>
inline std::enable_if_t<
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
!(std::is_same<
typename Kokkos::Experimental::DynamicView<T, P...>::memory_space,
typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::memory_space>::value &&
std::is_same<
typename Kokkos::Experimental::DynamicView<T, P...>::data_type,
typename Kokkos::Experimental::DynamicView<
T, P...>::HostMirror::data_type>::value),
typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror>
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::create_mirror(arg_prop, src);
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
std::enable_if_t<Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace,
typename Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::view_type>
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
return src;
}
template <class T, class... P, class... ViewCtorArgs,
class = std::enable_if_t<
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
std::enable_if_t<!Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace,
typename Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::view_type>
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
return Kokkos::Impl::create_mirror(src, arg_prop);
}
} // namespace Impl } // namespace Impl
// Create a mirror view in host space // public interface
template <class T, class... P> template <class T, class... P>
inline auto create_mirror_view( inline auto create_mirror_view(
const typename Kokkos::Experimental::DynamicView<T, P...>& src) { const typename Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{}); return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{});
} }
// public interface that accepts a without initializing flag
template <class T, class... P> template <class T, class... P>
inline auto create_mirror_view( inline auto create_mirror_view(
Kokkos::Impl::WithoutInitializing_t wi, Kokkos::Impl::WithoutInitializing_t wi,
@ -773,15 +742,18 @@ inline auto create_mirror_view(
return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); return Impl::create_mirror_view(src, Kokkos::view_alloc(wi));
} }
// Create a mirror in a new space // public interface that accepts a space
template <class Space, class T, class... P> template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline auto create_mirror_view( inline auto create_mirror_view(
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) { const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) {
return Impl::create_mirror_view(src, return Impl::create_mirror_view(src,
view_alloc(typename Space::memory_space{})); view_alloc(typename Space::memory_space{}));
} }
template <class Space, class T, class... P> // public interface that accepts a space and a without initializing flag
template <class Space, class T, class... P,
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
inline auto create_mirror_view( inline auto create_mirror_view(
Kokkos::Impl::WithoutInitializing_t wi, const Space&, Kokkos::Impl::WithoutInitializing_t wi, const Space&,
const Kokkos::Experimental::DynamicView<T, P...>& src) { const Kokkos::Experimental::DynamicView<T, P...>& src) {
@ -789,6 +761,8 @@ inline auto create_mirror_view(
src, Kokkos::view_alloc(wi, typename Space::memory_space{})); src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
} }
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class T, class... P, class... ViewCtorArgs> template <class T, class... P, class... ViewCtorArgs>
inline auto create_mirror_view( inline auto create_mirror_view(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop, const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
@ -985,58 +959,29 @@ struct ViewCopy<Kokkos::Experimental::DynamicView<DP...>,
} // namespace Impl } // namespace Impl
template <class... ViewCtorArgs, class T, class... P> // create a mirror view and deep copy it
// public interface that accepts arbitrary view constructor args passed by a
// view_alloc
template <class... ViewCtorArgs, class T, class... P,
class Enable = std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value>>
auto create_mirror_view_and_copy( auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>&, [[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::Experimental::DynamicView<T, P...>& src, const Kokkos::Experimental::DynamicView<T, P...>& src) {
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>; using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
Impl::check_view_ctor_args_create_mirror_view_and_copy<ViewCtorArgs...>();
if constexpr (Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
T, P...>::is_same_memspace) {
// same behavior as deep_copy(src, src) // same behavior as deep_copy(src, src)
if (!alloc_prop_input::has_execution_space) if constexpr (!alloc_prop_input::has_execution_space)
fence( fence(
"Kokkos::create_mirror_view_and_copy: fence before returning src view"); "Kokkos::create_mirror_view_and_copy: fence before returning src "
"view");
return src; return src;
} } else {
template <class... ViewCtorArgs, class T, class... P>
auto create_mirror_view_and_copy(
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
const Kokkos::Experimental::DynamicView<T, P...>& src,
std::enable_if_t<
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
!Impl::MirrorDynamicViewType<
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
P...>::is_same_memspace>* = nullptr) {
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
static_assert(
alloc_prop_input::has_memory_space,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must include a memory space!");
static_assert(!alloc_prop_input::has_pointer,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not include a pointer!");
static_assert(!alloc_prop_input::allow_padding,
"The view constructor arguments passed to "
"Kokkos::create_mirror_view_and_copy must "
"not explicitly allow padding!");
using Space = typename alloc_prop_input::memory_space; using Space = typename alloc_prop_input::memory_space;
using Mirror = using Mirror =
typename Impl::MirrorDynamicViewType<Space, T, P...>::view_type; typename Impl::MirrorDynamicViewType<Space, T, P...>::view_type;
@ -1057,8 +1002,14 @@ auto create_mirror_view_and_copy(
deep_copy(mirror, src); deep_copy(mirror, src);
return mirror; return mirror;
} }
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
!defined(KOKKOS_COMPILER_MSVC)
__builtin_unreachable();
#endif
}
template <class Space, class T, class... P> template <class Space, class T, class... P,
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
auto create_mirror_view_and_copy( auto create_mirror_view_and_copy(
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src, const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src,
std::string const& name = "") { std::string const& name = "") {

Some files were not shown because too many files have changed in this diff Show More