Merge branch 'lammps:develop' into alphataubio-kokkos-fixes
This commit is contained in:
2
.github/workflows/check-vla.yml
vendored
2
.github/workflows/check-vla.yml
vendored
@ -27,9 +27,9 @@ jobs:
|
||||
|
||||
- name: Install extra packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ccache \
|
||||
libeigen3-dev \
|
||||
libgsl-dev \
|
||||
libcurl4-openssl-dev \
|
||||
mold \
|
||||
mpi-default-bin \
|
||||
|
||||
6
.github/workflows/compile-msvc.yml
vendored
6
.github/workflows/compile-msvc.yml
vendored
@ -1,4 +1,4 @@
|
||||
# GitHub action to build LAMMPS on Windows with Visual C++
|
||||
# GitHub action to test LAMMPS on Windows with Visual C++
|
||||
name: "Windows Unit Tests"
|
||||
|
||||
on:
|
||||
@ -11,6 +11,10 @@ on:
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: ${{github.event_name == 'pull_request'}}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Windows Compilation Test
|
||||
|
||||
109
.github/workflows/full-regression.yml
vendored
Normal file
109
.github/workflows/full-regression.yml
vendored
Normal file
@ -0,0 +1,109 @@
|
||||
# GitHub action to build LAMMPS on Linux and run regression tests
|
||||
name: "Full Regression Test"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build LAMMPS
|
||||
# restrict to official LAMMPS repository
|
||||
if: ${{ github.repository == 'lammps/lammps' }}
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
CCACHE_DIR: ${{ github.workspace }}/.ccache
|
||||
strategy:
|
||||
max-parallel: 8
|
||||
matrix:
|
||||
idx: [ 0, 1, 2, 3, 4, 5, 6, 7 ]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
show-progress: false
|
||||
|
||||
- name: Install extra packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ccache ninja-build libeigen3-dev \
|
||||
libcurl4-openssl-dev python3-dev \
|
||||
mpi-default-bin mpi-default-dev
|
||||
|
||||
- name: Create Build Environment
|
||||
run: mkdir build
|
||||
|
||||
- name: Set up ccache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ${{ env.CCACHE_DIR }}
|
||||
key: linux-full-ccache-${{ github.sha }}
|
||||
restore-keys: linux-full-ccache-
|
||||
|
||||
- name: Building LAMMPS via CMake
|
||||
shell: bash
|
||||
run: |
|
||||
ccache -z
|
||||
python3 -m venv linuxenv
|
||||
source linuxenv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install numpy pyyaml junit_xml
|
||||
cmake -S cmake -B build \
|
||||
-C cmake/presets/gcc.cmake \
|
||||
-C cmake/presets/most.cmake \
|
||||
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||
-D CMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-D BUILD_SHARED_LIBS=off \
|
||||
-D DOWNLOAD_POTENTIALS=off \
|
||||
-D PKG_MANIFOLD=on \
|
||||
-D PKG_ML-PACE=on \
|
||||
-D PKG_ML-RANN=on \
|
||||
-D PKG_RHEO=on \
|
||||
-D PKG_PTM=on \
|
||||
-D PKG_PYTHON=on \
|
||||
-D PKG_QTB=on \
|
||||
-D PKG_SMTBQ=on \
|
||||
-G Ninja
|
||||
cmake --build build
|
||||
ccache -s
|
||||
|
||||
- name: Run Full Regression Tests
|
||||
shell: bash
|
||||
run: |
|
||||
source linuxenv/bin/activate
|
||||
python3 tools/regression-tests/run_tests.py \
|
||||
--lmp-bin=build/lmp \
|
||||
--config-file=tools/regression-tests/config_serial.yaml \
|
||||
--examples-top-level=examples --analyze --num-workers=8
|
||||
|
||||
python3 tools/regression-tests/run_tests.py \
|
||||
--lmp-bin=build/lmp \
|
||||
--config-file=tools/regression-tests/config_serial.yaml \
|
||||
--list-input=input-list-${{ matrix.idx }}.txt \
|
||||
--output-file=output-${{ matrix.idx }}.xml \
|
||||
--progress-file=progress-${{ matrix.idx }}.yaml \
|
||||
--log-file=run-${{ matrix.idx }}.log
|
||||
|
||||
tar -cvf full-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: full-regression-test-artifact-${{ matrix.idx }}
|
||||
path: full-regression-test-${{ matrix.idx }}.tar
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
steps:
|
||||
- name: Merge Artifacts
|
||||
uses: actions/upload-artifact/merge@v4
|
||||
with:
|
||||
name: merged-full-regresssion-artifact
|
||||
pattern: full-regression-test-artifact-*
|
||||
|
||||
118
.github/workflows/quick-regression.yml
vendored
Normal file
118
.github/workflows/quick-regression.yml
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
# GitHub action to build LAMMPS on Linux and run selected regression tests
|
||||
name: "Quick Regression Test"
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- develop
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: ${{github.event_name == 'pull_request'}}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build LAMMPS
|
||||
# restrict to official LAMMPS repository
|
||||
if: ${{ github.repository == 'lammps/lammps' }}
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
CCACHE_DIR: ${{ github.workspace }}/.ccache
|
||||
strategy:
|
||||
max-parallel: 4
|
||||
matrix:
|
||||
idx: [ 0, 1, 2, 3 ]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
show-progress: false
|
||||
|
||||
- name: Install extra packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ccache ninja-build libeigen3-dev \
|
||||
libcurl4-openssl-dev python3-dev \
|
||||
mpi-default-bin mpi-default-dev
|
||||
|
||||
- name: Create Build Environment
|
||||
run: mkdir build
|
||||
|
||||
- name: Set up ccache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ${{ env.CCACHE_DIR }}
|
||||
key: linux-quick-ccache-${{ github.sha }}
|
||||
restore-keys: linux-quick-ccache-
|
||||
|
||||
- name: Building LAMMPS via CMake
|
||||
shell: bash
|
||||
run: |
|
||||
ccache -z
|
||||
python3 -m venv linuxenv
|
||||
source linuxenv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install numpy pyyaml junit_xml
|
||||
cmake -S cmake -B build \
|
||||
-C cmake/presets/gcc.cmake \
|
||||
-C cmake/presets/most.cmake \
|
||||
-D CMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
||||
-D CMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-D BUILD_SHARED_LIBS=off \
|
||||
-D DOWNLOAD_POTENTIALS=off \
|
||||
-D PKG_MANIFOLD=on \
|
||||
-D PKG_ML-PACE=on \
|
||||
-D PKG_ML-RANN=on \
|
||||
-D PKG_RHEO=on \
|
||||
-D PKG_PTM=on \
|
||||
-D PKG_PYTHON=on \
|
||||
-D PKG_QTB=on \
|
||||
-D PKG_SMTBQ=on \
|
||||
-G Ninja
|
||||
cmake --build build
|
||||
ccache -s
|
||||
|
||||
- name: Run Regression Tests for Modified Styles
|
||||
shell: bash
|
||||
run: |
|
||||
source linuxenv/bin/activate
|
||||
python3 tools/regression-tests/run_tests.py \
|
||||
--lmp-bin=build/lmp \
|
||||
--config-file=tools/regression-tests/config_quick.yaml \
|
||||
--examples-top-level=examples \
|
||||
--quick-reference=tools/regression-tests/reference.yaml \
|
||||
--quick --quick-branch=origin/develop --quick-max=100 --num-workers=4
|
||||
|
||||
if [ -f input-list-${{ matrix.idx }}.txt ]
|
||||
then \
|
||||
python3 tools/regression-tests/run_tests.py \
|
||||
--lmp-bin=build/lmp \
|
||||
--config-file=tools/regression-tests/config_quick.yaml \
|
||||
--list-input=input-list-${{ matrix.idx }}.txt \
|
||||
--output-file=output-${{ matrix.idx }}.xml \
|
||||
--progress-file=progress-${{ matrix.idx }}.yaml \
|
||||
--log-file=run-${{ matrix.idx }}.log
|
||||
fi
|
||||
|
||||
tar -cvf quick-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: quick-regression-test-artifact-${{ matrix.idx }}
|
||||
path: quick-regression-test-${{ matrix.idx }}.tar
|
||||
|
||||
merge:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
steps:
|
||||
- name: Merge Artifacts
|
||||
uses: actions/upload-artifact/merge@v4
|
||||
with:
|
||||
name: merged-quick-regresssion-artifact
|
||||
pattern: quick-regression-test-artifact-*
|
||||
|
||||
37
.github/workflows/style-check.yml
vendored
Normal file
37
.github/workflows/style-check.yml
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
# GitHub action to run checks from tools/coding_standard
|
||||
name: "Check for Programming Style Conformance"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- develop
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: ${{github.event_name == 'pull_request'}}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Programming Style Conformance
|
||||
if: ${{ github.repository == 'lammps/lammps' }}
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Tests
|
||||
working-directory: src
|
||||
shell: bash
|
||||
run: |
|
||||
make check-whitespace
|
||||
make check-permissions
|
||||
make check-homepage
|
||||
make check-errordocs
|
||||
6
.github/workflows/unittest-linux.yml
vendored
6
.github/workflows/unittest-linux.yml
vendored
@ -11,6 +11,10 @@ on:
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: ${{github.event_name == 'pull_request'}}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Linux Unit Test
|
||||
@ -27,9 +31,9 @@ jobs:
|
||||
|
||||
- name: Install extra packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ccache \
|
||||
libeigen3-dev \
|
||||
libgsl-dev \
|
||||
libcurl4-openssl-dev \
|
||||
mold \
|
||||
ninja-build \
|
||||
|
||||
4
.github/workflows/unittest-macos.yml
vendored
4
.github/workflows/unittest-macos.yml
vendored
@ -11,6 +11,10 @@ on:
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: ${{github.event_name == 'pull_request'}}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: MacOS Unit Test
|
||||
|
||||
@ -497,7 +497,7 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUA
|
||||
PROPERTIES COMPILE_OPTIONS "-std=c++14")
|
||||
endif()
|
||||
|
||||
if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR BUILD_TOOLS)
|
||||
if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR BUILD_TOOLS)
|
||||
enable_language(C)
|
||||
if (NOT USE_INTERNAL_LINALG)
|
||||
find_package(LAPACK)
|
||||
@ -572,7 +572,7 @@ else()
|
||||
endif()
|
||||
|
||||
foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF
|
||||
PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM COMPRESS ML-PACE LEPTON RHEO EXTRA-COMMAND)
|
||||
PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM COMPRESS ML-PACE LEPTON EXTRA-COMMAND)
|
||||
if(PKG_${PKG_WITH_INCL})
|
||||
include(Packages/${PKG_WITH_INCL})
|
||||
endif()
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
option(BUILD_DOC "Build LAMMPS HTML documentation" OFF)
|
||||
|
||||
if(BUILD_DOC)
|
||||
option(BUILD_DOC_VENV "Build LAMMPS documentation virtual environment" ON)
|
||||
mark_as_advanced(BUILD_DOC_VENV)
|
||||
# Current Sphinx versions require at least Python 3.8
|
||||
# use default (or custom) Python executable, if version is sufficient
|
||||
if(Python_VERSION VERSION_GREATER_EQUAL 3.8)
|
||||
@ -18,14 +20,6 @@ if(BUILD_DOC)
|
||||
find_package(Doxygen 1.8.10 REQUIRED)
|
||||
file(GLOB DOC_SOURCES CONFIGURE_DEPENDS ${LAMMPS_DOC_DIR}/src/[^.]*.rst)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT docenv
|
||||
COMMAND ${VIRTUALENV} docenv
|
||||
)
|
||||
|
||||
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
|
||||
set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
|
||||
|
||||
set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config)
|
||||
set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in)
|
||||
set(SPHINX_STATIC_DIR ${SPHINX_CONFIG_DIR}/_static)
|
||||
@ -44,6 +38,15 @@ if(BUILD_DOC)
|
||||
# configure paths in conf.py, since relative paths change when file is copied
|
||||
configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE})
|
||||
|
||||
if(BUILD_DOC_VENV)
|
||||
add_custom_command(
|
||||
OUTPUT docenv
|
||||
COMMAND ${VIRTUALENV} docenv
|
||||
)
|
||||
|
||||
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
|
||||
set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${DOC_BUILD_DIR}/requirements.txt
|
||||
DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE}
|
||||
@ -53,6 +56,15 @@ if(BUILD_DOC)
|
||||
COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install -r ${DOC_BUILD_DIR}/requirements.txt --upgrade
|
||||
)
|
||||
|
||||
set(DOCENV_DEPS docenv ${DOC_BUILD_DIR}/requirements.txt)
|
||||
if(NOT TARGET Sphinx::sphinx-build)
|
||||
add_executable(Sphinx::sphinx-build IMPORTED GLOBAL)
|
||||
set_target_properties(Sphinx::sphinx-build PROPERTIES IMPORTED_LOCATION "${DOCENV_BINARY_DIR}/sphinx-build")
|
||||
endif()
|
||||
else()
|
||||
find_package(Sphinx)
|
||||
endif()
|
||||
|
||||
set(MATHJAX_URL "https://github.com/mathjax/MathJax/archive/3.1.3.tar.gz" CACHE STRING "URL for MathJax tarball")
|
||||
set(MATHJAX_MD5 "b81661c6e6ba06278e6ae37b30b0c492" CACHE STRING "MD5 checksum of MathJax tarball")
|
||||
mark_as_advanced(MATHJAX_URL)
|
||||
@ -97,8 +109,8 @@ if(BUILD_DOC)
|
||||
endif()
|
||||
add_custom_command(
|
||||
OUTPUT html
|
||||
DEPENDS ${DOC_SOURCES} docenv ${DOC_BUILD_DIR}/requirements.txt ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE}
|
||||
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html
|
||||
DEPENDS ${DOC_SOURCES} ${DOCENV_DEPS} ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE}
|
||||
COMMAND Sphinx::sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF
|
||||
COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp
|
||||
|
||||
29
cmake/Modules/FindSphinx.cmake
Normal file
29
cmake/Modules/FindSphinx.cmake
Normal file
@ -0,0 +1,29 @@
|
||||
# Find sphinx-build
|
||||
find_program(Sphinx_EXECUTABLE NAMES sphinx-build
|
||||
PATH_SUFFIXES bin
|
||||
DOC "Sphinx documenation build executable")
|
||||
mark_as_advanced(Sphinx_EXECUTABLE)
|
||||
|
||||
if(Sphinx_EXECUTABLE)
|
||||
execute_process(COMMAND ${Sphinx_EXECUTABLE} --version
|
||||
OUTPUT_VARIABLE sphinx_version
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE _sphinx_version_result)
|
||||
|
||||
if(_sphinx_version_result)
|
||||
message(WARNING "Unable to determine sphinx-build verison: ${_sphinx_version_result}")
|
||||
else()
|
||||
string(REGEX REPLACE "sphinx-build ([0-9.]+).*"
|
||||
"\\1"
|
||||
Sphinx_VERSION
|
||||
"${sphinx_version}")
|
||||
endif()
|
||||
|
||||
if(NOT TARGET Sphinx::sphinx-build)
|
||||
add_executable(Sphinx::sphinx-build IMPORTED GLOBAL)
|
||||
set_target_properties(Sphinx::sphinx-build PROPERTIES IMPORTED_LOCATION "${Sphinx_EXECUTABLE}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(Sphinx REQUIRED_VARS Sphinx_EXECUTABLE VERSION_VAR Sphinx_VERSION)
|
||||
@ -8,8 +8,24 @@ endif()
|
||||
########################################################################
|
||||
# consistency checks and Kokkos options/settings required by LAMMPS
|
||||
if(Kokkos_ENABLE_CUDA)
|
||||
message(STATUS "KOKKOS: Enabling CUDA LAMBDA function support")
|
||||
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "" FORCE)
|
||||
option(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC "CUDA asynchronous malloc support" OFF)
|
||||
mark_as_advanced(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
|
||||
if(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
|
||||
message(STATUS "KOKKOS: CUDA malloc async support enabled")
|
||||
else()
|
||||
message(STATUS "KOKKOS: CUDA malloc async support disabled")
|
||||
endif()
|
||||
endif()
|
||||
if(Kokkos_ENABLE_HIP)
|
||||
option(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS "Enable multiple kernel instantiations with HIP" ON)
|
||||
mark_as_advanced(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS)
|
||||
option(Kokkos_ENABLE_ROCTHRUST "Use RoCThrust library" ON)
|
||||
mark_as_advanced(Kokkos_ENABLE_ROCTHRUST)
|
||||
|
||||
if(Kokkos_ARCH_AMD_GFX942 OR Kokkos_ARCH_AMD_GFX940)
|
||||
option(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY "Enable unified memory with HIP" ON)
|
||||
mark_as_advanced(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
|
||||
endif()
|
||||
endif()
|
||||
# Adding OpenMP compiler flags without the checks done for
|
||||
# BUILD_OMP can result in compile failures. Enforce consistency.
|
||||
@ -18,6 +34,15 @@ if(Kokkos_ENABLE_OPENMP)
|
||||
message(FATAL_ERROR "Must enable BUILD_OMP with Kokkos_ENABLE_OPENMP")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(Kokkos_ENABLE_SERIAL)
|
||||
if(NOT (Kokkos_ENABLE_OPENMP OR Kokkos_ENABLE_THREADS OR
|
||||
Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP OR Kokkos_ENABLE_SYCL
|
||||
OR Kokkos_ENABLE_OPENMPTARGET))
|
||||
option(Kokkos_ENABLE_ATOMICS_BYPASS "Disable atomics for Kokkos Serial Backend" ON)
|
||||
mark_as_advanced(Kokkos_ENABLE_ATOMICS_BYPASS)
|
||||
endif()
|
||||
endif()
|
||||
########################################################################
|
||||
|
||||
option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF)
|
||||
@ -45,8 +70,8 @@ if(DOWNLOAD_KOKKOS)
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
||||
include(ExternalProject)
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "243de871b3dc2cf3990c1c404032df83" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "de6ee80d00b6212b02bfb7f1e71a8392" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
mark_as_advanced(KOKKOS_URL)
|
||||
mark_as_advanced(KOKKOS_MD5)
|
||||
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
|
||||
@ -71,7 +96,7 @@ if(DOWNLOAD_KOKKOS)
|
||||
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
|
||||
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
|
||||
elseif(EXTERNAL_KOKKOS)
|
||||
find_package(Kokkos 4.3.01 REQUIRED CONFIG)
|
||||
find_package(Kokkos 4.4.01 REQUIRED CONFIG)
|
||||
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
||||
else()
|
||||
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
||||
|
||||
@ -1,2 +0,0 @@
|
||||
find_package(GSL 2.6 REQUIRED)
|
||||
target_link_libraries(lammps PRIVATE GSL::gsl)
|
||||
@ -67,6 +67,7 @@ set(WIN_PACKAGES
|
||||
REACTION
|
||||
REAXFF
|
||||
REPLICA
|
||||
RHEO
|
||||
RIGID
|
||||
SHOCK
|
||||
SMTBQ
|
||||
|
||||
@ -60,6 +60,7 @@ set(ALL_PACKAGES
|
||||
REACTION
|
||||
REAXFF
|
||||
REPLICA
|
||||
RHEO
|
||||
RIGID
|
||||
SHOCK
|
||||
SPH
|
||||
|
||||
@ -60,6 +60,7 @@ set(WIN_PACKAGES
|
||||
REACTION
|
||||
REAXFF
|
||||
REPLICA
|
||||
RHEO
|
||||
RIGID
|
||||
SHOCK
|
||||
SMTBQ
|
||||
|
||||
@ -138,12 +138,27 @@ during development:
|
||||
The status of this automated testing can be viewed on `https://ci.lammps.org
|
||||
<https://ci.lammps.org>`_.
|
||||
|
||||
The scripts and inputs for integration, run, and regression testing
|
||||
are maintained in a
|
||||
`separate repository <https://github.com/lammps/lammps-testing>`_
|
||||
of the LAMMPS project on GitHub. A few tests are also run as GitHub
|
||||
Actions and their configuration files are in the ``.github/workflows/``
|
||||
folder of the LAMMPS git tree.
|
||||
The scripts and inputs for integration, run, and legacy regression
|
||||
testing are maintained in a `separate repository
|
||||
<https://github.com/lammps/lammps-testing>`_ of the LAMMPS project on
|
||||
GitHub. A few tests are also run as GitHub Actions and their
|
||||
configuration files are in the ``.github/workflows/`` folder of the
|
||||
LAMMPS git tree.
|
||||
|
||||
Regression tests can also be performed locally with the :ref:`regression
|
||||
tester tool <regression>`. The tool checks if a given LAMMPS binary run
|
||||
with selected input examples produces thermo output that is consistent
|
||||
with the provided log files. The script can be run in one pass over all
|
||||
available input files, but it can also first create multiple lists of
|
||||
inputs or folders that can then be run with multiple workers
|
||||
concurrently to speed things up. Another mode allows to do a quick
|
||||
check of inputs that contain commands that have changes in the current
|
||||
checkout branch relative to a git branch. This works similar to the two
|
||||
pass mode, but will select only shorter runs and no more than 100 inputs
|
||||
that are chosen randomly. This ensures that this test runs
|
||||
significantly faster compared to the full test run. These test runs can
|
||||
also be performed with instrumented LAMMPS binaries (see previous
|
||||
section).
|
||||
|
||||
The unit testing facility is integrated into the CMake build process of
|
||||
the LAMMPS source code distribution itself. It can be enabled by
|
||||
|
||||
@ -2251,28 +2251,38 @@ verified to work in February 2020 with Quantum Espresso versions 6.3 to
|
||||
RHEO package
|
||||
------------
|
||||
|
||||
To build with this package you must have the `GNU Scientific Library
|
||||
(GSL) <https://www.gnu.org/software/gsl/>` installed in locations that
|
||||
are accessible in your environment. The GSL library should be at least
|
||||
version 2.7.
|
||||
This package depends on the BPM package.
|
||||
|
||||
.. tabs::
|
||||
|
||||
.. tab:: CMake build
|
||||
|
||||
If CMake cannot find the GSL library or include files, you can set:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
-D GSL_ROOT_DIR=path # path to root of GSL installation
|
||||
-D PKG_RHEO=yes # enable the package itself
|
||||
-D PKG_BPM=yes # the RHEO package requires BPM
|
||||
-D USE_INTERNAL_LINALG=value # prefer internal LAPACK if true
|
||||
|
||||
Some features in the RHEO package are dependent on code in the BPM
|
||||
package so the latter one *must* be enabled as well.
|
||||
|
||||
The RHEO package also requires LAPACK (and BLAS) and CMake
|
||||
can identify their locations and pass that info to the RHEO
|
||||
build script. But on some systems this may cause problems when
|
||||
linking or the dependency is not desired. By using the setting
|
||||
``-D USE_INTERNAL_LINALG=yes`` when running the CMake
|
||||
configuration, you will select compiling and linking the bundled
|
||||
linear algebra library and work around the limitations.
|
||||
|
||||
.. tab:: Traditional make
|
||||
|
||||
LAMMPS will try to auto-detect the GSL compiler and linker flags
|
||||
from the corresponding ``pkg-config`` file (``gsl.pc``), otherwise
|
||||
you can edit the file ``lib/rheo/Makefile.lammps``
|
||||
to specify the paths and library names where indicated by comments.
|
||||
This must be done **before** the package is installed.
|
||||
The RHEO package requires LAPACK (and BLAS) which can be either
|
||||
a system provided library or the bundled "linalg" library. This
|
||||
is a subset of LAPACK translated to C++. For that, one of the
|
||||
provided ``Makefile.lammps.<config>`` files needs to be copied
|
||||
to ``Makefile.lammps`` and edited as needed. The default file
|
||||
uses the bundled "linalg" library, which can be built by
|
||||
``make lib-linalg args='-m serial'`` in the ``src`` folder.
|
||||
|
||||
----------
|
||||
|
||||
|
||||
@ -229,8 +229,7 @@ can be used with the Intel or GNU compiler (see the ``FFT_LIB`` setting
|
||||
above).
|
||||
|
||||
The NVIDIA Performance Libraries (NVPL) FFT library is optimized for NVIDIA
|
||||
Grace Armv9.0 architecture. You can download it from
|
||||
`https://docs.nvidia.com/nvpl/`_.
|
||||
Grace Armv9.0 architecture. You can download it from https://docs.nvidia.com/nvpl/
|
||||
|
||||
The cuFFT and hipFFT FFT libraries are packaged with NVIDIA's CUDA and
|
||||
AMD's HIP installations, respectively. These FFT libraries require the
|
||||
|
||||
@ -56,7 +56,7 @@ lammps.org". General questions about LAMMPS should be posted in the
|
||||
- SNL
|
||||
- jmgoff at sandia.gov
|
||||
- machine learned potentials, QEq solvers, Python
|
||||
* - Megan McCarthy
|
||||
* - Meg McCarthy
|
||||
- SNL
|
||||
- megmcca at sandia.gov
|
||||
- alloys, micro-structure, machine learned potentials
|
||||
@ -67,7 +67,7 @@ lammps.org". General questions about LAMMPS should be posted in the
|
||||
* - `Trung Nguyen <tn_>`_
|
||||
- U Chicago
|
||||
- ndactrung at gmail.com
|
||||
- soft matter, GPU package
|
||||
- soft matter, GPU package, DIELECTRIC package, regression testing
|
||||
|
||||
.. _rb: https://rbberger.github.io/
|
||||
.. _gc: https://enthalpiste.fr/
|
||||
|
||||
@ -3,71 +3,70 @@ Running LAMMPS on Windows
|
||||
|
||||
To run a serial (non-MPI) executable, follow these steps:
|
||||
|
||||
* Get a command prompt by going to Start->Run... ,
|
||||
then typing "cmd".
|
||||
* Move to the directory where you have your input script,
|
||||
* Install a LAMMPS installer package from https://packages.lammps.org/windows.html
|
||||
* Open the "Command Prompt" or "Terminal" app.
|
||||
* Change to the directory where you have your input script,
|
||||
(e.g. by typing: cd "Documents").
|
||||
* At the command prompt, type "lmp -in in.file", where
|
||||
in.file is the name of your LAMMPS input script.
|
||||
* At the command prompt, type "lmp -in in.file.lmp", where
|
||||
``in.file.lmp`` is the name of your LAMMPS input script.
|
||||
|
||||
Note that the serial executable includes support for multi-threading
|
||||
parallelization from the styles in the OPENMP packages. To run with
|
||||
4 threads, you can type this:
|
||||
parallelization from the styles in the OPENMP and KOKKOS packages.
|
||||
To run with 4 threads, you can type this:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lmp -in in.lj -pk omp 4 -sf omp
|
||||
lmp -in in.lj.lmp -pk omp 4 -sf omp
|
||||
lmp -in in.lj.lmp -k on t 4 -sf kk
|
||||
|
||||
Alternately, you can also install a package with LAMMPS-GUI included and
|
||||
open the LAMMPS-GUI app (the package includes the command line version
|
||||
of LAMMPS as well) and open the input file in the GUI and run it from
|
||||
there. For details on LAMMPS-GUI, see :doc:`Howto_lammps_gui`.
|
||||
|
||||
----------
|
||||
|
||||
For the MPI executable, which allows you to run LAMMPS under Windows
|
||||
in parallel, follow these steps.
|
||||
For the MS-MPI executables, which allow you to run LAMMPS under Windows
|
||||
in parallel using MPI rather than multi-threading, follow these steps.
|
||||
|
||||
Download and install a compatible MPI library binary package:
|
||||
|
||||
* for 32-bit Windows: `mpich2-1.4.1p1-win-ia32.msi <https://download.lammps.org/thirdparty/mpich2-1.4.1p1-win-ia32.msi>`_
|
||||
* for 64-bit Windows: `mpich2-1.4.1p1-win-x86-64.msi <https://download.lammps.org/thirdparty/mpich2-1.4.1p1-win-x86-64.msi>`_
|
||||
|
||||
The LAMMPS Windows installer packages will automatically adjust your
|
||||
path for the default location of this MPI package. After the
|
||||
installation of the MPICH2 software, it needs to be integrated into
|
||||
the system. For this you need to start a Command Prompt in
|
||||
*Administrator Mode* (right click on the icon and select it). Change
|
||||
into the MPICH2 installation directory, then into the subdirectory
|
||||
**bin** and execute **smpd.exe -install**\ . Exit the command window.
|
||||
|
||||
* Get a new, regular command prompt by going to Start->Run... ,
|
||||
then typing "cmd".
|
||||
* Move to the directory where you have your input file
|
||||
(e.g. by typing: cd "Documents").
|
||||
Download and install the MS-MPI runtime package ``msmpisetup.exe`` from
|
||||
https://www.microsoft.com/en-us/download/details.aspx?id=105289 (Note
|
||||
that the ``msmpisdk.msi`` is **only** required for **compilation** of
|
||||
LAMMPS from source on Windows using Microsoft Visual Studio). After
|
||||
installation of MS-MPI perform a reboot.
|
||||
|
||||
Then you can run the executable in serial like in the example above
|
||||
or in parallel using MPI with one of the following commands:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
mpiexec -localonly 4 lmp -in in.file
|
||||
mpiexec -np 4 lmp -in in.file
|
||||
mpiexec -localonly 4 lmp -in in.file.lmp
|
||||
mpiexec -np 4 lmp -in in.file.lmp
|
||||
|
||||
where in.file is the name of your LAMMPS input script. For the latter
|
||||
case, you may be prompted to enter the password that you set during
|
||||
installation of the MPI library software.
|
||||
where ``in.file.lmp`` is the name of your LAMMPS input script. For the
|
||||
latter case, you may be prompted to enter the password that you set
|
||||
during installation of the MPI library software.
|
||||
|
||||
In this mode, output may not immediately show up on the screen, so if
|
||||
your input script takes a long time to execute, you may need to be
|
||||
patient before the output shows up.
|
||||
|
||||
The parallel executable can also run on a single processor by typing
|
||||
something like this:
|
||||
Note that the parallel executable also includes OpenMP multi-threading
|
||||
through both the OPENMP and the KOKKOS package, which can be combined
|
||||
with MPI using something like:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
lmp -in in.lj
|
||||
mpiexec -localonly 2 lmp -in in.lj.lmp -pk omp 2 -sf omp
|
||||
mpiexec -localonly 2 lmp -in in.lj.lmp -kokkos on t 2 -sf kk
|
||||
|
||||
Note that the parallel executable also includes OpenMP
|
||||
multi-threading, which can be combined with MPI using something like:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
mpiexec -localonly 2 lmp -in in.lj -pk omp 2 -sf omp
|
||||
-------------
|
||||
|
||||
MPI parallelization will work for *all* functionality in LAMMPS and in
|
||||
many cases the MPI parallelization is more efficient than
|
||||
multi-threading since LAMMPS was designed from ground up for MPI
|
||||
parallelization using domain decomposition. Multi-threading is only
|
||||
available for selected styles and implemented on top of the MPI
|
||||
parallelization. Multi-threading is most useful for systems with large
|
||||
load imbalances when using domain decomposition and a smaller number
|
||||
of threads (<= 8).
|
||||
|
||||
@ -1022,7 +1022,7 @@ regression tests with a given LAMMPS binary. The tool launches the
|
||||
LAMMPS binary with any given input script under one of the `examples`
|
||||
subdirectories, and compares the thermo output in the generated log file
|
||||
with those in the provided log file with the same number of processors
|
||||
ub the same subdirectory. If the differences between the actual and
|
||||
in the same subdirectory. If the differences between the actual and
|
||||
reference values are within specified tolerances, the test is considered
|
||||
passed. For each test batch, that is, a set of example input scripts,
|
||||
the mpirun command, the LAMMPS command line arguments, and the
|
||||
|
||||
@ -319,25 +319,34 @@ all types from 1 to :math:`N`. A leading asterisk means all types from
|
||||
:math:`N` (inclusive). A middle asterisk means all types from m to n
|
||||
(inclusive).
|
||||
|
||||
Currently *bond* does not support bond_style hybrid nor bond_style
|
||||
hybrid/overlay as bond styles. The bond styles that currently work
|
||||
with fix_adapt are
|
||||
If :doc:`bond_style hybrid <bond_hybrid>` is used, *bstyle* should be a
|
||||
sub-style name. The bond styles that currently work with fix adapt are:
|
||||
|
||||
+------------------------------------+------------+------------+
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`class2 <bond_class2>` | r0 | type bonds |
|
||||
+------------------------------------+------------+------------+
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`fene <bond_fene>` | k,r0 | type bonds |
|
||||
+------------------------------------+------------+------------+
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`fene/expand <bond_fene_expand>` | k,r0,epsilon,sigma,shift | type bonds |
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`fene/nm <bond_fene>` | k,r0 | type bonds |
|
||||
+------------------------------------+------------+------------+
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`gromos <bond_gromos>` | k,r0 | type bonds |
|
||||
+------------------------------------+------------+------------+
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`harmonic <bond_harmonic>` | k,r0 | type bonds |
|
||||
+------------------------------------+------------+------------+
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`harmonic/restrain <bond_harmonic_restrain>` | k | type bonds |
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`harmonic/shift <bond_harmonic_shift>` | k,r0,r1 | type bonds |
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`harmonic/shift/cut <bond_harmonic_shift_cut>` | k,r0,r1 | type bonds |
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`mm3 <bond_mm3>` | k,r0 | type bonds |
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`morse <bond_morse>` | r0 | type bonds |
|
||||
+------------------------------------+------------+------------+
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
| :doc:`nonlinear <bond_nonlinear>` | epsilon,r0 | type bonds |
|
||||
+------------------------------------+------------+------------+
|
||||
+-----------------------------------------------------+---------------------------+------------+
|
||||
|
||||
----------
|
||||
|
||||
@ -357,15 +366,34 @@ all types from 1 to :math:`N`. A leading asterisk means all types from
|
||||
:math:`N` (inclusive). A middle asterisk means all types from m to n
|
||||
(inclusive).
|
||||
|
||||
Currently *angle* does not support angle_style hybrid nor angle_style
|
||||
hybrid/overlay as angle styles. The angle styles that currently work
|
||||
with fix_adapt are
|
||||
If :doc:`angle_style hybrid <angle_hybrid>` is used, *astyle* should be a
|
||||
sub-style name. The angle styles that currently work with fix adapt are:
|
||||
|
||||
+------------------------------------+----------+-------------+
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`harmonic <angle_harmonic>` | k,theta0 | type angles |
|
||||
+------------------------------------+----------+-------------+
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`charmm <angle_charmm>` | k,theta0 | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`class2 <angle_class2>` | k2,k3,k4,theta0 | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`cosine <angle_cosine>` | k | type angles |
|
||||
+------------------------------------+----------+-------------+
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`cosine/periodic <angle_cosine_periodic>` | k,b,n | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`cosine/squared/restricted <angle_cosine_squared_restricted>` | k,theta0 | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`dipole <angle_dipole>` | k,gamma0 | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`fourier <angle_fourier>` | k,c0,c1,c2 | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`fourier/simple <angle_fourier_simple>` | k,c,n | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`mm3 <angle_mm3>` | k,theta0 | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`quartic <angle_quartic>` | k2,k3,k4,theta0 | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
| :doc:`spica <angle_spica>` | k,theta0 | type angles |
|
||||
+--------------------------------------------------------------------+-----------------+-------------+
|
||||
|
||||
Note that internally, theta0 is stored in radians, so the variable
|
||||
this fix uses to reset theta0 needs to generate values in radians.
|
||||
|
||||
@ -50,8 +50,8 @@ Syntax
|
||||
*intra_energy* value = intramolecular energy (energy units)
|
||||
*tfac_insert* value = scale up/down temperature of inserted atoms (unitless)
|
||||
*overlap_cutoff* value = maximum pair distance for overlap rejection (distance units)
|
||||
*max* value = Maximum number of molecules allowed in the system
|
||||
*min* value = Minimum number of molecules allowed in the system
|
||||
*max* value = Maximum number of atoms allowed in the fix group (and region)
|
||||
*min* value = Minimum number of atoms allowed in the fix group (and region)
|
||||
|
||||
Examples
|
||||
""""""""
|
||||
@ -380,10 +380,11 @@ an infinite positive energy to all new configurations that place any
|
||||
pair of atoms closer than the specified overlap cutoff distance.
|
||||
|
||||
The *max* and *min* keywords allow for the restriction of the number of
|
||||
atoms in the simulation. They automatically reject all insertion or
|
||||
deletion moves that would take the system beyond the set boundaries.
|
||||
Should the system already be beyond the boundary, only moves that bring
|
||||
the system closer to the bounds may be accepted.
|
||||
atoms in the fix group (and region in case the *region* keyword is
|
||||
used). They automatically reject all insertion or deletion moves that
|
||||
would take the system beyond the set boundaries. Should the system
|
||||
already be beyond the boundary, only moves that bring the system closer
|
||||
to the bounds may be accepted.
|
||||
|
||||
The *group* keyword adds all inserted atoms to the :doc:`group <group>`
|
||||
of the group-ID value. The *grouptype* keyword adds all inserted atoms
|
||||
|
||||
@ -51,7 +51,7 @@ index file. When specifying group IDs, only those groups will be
|
||||
written to the index file. In order to follow the Gromacs conventions,
|
||||
the group *all* will be renamed to *System* in the index file.
|
||||
|
||||
The *ndx2group* command will create of update group definitions from
|
||||
The *ndx2group* command will create or update group definitions from
|
||||
those stored in an index file. Without specifying any group IDs, all
|
||||
groups except *System* will be read from the index file and the
|
||||
corresponding groups recreated. If a group of the same name already
|
||||
|
||||
@ -115,10 +115,11 @@ to tell LAMMPS how many parallel files exist, via its specified
|
||||
|
||||
The format of the dump file is selected through the *format* keyword.
|
||||
If specified, it must be the last keyword used, since all remaining
|
||||
arguments are passed on to the dump reader. The *native* format is
|
||||
for native LAMMPS dump files, written with a :doc:`dump atom <dump>`
|
||||
or :doc:`dump custom <dump>` command. The *xyz* format is for generic XYZ
|
||||
formatted dump files. These formats take no additional values.
|
||||
arguments are passed on to the dump reader. The *native* format is for
|
||||
native LAMMPS dump files, written with a :doc:`dump atom <dump>` or
|
||||
:doc:`dump custom <dump>` command. The *xyz* format is for generic XYZ
|
||||
formatted dump files (see details below). These formats take no
|
||||
additional values.
|
||||
|
||||
The *molfile* format supports reading data through using the `VMD <vmd_>`_
|
||||
molfile plugin interface. This dump reader format is only available,
|
||||
@ -230,23 +231,39 @@ will then have a label corresponding to the fix-ID rather than "x" or
|
||||
"xs". The *label* keyword can also be used to specify new column
|
||||
labels for fields *id* and *type*\ .
|
||||
|
||||
For dump files in *xyz* format, only the *x*, *y*, and *z* fields are
|
||||
supported. The dump file does not store atom IDs, so these are
|
||||
assigned consecutively to the atoms as they appear in the dump file,
|
||||
starting from 1. Thus you should ensure that order of atoms is
|
||||
consistent from snapshot to snapshot in the XYZ dump file. See
|
||||
the :doc:`dump_modify sort <dump_modify>` command if the XYZ dump file
|
||||
was written by LAMMPS.
|
||||
For dump files in *xyz* format, only the *type*, *x*, *y*, and *z*
|
||||
fields are supported. There are many variants of the XYZ file format.
|
||||
LAMMPS will read the number of atoms from the first line of each frame,
|
||||
ignore the second (title) line, and then read one line for each atom in the format:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
<label> <x coordinate> <y coordinate> <z coordinate>
|
||||
|
||||
|
||||
If the atom label is a numeric integer (like with XYZ files created by
|
||||
created with default settings by :doc:`dump style <dump>` *xyz*), that
|
||||
number will be used as the atom type. If the atom label is a string,
|
||||
then a type map must be created using the :doc:`labelmap command
|
||||
<labelmap>`. This map needs to associate each (numeric) atom type with
|
||||
a string label. The numeric atom type is stored internally.
|
||||
|
||||
The xyz format dump file does not store atom IDs, so these are assigned
|
||||
consecutively to the atoms as they appear in the dump file, starting
|
||||
from 1. Thus you should ensure that the order of atoms is consistent
|
||||
from snapshot to snapshot in the XYZ dump file. See the
|
||||
:doc:`dump_modify sort <dump_modify>` command if the XYZ dump file was
|
||||
written by LAMMPS.
|
||||
|
||||
For dump files in *molfile* format, the *x*, *y*, *z*, *vx*, *vy*, and
|
||||
*vz* fields can be specified. However, not all molfile formats store
|
||||
velocities, or their respective plugins may not support reading of
|
||||
velocities. The molfile dump files do not store atom IDs, so these
|
||||
are assigned consecutively to the atoms as they appear in the dump
|
||||
file, starting from 1. Thus you should ensure that order of atoms are
|
||||
consistent from snapshot to snapshot in the molfile dump file.
|
||||
See the :doc:`dump_modify sort <dump_modify>` command if the dump file
|
||||
was written by LAMMPS.
|
||||
velocities. The molfile dump files do not store atom IDs, so these are
|
||||
assigned consecutively to the atoms as they appear in the dump file,
|
||||
starting from 1. Thus you should ensure that the order of atoms are
|
||||
consistent from snapshot to snapshot in the molfile dump file. See the
|
||||
:doc:`dump_modify sort <dump_modify>` command if the dump file was
|
||||
written by LAMMPS.
|
||||
|
||||
The *adios* format supports all fields that the *native* format supports
|
||||
except for the *q* charge field.
|
||||
|
||||
@ -18,7 +18,7 @@ Syntax
|
||||
*delete* = no args
|
||||
*block* args = xlo xhi ylo yhi zlo zhi
|
||||
xlo,xhi,ylo,yhi,zlo,zhi = bounds of block in all dimensions (distance units)
|
||||
xlo,xhi,ylo,yhi,zlo,zhi can be a variable
|
||||
xlo,xhi,ylo,yhi,zlo,zhi can be a variable (see below)
|
||||
*cone* args = dim c1 c2 radlo radhi lo hi
|
||||
dim = *x* or *y* or *z* = axis of cone
|
||||
c1,c2 = coords of cone axis in other 2 dimensions (distance units)
|
||||
@ -38,6 +38,7 @@ Syntax
|
||||
*plane* args = px py pz nx ny nz
|
||||
px,py,pz = point on the plane (distance units)
|
||||
nx,ny,nz = direction normal to plane (distance units)
|
||||
px,py,pz can be a variable (see below)
|
||||
*prism* args = xlo xhi ylo yhi zlo zhi xy xz yz
|
||||
xlo,xhi,ylo,yhi,zlo,zhi = bounds of untilted prism (distance units)
|
||||
xy = distance to tilt y in x direction (distance units)
|
||||
@ -206,9 +207,10 @@ parameters a,b,c for style *ellipsoid*, can each be specified as an
|
||||
equal-style :doc:`variable <variable>`. Likewise, for style *sphere*
|
||||
and *ellipsoid* the x-, y-, and z- coordinates of the center of the
|
||||
sphere/ellipsoid can be specified as an equal-style variable. And for
|
||||
style *cylinder* the two center positions c1 and c2 for the location
|
||||
of the cylinder axes can be specified as a equal-style variable. For style *cone*
|
||||
all properties can be defined via equal-style variables.
|
||||
style *cylinder* the two center positions c1 and c2 for the location of
|
||||
the cylinder axes can be specified as a equal-style variable. For style
|
||||
*cone* all properties can be defined via equal-style variables. For
|
||||
style *plane* the point can be defined via equal-style variables.
|
||||
|
||||
If the value is a variable, it should be specified as v_name, where
|
||||
name is the variable name. In this case, the variable will be
|
||||
|
||||
@ -141,6 +141,7 @@ arg
|
||||
arge
|
||||
args
|
||||
argv
|
||||
Armv
|
||||
arrhenius
|
||||
Arun
|
||||
arXiv
|
||||
|
||||
@ -13,11 +13,11 @@ neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule/intra all
|
||||
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff * * 1.0 1.0 1.0 0.25 2.5
|
||||
pair_coeff * * 0.25 0.25 1.0 0.25 2.5
|
||||
|
||||
fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983
|
||||
|
||||
fix 3 all deform 1 x scale 0.3 y scale 0.3
|
||||
fix 3 all deform 5 x scale 0.8 y scale 0.8
|
||||
fix 4 all enforce2d
|
||||
|
||||
compute 10 all property/atom end1x end1y end2x end2y
|
||||
@ -26,7 +26,7 @@ compute 10 all property/atom end1x end1y end2x end2y
|
||||
#dump 2 all custom 500 dump1.line id type &
|
||||
# c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
timestep 0.004
|
||||
timestep 0.001
|
||||
|
||||
compute 1 all erotate/asphere
|
||||
compute 2 all ke
|
||||
|
||||
@ -24,7 +24,7 @@ set group small mass 0.01
|
||||
pair_style lj/cut 1.5
|
||||
pair_coeff 1 1 1.0 1.0
|
||||
pair_coeff 2 2 0.0 1.0 0.0
|
||||
pair_coeff 1 2 0.0 1.0
|
||||
pair_coeff 1 2 1.0 1.0
|
||||
|
||||
delete_atoms overlap 1.5 small big
|
||||
|
||||
@ -34,7 +34,7 @@ reset_timestep 0
|
||||
|
||||
velocity small create 1.44 87287 loop geom
|
||||
|
||||
neighbor 0.3 multi
|
||||
neighbor 0.8 multi
|
||||
neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule/intra big include big
|
||||
|
||||
@ -46,19 +46,19 @@ neigh_modify include big
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5
|
||||
pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0
|
||||
pair_coeff 1 2 1.0 0.0 0.0 1.0 0.0
|
||||
pair_coeff 1 2 1.0 0.2 1.0 0.2 2.5
|
||||
|
||||
# use fix SRD to push small particles out from inside big ones
|
||||
# if comment out, big particles won't see SRD particles
|
||||
|
||||
timestep 0.001
|
||||
timestep 0.0001
|
||||
|
||||
fix 1 big rigid molecule
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 &
|
||||
search 0.2 cubic warn 0.0001 shift yes 49829 &
|
||||
overlap yes collision noslip
|
||||
|
||||
fix 3 all deform 1 x scale 0.35 y scale 0.35
|
||||
fix 3 all deform 1 x scale 1.25 y scale 1.25
|
||||
fix 4 all enforce2d
|
||||
|
||||
# diagnostics
|
||||
@ -96,12 +96,12 @@ change_box all triclinic
|
||||
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 &
|
||||
search 0.2 cubic warn 0.0001 shift yes 49829 &
|
||||
overlap yes collision noslip tstat yes
|
||||
overlap yes collision noslip #tstat yes
|
||||
|
||||
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump2.line.srd id type &
|
||||
# c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
fix 3 all deform 1 xy erate 0.05 units box remap v
|
||||
fix 3 all deform 1 xy erate 0.0002 units box remap v
|
||||
|
||||
run 40000
|
||||
run 30000
|
||||
|
||||
@ -1,213 +0,0 @@
|
||||
LAMMPS (1 Feb 2014)
|
||||
# Aspherical shear demo - 2d line box and triangle mixture, implicit solvent
|
||||
|
||||
units lj
|
||||
atom_style line
|
||||
dimension 2
|
||||
|
||||
read_data data.line
|
||||
orthogonal box = (-22.5539 -22.5539 -0.5) to (22.5539 22.5539 0.5)
|
||||
4 by 2 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
350 atoms
|
||||
350 lines
|
||||
|
||||
velocity all create 1.44 320984 loop geom
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule all
|
||||
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff * * 1.0 0.25
|
||||
|
||||
fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983
|
||||
100 rigid bodies with 350 atoms
|
||||
|
||||
fix 3 all deform 1 x scale 0.3 y scale 0.3
|
||||
fix 4 all enforce2d
|
||||
|
||||
compute 10 all property/atom end1x end1y end2x end2y
|
||||
|
||||
#dump 1 all custom 500 dump1.atom id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump1.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
timestep 0.004
|
||||
|
||||
compute 1 all erotate/asphere
|
||||
compute 2 all ke
|
||||
compute 3 all pe
|
||||
variable toteng equal (c_1+c_2+c_3)/atoms
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step temp f_2 pe ke c_1 c_2 c_3 v_toteng
|
||||
|
||||
run 10000
|
||||
Memory usage per processor = 2.6072 Mbytes
|
||||
Step Temp 2 PotEng KinEng 1 2 3 toteng
|
||||
0 0 1.2780105 0 -0 0.037823677 0.50989511 0 0.54771879
|
||||
1000 0 1.9896906 -0.13333756 -0 0.12630626 0.72641827 -0.13333756 0.71938697
|
||||
2000 0 2.0408541 -0.24906647 -0 0.13199238 0.74265938 -0.24906647 0.62558529
|
||||
3000 0 1.9921323 -0.39759798 -0 0.11671173 0.73705927 -0.39759798 0.45617302
|
||||
4000 0 2.1392159 -0.36475197 -0 0.11923802 0.7975688 -0.36475197 0.55205485
|
||||
5000 0 2.139715 -0.52582091 -0 0.15846417 0.75855653 -0.52582091 0.39119979
|
||||
6000 0 2.1313904 -0.65532027 -0 0.11090422 0.80254883 -0.65532027 0.25813278
|
||||
7000 0 1.9416614 -0.81322598 -0 0.11722471 0.71491587 -0.81322598 0.018914608
|
||||
8000 0 1.9388183 -1.0581149 -0 0.10142762 0.72949452 -1.0581149 -0.22719275
|
||||
9000 0 2.2830265 -1.583347 -0 0.14583927 0.83260066 -1.583347 -0.60490709
|
||||
10000 0 4.1416666 -2.6034045 -0 0.18839177 1.5866082 -2.6034045 -0.82840455
|
||||
Loop time of 2.84713 on 8 procs for 10000 steps with 350 atoms
|
||||
|
||||
Pair time (%) = 1.26607 (44.4682)
|
||||
Neigh time (%) = 0.0163046 (0.572669)
|
||||
Comm time (%) = 0.786148 (27.612)
|
||||
Outpt time (%) = 0.000368953 (0.0129588)
|
||||
Other time (%) = 0.778241 (27.3342)
|
||||
|
||||
Nlocal: 43.75 ave 51 max 39 min
|
||||
Histogram: 1 2 0 2 0 1 1 0 0 1
|
||||
Nghost: 170.25 ave 180 max 160 min
|
||||
Histogram: 2 1 0 0 0 0 2 0 1 2
|
||||
Neighs: 963.125 ave 1209 max 767 min
|
||||
Histogram: 2 0 2 0 1 0 1 0 1 1
|
||||
|
||||
Total # of neighbors = 7705
|
||||
Ave neighs/atom = 22.0143
|
||||
Neighbor list builds = 987
|
||||
Dangerous builds = 0
|
||||
|
||||
#undump 1
|
||||
#undump 2
|
||||
unfix 3
|
||||
|
||||
change_box all triclinic
|
||||
triclinic box = (-6.76616 -6.76616 -0.5) to (6.76616 6.76616 0.5) with tilt (0 0 0)
|
||||
|
||||
#dump 1 all custom 500 dump2.atom id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump2.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
fix 3 all deform 1 xy erate 0.01 units box
|
||||
|
||||
run 100000
|
||||
Memory usage per processor = 2.75978 Mbytes
|
||||
Step Temp 2 PotEng KinEng 1 2 3 toteng
|
||||
10000 0 4.1416666 -2.6024092 -0 0.18839177 1.5866082 -2.6024092 -0.82740923
|
||||
11000 0 1.8408319 -3.1031477 -0 0.12073234 0.66819561 -3.1031477 -2.3142198
|
||||
12000 0 2.0793172 -3.0329681 -0 0.086927592 0.80420833 -3.0329681 -2.1418322
|
||||
13000 0 2.2022136 -2.99776 -0 0.14309291 0.8007129 -2.99776 -2.0539542
|
||||
14000 0 1.9510757 -3.094649 -0 0.09482969 0.74134559 -3.094649 -2.2584737
|
||||
15000 0 1.9874689 -3.1431753 -0 0.1083061 0.7434663 -3.1431753 -2.2914029
|
||||
16000 0 1.8484778 -2.9491537 -0 0.079102883 0.71310191 -2.9491537 -2.1569489
|
||||
17000 0 2.1978438 -2.9675694 -0 0.11677634 0.82515673 -2.9675694 -2.0256363
|
||||
18000 0 2.0293397 -2.9860257 -0 0.1287845 0.7409325 -2.9860257 -2.1163087
|
||||
19000 0 2.0077219 -3.005622 -0 0.12697603 0.7334762 -3.005622 -2.1451698
|
||||
20000 0 2.1806369 -3.0622132 -0 0.11066657 0.82389212 -3.0622132 -2.1276545
|
||||
21000 0 1.8156509 -3.1031481 -0 0.10227614 0.67585994 -3.1031481 -2.325012
|
||||
22000 0 2.1028516 -3.0861182 -0 0.098877162 0.80234493 -3.0861182 -2.1848961
|
||||
23000 0 1.8994891 -3.0110243 -0 0.10961187 0.70445488 -3.0110243 -2.1969576
|
||||
24000 0 1.9305389 -3.0057136 -0 0.11735151 0.7100223 -3.0057136 -2.1783398
|
||||
25000 0 1.9553918 -3.0848948 -0 0.13217467 0.70585039 -3.0848948 -2.2468697
|
||||
26000 0 1.8903754 -2.9543658 -0 0.099925113 0.71023579 -2.9543658 -2.1442049
|
||||
27000 0 2.2624684 -3.2416154 -0 0.11398815 0.85564117 -3.2416154 -2.2719861
|
||||
28000 0 2.0335234 -3.1795174 -0 0.10291986 0.76859015 -3.1795174 -2.3080074
|
||||
29000 0 1.7056403 -3.1198739 -0 0.076174496 0.65481419 -3.1198739 -2.3888853
|
||||
30000 0 2.1203465 -3.0863113 -0 0.11355683 0.79516311 -3.0863113 -2.1775914
|
||||
31000 0 1.8446708 -3.2764357 -0 0.11006455 0.68050865 -3.2764357 -2.4858625
|
||||
32000 0 1.7947968 -3.081031 -0 0.087540776 0.68165784 -3.081031 -2.3118324
|
||||
33000 0 2.2576228 -3.1042222 -0 0.15666855 0.81088407 -3.1042222 -2.1366696
|
||||
34000 0 1.8522306 -3.0037311 -0 0.08276626 0.71104684 -3.0037311 -2.209918
|
||||
35000 0 2.0611686 -2.9877406 -0 0.10822003 0.77513794 -2.9877406 -2.1043826
|
||||
36000 0 2.0739798 -2.981184 -0 0.11784198 0.77100651 -2.981184 -2.0923355
|
||||
37000 0 2.2120215 -2.8961216 -0 0.13172943 0.8162798 -2.8961216 -1.9481124
|
||||
38000 0 2.1097357 -2.9746777 -0 0.11881736 0.78535507 -2.9746777 -2.0705053
|
||||
39000 0 1.8928127 -2.9482169 -0 0.10915752 0.70204792 -2.9482169 -2.1370115
|
||||
40000 0 1.8515483 -3.003524 -0 0.096981256 0.69653943 -3.003524 -2.2100033
|
||||
41000 0 1.8569065 -3.0834675 -0 0.10562739 0.6901897 -3.0834675 -2.2876504
|
||||
42000 0 2.2638459 -3.0577224 -0 0.10896442 0.86125524 -3.0577224 -2.0875027
|
||||
43000 0 2.0992151 -3.1219185 -0 0.13280434 0.76685929 -3.1219185 -2.2222548
|
||||
44000 0 2.3479986 -3.0702445 -0 0.14438131 0.86190379 -3.0702445 -2.0639594
|
||||
45000 0 2.0295235 -2.9619688 -0 0.13726365 0.73253213 -2.9619688 -2.092173
|
||||
46000 0 2.0597998 -3.1282569 -0 0.10139093 0.7813804 -3.1282569 -2.2454856
|
||||
47000 0 1.8173541 -3.1122647 -0 0.10380346 0.67506259 -3.1122647 -2.3333986
|
||||
48000 0 1.9650208 -3.22153 -0 0.10115952 0.74099226 -3.22153 -2.3793782
|
||||
49000 0 1.8836303 -3.0781944 -0 0.10561306 0.70165705 -3.0781944 -2.2709243
|
||||
50000 0 1.7799289 -3.1090208 -0 0.10522105 0.65760561 -3.1090208 -2.3461941
|
||||
51000 0 1.7270244 -2.894789 -0 0.079316066 0.66083727 -2.894789 -2.1546357
|
||||
52000 0 2.1036512 -3.0708266 -0 0.11762021 0.7839446 -3.0708266 -2.1692618
|
||||
53000 0 2.1749106 -3.043193 -0 0.11908953 0.81301499 -3.043193 -2.1110885
|
||||
54000 0 1.7245018 -3.1528646 -0 0.11118993 0.62788226 -3.1528646 -2.4137924
|
||||
55000 0 1.7599209 -3.0543405 -0 0.098996756 0.65525507 -3.0543405 -2.3000886
|
||||
56000 0 1.9085953 -3.1072383 -0 0.10931083 0.70865859 -3.1072383 -2.2892689
|
||||
57000 0 1.9682028 -3.131335 -0 0.094973379 0.74854212 -3.131335 -2.2878195
|
||||
58000 0 1.7813545 -3.167275 -0 0.085523136 0.6779145 -3.167275 -2.4038374
|
||||
59000 0 2.0324497 -3.1103534 -0 0.13343285 0.73761703 -3.1103534 -2.2393035
|
||||
60000 0 1.697349 -3.152831 -0 0.086035815 0.64139945 -3.152831 -2.4253957
|
||||
61000 0 2.061537 -3.0730484 -0 0.12753143 0.75598441 -3.0730484 -2.1895325
|
||||
62000 0 1.8186587 -3.1590894 -0 0.096701457 0.68272371 -3.1590894 -2.3796643
|
||||
63000 0 1.9692063 -3.1053354 -0 0.094864849 0.7490807 -3.1053354 -2.2613899
|
||||
64000 0 2.0113722 -3.0898117 -0 0.12640828 0.73560838 -3.0898117 -2.227795
|
||||
65000 0 1.9350085 -3.2290712 -0 0.11850297 0.71078638 -3.2290712 -2.3997819
|
||||
66000 0 2.0569306 -3.0489252 -0 0.12731012 0.75423156 -3.0489252 -2.1673835
|
||||
67000 0 2.0119071 -3.10894 -0 0.099142151 0.76310375 -3.10894 -2.2466941
|
||||
68000 0 2.0615321 -3.167552 -0 0.11066625 0.77284751 -3.167552 -2.2840383
|
||||
69000 0 1.8731077 -3.1864825 -0 0.12982742 0.672933 -3.1864825 -2.3837221
|
||||
70000 0 2.109088 -3.1787326 -0 0.13107619 0.77281866 -3.1787326 -2.2748377
|
||||
71000 0 1.9764202 -3.0731116 -0 0.12035585 0.72668137 -3.0731116 -2.2260744
|
||||
72000 0 1.6460591 -3.0865952 -0 0.09486718 0.61058673 -3.0865952 -2.3811413
|
||||
73000 0 1.9756864 -3.0431747 -0 0.1041775 0.74254526 -3.0431747 -2.196452
|
||||
74000 0 2.0553658 -3.1290715 -0 0.12267262 0.75819842 -3.1290715 -2.2482004
|
||||
75000 0 1.550997 -2.9813403 -0 0.094350599 0.57036238 -2.9813403 -2.3166273
|
||||
76000 0 1.9824851 -3.0057088 -0 0.11621875 0.73341774 -3.0057088 -2.1560723
|
||||
77000 0 1.8451505 -3.0063772 -0 0.12602143 0.66475735 -3.0063772 -2.2155984
|
||||
78000 0 1.934755 -3.0603773 -0 0.11800375 0.71117696 -3.0603773 -2.2311966
|
||||
79000 0 2.1982093 -3.095224 -0 0.12840585 0.81368383 -3.095224 -2.1531343
|
||||
80000 0 1.7547828 -3.0620364 -0 0.092552269 0.65949751 -3.0620364 -2.3099866
|
||||
81000 0 2.1064588 -3.1631358 -0 0.11415036 0.78861768 -3.1631358 -2.2603677
|
||||
82000 0 2.0039288 -3.1190532 -0 0.13374009 0.72508654 -3.1190532 -2.2602266
|
||||
83000 0 1.7749465 -3.2172309 -0 0.11017601 0.65051537 -3.2172309 -2.4565396
|
||||
84000 0 1.7865147 -3.1433093 -0 0.086859958 0.67878922 -3.1433093 -2.3776602
|
||||
85000 0 1.4541199 -3.0123872 -0 0.096301993 0.52689225 -3.0123872 -2.3891929
|
||||
86000 0 2.0345567 -2.9577061 -0 0.11756878 0.7543841 -2.9577061 -2.0857532
|
||||
87000 0 1.9655671 -3.0235755 -0 0.13068174 0.71170417 -3.0235755 -2.1811896
|
||||
88000 0 2.1238733 -3.0972095 -0 0.15660293 0.75362848 -3.0972095 -2.1869781
|
||||
89000 0 1.93413 -3.0018414 -0 0.11736651 0.71154634 -3.0018414 -2.1729285
|
||||
90000 0 2.0106701 -3.0778989 -0 0.1150105 0.74670527 -3.0778989 -2.2161831
|
||||
91000 0 1.9700224 -3.0072749 -0 0.1200151 0.72428023 -3.0072749 -2.1629796
|
||||
92000 0 2.3062898 -3.1245149 -0 0.16045862 0.82795128 -3.1245149 -2.136105
|
||||
93000 0 1.9977984 -3.1072212 -0 0.098381856 0.75781746 -3.1072212 -2.2510219
|
||||
94000 0 1.9684844 -3.0923519 -0 0.11228362 0.73135255 -3.0923519 -2.2487157
|
||||
95000 0 1.8913269 -3.1044766 -0 0.088944817 0.72162386 -3.1044766 -2.293908
|
||||
96000 0 2.139258 -2.9032081 -0 0.12122357 0.7956013 -2.9032081 -1.9863833
|
||||
97000 0 2.1549839 -3.0073823 -0 0.14177897 0.78178555 -3.0073823 -2.0838178
|
||||
98000 0 2.001084 -3.0411655 -0 0.16549603 0.69211139 -3.0411655 -2.1835581
|
||||
99000 0 2.269108 -3.0749812 -0 0.11370514 0.85876972 -3.0749812 -2.1025064
|
||||
100000 0 2.0270482 -3.0802101 -0 0.098599302 0.77013563 -3.0802101 -2.2114752
|
||||
101000 0 1.9412796 -3.1543623 -0 0.11953004 0.71244692 -3.1543623 -2.3223853
|
||||
102000 0 2.0146461 -3.0140006 -0 0.10152606 0.7618937 -3.0140006 -2.1505808
|
||||
103000 0 1.7377282 -3.1862597 -0 0.082111131 0.66262952 -3.1862597 -2.441519
|
||||
104000 0 1.7973897 -3.1055088 -0 0.093370304 0.67693958 -3.1055088 -2.3351989
|
||||
105000 0 2.2615194 -2.9636424 -0 0.12584102 0.84338157 -2.9636424 -1.9944198
|
||||
106000 0 1.8974182 -2.9505576 -0 0.085928679 0.72725057 -2.9505576 -2.1373783
|
||||
107000 0 2.0691161 -3.0804349 -0 0.12411017 0.76265389 -3.0804349 -2.1936708
|
||||
108000 0 2.0457472 -2.981702 -0 0.09308074 0.78366806 -2.981702 -2.1049532
|
||||
109000 0 1.6610604 -3.1463569 -0 0.066318676 0.64556436 -3.1463569 -2.4344738
|
||||
110000 0 1.9606721 -3.080013 -0 0.13164067 0.70864736 -3.080013 -2.2397249
|
||||
Loop time of 62.2251 on 8 procs for 100000 steps with 350 atoms
|
||||
|
||||
Pair time (%) = 43.4946 (69.8987)
|
||||
Neigh time (%) = 0.395421 (0.635469)
|
||||
Comm time (%) = 10.3551 (16.6414)
|
||||
Outpt time (%) = 0.00358662 (0.00576394)
|
||||
Other time (%) = 7.97644 (12.8187)
|
||||
|
||||
Nlocal: 43.75 ave 51 max 33 min
|
||||
Histogram: 1 0 1 0 0 2 0 1 2 1
|
||||
Nghost: 168.375 ave 180 max 155 min
|
||||
Histogram: 1 1 0 0 1 0 2 2 0 1
|
||||
Neighs: 971 ave 1278 max 631 min
|
||||
Histogram: 1 1 0 1 0 1 2 1 0 1
|
||||
|
||||
Total # of neighbors = 7768
|
||||
Ave neighs/atom = 22.1943
|
||||
Neighbor list builds = 7621
|
||||
Dangerous builds = 0
|
||||
@ -1,244 +0,0 @@
|
||||
LAMMPS (1 Feb 2014)
|
||||
# Aspherical shear demo - 2d line boxes, solvated by SRD particles
|
||||
|
||||
units lj
|
||||
atom_style line
|
||||
atom_modify first big
|
||||
dimension 2
|
||||
|
||||
read_data data.line.srd
|
||||
orthogonal box = (-28.7968 -28.7968 -0.5) to (28.7968 28.7968 0.5)
|
||||
4 by 2 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
400 atoms
|
||||
400 lines
|
||||
|
||||
# add small particles as hi density lattice
|
||||
|
||||
lattice sq 0.4
|
||||
Lattice spacing in x,y,z = 1.58114 1.58114 1.58114
|
||||
region plane block INF INF INF INF -0.001 0.001
|
||||
lattice sq 10.0
|
||||
Lattice spacing in x,y,z = 0.316228 0.316228 0.316228
|
||||
create_atoms 2 region plane
|
||||
Created 33489 atoms
|
||||
|
||||
group big type 1
|
||||
400 atoms in group big
|
||||
group small type 2
|
||||
33489 atoms in group small
|
||||
set group small mass 0.01
|
||||
33489 settings made for mass
|
||||
|
||||
# delete overlaps
|
||||
# must set 1-2 cutoff to non-zero value
|
||||
|
||||
pair_style lj/cut 1.5
|
||||
pair_coeff 1 1 1.0 1.0
|
||||
pair_coeff 2 2 0.0 1.0 0.0
|
||||
pair_coeff 1 2 0.0 1.0
|
||||
|
||||
delete_atoms overlap 1.5 small big
|
||||
Deleted 13605 atoms, new total = 20284
|
||||
|
||||
# SRD run
|
||||
|
||||
reset_timestep 0
|
||||
|
||||
velocity small create 1.44 87287 loop geom
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule big include big
|
||||
|
||||
communicate multi group big vel yes
|
||||
neigh_modify include big
|
||||
|
||||
# no pairwise interactions with small particles
|
||||
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff 1 1 1.0 1.0
|
||||
pair_coeff 2 2 0.0 1.0 0.0
|
||||
pair_coeff 1 2 0.0 1.0 0.0
|
||||
|
||||
# use fix SRD to push small particles out from inside big ones
|
||||
# if comment out, big particles won't see SRD particles
|
||||
|
||||
timestep 0.001
|
||||
|
||||
fix 1 big rigid molecule
|
||||
100 rigid bodies with 400 atoms
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip
|
||||
|
||||
fix 3 all deform 1 x scale 0.35 y scale 0.35
|
||||
fix 4 all enforce2d
|
||||
|
||||
# diagnostics
|
||||
|
||||
compute tsmall small temp/deform
|
||||
compute tbig big temp
|
||||
variable pebig equal pe*atoms/count(big)
|
||||
variable ebig equal etotal*atoms/count(big)
|
||||
|
||||
compute 1 big erotate/asphere
|
||||
compute 2 all ke
|
||||
compute 3 all pe
|
||||
variable toteng equal (c_1+c_2+c_3)/atoms
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step temp c_tsmall f_2[9] c_1 etotal v_pebig v_ebig press
|
||||
thermo_modify temp tbig
|
||||
WARNING: Temperature for thermo pressure is not for group all (../thermo.cpp:439)
|
||||
|
||||
compute 10 big property/atom end1x end1y end2x end2y
|
||||
|
||||
#dump 1 all custom 500 dump1.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump1.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
run 10000
|
||||
WARNING: Using fix srd with box deformation but no SRD thermostat (../fix_srd.cpp:385)
|
||||
SRD info:
|
||||
SRD/big particles = 19884 400
|
||||
big particle diameter max/min = 1.99882 0.503306
|
||||
SRD temperature & lamda = 1 0.2
|
||||
SRD max distance & max velocity = 0.8 40
|
||||
SRD grid counts: 230 230 1
|
||||
SRD grid size: request, actual (xyz) = 0.25, 0.250407 0.250407 1
|
||||
SRD per actual grid cell = 0.444963
|
||||
SRD viscosity = 4.2356
|
||||
big/SRD mass density ratio = 14.0918
|
||||
WARNING: SRD bin size for fix srd differs from user request (../fix_srd.cpp:2853)
|
||||
WARNING: Fix srd grid size > 1/4 of big particle diameter (../fix_srd.cpp:2875)
|
||||
# of rescaled SRD velocities = 0
|
||||
ave/max small velocity = 15.906 29.1054
|
||||
ave/max big velocity = 0 0
|
||||
WARNING: Using compute temp/deform with inconsistent fix deform remap option (../compute_temp_deform.cpp:76)
|
||||
Memory usage per processor = 7.79007 Mbytes
|
||||
Step Temp tsmall 2[9] 1 TotEng pebig ebig Press
|
||||
0 0 1.4528554 0 0 0 0 0 0
|
||||
1000 0 1.1122612 1.1071958 0.00058011072 0 0 0 0.32625408
|
||||
2000 0 1.0254475 1.0231236 0.00072347646 -2.3144253e-06 -0.00011736451 -0.00011736451 0.44526308
|
||||
3000 0 1.0089214 0.99966408 0.00061511441 -0.00023298189 -0.011814512 -0.011814512 0.87208025
|
||||
4000 0 1.0066185 0.99391102 0.00079065587 -0.0021630644 -0.10968899 -0.10968899 1.3901494
|
||||
5000 0 1.0109003 1.0009124 0.00071588733 -0.0040023493 -0.20295913 -0.20295913 1.2736445
|
||||
6000 0 1.0119642 1.0105049 0.00080741361 -0.0067908165 -0.34436231 -0.34436231 2.5492857
|
||||
7000 0 1.0048989 0.98912274 0.00098851463 -0.010933999 -0.55446309 -0.55446309 3.5707613
|
||||
8000 0 1.0021597 0.99377939 0.0008405671 -0.015690841 -0.79568252 -0.79568252 6.685381
|
||||
9000 0 1.0197546 1.00114 0.0012645557 -0.021124602 -1.0712286 -1.0712286 18.907993
|
||||
10000 0 1.0401079 1.0184189 0.0036111452 0.0099395852 0.50403637 0.50403637 96.909257
|
||||
Loop time of 3.93391 on 8 procs for 10000 steps with 20284 atoms
|
||||
|
||||
Pair time (%) = 0.110261 (2.80284)
|
||||
Neigh time (%) = 0.32853 (8.35124)
|
||||
Comm time (%) = 0.199613 (5.07416)
|
||||
Outpt time (%) = 0.00108692 (0.0276295)
|
||||
Other time (%) = 3.29442 (83.7441)
|
||||
|
||||
Nlocal: 2535.5 ave 2850 max 2226 min
|
||||
Histogram: 1 0 1 1 1 1 1 0 1 1
|
||||
Nghost: 109.25 ave 117 max 102 min
|
||||
Histogram: 1 0 2 0 1 1 1 1 0 1
|
||||
Neighs: 526 ave 665 max 407 min
|
||||
Histogram: 1 2 0 1 0 1 1 1 0 1
|
||||
|
||||
Total # of neighbors = 4208
|
||||
Ave neighs/atom = 0.207454
|
||||
Neighbor list builds = 522
|
||||
Dangerous builds = 0
|
||||
|
||||
#undump 1
|
||||
#undump 2
|
||||
unfix 3
|
||||
|
||||
change_box all triclinic
|
||||
triclinic box = (-10.0789 -10.0789 -0.5) to (10.0789 10.0789 0.5) with tilt (0 0 0)
|
||||
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip tstat yes
|
||||
|
||||
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump2.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
fix 3 all deform 1 xy erate 0.05 units box remap v
|
||||
|
||||
run 40000
|
||||
SRD info:
|
||||
SRD/big particles = 19884 400
|
||||
big particle diameter max/min = 1.99882 0.503306
|
||||
SRD temperature & lamda = 1 0.2
|
||||
SRD max distance & max velocity = 0.8 40
|
||||
SRD grid counts: 81 81 1
|
||||
SRD grid size: request, actual (xyz) = 0.25, 0.248861 0.248861 1
|
||||
SRD per actual grid cell = -11.3331
|
||||
SRD viscosity = -1920.36
|
||||
big/SRD mass density ratio = -0.546468
|
||||
WARNING: SRD bin size for fix srd differs from user request (../fix_srd.cpp:2853)
|
||||
WARNING: Fix srd grid size > 1/4 of big particle diameter (../fix_srd.cpp:2875)
|
||||
WARNING: Fix srd viscosity < 0.0 due to low SRD density (../fix_srd.cpp:2877)
|
||||
# of rescaled SRD velocities = 1
|
||||
ave/max small velocity = 12.724 40
|
||||
ave/max big velocity = 1.54523 5.36901
|
||||
Memory usage per processor = 4.23847 Mbytes
|
||||
Step Temp tsmall 2[9] 1 TotEng pebig ebig Press
|
||||
10000 0 1.0277413 0 0.0036111452 0.010135973 0.51399517 0.51399517 60.794865
|
||||
11000 0 1.0024742 1 0.00040482851 -0.031643325 -1.604633 -1.604633 55.531632
|
||||
12000 0 1.0036177 1 0.00036416993 -0.030844063 -1.5641024 -1.5641024 56.694788
|
||||
13000 0 1.0043067 1 0.00068862721 -0.030673787 -1.5554677 -1.5554677 56.852537
|
||||
14000 0 1.0042533 1 0.00050997053 -0.035967153 -1.8238943 -1.8238943 51.254109
|
||||
15000 0 1.003956 1 0.00048458218 -0.038060147 -1.9300301 -1.9300301 50.346943
|
||||
16000 0 1.0059929 1 0.0004740426 -0.037433402 -1.8982478 -1.8982478 49.82532
|
||||
17000 0 1.0042401 1 0.00039855238 -0.034314446 -1.7400855 -1.7400855 48.430648
|
||||
18000 0 1.0025952 1 0.00046207703 -0.034166102 -1.732563 -1.732563 53.883613
|
||||
19000 0 1.0025881 1 0.00034818943 -0.037341367 -1.8935807 -1.8935807 48.90662
|
||||
20000 0 1.0009932 1 0.00031244041 -0.035274728 -1.7887814 -1.7887814 47.665935
|
||||
21000 0 1.0025603 1 0.0005514826 -0.034350221 -1.7418997 -1.7418997 63.290704
|
||||
22000 0 1.0038575 1 0.00034453716 -0.03576596 -1.8136918 -1.8136918 49.805726
|
||||
23000 0 1.0030321 1 0.00049535709 -0.035873083 -1.819124 -1.819124 48.973244
|
||||
24000 0 1.0048593 1 0.00034225992 -0.03322158 -1.6846663 -1.6846663 52.557383
|
||||
25000 0 1.0024423 1 0.00059157362 -0.030152319 -1.5290241 -1.5290241 56.733821
|
||||
26000 0 1.0022703 1 0.00047512976 -0.029563064 -1.499143 -1.499143 56.271943
|
||||
27000 0 1.0024537 1 0.00054158319 -0.02957039 -1.4995145 -1.4995145 55.246787
|
||||
28000 0 1.0023325 1 0.00051129428 -0.035115211 -1.7806924 -1.7806924 49.718172
|
||||
29000 0 1.002865 1 0.00056728135 -0.035484703 -1.7994293 -1.7994293 48.387401
|
||||
30000 0 1.0022693 1 0.00040284402 -0.033844303 -1.7162446 -1.7162446 50.486676
|
||||
31000 0 1.0037106 1 0.00056291948 -0.036529699 -1.852421 -1.852421 52.011803
|
||||
32000 0 1.0022542 1 0.00041877447 -0.036903458 -1.8713743 -1.8713743 52.712289
|
||||
33000 0 1.0021364 1 0.00040871451 -0.033616728 -1.7047043 -1.7047043 55.199593
|
||||
34000 0 1.003779 1 0.00040667148 -0.031745412 -1.6098098 -1.6098098 51.055261
|
||||
35000 0 1.0015547 1 0.00042509365 -0.033603064 -1.7040114 -1.7040114 53.887389
|
||||
36000 0 1.0024849 1 0.00040589851 -0.032151258 -1.6303903 -1.6303903 55.673615
|
||||
37000 0 1.0038204 1 0.00040542177 -0.033643242 -1.7060488 -1.7060488 51.834694
|
||||
38000 0 1.0031777 1 0.00040237126 -0.034711811 -1.7602359 -1.7602359 56.275778
|
||||
39000 0 1.002777 1 0.00051021239 -0.034375104 -1.7431615 -1.7431615 48.461145
|
||||
40000 0 1.0053762 1 0.00041566465 -0.036016384 -1.8263908 -1.8263908 48.937456
|
||||
41000 0 1.0039078 1 0.00049726673 -0.036481888 -1.8499966 -1.8499966 49.290465
|
||||
42000 0 1.0033408 1 0.00045236191 -0.034254524 -1.7370469 -1.7370469 52.146603
|
||||
43000 0 1.0039893 1 0.00048362351 -0.032519474 -1.6490625 -1.6490625 53.320599
|
||||
44000 0 1.0036667 1 0.00046834006 -0.031099875 -1.5770747 -1.5770747 55.907531
|
||||
45000 0 1.0035765 1 0.0005195031 -0.029559708 -1.4989728 -1.4989728 56.191165
|
||||
46000 0 1.004514 1 0.00050134348 -0.028852909 -1.463131 -1.463131 57.547696
|
||||
47000 0 1.003864 1 0.00038963389 -0.031339229 -1.5892123 -1.5892123 55.775764
|
||||
48000 0 1.003145 1 0.00035436452 -0.032390682 -1.6425315 -1.6425315 55.233354
|
||||
49000 0 1.0042615 1 0.00053161075 -0.029960468 -1.5192953 -1.5192953 54.611685
|
||||
50000 0 1.0037096 1 0.00031191389 -0.02942011 -1.4918938 -1.4918938 58.653762
|
||||
Loop time of 27.1275 on 8 procs for 40000 steps with 20284 atoms
|
||||
|
||||
Pair time (%) = 1.54277 (5.68709)
|
||||
Neigh time (%) = 1.26864 (4.67658)
|
||||
Comm time (%) = 1.19871 (4.4188)
|
||||
Outpt time (%) = 0.00421953 (0.0155544)
|
||||
Other time (%) = 23.1132 (85.202)
|
||||
|
||||
Nlocal: 2535.5 ave 2670 max 2406 min
|
||||
Histogram: 1 1 1 0 0 2 1 1 0 1
|
||||
Nghost: 114.5 ave 124 max 103 min
|
||||
Histogram: 1 0 1 0 2 1 0 1 0 2
|
||||
Neighs: 521.5 ave 692 max 442 min
|
||||
Histogram: 3 1 1 1 0 0 0 1 0 1
|
||||
|
||||
Total # of neighbors = 4172
|
||||
Ave neighs/atom = 0.205679
|
||||
Neighbor list builds = 2002
|
||||
Dangerous builds = 0
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
189
examples/ASPHERE/line/log.21Sep24.line.g++.1
Normal file
189
examples/ASPHERE/line/log.21Sep24.line.g++.1
Normal file
@ -0,0 +1,189 @@
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Aspherical shear demo - 2d line box and triangle mixture, implicit solvent
|
||||
|
||||
units lj
|
||||
atom_style line
|
||||
dimension 2
|
||||
|
||||
read_data data.line
|
||||
Reading data file ...
|
||||
orthogonal box = (-22.553882 -22.553882 -0.5) to (22.553882 22.553882 0.5)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
350 atoms
|
||||
350 lines
|
||||
read_data CPU = 0.003 seconds
|
||||
|
||||
velocity all create 1.44 320984 loop geom
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule/intra all
|
||||
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff * * 0.25 0.25 1.0 0.25 2.5
|
||||
|
||||
fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983
|
||||
100 rigid bodies with 350 atoms
|
||||
|
||||
fix 3 all deform 5 x scale 0.8 y scale 0.8
|
||||
fix 4 all enforce2d
|
||||
|
||||
compute 10 all property/atom end1x end1y end2x end2y
|
||||
|
||||
#dump 1 all custom 500 dump1.atom id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump1.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
timestep 0.001
|
||||
|
||||
compute 1 all erotate/asphere
|
||||
compute 2 all ke
|
||||
compute 3 all pe
|
||||
variable toteng equal (c_1+c_2+c_3)/atoms
|
||||
|
||||
compute_modify thermo_temp extra/dof -350
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step f_2 pe ke c_1 c_2 c_3 v_toteng
|
||||
|
||||
run 10000
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Your simulation uses code contributions which should be cited:
|
||||
|
||||
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
|
||||
|
||||
@Article{Gissinger24,
|
||||
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
|
||||
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
|
||||
journal = {J. Phys. Chem. B},
|
||||
year = 2024,
|
||||
volume = 128,
|
||||
number = 13,
|
||||
pages = {3282–-3297}
|
||||
}
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2.8
|
||||
ghost atom cutoff = 2.8
|
||||
binsize = 1.4, bins = 33 33 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.742 | 4.742 | 4.742 Mbytes
|
||||
Step f_2 PotEng KinEng c_1 c_2 c_3 v_toteng
|
||||
0 1.1872976 0 0.46543528 0.03617212 0.46543528 0 0.5016074
|
||||
1000 1.9084412 -0.001043719 0.71003395 0.089891202 0.71003395 -0.001043719 0.79888143
|
||||
2000 2.31668 -0.020711665 0.83611544 0.13062287 0.83611544 -0.020711665 0.94602664
|
||||
3000 2.3094506 -0.062018072 0.80584123 0.15326943 0.80584123 -0.062018072 0.8970926
|
||||
4000 2.4383442 -0.053659995 0.86864073 0.14696993 0.86864073 -0.053659995 0.96195067
|
||||
5000 2.5885917 -0.15612821 0.90351683 0.17156633 0.90351683 -0.15612821 0.91895495
|
||||
6000 2.1187379 -0.072041135 0.76933527 0.11557939 0.76933527 -0.072041135 0.81287353
|
||||
7000 2.1407592 -0.057727294 0.75154343 0.13827066 0.75154343 -0.057727294 0.8320868
|
||||
8000 2.0661844 -0.097529608 0.71093839 0.14547433 0.71093839 -0.097529608 0.75888311
|
||||
9000 2.1574445 -0.052890857 0.78826145 0.11363136 0.78826145 -0.052890857 0.84900195
|
||||
10000 2.473097 -0.092376489 0.85756337 0.16861278 0.85756337 -0.092376489 0.93379965
|
||||
Loop time of 1.16704 on 1 procs for 10000 steps with 350 atoms
|
||||
|
||||
Performance: 740334.372 tau/day, 8568.685 timesteps/s, 2.999 Matom-step/s
|
||||
99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.80757 | 0.80757 | 0.80757 | 0.0 | 69.20
|
||||
Neigh | 0.0090046 | 0.0090046 | 0.0090046 | 0.0 | 0.77
|
||||
Comm | 0.011808 | 0.011808 | 0.011808 | 0.0 | 1.01
|
||||
Output | 0.000194 | 0.000194 | 0.000194 | 0.0 | 0.02
|
||||
Modify | 0.3317 | 0.3317 | 0.3317 | 0.0 | 28.42
|
||||
Other | | 0.006768 | | | 0.58
|
||||
|
||||
Nlocal: 350 ave 350 max 350 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 101 ave 101 max 101 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 853 ave 853 max 853 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 853
|
||||
Ave neighs/atom = 2.4371429
|
||||
Neighbor list builds = 274
|
||||
Dangerous builds = 0
|
||||
|
||||
#undump 1
|
||||
#undump 2
|
||||
unfix 3
|
||||
|
||||
change_box all triclinic
|
||||
Changing box ...
|
||||
triclinic box = (-18.043106 -18.043106 -0.5) to (18.043106 18.043106 0.5) with tilt (0 0 0)
|
||||
|
||||
#dump 1 all custom 500 dump2.atom id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump2.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
fix 3 all deform 1 xy erate 0.01 units box
|
||||
|
||||
run 10000
|
||||
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2.8
|
||||
ghost atom cutoff = 2.8
|
||||
binsize = 1.4, bins = 26 26 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton/tri
|
||||
stencil: half/bin/2d/tri
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.756 | 4.756 | 4.756 Mbytes
|
||||
Step f_2 PotEng KinEng c_1 c_2 c_3 v_toteng
|
||||
10000 2.473097 -0.092380532 0.85753671 0.16863499 0.85753671 -0.092380532 0.93379117
|
||||
11000 2.5185915 -0.1015737 0.84230876 0.197573 0.84230876 -0.1015737 0.93830806
|
||||
12000 2.5971728 -0.13087195 0.90608898 0.17248755 0.90608898 -0.13087195 0.94770458
|
||||
13000 2.5065714 -0.14666584 0.83804307 0.19683483 0.83804307 -0.14666584 0.88821206
|
||||
14000 2.1672357 -0.10720878 0.79713452 0.10973398 0.79713452 -0.10720878 0.79965972
|
||||
15000 2.3444438 -0.14780285 0.81642139 0.15695019 0.81642139 -0.14780285 0.82556873
|
||||
16000 2.2672973 -0.12590007 0.76672668 0.17081014 0.76672668 -0.12590007 0.81163675
|
||||
17000 2.3999651 -0.15464102 0.85708567 0.14289234 0.85708567 -0.15464102 0.84533699
|
||||
18000 2.2024356 -0.14077779 0.76727873 0.14718521 0.76727873 -0.14077779 0.77368615
|
||||
19000 2.5411572 -0.092014576 0.85750101 0.19297197 0.85750101 -0.092014576 0.95845841
|
||||
20000 2.2113691 -0.11944862 0.77295445 0.14564597 0.77295445 -0.11944862 0.79915181
|
||||
Loop time of 1.76388 on 1 procs for 10000 steps with 350 atoms
|
||||
|
||||
Performance: 489827.896 tau/day, 5669.304 timesteps/s, 1.984 Matom-step/s
|
||||
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 1.3668 | 1.3668 | 1.3668 | 0.0 | 77.49
|
||||
Neigh | 0.018849 | 0.018849 | 0.018849 | 0.0 | 1.07
|
||||
Comm | 0.013338 | 0.013338 | 0.013338 | 0.0 | 0.76
|
||||
Output | 0.00020434 | 0.00020434 | 0.00020434 | 0.0 | 0.01
|
||||
Modify | 0.35698 | 0.35698 | 0.35698 | 0.0 | 20.24
|
||||
Other | | 0.007667 | | | 0.43
|
||||
|
||||
Nlocal: 350 ave 350 max 350 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 119 ave 119 max 119 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 997 ave 997 max 997 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 997
|
||||
Ave neighs/atom = 2.8485714
|
||||
Neighbor list builds = 283
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:02
|
||||
189
examples/ASPHERE/line/log.21Sep24.line.g++.4
Normal file
189
examples/ASPHERE/line/log.21Sep24.line.g++.4
Normal file
@ -0,0 +1,189 @@
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Aspherical shear demo - 2d line box and triangle mixture, implicit solvent
|
||||
|
||||
units lj
|
||||
atom_style line
|
||||
dimension 2
|
||||
|
||||
read_data data.line
|
||||
Reading data file ...
|
||||
orthogonal box = (-22.553882 -22.553882 -0.5) to (22.553882 22.553882 0.5)
|
||||
2 by 2 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
350 atoms
|
||||
350 lines
|
||||
read_data CPU = 0.003 seconds
|
||||
|
||||
velocity all create 1.44 320984 loop geom
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule/intra all
|
||||
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff * * 0.25 0.25 1.0 0.25 2.5
|
||||
|
||||
fix 2 all rigid molecule langevin 2.0 2.0 1.0 492983
|
||||
100 rigid bodies with 350 atoms
|
||||
|
||||
fix 3 all deform 5 x scale 0.8 y scale 0.8
|
||||
fix 4 all enforce2d
|
||||
|
||||
compute 10 all property/atom end1x end1y end2x end2y
|
||||
|
||||
#dump 1 all custom 500 dump1.atom id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump1.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
timestep 0.001
|
||||
|
||||
compute 1 all erotate/asphere
|
||||
compute 2 all ke
|
||||
compute 3 all pe
|
||||
variable toteng equal (c_1+c_2+c_3)/atoms
|
||||
|
||||
compute_modify thermo_temp extra/dof -350
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step f_2 pe ke c_1 c_2 c_3 v_toteng
|
||||
|
||||
run 10000
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Your simulation uses code contributions which should be cited:
|
||||
|
||||
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
|
||||
|
||||
@Article{Gissinger24,
|
||||
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
|
||||
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
|
||||
journal = {J. Phys. Chem. B},
|
||||
year = 2024,
|
||||
volume = 128,
|
||||
number = 13,
|
||||
pages = {3282–-3297}
|
||||
}
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2.8
|
||||
ghost atom cutoff = 2.8
|
||||
binsize = 1.4, bins = 33 33 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.736 | 4.736 | 4.736 Mbytes
|
||||
Step f_2 PotEng KinEng c_1 c_2 c_3 v_toteng
|
||||
0 1.1872976 0 0.46543528 0.03617212 0.46543528 0 0.5016074
|
||||
1000 1.9084412 -0.001043719 0.71003395 0.089891202 0.71003395 -0.001043719 0.79888143
|
||||
2000 2.31668 -0.020711665 0.83611544 0.13062287 0.83611544 -0.020711665 0.94602664
|
||||
3000 2.3094506 -0.062018072 0.80584123 0.15326943 0.80584123 -0.062018072 0.8970926
|
||||
4000 2.4383442 -0.053659995 0.86864073 0.14696993 0.86864073 -0.053659995 0.96195067
|
||||
5000 2.5885917 -0.15612821 0.90351683 0.17156633 0.90351683 -0.15612821 0.91895494
|
||||
6000 2.118738 -0.072041141 0.76933528 0.1155794 0.76933528 -0.072041141 0.81287354
|
||||
7000 2.1407609 -0.057727453 0.75154404 0.13827078 0.75154404 -0.057727453 0.83208737
|
||||
8000 2.066348 -0.097639137 0.71096217 0.14551295 0.71096217 -0.097639137 0.75883597
|
||||
9000 2.1969096 -0.050133795 0.8028133 0.11559948 0.8028133 -0.050133795 0.86827899
|
||||
10000 2.3932442 -0.098008752 0.83753779 0.15678193 0.83753779 -0.098008752 0.89631097
|
||||
Loop time of 0.596624 on 4 procs for 10000 steps with 350 atoms
|
||||
|
||||
Performance: 1448149.372 tau/day, 16760.988 timesteps/s, 5.866 Matom-step/s
|
||||
98.2% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.17419 | 0.21073 | 0.27006 | 7.8 | 35.32
|
||||
Neigh | 0.0015899 | 0.0018079 | 0.0020932 | 0.5 | 0.30
|
||||
Comm | 0.041688 | 0.101 | 0.13805 | 11.3 | 16.93
|
||||
Output | 0.00011945 | 0.0001262 | 0.000143 | 0.0 | 0.02
|
||||
Modify | 0.26906 | 0.27183 | 0.27467 | 0.4 | 45.56
|
||||
Other | | 0.01113 | | | 1.87
|
||||
|
||||
Nlocal: 87.5 ave 94 max 80 min
|
||||
Histogram: 1 1 0 0 0 0 0 0 0 2
|
||||
Nghost: 54.5 ave 67 max 42 min
|
||||
Histogram: 1 0 1 0 0 0 0 1 0 1
|
||||
Neighs: 212.75 ave 286 max 122 min
|
||||
Histogram: 1 0 0 1 0 0 0 0 1 1
|
||||
|
||||
Total # of neighbors = 851
|
||||
Ave neighs/atom = 2.4314286
|
||||
Neighbor list builds = 273
|
||||
Dangerous builds = 0
|
||||
|
||||
#undump 1
|
||||
#undump 2
|
||||
unfix 3
|
||||
|
||||
change_box all triclinic
|
||||
Changing box ...
|
||||
triclinic box = (-18.043106 -18.043106 -0.5) to (18.043106 18.043106 0.5) with tilt (0 0 0)
|
||||
|
||||
#dump 1 all custom 500 dump2.atom id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump2.line id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
fix 3 all deform 1 xy erate 0.01 units box
|
||||
|
||||
run 10000
|
||||
Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2.8
|
||||
ghost atom cutoff = 2.8
|
||||
binsize = 1.4, bins = 26 26 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton/tri
|
||||
stencil: half/bin/2d/tri
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.752 | 4.752 | 4.752 Mbytes
|
||||
Step f_2 PotEng KinEng c_1 c_2 c_3 v_toteng
|
||||
10000 2.3932442 -0.098029446 0.83751359 0.1568021 0.83751359 -0.098029446 0.89628624
|
||||
11000 2.4541893 -0.15538223 0.82053681 0.19271549 0.82053681 -0.15538223 0.85787007
|
||||
12000 2.4124449 -0.14024177 0.84559598 0.15692416 0.84559598 -0.14024177 0.86227837
|
||||
13000 2.2095814 -0.10822636 0.73930104 0.173052 0.73930104 -0.10822636 0.80412668
|
||||
14000 2.0946831 -0.087303541 0.77743494 0.10023865 0.77743494 -0.087303541 0.79037005
|
||||
15000 2.0911016 -0.11524594 0.74044758 0.12978235 0.74044758 -0.11524594 0.75498398
|
||||
16000 1.9736428 -0.17259563 0.67852978 0.13943094 0.67852978 -0.17259563 0.64536509
|
||||
17000 2.3284367 -0.17673537 0.77018991 0.1897596 0.77018991 -0.17673537 0.78321414
|
||||
18000 2.3178564 -0.16634746 0.81488323 0.1487365 0.81488323 -0.16634746 0.79727227
|
||||
19000 2.7497251 -0.18254513 1.0041125 0.14528424 1.0041125 -0.18254513 0.96685165
|
||||
20000 2.3897059 -0.12664177 0.8390411 0.15426546 0.8390411 -0.12664177 0.86666479
|
||||
Loop time of 0.79519 on 4 procs for 10000 steps with 350 atoms
|
||||
|
||||
Performance: 1086533.223 tau/day, 12575.616 timesteps/s, 4.401 Matom-step/s
|
||||
98.2% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.26601 | 0.35051 | 0.443 | 12.3 | 44.08
|
||||
Neigh | 0.0029891 | 0.0036725 | 0.0040631 | 0.7 | 0.46
|
||||
Comm | 0.049744 | 0.14268 | 0.22734 | 19.3 | 17.94
|
||||
Output | 0.00013783 | 0.00014503 | 0.00016304 | 0.0 | 0.02
|
||||
Modify | 0.27963 | 0.28446 | 0.29049 | 0.8 | 35.77
|
||||
Other | | 0.01372 | | | 1.73
|
||||
|
||||
Nlocal: 87.5 ave 101 max 75 min
|
||||
Histogram: 1 0 1 0 0 0 1 0 0 1
|
||||
Nghost: 61.5 ave 67 max 50 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 2 1
|
||||
Neighs: 244.75 ave 268 max 198 min
|
||||
Histogram: 1 0 0 0 0 0 0 1 0 2
|
||||
|
||||
Total # of neighbors = 979
|
||||
Ave neighs/atom = 2.7971429
|
||||
Neighbor list builds = 289
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:01
|
||||
363
examples/ASPHERE/line/log.21Sep24.line.srd.g++.1
Normal file
363
examples/ASPHERE/line/log.21Sep24.line.srd.g++.1
Normal file
@ -0,0 +1,363 @@
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Aspherical shear demo - 2d line boxes, solvated by SRD particles
|
||||
|
||||
units lj
|
||||
atom_style line
|
||||
atom_modify first big
|
||||
dimension 2
|
||||
|
||||
read_data data.line.srd
|
||||
Reading data file ...
|
||||
orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
400 atoms
|
||||
400 lines
|
||||
read_data CPU = 0.003 seconds
|
||||
|
||||
# add small particles as hi density lattice
|
||||
|
||||
lattice sq 0.4
|
||||
Lattice spacing in x,y,z = 1.5811388 1.5811388 1.5811388
|
||||
region plane block INF INF INF INF -0.001 0.001
|
||||
lattice sq 10.0
|
||||
Lattice spacing in x,y,z = 0.31622777 0.31622777 0.31622777
|
||||
create_atoms 2 region plane
|
||||
Created 33489 atoms
|
||||
using lattice units in orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
|
||||
create_atoms CPU = 0.003 seconds
|
||||
|
||||
group big type 1
|
||||
400 atoms in group big
|
||||
group small type 2
|
||||
33489 atoms in group small
|
||||
set group small mass 0.01
|
||||
Setting atom values ...
|
||||
33489 settings made for mass
|
||||
|
||||
# delete overlaps
|
||||
# must set 1-2 cutoff to non-zero value
|
||||
|
||||
pair_style lj/cut 1.5
|
||||
pair_coeff 1 1 1.0 1.0
|
||||
pair_coeff 2 2 0.0 1.0 0.0
|
||||
pair_coeff 1 2 1.0 1.0
|
||||
|
||||
delete_atoms overlap 1.5 small big
|
||||
System init for delete_atoms ...
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.8
|
||||
ghost atom cutoff = 1.8
|
||||
binsize = 0.9, bins = 64 64 2
|
||||
2 neighbor lists, perpetual/occasional/extra = 1 1 0
|
||||
(1) command delete_atoms, occasional
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/2d
|
||||
bin: standard
|
||||
(2) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
WARNING: Delete_atoms cutoff > minimum neighbor cutoff (src/delete_atoms.cpp:313)
|
||||
Deleted 13605 atoms, new total = 20284
|
||||
|
||||
# SRD run
|
||||
|
||||
reset_timestep 0
|
||||
|
||||
velocity small create 1.44 87287 loop geom
|
||||
|
||||
neighbor 0.8 multi
|
||||
neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule/intra big include big
|
||||
|
||||
comm_modify mode multi group big vel yes
|
||||
neigh_modify include big
|
||||
|
||||
# no pairwise interactions with small particles
|
||||
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5
|
||||
pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0
|
||||
pair_coeff 1 2 1.0 0.2 1.0 0.2 2.5
|
||||
|
||||
# use fix SRD to push small particles out from inside big ones
|
||||
# if comment out, big particles won't see SRD particles
|
||||
|
||||
timestep 0.0001
|
||||
|
||||
fix 1 big rigid molecule
|
||||
100 rigid bodies with 400 atoms
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip
|
||||
|
||||
fix 3 all deform 1 x scale 1.25 y scale 1.25
|
||||
fix 4 all enforce2d
|
||||
|
||||
# diagnostics
|
||||
|
||||
compute tsmall small temp/deform
|
||||
compute tbig big temp
|
||||
variable pebig equal pe*atoms/count(big)
|
||||
variable ebig equal etotal*atoms/count(big)
|
||||
|
||||
compute_modify tbig extra/dof -350
|
||||
|
||||
compute 1 big erotate/asphere
|
||||
compute 2 all ke
|
||||
compute 3 all pe
|
||||
variable toteng equal (c_1+c_2+c_3)/atoms
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step c_tsmall f_2[9] c_1 etotal v_pebig v_ebig press
|
||||
thermo_modify temp tbig
|
||||
WARNING: Temperature for thermo pressure is not for group all (src/thermo.cpp:533)
|
||||
|
||||
compute 10 big property/atom end1x end1y end2x end2y
|
||||
|
||||
#dump 1 all custom 500 dump1.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump1.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
run 10000
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Your simulation uses code contributions which should be cited:
|
||||
|
||||
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
|
||||
|
||||
@Article{Gissinger24,
|
||||
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
|
||||
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
|
||||
journal = {J. Phys. Chem. B},
|
||||
year = 2024,
|
||||
volume = 128,
|
||||
number = 13,
|
||||
pages = {3282–-3297}
|
||||
}
|
||||
|
||||
- neighbor multi command: doi:10.1016/j.cpc.2008.03.005, doi:10.1007/s40571-020-00361-2
|
||||
|
||||
@Article{Intveld08,
|
||||
author = {in 't Veld, P. J. and S. J.~Plimpton and G. S. Grest},
|
||||
title = {Accurate and Efficient Methods for Modeling Colloidal
|
||||
Mixtures in an Explicit Solvent using Molecular Dynamics},
|
||||
journal = {Comput.\ Phys.\ Commut.},
|
||||
year = 2008,
|
||||
volume = 179,
|
||||
pages = {320--329}
|
||||
}
|
||||
|
||||
@article{Shire2020,
|
||||
author = {Shire, Tom and Hanley, Kevin J. and Stratford, Kevin},
|
||||
title = {{DEM} Simulations of Polydisperse Media: Efficient Contact
|
||||
Detection Applied to Investigate the Quasi-Static Limit},
|
||||
journal = {Computational Particle Mechanics},
|
||||
year = {2020}
|
||||
@article{Monti2022,
|
||||
author = {Monti, Joseph M. and Clemmer, Joel T. and Srivastava,
|
||||
Ishan and Silbert, Leonardo E. and Grest, Gary S.
|
||||
and Lechman, Jeremy B.},
|
||||
title = {Large-scale frictionless jamming with power-law particle
|
||||
size distributions},
|
||||
journal = {Phys. Rev. E},
|
||||
volume = {106}
|
||||
issue = {3}
|
||||
year = {2022}
|
||||
}
|
||||
|
||||
- fix srd command: doi:10.1063/1.3419070
|
||||
|
||||
@Article{Petersen10,
|
||||
author = {M. K. Petersen and J. B. Lechman and S. J. Plimpton and
|
||||
G. S. Grest and in 't Veld, P. J. and P. R. Schunk},
|
||||
title = {Mesoscale Hydrodynamics via Stochastic Rotation
|
||||
Dynamics: Comparison with {L}ennard-{J}ones Fluid},
|
||||
journal = {J.~Chem.\ Phys.},
|
||||
year = 2010,
|
||||
volume = 132,
|
||||
pages = 174106
|
||||
}
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
WARNING: Using compute temp/deform with inconsistent fix deform remap option (src/compute_temp_deform.cpp:71)
|
||||
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
|
||||
SRD info:
|
||||
SRD/big particles = 19884 400
|
||||
big particle diameter max/min = 1.9988171 0.50330564
|
||||
SRD temperature & lamda = 1 0.02
|
||||
SRD max distance & max velocity = 0.08 40
|
||||
SRD grid counts: 230 230 1
|
||||
SRD grid size: request, actual (xyz) = 0.25, 0.25040659 0.25040659 1
|
||||
SRD per actual grid cell = 0.45503978
|
||||
SRD viscosity = 0.92062623
|
||||
big/SRD mass density ratio = 12.298053
|
||||
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
|
||||
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
|
||||
# of rescaled SRD velocities = 0
|
||||
ave/max small velocity = 15.906001 29.105426
|
||||
ave/max big velocity = 0 0
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 3.3
|
||||
ghost atom cutoff = 3.3
|
||||
binsize = 1.65, bins = 35 35 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/multi/atomonly/newton
|
||||
stencil: half/multi/2d
|
||||
bin: multi
|
||||
Per MPI rank memory allocation (min/avg/max) = 43.18 | 43.18 | 43.18 Mbytes
|
||||
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
|
||||
0 1.6084386 0 0 0 0 0 0
|
||||
1000 1.4566787 1.2042825 0.00015900089 0.0011955837 0 0.060628048 0.75322967
|
||||
2000 1.3561018 1.2092936 0.00026739313 0.0020783306 0 0.10539215 0.53247025
|
||||
3000 1.3327447 1.1696777 0.0003215116 0.0024935719 0 0.12644903 0.32024668
|
||||
4000 1.3031273 1.1385596 0.00037337107 0.0031214473 0 0.15828859 0.64066244
|
||||
5000 1.28204 1.0983763 0.00047966362 0.0033295434 0 0.16884114 0.32103666
|
||||
6000 1.2654396 1.0969562 0.00049917373 0.0037382387 0 0.18956609 0.16211614
|
||||
7000 1.2401503 1.1232046 0.00045137168 0.0037522201 0 0.19027508 0.014071672
|
||||
8000 1.2349025 1.059008 0.00056648801 0.0039664742 0 0.20113991 0.24483817
|
||||
9000 1.2272907 1.0371006 0.00056271544 0.0037847074 0 0.19192251 0.032968056
|
||||
10000 1.2153817 1.0209113 0.00053291164 0.0037197691 0 0.18862949 0.3237698
|
||||
Loop time of 6.0346 on 1 procs for 10000 steps with 20284 atoms
|
||||
|
||||
Performance: 14317.424 tau/day, 1657.109 timesteps/s, 33.613 Matom-step/s
|
||||
96.2% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.10636 | 0.10636 | 0.10636 | 0.0 | 1.76
|
||||
Neigh | 0.01905 | 0.01905 | 0.01905 | 0.0 | 0.32
|
||||
Comm | 0.041223 | 0.041223 | 0.041223 | 0.0 | 0.68
|
||||
Output | 0.0017015 | 0.0017015 | 0.0017015 | 0.0 | 0.03
|
||||
Modify | 5.8165 | 5.8165 | 5.8165 | 0.0 | 96.39
|
||||
Other | | 0.04976 | | | 0.82
|
||||
|
||||
Nlocal: 20284 ave 20284 max 20284 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 84 ave 84 max 84 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 0 ave 0 max 0 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 0
|
||||
Ave neighs/atom = 0
|
||||
Neighbor list builds = 500
|
||||
Dangerous builds = 0
|
||||
|
||||
#undump 1
|
||||
#undump 2
|
||||
unfix 3
|
||||
|
||||
change_box all triclinic
|
||||
Changing box ...
|
||||
triclinic box = (-35.995947 -35.995947 -0.5) to (35.995947 35.995947 0.5) with tilt (0 0 0)
|
||||
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip #tstat yes
|
||||
|
||||
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump2.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
fix 3 all deform 1 xy erate 0.0002 units box remap v
|
||||
|
||||
run 30000
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
|
||||
SRD info:
|
||||
SRD/big particles = 19884 400
|
||||
big particle diameter max/min = 1.9988171 0.50330564
|
||||
SRD temperature & lamda = 1 0.02
|
||||
SRD max distance & max velocity = 0.08 40
|
||||
SRD grid counts: 288 288 1
|
||||
SRD grid size: request, actual (xyz) = 0.25, 0.24997185 0.24997185 1
|
||||
SRD per actual grid cell = 0.26976265
|
||||
SRD viscosity = 1.0312189
|
||||
big/SRD mass density ratio = 20.672578
|
||||
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
|
||||
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
|
||||
# of rescaled SRD velocities = 0
|
||||
ave/max small velocity = 12.805429 40
|
||||
ave/max big velocity = 0.50400801 2.1697856
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 3.3
|
||||
ghost atom cutoff = 3.3
|
||||
binsize = 1.65, bins = 44 44 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/multi/atomonly/newton/tri
|
||||
stencil: half/multi/2d/tri
|
||||
bin: multi
|
||||
Per MPI rank memory allocation (min/avg/max) = 58.85 | 58.85 | 58.85 Mbytes
|
||||
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
|
||||
10000 1.0423767 0 0.00053289774 0.0037197858 0 0.18863034 0.014558088
|
||||
11000 1.0333923 1.0309908 0.00058283908 0.0042336078 0 0.21468625 0.13036048
|
||||
12000 1.0268543 1.0290122 0.00055294944 0.0044278861 0 0.2245381 0.53275676
|
||||
13000 1.0257856 1.0140434 0.00051520803 0.0042888387 0 0.21748701 0.2066835
|
||||
14000 1.0239497 1.0321558 0.00054742053 0.004259664 0 0.21600756 0.24430097
|
||||
15000 1.0194475 1.0119806 0.00053619565 0.0041113765 0 0.2084879 0.18420434
|
||||
16000 1.016421 1.0309991 0.00049553642 0.0041338645 0 0.20962827 0.18298406
|
||||
17000 1.0088527 0.99723694 0.00049824173 0.0044777581 0 0.22706711 0.65557023
|
||||
18000 1.0073877 1.0136962 0.00058327835 0.0045355884 0 0.22999969 0.13700737
|
||||
19000 1.0026916 1.0253129 0.00056760151 0.0045138437 0 0.22889701 0.16443344
|
||||
20000 1.0024042 0.98988084 0.00057568118 0.0048644983 0 0.24667871 0.060864322
|
||||
21000 0.99500641 0.99075324 0.00062187543 0.0050480594 0 0.25598709 0.18428384
|
||||
22000 0.99286446 0.98642266 0.00061024492 0.0050497174 0 0.25607117 0.18499738
|
||||
23000 0.99204086 0.98869005 0.00064013575 0.0052921112 0 0.26836296 0.046987646
|
||||
24000 0.98768375 1.0110613 0.00065868636 0.0049285304 0 0.24992578 0.22623751
|
||||
25000 0.98763308 0.98752678 0.00067070463 0.0049364253 0 0.25032613 0.022819382
|
||||
26000 0.9844889 1.0059918 0.00075364589 0.0053381778 0 0.270699 0.089735771
|
||||
27000 0.98574608 0.99094039 0.00080711088 0.0054137233 0 0.27452991 0.14477885
|
||||
28000 0.98139419 0.97487219 0.00071242405 0.0053315389 0 0.27036234 -0.016357088
|
||||
29000 0.98458705 0.98600894 0.00068355573 0.0050157315 0 0.25434775 0.015749835
|
||||
30000 0.98440759 1.0010793 0.00067594666 0.0048121267 0 0.24402295 0.20422918
|
||||
31000 0.98060632 0.9825933 0.00069529947 0.0050649881 -0.00010700206 0.25684555 0.0014601267
|
||||
32000 0.97616624 0.94976641 0.00067682494 0.0052111319 -0.00013230279 0.2642565 0.013738873
|
||||
33000 0.97926122 0.95756794 0.00071774059 0.0053716513 -0.00024804252 0.27239644 0.10643895
|
||||
34000 0.97227771 0.94365547 0.00080885359 0.0055744144 -0.00063217879 0.28267856 0.12472959
|
||||
35000 0.97295679 0.98103625 0.0008130852 0.0057598371 -0.00097218501 0.29208134 0.88422099
|
||||
36000 0.97131919 0.97164636 0.0008345433 0.0062212675 -0.001543055 0.31548048 0.22389401
|
||||
37000 0.96988732 0.98589058 0.00083271625 0.0062999411 -0.0026064482 0.31947001 0.11797226
|
||||
38000 0.97135512 0.96486494 0.00076943974 0.0057397763 -0.0041174109 0.29106406 0.072790492
|
||||
39000 0.97386738 0.95218062 0.00079594844 0.0056308358 -0.0069006312 0.28553968 0.12417837
|
||||
40000 0.97914045 1.0033941 0.000830798 0.0055952714 -0.0086705514 0.28373621 0.1976663
|
||||
Loop time of 15.944 on 1 procs for 30000 steps with 20284 atoms
|
||||
|
||||
Performance: 16256.931 tau/day, 1881.589 timesteps/s, 38.166 Matom-step/s
|
||||
99.5% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.31871 | 0.31871 | 0.31871 | 0.0 | 2.00
|
||||
Neigh | 0.082974 | 0.082974 | 0.082974 | 0.0 | 0.52
|
||||
Comm | 0.16092 | 0.16092 | 0.16092 | 0.0 | 1.01
|
||||
Output | 0.0047257 | 0.0047257 | 0.0047257 | 0.0 | 0.03
|
||||
Modify | 15.189 | 15.189 | 15.189 | 0.0 | 95.27
|
||||
Other | | 0.1871 | | | 1.17
|
||||
|
||||
Nlocal: 20284 ave 20284 max 20284 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 76 ave 76 max 76 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 38 ave 38 max 38 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 38
|
||||
Ave neighs/atom = 0.0018733978
|
||||
Neighbor list builds = 1500
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:22
|
||||
363
examples/ASPHERE/line/log.21Sep24.line.srd.g++.4
Normal file
363
examples/ASPHERE/line/log.21Sep24.line.srd.g++.4
Normal file
@ -0,0 +1,363 @@
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Aspherical shear demo - 2d line boxes, solvated by SRD particles
|
||||
|
||||
units lj
|
||||
atom_style line
|
||||
atom_modify first big
|
||||
dimension 2
|
||||
|
||||
read_data data.line.srd
|
||||
Reading data file ...
|
||||
orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
|
||||
2 by 2 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
400 atoms
|
||||
400 lines
|
||||
read_data CPU = 0.003 seconds
|
||||
|
||||
# add small particles as hi density lattice
|
||||
|
||||
lattice sq 0.4
|
||||
Lattice spacing in x,y,z = 1.5811388 1.5811388 1.5811388
|
||||
region plane block INF INF INF INF -0.001 0.001
|
||||
lattice sq 10.0
|
||||
Lattice spacing in x,y,z = 0.31622777 0.31622777 0.31622777
|
||||
create_atoms 2 region plane
|
||||
Created 33489 atoms
|
||||
using lattice units in orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
|
||||
create_atoms CPU = 0.001 seconds
|
||||
|
||||
group big type 1
|
||||
400 atoms in group big
|
||||
group small type 2
|
||||
33489 atoms in group small
|
||||
set group small mass 0.01
|
||||
Setting atom values ...
|
||||
33489 settings made for mass
|
||||
|
||||
# delete overlaps
|
||||
# must set 1-2 cutoff to non-zero value
|
||||
|
||||
pair_style lj/cut 1.5
|
||||
pair_coeff 1 1 1.0 1.0
|
||||
pair_coeff 2 2 0.0 1.0 0.0
|
||||
pair_coeff 1 2 1.0 1.0
|
||||
|
||||
delete_atoms overlap 1.5 small big
|
||||
System init for delete_atoms ...
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.8
|
||||
ghost atom cutoff = 1.8
|
||||
binsize = 0.9, bins = 64 64 2
|
||||
2 neighbor lists, perpetual/occasional/extra = 1 1 0
|
||||
(1) command delete_atoms, occasional
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/2d
|
||||
bin: standard
|
||||
(2) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
WARNING: Delete_atoms cutoff > minimum neighbor cutoff (src/delete_atoms.cpp:313)
|
||||
Deleted 13605 atoms, new total = 20284
|
||||
|
||||
# SRD run
|
||||
|
||||
reset_timestep 0
|
||||
|
||||
velocity small create 1.44 87287 loop geom
|
||||
|
||||
neighbor 0.8 multi
|
||||
neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule/intra big include big
|
||||
|
||||
comm_modify mode multi group big vel yes
|
||||
neigh_modify include big
|
||||
|
||||
# no pairwise interactions with small particles
|
||||
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5
|
||||
pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0
|
||||
pair_coeff 1 2 1.0 0.2 1.0 0.2 2.5
|
||||
|
||||
# use fix SRD to push small particles out from inside big ones
|
||||
# if comment out, big particles won't see SRD particles
|
||||
|
||||
timestep 0.0001
|
||||
|
||||
fix 1 big rigid molecule
|
||||
100 rigid bodies with 400 atoms
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip
|
||||
|
||||
fix 3 all deform 1 x scale 1.25 y scale 1.25
|
||||
fix 4 all enforce2d
|
||||
|
||||
# diagnostics
|
||||
|
||||
compute tsmall small temp/deform
|
||||
compute tbig big temp
|
||||
variable pebig equal pe*atoms/count(big)
|
||||
variable ebig equal etotal*atoms/count(big)
|
||||
|
||||
compute_modify tbig extra/dof -350
|
||||
|
||||
compute 1 big erotate/asphere
|
||||
compute 2 all ke
|
||||
compute 3 all pe
|
||||
variable toteng equal (c_1+c_2+c_3)/atoms
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step c_tsmall f_2[9] c_1 etotal v_pebig v_ebig press
|
||||
thermo_modify temp tbig
|
||||
WARNING: Temperature for thermo pressure is not for group all (src/thermo.cpp:533)
|
||||
|
||||
compute 10 big property/atom end1x end1y end2x end2y
|
||||
|
||||
#dump 1 all custom 500 dump1.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump1.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
run 10000
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Your simulation uses code contributions which should be cited:
|
||||
|
||||
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
|
||||
|
||||
@Article{Gissinger24,
|
||||
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
|
||||
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
|
||||
journal = {J. Phys. Chem. B},
|
||||
year = 2024,
|
||||
volume = 128,
|
||||
number = 13,
|
||||
pages = {3282–-3297}
|
||||
}
|
||||
|
||||
- neighbor multi command: doi:10.1016/j.cpc.2008.03.005, doi:10.1007/s40571-020-00361-2
|
||||
|
||||
@Article{Intveld08,
|
||||
author = {in 't Veld, P. J. and S. J.~Plimpton and G. S. Grest},
|
||||
title = {Accurate and Efficient Methods for Modeling Colloidal
|
||||
Mixtures in an Explicit Solvent using Molecular Dynamics},
|
||||
journal = {Comput.\ Phys.\ Commut.},
|
||||
year = 2008,
|
||||
volume = 179,
|
||||
pages = {320--329}
|
||||
}
|
||||
|
||||
@article{Shire2020,
|
||||
author = {Shire, Tom and Hanley, Kevin J. and Stratford, Kevin},
|
||||
title = {{DEM} Simulations of Polydisperse Media: Efficient Contact
|
||||
Detection Applied to Investigate the Quasi-Static Limit},
|
||||
journal = {Computational Particle Mechanics},
|
||||
year = {2020}
|
||||
@article{Monti2022,
|
||||
author = {Monti, Joseph M. and Clemmer, Joel T. and Srivastava,
|
||||
Ishan and Silbert, Leonardo E. and Grest, Gary S.
|
||||
and Lechman, Jeremy B.},
|
||||
title = {Large-scale frictionless jamming with power-law particle
|
||||
size distributions},
|
||||
journal = {Phys. Rev. E},
|
||||
volume = {106}
|
||||
issue = {3}
|
||||
year = {2022}
|
||||
}
|
||||
|
||||
- fix srd command: doi:10.1063/1.3419070
|
||||
|
||||
@Article{Petersen10,
|
||||
author = {M. K. Petersen and J. B. Lechman and S. J. Plimpton and
|
||||
G. S. Grest and in 't Veld, P. J. and P. R. Schunk},
|
||||
title = {Mesoscale Hydrodynamics via Stochastic Rotation
|
||||
Dynamics: Comparison with {L}ennard-{J}ones Fluid},
|
||||
journal = {J.~Chem.\ Phys.},
|
||||
year = 2010,
|
||||
volume = 132,
|
||||
pages = 174106
|
||||
}
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
WARNING: Using compute temp/deform with inconsistent fix deform remap option (src/compute_temp_deform.cpp:71)
|
||||
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
|
||||
SRD info:
|
||||
SRD/big particles = 19884 400
|
||||
big particle diameter max/min = 1.9988171 0.50330564
|
||||
SRD temperature & lamda = 1 0.02
|
||||
SRD max distance & max velocity = 0.08 40
|
||||
SRD grid counts: 230 230 1
|
||||
SRD grid size: request, actual (xyz) = 0.25, 0.25040659 0.25040659 1
|
||||
SRD per actual grid cell = 0.45503978
|
||||
SRD viscosity = 0.92062623
|
||||
big/SRD mass density ratio = 12.298053
|
||||
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
|
||||
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
|
||||
# of rescaled SRD velocities = 0
|
||||
ave/max small velocity = 15.906001 29.105426
|
||||
ave/max big velocity = 0 0
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 3.3
|
||||
ghost atom cutoff = 3.3
|
||||
binsize = 1.65, bins = 35 35 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/multi/atomonly/newton
|
||||
stencil: half/multi/2d
|
||||
bin: multi
|
||||
Per MPI rank memory allocation (min/avg/max) = 13.44 | 13.45 | 13.47 Mbytes
|
||||
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
|
||||
0 1.6084386 0 0 0 0 0 0
|
||||
1000 1.4539924 1.2578325 0.0001679809 0.0010497614 0 0.053233399 0.69841607
|
||||
2000 1.3516746 1.1693535 0.00031932331 0.0021450754 0 0.10877677 0.058141137
|
||||
3000 1.3294093 1.1539986 0.00032558495 0.0022345521 0 0.11331414 0.085892255
|
||||
4000 1.3049311 1.1174444 0.00039637116 0.0026520968 0 0.13448783 0.10574422
|
||||
5000 1.2790124 1.1209176 0.0004519172 0.0032635257 0 0.16549339 0.59225702
|
||||
6000 1.2631214 1.0868777 0.00052274216 0.003527049 0 0.17885666 0.26093936
|
||||
7000 1.2520054 1.0780954 0.00051683183 0.0038111141 0 0.1932616 -0.0014733978
|
||||
8000 1.2412037 1.0587149 0.00056620143 0.0038329297 0 0.19436786 0.31576462
|
||||
9000 1.2242125 1.0699712 0.00065335672 0.0039948578 0 0.20257924 0.19755012
|
||||
10000 1.2155758 1.0279682 0.00059730828 0.004142635 0 0.21007302 0.30022953
|
||||
Loop time of 1.92412 on 4 procs for 10000 steps with 20284 atoms
|
||||
|
||||
Performance: 44903.607 tau/day, 5197.177 timesteps/s, 105.420 Matom-step/s
|
||||
97.6% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.026855 | 0.027529 | 0.028377 | 0.4 | 1.43
|
||||
Neigh | 0.0073723 | 0.007433 | 0.0075693 | 0.1 | 0.39
|
||||
Comm | 0.062837 | 0.065574 | 0.066747 | 0.6 | 3.41
|
||||
Output | 0.00072894 | 0.00076558 | 0.00083361 | 0.0 | 0.04
|
||||
Modify | 1.7861 | 1.7926 | 1.8 | 0.4 | 93.16
|
||||
Other | | 0.03026 | | | 1.57
|
||||
|
||||
Nlocal: 5071 ave 5096 max 5051 min
|
||||
Histogram: 1 0 1 0 0 1 0 0 0 1
|
||||
Nghost: 44 ave 51 max 40 min
|
||||
Histogram: 2 0 0 1 0 0 0 0 0 1
|
||||
Neighs: 0 ave 0 max 0 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 0
|
||||
Ave neighs/atom = 0
|
||||
Neighbor list builds = 500
|
||||
Dangerous builds = 0
|
||||
|
||||
#undump 1
|
||||
#undump 2
|
||||
unfix 3
|
||||
|
||||
change_box all triclinic
|
||||
Changing box ...
|
||||
triclinic box = (-35.995947 -35.995947 -0.5) to (35.995947 35.995947 0.5) with tilt (0 0 0)
|
||||
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip #tstat yes
|
||||
|
||||
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump2.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
fix 3 all deform 1 xy erate 0.0002 units box remap v
|
||||
|
||||
run 30000
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
|
||||
SRD info:
|
||||
SRD/big particles = 19884 400
|
||||
big particle diameter max/min = 1.9988171 0.50330564
|
||||
SRD temperature & lamda = 1 0.02
|
||||
SRD max distance & max velocity = 0.08 40
|
||||
SRD grid counts: 288 288 1
|
||||
SRD grid size: request, actual (xyz) = 0.25, 0.24997185 0.24997185 1
|
||||
SRD per actual grid cell = 0.26976265
|
||||
SRD viscosity = 1.0312189
|
||||
big/SRD mass density ratio = 20.672578
|
||||
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
|
||||
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
|
||||
# of rescaled SRD velocities = 0
|
||||
ave/max small velocity = 12.826666 40
|
||||
ave/max big velocity = 0.53469722 1.7415554
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 3.3
|
||||
ghost atom cutoff = 3.3
|
||||
binsize = 1.65, bins = 44 44 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/multi/atomonly/newton/tri
|
||||
stencil: half/multi/2d/tri
|
||||
bin: multi
|
||||
Per MPI rank memory allocation (min/avg/max) = 17.6 | 17.6 | 17.61 Mbytes
|
||||
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
|
||||
10000 1.0416233 0 0.0005972922 0.0041426543 0 0.210074 0.016213064
|
||||
11000 1.0366852 1.0236717 0.00066926382 0.004404743 0 0.22336452 0.097686059
|
||||
12000 1.028695 1.0023004 0.00065323121 0.0043971164 0 0.22297777 0.30007875
|
||||
13000 1.0223214 1.0296267 0.00060201395 0.0041815724 0 0.21204754 0.17307062
|
||||
14000 1.0210601 1.0092227 0.00057020066 0.0041936718 0 0.2126611 0.13379405
|
||||
15000 1.0169223 1.003359 0.00060818329 0.0046120335 0 0.23387622 0.39144341
|
||||
16000 1.012222 0.99937463 0.00060471571 0.004533278 0 0.22988253 0.16026051
|
||||
17000 1.0087993 0.98937449 0.00061843646 0.0048888197 0 0.24791205 0.095507016
|
||||
18000 1.0030288 0.94092316 0.00054252694 0.0046740839 0 0.2370228 -0.070838649
|
||||
19000 1.0010739 0.99759814 0.00054041829 0.0045481798 0 0.2306382 0.1944996
|
||||
20000 1.004189 1.0157285 0.00053330893 0.0044999261 0 0.22819125 0.14452619
|
||||
21000 0.99601133 1.003771 0.00053282188 0.0048954851 0 0.24825005 0.20196263
|
||||
22000 0.99445408 1.0163465 0.00059954941 0.0051668276 0 0.26200983 0.21332194
|
||||
23000 0.99663947 0.94920707 0.00057729723 0.0051052499 0 0.25888722 0.19282224
|
||||
24000 0.99500015 1.0021742 0.00064312956 0.0053430656 0 0.27094686 0.18044164
|
||||
25000 0.99388189 0.98446723 0.00060150964 0.0054017824 0 0.27392438 0.18844037
|
||||
26000 0.99632932 0.94670024 0.00052479857 0.004905147 0 0.24874 0.17961314
|
||||
27000 0.99112962 1.0247118 0.00053159824 0.004752248 0 0.2409865 0.12037737
|
||||
28000 0.98887153 0.97734068 0.00052255365 0.0050199491 0 0.25456162 0.29110866
|
||||
29000 0.98938025 0.99467239 0.00053095044 0.0050303107 0 0.25508706 0.59776339
|
||||
30000 0.99081592 0.98822122 0.00056693985 0.0052452228 0 0.26598525 0.0150843
|
||||
31000 0.99050903 1.0184912 0.00056742049 0.0056515686 0 0.28659104 0.07877756
|
||||
32000 0.98929926 0.99257634 0.00058111732 0.005740055 0 0.29107819 0.19146041
|
||||
33000 0.98698723 0.94872564 0.00056547335 0.0057628447 0 0.29223385 0.076755599
|
||||
34000 0.98967998 0.98777081 0.00056045905 0.0055645603 0 0.28217885 0.095025284
|
||||
35000 0.98777734 0.95746323 0.00050104756 0.0055630681 -4.7847642e-05 0.28210318 0.25667997
|
||||
36000 0.98661597 0.98801246 0.00047074618 0.0054500805 -5.8956193e-05 0.27637358 0.18221607
|
||||
37000 0.98390111 0.9837894 0.00044581144 0.0054703357 -7.4197741e-05 0.27740072 0.11959303
|
||||
38000 0.98092646 1.0142057 0.0004945556 0.0056372628 -8.6736668e-05 0.2858656 0.017325263
|
||||
39000 0.98125957 0.94219822 0.00059691168 0.0060914156 -9.9726556e-05 0.30889569 0.0081217281
|
||||
40000 0.98362942 0.9649582 0.00062286697 0.0063736358 -0.00010700337 0.32320707 0.293121
|
||||
Loop time of 5.08621 on 4 procs for 30000 steps with 20284 atoms
|
||||
|
||||
Performance: 50961.296 tau/day, 5898.298 timesteps/s, 119.641 Matom-step/s
|
||||
98.8% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.084321 | 0.084948 | 0.086233 | 0.3 | 1.67
|
||||
Neigh | 0.027521 | 0.029247 | 0.030632 | 0.7 | 0.58
|
||||
Comm | 0.19101 | 0.20262 | 0.2068 | 1.5 | 3.98
|
||||
Output | 0.0019581 | 0.0020369 | 0.0022544 | 0.3 | 0.04
|
||||
Modify | 4.681 | 4.6884 | 4.7051 | 0.4 | 92.18
|
||||
Other | | 0.07891 | | | 1.55
|
||||
|
||||
Nlocal: 5071 ave 5197 max 4951 min
|
||||
Histogram: 1 0 0 1 0 0 1 0 0 1
|
||||
Nghost: 44 ave 50 max 41 min
|
||||
Histogram: 2 0 0 1 0 0 0 0 0 1
|
||||
Neighs: 5.75 ave 12 max 0 min
|
||||
Histogram: 1 0 0 1 0 1 0 0 0 1
|
||||
|
||||
Total # of neighbors = 23
|
||||
Ave neighs/atom = 0.0011338986
|
||||
Neighbor list builds = 1500
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:07
|
||||
323
examples/ASPHERE/line/log.21Sep24.line.srd.g++.8
Normal file
323
examples/ASPHERE/line/log.21Sep24.line.srd.g++.8
Normal file
@ -0,0 +1,323 @@
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-296-g061c80104a-modified)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Aspherical shear demo - 2d line boxes, solvated by SRD particles
|
||||
|
||||
units lj
|
||||
atom_style line
|
||||
atom_modify first big
|
||||
dimension 2
|
||||
|
||||
read_data data.line.srd
|
||||
Reading data file ...
|
||||
orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
|
||||
4 by 2 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
400 atoms
|
||||
400 lines
|
||||
read_data CPU = 0.002 seconds
|
||||
|
||||
# add small particles as hi density lattice
|
||||
|
||||
lattice sq 0.4
|
||||
Lattice spacing in x,y,z = 1.5811388 1.5811388 1.5811388
|
||||
region plane block INF INF INF INF -0.001 0.001
|
||||
lattice sq 10.0
|
||||
Lattice spacing in x,y,z = 0.31622777 0.31622777 0.31622777
|
||||
create_atoms 2 region plane
|
||||
Created 33489 atoms
|
||||
using lattice units in orthogonal box = (-28.796757 -28.796757 -0.5) to (28.796757 28.796757 0.5)
|
||||
create_atoms CPU = 0.001 seconds
|
||||
|
||||
group big type 1
|
||||
400 atoms in group big
|
||||
group small type 2
|
||||
33489 atoms in group small
|
||||
set group small mass 0.01
|
||||
Setting atom values ...
|
||||
33489 settings made for mass
|
||||
|
||||
# delete overlaps
|
||||
# must set 1-2 cutoff to non-zero value
|
||||
|
||||
pair_style lj/cut 1.5
|
||||
pair_coeff 1 1 1.0 1.0
|
||||
pair_coeff 2 2 0.0 1.0 0.0
|
||||
pair_coeff 1 2 1.0 1.0
|
||||
|
||||
delete_atoms overlap 1.5 small big
|
||||
System init for delete_atoms ...
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.8
|
||||
ghost atom cutoff = 1.8
|
||||
binsize = 0.9, bins = 64 64 2
|
||||
2 neighbor lists, perpetual/occasional/extra = 1 1 0
|
||||
(1) command delete_atoms, occasional
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/2d
|
||||
bin: standard
|
||||
(2) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
WARNING: Delete_atoms cutoff > minimum neighbor cutoff (src/delete_atoms.cpp:313)
|
||||
Deleted 13605 atoms, new total = 20284
|
||||
|
||||
# SRD run
|
||||
|
||||
reset_timestep 0
|
||||
|
||||
velocity small create 1.44 87287 loop geom
|
||||
|
||||
neighbor 0.8 multi
|
||||
neigh_modify delay 0 every 1 check yes
|
||||
neigh_modify exclude molecule/intra big include big
|
||||
|
||||
comm_modify mode multi group big vel yes
|
||||
neigh_modify include big
|
||||
|
||||
# no pairwise interactions with small particles
|
||||
|
||||
pair_style line/lj 2.5
|
||||
pair_coeff 1 1 1.0 1.0 1.0 1.0 2.5
|
||||
pair_coeff 2 2 0.0 0.0 0.0 1.0 0.0
|
||||
pair_coeff 1 2 1.0 0.2 1.0 0.2 2.5
|
||||
|
||||
# use fix SRD to push small particles out from inside big ones
|
||||
# if comment out, big particles won't see SRD particles
|
||||
|
||||
timestep 0.0002
|
||||
|
||||
fix 1 big rigid molecule
|
||||
100 rigid bodies with 400 atoms
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip
|
||||
|
||||
fix 3 all deform 1 x scale 1.25 y scale 1.25
|
||||
fix 4 all enforce2d
|
||||
|
||||
# diagnostics
|
||||
|
||||
compute tsmall small temp/deform
|
||||
compute tbig big temp
|
||||
variable pebig equal pe*atoms/count(big)
|
||||
variable ebig equal etotal*atoms/count(big)
|
||||
|
||||
compute_modify tbig extra/dof -350
|
||||
|
||||
compute 1 big erotate/asphere
|
||||
compute 2 all ke
|
||||
compute 3 all pe
|
||||
variable toteng equal (c_1+c_2+c_3)/atoms
|
||||
|
||||
thermo 1000
|
||||
thermo_style custom step c_tsmall f_2[9] c_1 etotal v_pebig v_ebig press
|
||||
thermo_modify temp tbig
|
||||
WARNING: Temperature for thermo pressure is not for group all (src/thermo.cpp:533)
|
||||
|
||||
compute 10 big property/atom end1x end1y end2x end2y
|
||||
|
||||
#dump 1 all custom 500 dump1.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump1.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
run 10000
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Your simulation uses code contributions which should be cited:
|
||||
|
||||
- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
|
||||
|
||||
@Article{Gissinger24,
|
||||
author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
|
||||
title = {Type Label Framework for Bonded Force Fields in LAMMPS},
|
||||
journal = {J. Phys. Chem. B},
|
||||
year = 2024,
|
||||
volume = 128,
|
||||
number = 13,
|
||||
pages = {3282–-3297}
|
||||
}
|
||||
|
||||
- neighbor multi command: doi:10.1016/j.cpc.2008.03.005, doi:10.1007/s40571-020-00361-2
|
||||
|
||||
@Article{Intveld08,
|
||||
author = {in 't Veld, P. J. and S. J.~Plimpton and G. S. Grest},
|
||||
title = {Accurate and Efficient Methods for Modeling Colloidal
|
||||
Mixtures in an Explicit Solvent using Molecular Dynamics},
|
||||
journal = {Comput.\ Phys.\ Commut.},
|
||||
year = 2008,
|
||||
volume = 179,
|
||||
pages = {320--329}
|
||||
}
|
||||
|
||||
@article{Shire2020,
|
||||
author = {Shire, Tom and Hanley, Kevin J. and Stratford, Kevin},
|
||||
title = {{DEM} Simulations of Polydisperse Media: Efficient Contact
|
||||
Detection Applied to Investigate the Quasi-Static Limit},
|
||||
journal = {Computational Particle Mechanics},
|
||||
year = {2020}
|
||||
@article{Monti2022,
|
||||
author = {Monti, Joseph M. and Clemmer, Joel T. and Srivastava,
|
||||
Ishan and Silbert, Leonardo E. and Grest, Gary S.
|
||||
and Lechman, Jeremy B.},
|
||||
title = {Large-scale frictionless jamming with power-law particle
|
||||
size distributions},
|
||||
journal = {Phys. Rev. E},
|
||||
volume = {106}
|
||||
issue = {3}
|
||||
year = {2022}
|
||||
}
|
||||
|
||||
- fix srd command: doi:10.1063/1.3419070
|
||||
|
||||
@Article{Petersen10,
|
||||
author = {M. K. Petersen and J. B. Lechman and S. J. Plimpton and
|
||||
G. S. Grest and in 't Veld, P. J. and P. R. Schunk},
|
||||
title = {Mesoscale Hydrodynamics via Stochastic Rotation
|
||||
Dynamics: Comparison with {L}ennard-{J}ones Fluid},
|
||||
journal = {J.~Chem.\ Phys.},
|
||||
year = 2010,
|
||||
volume = 132,
|
||||
pages = 174106
|
||||
}
|
||||
|
||||
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
WARNING: Using compute temp/deform with inconsistent fix deform remap option (src/compute_temp_deform.cpp:71)
|
||||
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
|
||||
SRD info:
|
||||
SRD/big particles = 19884 400
|
||||
big particle diameter max/min = 1.9988171 0.50330564
|
||||
SRD temperature & lamda = 1 0.04
|
||||
SRD max distance & max velocity = 0.16 40
|
||||
SRD grid counts: 230 230 1
|
||||
SRD grid size: request, actual (xyz) = 0.25, 0.25040659 0.25040659 1
|
||||
SRD per actual grid cell = 0.45503978
|
||||
SRD viscosity = 1.0732692
|
||||
big/SRD mass density ratio = 12.298053
|
||||
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
|
||||
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
|
||||
# of rescaled SRD velocities = 0
|
||||
ave/max small velocity = 15.906001 29.105426
|
||||
ave/max big velocity = 0 0
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 3.3
|
||||
ghost atom cutoff = 3.3
|
||||
binsize = 1.65, bins = 35 35 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/multi/atomonly/newton
|
||||
stencil: half/multi/2d
|
||||
bin: multi
|
||||
Per MPI rank memory allocation (min/avg/max) = 9.752 | 9.762 | 9.781 Mbytes
|
||||
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
|
||||
0 1.4809886 0 0 0 0 0 0
|
||||
1000 1.2265081 1.1522909 0.00027866069 0.0022427232 0 0.1137285 0.58262976
|
||||
2000 1.1757141 1.1251323 0.00040597152 0.003287261 0 0.16669701 0.27109853
|
||||
3000 1.126304 1.0646585 0.00050542958 0.0040748897 0 0.20663766 0.36959653
|
||||
4000 1.1140297 1.0761435 0.00060430927 0.0052863645 0 0.26807154 0.35099205
|
||||
5000 1.0823307 1.0256814 0.00066557681 0.0051213284 0 0.25970256 0.28491631
|
||||
6000 1.0656188 1.0387091 0.00066454105 0.005213537 0 0.26437846 0.2150581
|
||||
7000 1.0600108 1.0099931 0.0007238719 0.005470459 0 0.27740698 0.21846568
|
||||
8000 1.0464374 1.0049819 0.00061068366 0.0053702582 0 0.27232579 0.1326171
|
||||
9000 1.0381097 1.0126049 0.00057318728 0.0055976321 0 0.28385592 0.16797796
|
||||
10000 1.0312982 1.0018962 0.00050597007 0.0051153126 0 0.2593975 0.15281043
|
||||
Loop time of 1.2997 on 8 procs for 10000 steps with 20284 atoms
|
||||
|
||||
Performance: 132953.355 tau/day, 7694.060 timesteps/s, 156.066 Matom-step/s
|
||||
96.7% CPU use with 8 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.01406 | 0.014495 | 0.01532 | 0.3 | 1.12
|
||||
Neigh | 0.0051916 | 0.0053424 | 0.0054386 | 0.1 | 0.41
|
||||
Comm | 0.062053 | 0.065305 | 0.069014 | 0.9 | 5.02
|
||||
Output | 0.00062289 | 0.00065254 | 0.00077243 | 0.0 | 0.05
|
||||
Modify | 1.169 | 1.178 | 1.1905 | 0.6 | 90.64
|
||||
Other | | 0.03589 | | | 2.76
|
||||
|
||||
Nlocal: 2535.5 ave 2637 max 2476 min
|
||||
Histogram: 1 3 1 1 0 0 0 0 1 1
|
||||
Nghost: 31.125 ave 42 max 25 min
|
||||
Histogram: 3 0 1 2 0 0 0 0 1 1
|
||||
Neighs: 0 ave 0 max 0 min
|
||||
Histogram: 8 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 0
|
||||
Ave neighs/atom = 0
|
||||
Neighbor list builds = 500
|
||||
Dangerous builds = 0
|
||||
|
||||
#undump 1
|
||||
#undump 2
|
||||
unfix 3
|
||||
|
||||
change_box all triclinic
|
||||
Changing box ...
|
||||
triclinic box = (-35.995947 -35.995947 -0.5) to (35.995947 35.995947 0.5) with tilt (0 0 0)
|
||||
|
||||
fix 2 small srd 20 big 1.0 0.25 49894 search 0.2 cubic warn 0.0001 shift yes 49829 overlap yes collision noslip #tstat yes
|
||||
|
||||
#dump 1 all custom 500 dump2.atom.srd id type x y z ix iy iz
|
||||
#dump 2 all custom 500 dump2.line.srd id type # c_10[1] c_10[2] c_10[3] c_10[4]
|
||||
|
||||
fix 3 all deform 1 xy erate 0.0002 units box remap v
|
||||
|
||||
run 40000
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
WARNING: Using fix srd with box deformation but no SRD thermostat (src/SRD/fix_srd.cpp:405)
|
||||
SRD info:
|
||||
SRD/big particles = 19884 400
|
||||
big particle diameter max/min = 1.9988171 0.50330564
|
||||
SRD temperature & lamda = 1 0.04
|
||||
SRD max distance & max velocity = 0.16 40
|
||||
SRD grid counts: 288 288 1
|
||||
SRD grid size: request, actual (xyz) = 0.25, 0.24997185 0.24997185 1
|
||||
SRD per actual grid cell = 0.26976265
|
||||
SRD viscosity = 1.5799049
|
||||
big/SRD mass density ratio = 20.672578
|
||||
WARNING: SRD bin size for fix srd differs from user request (src/SRD/fix_srd.cpp:2809)
|
||||
WARNING: Fix srd grid size > 1/4 of big particle diameter (src/SRD/fix_srd.cpp:2830)
|
||||
# of rescaled SRD velocities = 1
|
||||
ave/max small velocity = 12.481632 40
|
||||
ave/max big velocity = 0.58815233 1.5489134
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 3.3
|
||||
ghost atom cutoff = 3.3
|
||||
binsize = 1.65, bins = 44 44 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair line/lj, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/multi/atomonly/newton/tri
|
||||
stencil: half/multi/2d/tri
|
||||
bin: multi
|
||||
Per MPI rank memory allocation (min/avg/max) = 11.95 | 11.95 | 11.95 Mbytes
|
||||
Step c_tsmall f_2[9] c_1 TotEng v_pebig v_ebig Press
|
||||
10000 0.98889306 0 0.00050595647 0.0051153289 0 0.25939833 0.020019811
|
||||
11000 0.98995417 0.99721412 0.0005166709 0.0054320552 0 0.27545952 0.03376188
|
||||
12000 0.99401168 0.95502319 0.00053659416 0.0054974426 0 0.27877531 0.056856034
|
||||
13000 0.98790692 0.96488357 0.00061076364 0.0053196554 0 0.26975972 0.2677228
|
||||
14000 0.98939233 0.98809694 0.00052317779 0.0050536922 0 0.25627273 0.20381427
|
||||
15000 0.99046365 0.97048828 0.00056914402 0.0053274908 0 0.27015706 0.082163088
|
||||
16000 0.99743447 1.0111983 0.00053706165 0.0048068384 0 0.24375478 0.06429569
|
||||
17000 0.99622359 1.0124982 0.0005905197 0.0055966416 -7.4541903e-05 0.2838057 0.25418037
|
||||
18000 0.99283929 0.99581721 0.00061730878 0.0059098964 -0.00014754106 0.29969084 0.12701702
|
||||
19000 0.99175517 1.0115581 0.0005128714 0.0059214809 -0.00046203987 0.30027829 0.2640506
|
||||
20000 0.98895811 0.97731528 0.00052294314 0.0055670021 -0.0016029702 0.28230268 0.26239209
|
||||
21000 0.98904093 0.98120258 0.00054169978 0.0052762731 -0.0026834879 0.26755981 0.15432481
|
||||
22000 0.98773149 0.99217218 0.00056091602 0.0052754729 -0.0044093345 0.26751923 0.2208051
|
||||
23000 0.98802862 0.96564549 0.00062226338 0.0053263454 -0.008790914 0.27009898 0.18058977
|
||||
24000 0.98754002 1.024015 0.00057178953 0.0054083382 -0.0094859317 0.27425683 0.01598147
|
||||
25000 0.98656341 0.94647246 0.00066215791 0.0053555849 -0.014877058 0.27158171 0.28794628
|
||||
26000 0.98750471 1.0077626 0.00058955769 0.0057081639 -0.013151869 0.28946099 0.20814911
|
||||
27
|
||||
@ -81,6 +81,7 @@ liblammpsplugin_t *liblammpsplugin_load(const char *lib)
|
||||
ADDSYM(python_finalize);
|
||||
|
||||
ADDSYM(error);
|
||||
ADDSYM(expand);
|
||||
|
||||
ADDSYM(file);
|
||||
ADDSYM(command);
|
||||
|
||||
@ -126,6 +126,7 @@ struct _liblammpsplugin {
|
||||
void (*python_finalize)();
|
||||
|
||||
void (*error)(void *, int, const char *);
|
||||
char *(*expand)(void *, const char *);
|
||||
|
||||
void (*file)(void *, const char *);
|
||||
char *(*command)(void *, const char *);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-182-g93942f2013-modified)
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-372-g51d104975a)
|
||||
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Test of MEAM potential for HGa
|
||||
@ -67,7 +67,7 @@ Created 1 atoms
|
||||
variable teng equal "c_eatoms"
|
||||
compute pot_energy all pe/atom
|
||||
compute stress all stress/atom NULL
|
||||
# dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
|
||||
dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
|
||||
run 1
|
||||
WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
|
||||
Neighbor list info ...
|
||||
@ -89,22 +89,22 @@ Neighbor list info ...
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 8.587 | 8.587 | 8.587 Mbytes
|
||||
Step Temp TotEng Press Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_eatoms
|
||||
0 0 15.433079 491354.7 838670.96 635393.15 0 80195.797 0 0 8 8 8 512 15.433079
|
||||
1 0 15.433079 491354.7 838670.96 635393.15 0 80195.797 0 0 8 8 8 512 15.433079
|
||||
Loop time of 4.4446e-05 on 1 procs for 1 steps with 3 atoms
|
||||
0 0 15.438614 491542.52 839006.02 635621.55 0 80225.587 0 0 8 8 8 512 15.438614
|
||||
1 0 15.438614 491542.52 839006.02 635621.55 0 80225.587 0 0 8 8 8 512 15.438614
|
||||
Loop time of 0.000144827 on 1 procs for 1 steps with 3 atoms
|
||||
|
||||
Performance: 1943.932 ns/day, 0.012 hours/ns, 22499.213 timesteps/s, 67.498 katom-step/s
|
||||
31.5% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
Performance: 596.574 ns/day, 0.040 hours/ns, 6904.790 timesteps/s, 20.714 katom-step/s
|
||||
21.4% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 2.9908e-05 | 2.9908e-05 | 2.9908e-05 | 0.0 | 67.29
|
||||
Pair | 9.2136e-05 | 9.2136e-05 | 9.2136e-05 | 0.0 | 63.62
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 1.033e-06 | 1.033e-06 | 1.033e-06 | 0.0 | 2.32
|
||||
Output | 9.347e-06 | 9.347e-06 | 9.347e-06 | 0.0 | 21.03
|
||||
Modify | 2.02e-07 | 2.02e-07 | 2.02e-07 | 0.0 | 0.45
|
||||
Other | | 3.956e-06 | | | 8.90
|
||||
Comm | 4.389e-06 | 4.389e-06 | 4.389e-06 | 0.0 | 3.03
|
||||
Output | 3.9556e-05 | 3.9556e-05 | 3.9556e-05 | 0.0 | 27.31
|
||||
Modify | 9.92e-07 | 9.92e-07 | 9.92e-07 | 0.0 | 0.68
|
||||
Other | | 7.754e-06 | | | 5.35
|
||||
|
||||
Nlocal: 3 ave 3 max 3 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -1,4 +1,4 @@
|
||||
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-182-g93942f2013-modified)
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-372-g51d104975a)
|
||||
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Test of MEAM potential for HGa
|
||||
@ -67,7 +67,7 @@ Created 1 atoms
|
||||
variable teng equal "c_eatoms"
|
||||
compute pot_energy all pe/atom
|
||||
compute stress all stress/atom NULL
|
||||
# dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
|
||||
dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
|
||||
run 1
|
||||
WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
|
||||
Neighbor list info ...
|
||||
@ -89,22 +89,22 @@ Neighbor list info ...
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 7.965 | 8.123 | 8.594 Mbytes
|
||||
Step Temp TotEng Press Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume c_eatoms
|
||||
0 0 15.433079 491354.7 838670.96 635393.15 0 80195.797 0 0 8 8 8 512 15.433079
|
||||
1 0 15.433079 491354.7 838670.96 635393.15 0 80195.797 0 0 8 8 8 512 15.433079
|
||||
Loop time of 8.70645e-05 on 4 procs for 1 steps with 3 atoms
|
||||
0 0 15.438614 491542.52 839006.02 635621.55 0 80225.587 0 0 8 8 8 512 15.438614
|
||||
1 0 15.438614 491542.52 839006.02 635621.55 0 80225.587 0 0 8 8 8 512 15.438614
|
||||
Loop time of 0.000328503 on 4 procs for 1 steps with 3 atoms
|
||||
|
||||
Performance: 992.368 ns/day, 0.024 hours/ns, 11485.738 timesteps/s, 34.457 katom-step/s
|
||||
29.0% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
Performance: 263.011 ns/day, 0.091 hours/ns, 3044.110 timesteps/s, 9.132 katom-step/s
|
||||
75.3% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 4.3957e-05 | 4.67e-05 | 5.1056e-05 | 0.0 | 53.64
|
||||
Pair | 0.0001419 | 0.0001471 | 0.00015891 | 0.0 | 44.78
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 1.105e-05 | 1.3822e-05 | 1.7033e-05 | 0.0 | 15.88
|
||||
Output | 1.5765e-05 | 1.9045e-05 | 2.5216e-05 | 0.0 | 21.87
|
||||
Modify | 2.58e-07 | 3.465e-07 | 3.81e-07 | 0.0 | 0.40
|
||||
Other | | 7.151e-06 | | | 8.21
|
||||
Comm | 2.2092e-05 | 2.8424e-05 | 3.667e-05 | 0.0 | 8.65
|
||||
Output | 8.6275e-05 | 0.00010558 | 0.0001422 | 0.0 | 32.14
|
||||
Modify | 1.093e-06 | 2.4148e-06 | 5.651e-06 | 0.0 | 0.74
|
||||
Other | | 4.498e-05 | | | 13.69
|
||||
|
||||
Nlocal: 0.75 ave 3 max 0 min
|
||||
Histogram: 3 0 0 0 0 0 0 0 0 1
|
||||
@ -15,8 +15,8 @@ velocity all create 1.44 87287 loop geom
|
||||
region slice block 4 6 INF INF INF INF
|
||||
set region slice type 2
|
||||
|
||||
pair_style lj/cut 2.5
|
||||
pair_coeff * * 1.0 1.0 1.0
|
||||
pair_style lj/cut 4.0
|
||||
pair_coeff * * 1.0 1.0
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1
|
||||
@ -24,14 +24,14 @@ neigh_modify delay 0 every 1
|
||||
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
|
||||
fix 2 all deform 1 xy erate 0.01 remap v
|
||||
|
||||
#dump 1 all custom 5000 dump.nemd id type x y z
|
||||
#dump 1 all custom 500 dump.nemd id type x y z
|
||||
|
||||
#dump 2 all image 1000 image.*.jpg type type adiam 1.2
|
||||
#dump 2 all image 100 image.*.jpg type type adiam 1.2
|
||||
#dump_modify 2 pad 5
|
||||
|
||||
#dump 3 all movie 1000 movie.mpg type type adiam 1.2
|
||||
#dump 3 all movie 100 movie.mpg type type adiam 1.2
|
||||
#dump_modify 3 pad 5
|
||||
|
||||
thermo 1000
|
||||
thermo 50
|
||||
|
||||
run 50000
|
||||
run 1000
|
||||
|
||||
@ -1,137 +0,0 @@
|
||||
LAMMPS (27 Nov 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# 2d NEMD simulation
|
||||
|
||||
units lj
|
||||
atom_style atomic
|
||||
dimension 2
|
||||
|
||||
lattice sq2 0.8442
|
||||
Lattice spacing in x,y,z = 1.53919 1.53919 1.53919
|
||||
region box prism 0 10 0 8 -0.5 0.5 0 0 0
|
||||
create_box 2 box
|
||||
Created triclinic box = (0 0 -0.769595) to (15.3919 12.3135 0.769595) with tilt (0 0 0)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 160 atoms
|
||||
Time spent = 0.000332355 secs
|
||||
mass * 1.0
|
||||
|
||||
velocity all create 1.44 87287 loop geom
|
||||
|
||||
region slice block 4 6 INF INF INF INF
|
||||
set region slice type 2
|
||||
40 settings made for type
|
||||
|
||||
pair_style lj/cut 2.5
|
||||
pair_coeff * * 1.0 1.0 1.0
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1
|
||||
|
||||
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
|
||||
fix 2 all deform 1 xy erate 0.01 remap v
|
||||
|
||||
#dump 1 all custom 5000 dump.nemd id type x y z
|
||||
|
||||
#dump 2 all image 1000 image.*.jpg type type adiam 1.2
|
||||
#dump_modify 2 pad 5
|
||||
|
||||
#dump 3 all movie 1000 movie.mpg type type adiam 1.2
|
||||
#dump_modify 3 pad 5
|
||||
|
||||
thermo 1000
|
||||
|
||||
run 50000
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.3
|
||||
ghost atom cutoff = 1.3
|
||||
binsize = 0.65, bins = 24 19 3
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton/tri
|
||||
stencil: half/bin/2d/newton/tri
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 3.065 | 3.065 | 3.065 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press Volume
|
||||
0 1.44 0 0 1.431 1.2080502 189.52855
|
||||
1000 1.1326992 0.25863754 0 1.3842573 6.0588079 189.52855
|
||||
2000 0.99104643 0.37634349 0 1.3611959 7.8993387 189.52855
|
||||
3000 1.0749743 0.21908728 0 1.2873429 6.2659517 189.52855
|
||||
4000 1.0986742 0.27147022 0 1.3632777 5.8778262 189.52855
|
||||
5000 1.071838 0.23413372 0 1.2992728 5.9120887 189.52855
|
||||
6000 1.0013194 0.26923671 0 1.2642979 6.2802759 189.52855
|
||||
7000 0.94110685 0.3224557 0 1.2576806 6.1864166 189.52855
|
||||
8000 0.97391513 0.28793383 0 1.255762 6.5071893 189.52855
|
||||
9000 0.95346063 0.31050593 0 1.2580074 6.3321512 189.52855
|
||||
10000 0.96236447 0.26298203 0 1.2193317 6.4083918 189.52855
|
||||
11000 0.9511149 0.27571527 0 1.2208857 6.0949768 189.52855
|
||||
12000 1.0186935 0.18134918 0 1.1936758 5.1269128 189.52855
|
||||
13000 0.96350682 0.23171507 0 1.1892 5.7367267 189.52855
|
||||
14000 0.94740402 0.27357945 0 1.2150622 6.0156532 189.52855
|
||||
15000 0.87951545 0.27745111 0 1.1514696 6.297405 189.52855
|
||||
16000 0.93216196 0.27020559 0 1.1965415 6.6188833 189.52855
|
||||
17000 0.94109936 0.24756193 0 1.1827794 5.8993088 189.52855
|
||||
18000 0.97325239 0.27996398 0 1.2471335 6.1486561 189.52855
|
||||
19000 1.0494686 0.27132686 0 1.3142363 6.6757065 189.52855
|
||||
20000 1.0391862 0.25195457 0 1.2846459 6.143235 189.52855
|
||||
21000 0.96407137 0.27359166 0 1.2316376 5.9577116 189.52855
|
||||
22000 0.97954534 0.31920255 0 1.2926257 6.5320163 189.52855
|
||||
23000 0.97585473 0.24154424 0 1.2112999 6.0839179 189.52855
|
||||
24000 1.0522109 0.1646952 0 1.2103298 5.0388687 189.52855
|
||||
25000 0.93707172 0.25655806 0 1.1877731 5.819887 189.52855
|
||||
26000 0.89798775 0.26629627 0 1.1586716 6.0393558 189.52855
|
||||
27000 0.93259926 0.24542428 0 1.1721948 5.3560986 189.52855
|
||||
28000 0.8428223 0.20784302 0 1.0453977 4.956911 189.52855
|
||||
29000 0.81653505 0.21924932 0 1.030681 5.271501 189.52855
|
||||
30000 0.90157811 0.15070734 0 1.0466506 4.476142 189.52855
|
||||
31000 0.86580039 0.21115151 0 1.0715407 5.0056915 189.52855
|
||||
32000 0.89768096 0.28377249 0 1.1758429 5.8449711 189.52855
|
||||
33000 1.0504011 0.29009694 0 1.333933 6.1319155 189.52855
|
||||
34000 1.2009765 0.19137934 0 1.3848498 4.9643885 189.52855
|
||||
35000 1.208705 0.27071222 0 1.4718628 6.2162389 189.52855
|
||||
36000 1.2211309 0.28389521 0 1.497394 6.5090715 189.52855
|
||||
37000 1.1384381 0.42795547 0 1.5592783 8.5129272 189.52855
|
||||
38000 1.2198334 0.34335732 0 1.5555668 7.2940883 189.52855
|
||||
39000 1.1562045 0.35783089 0 1.5068091 7.340999 189.52855
|
||||
40000 1.2145924 0.28410558 0 1.4911068 6.234986 189.52855
|
||||
41000 1.1240878 0.34663237 0 1.4636946 7.1720193 189.52855
|
||||
42000 1.2491422 0.26815889 0 1.509494 6.1390803 189.52855
|
||||
43000 1.1387564 0.33755832 0 1.4691975 7.0577597 189.52855
|
||||
44000 1.0031598 0.4081807 0 1.4050708 8.2732113 189.52855
|
||||
45000 1.0166213 0.29131017 0 1.3015776 6.1907807 189.52855
|
||||
46000 0.96251302 0.31483519 0 1.2713325 6.6987235 189.52855
|
||||
47000 0.89809294 0.30909884 0 1.2015787 6.3997583 189.52855
|
||||
48000 0.86736217 0.31917648 0 1.1811176 7.1584774 189.52855
|
||||
49000 0.91979053 0.21099403 0 1.1250359 5.4968259 189.52855
|
||||
50000 0.87079959 0.24059333 0 1.1059504 5.6039305 189.52855
|
||||
Loop time of 1.54353 on 1 procs for 50000 steps with 160 atoms
|
||||
|
||||
Performance: 13993916.675 tau/day, 32393.326 timesteps/s
|
||||
99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.20172 | 0.20172 | 0.20172 | 0.0 | 13.07
|
||||
Neigh | 0.16634 | 0.16634 | 0.16634 | 0.0 | 10.78
|
||||
Comm | 0.068928 | 0.068928 | 0.068928 | 0.0 | 4.47
|
||||
Output | 0.00059891 | 0.00059891 | 0.00059891 | 0.0 | 0.04
|
||||
Modify | 1.0123 | 1.0123 | 1.0123 | 0.0 | 65.59
|
||||
Other | | 0.09361 | | | 6.06
|
||||
|
||||
Nlocal: 160 ave 160 max 160 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 73 ave 73 max 73 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 353 ave 353 max 353 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 353
|
||||
Ave neighs/atom = 2.20625
|
||||
Neighbor list builds = 5273
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:01
|
||||
@ -1,137 +0,0 @@
|
||||
LAMMPS (27 Nov 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# 2d NEMD simulation
|
||||
|
||||
units lj
|
||||
atom_style atomic
|
||||
dimension 2
|
||||
|
||||
lattice sq2 0.8442
|
||||
Lattice spacing in x,y,z = 1.53919 1.53919 1.53919
|
||||
region box prism 0 10 0 8 -0.5 0.5 0 0 0
|
||||
create_box 2 box
|
||||
Created triclinic box = (0 0 -0.769595) to (15.3919 12.3135 0.769595) with tilt (0 0 0)
|
||||
2 by 2 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 160 atoms
|
||||
Time spent = 0.000308275 secs
|
||||
mass * 1.0
|
||||
|
||||
velocity all create 1.44 87287 loop geom
|
||||
|
||||
region slice block 4 6 INF INF INF INF
|
||||
set region slice type 2
|
||||
40 settings made for type
|
||||
|
||||
pair_style lj/cut 2.5
|
||||
pair_coeff * * 1.0 1.0 1.0
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1
|
||||
|
||||
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
|
||||
fix 2 all deform 1 xy erate 0.01 remap v
|
||||
|
||||
#dump 1 all custom 5000 dump.nemd id type x y z
|
||||
|
||||
#dump 2 all image 1000 image.*.jpg type type adiam 1.2
|
||||
#dump_modify 2 pad 5
|
||||
|
||||
#dump 3 all movie 1000 movie.mpg type type adiam 1.2
|
||||
#dump_modify 3 pad 5
|
||||
|
||||
thermo 1000
|
||||
|
||||
run 50000
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.3
|
||||
ghost atom cutoff = 1.3
|
||||
binsize = 0.65, bins = 24 19 3
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton/tri
|
||||
stencil: half/bin/2d/newton/tri
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 3.062 | 3.062 | 3.062 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press Volume
|
||||
0 1.44 0 0 1.431 1.2080502 189.52855
|
||||
1000 1.1682693 0.24486562 0 1.4058332 5.8092954 189.52855
|
||||
2000 1.0928734 0.27609364 0 1.3621366 6.2237017 189.52855
|
||||
3000 1.09088 0.24816112 0 1.3322231 5.7001547 189.52855
|
||||
4000 1.0110684 0.29868377 0 1.303433 7.3312319 189.52855
|
||||
5000 0.91033678 0.28330698 0 1.1879542 6.1840352 189.52855
|
||||
6000 0.93416074 0.22661127 0 1.1549335 5.3619735 189.52855
|
||||
7000 0.93305734 0.19203739 0 1.1192631 5.2497547 189.52855
|
||||
8000 0.88944438 0.19421381 0 1.0780992 4.9733446 189.52855
|
||||
9000 0.86949257 0.21207681 0 1.0761351 5.4687076 189.52855
|
||||
10000 0.80088203 0.24071142 0 1.0365879 5.334545 189.52855
|
||||
11000 0.88899727 0.19972767 0 1.0831687 4.8832207 189.52855
|
||||
12000 0.93045817 0.17883252 0 1.1034753 4.9081709 189.52855
|
||||
13000 0.9724196 0.19089684 0 1.1572388 5.3460903 189.52855
|
||||
14000 0.93902186 0.25513773 0 1.1882907 6.3338337 189.52855
|
||||
15000 0.91879903 0.31605547 0 1.229112 6.2085671 189.52855
|
||||
16000 0.9860058 0.26863362 0 1.2484769 6.514688 189.52855
|
||||
17000 1.0354756 0.23445357 0 1.2634574 6.1519296 189.52855
|
||||
18000 1.0244774 0.27511827 0 1.2931927 6.2230002 189.52855
|
||||
19000 1.1581216 0.21558936 0 1.3664727 5.5458237 189.52855
|
||||
20000 1.0552168 0.29344488 0 1.3420666 6.4880315 189.52855
|
||||
21000 0.97925435 0.31583414 0 1.2889681 6.7584093 189.52855
|
||||
22000 1.0112494 0.26246834 0 1.2673974 5.4112008 189.52855
|
||||
23000 1.0463332 0.26049752 0 1.3002911 6.1359606 189.52855
|
||||
24000 1.1130319 0.19848564 0 1.3045611 5.7088487 189.52855
|
||||
25000 1.0355662 0.28048951 0 1.3095834 6.4596476 189.52855
|
||||
26000 1.0823932 0.21784218 0 1.2934704 5.106334 189.52855
|
||||
27000 0.99719525 0.32679678 0 1.3177596 6.7399277 189.52855
|
||||
28000 1.0665868 0.25002709 0 1.3099477 6.2732557 189.52855
|
||||
29000 1.0312798 0.30650087 0 1.3313351 7.0581024 189.52855
|
||||
30000 1.0388277 0.29812912 0 1.3304641 6.2533028 189.52855
|
||||
31000 1.0461658 0.21344416 0 1.2530714 5.3631154 189.52855
|
||||
32000 1.0233681 0.27545017 0 1.2924222 5.9612896 189.52855
|
||||
33000 1.1353086 0.20278244 0 1.3309953 5.7619128 189.52855
|
||||
34000 1.0374791 0.29661216 0 1.327607 6.5124409 189.52855
|
||||
35000 1.0752783 0.21684443 0 1.2854022 5.4759171 189.52855
|
||||
36000 1.0383445 0.27068641 0 1.3025412 6.8367218 189.52855
|
||||
37000 0.97341144 0.24034988 0 1.2076775 6.1335996 189.52855
|
||||
38000 0.9285918 0.2737544 0 1.1965425 5.8750327 189.52855
|
||||
39000 0.84869423 0.30079207 0 1.144182 6.8909326 189.52855
|
||||
40000 0.88237131 0.26049171 0 1.1373482 6.3932981 189.52855
|
||||
41000 0.90368591 0.21064132 0 1.1086792 5.5627232 189.52855
|
||||
42000 0.93436749 0.20367569 0 1.1322034 5.1420052 189.52855
|
||||
43000 0.91378588 0.26155533 0 1.16963 6.366756 189.52855
|
||||
44000 0.91673608 0.25967314 0 1.1706796 6.0846334 189.52855
|
||||
45000 1.0233334 0.25463562 0 1.2715732 6.0924255 189.52855
|
||||
46000 0.96184729 0.35422095 0 1.3100567 7.0249175 189.52855
|
||||
47000 1.134079 0.26196034 0 1.3889514 6.3476756 189.52855
|
||||
48000 1.0552136 0.303812 0 1.3524305 6.6968927 189.52855
|
||||
49000 1.1282184 0.2100955 0 1.3312626 5.8658659 189.52855
|
||||
50000 1.0493816 0.31540438 0 1.3582274 6.6348173 189.52855
|
||||
Loop time of 1.09903 on 4 procs for 50000 steps with 160 atoms
|
||||
|
||||
Performance: 19653623.953 tau/day, 45494.500 timesteps/s
|
||||
94.4% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.057854 | 0.05974 | 0.062726 | 0.7 | 5.44
|
||||
Neigh | 0.047791 | 0.049863 | 0.054819 | 1.3 | 4.54
|
||||
Comm | 0.3581 | 0.38553 | 0.39784 | 2.6 | 35.08
|
||||
Output | 0.001116 | 0.0014414 | 0.0023859 | 1.4 | 0.13
|
||||
Modify | 0.41102 | 0.42642 | 0.4493 | 2.3 | 38.80
|
||||
Other | | 0.176 | | | 16.02
|
||||
|
||||
Nlocal: 40 ave 42 max 39 min
|
||||
Histogram: 2 0 0 1 0 0 0 0 0 1
|
||||
Nghost: 36.5 ave 37 max 36 min
|
||||
Histogram: 2 0 0 0 0 0 0 0 0 2
|
||||
Neighs: 87.5 ave 94 max 81 min
|
||||
Histogram: 1 1 0 0 0 0 0 0 1 1
|
||||
|
||||
Total # of neighbors = 350
|
||||
Ave neighs/atom = 2.1875
|
||||
Neighbor list builds = 5276
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:01
|
||||
111
examples/nemd/log.30Sep24.g++.1
Normal file
111
examples/nemd/log.30Sep24.g++.1
Normal file
@ -0,0 +1,111 @@
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-394-g75f86a68a7-modified)
|
||||
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# 2d NEMD simulation
|
||||
|
||||
units lj
|
||||
atom_style atomic
|
||||
dimension 2
|
||||
|
||||
lattice sq2 0.8442
|
||||
Lattice spacing in x,y,z = 1.5391903 1.5391903 1.5391903
|
||||
region box prism 0 10 0 8 -0.5 0.5 0 0 0
|
||||
create_box 2 box
|
||||
Created triclinic box = (0 0 -0.76959516) to (15.391903 12.313523 0.76959516) with tilt (0 0 0)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 160 atoms
|
||||
using lattice units in triclinic box = (0 0 -0.76959516) to (15.391903 12.313523 0.76959516) with tilt (0 0 0)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
mass * 1.0
|
||||
|
||||
velocity all create 1.44 87287 loop geom
|
||||
|
||||
region slice block 4 6 INF INF INF INF
|
||||
set region slice type 2
|
||||
Setting atom values ...
|
||||
40 settings made for type
|
||||
|
||||
pair_style lj/cut 4.0
|
||||
pair_coeff * * 1.0 1.0
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1
|
||||
|
||||
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
|
||||
fix 2 all deform 1 xy erate 0.01 remap v
|
||||
|
||||
#dump 1 all custom 500 dump.nemd id type x y z
|
||||
|
||||
#dump 2 all image 100 image.*.jpg type type adiam 1.2
|
||||
#dump_modify 2 pad 5
|
||||
|
||||
#dump 3 all movie 100 movie.mpg type type adiam 1.2
|
||||
#dump_modify 3 pad 5
|
||||
|
||||
thermo 50
|
||||
|
||||
run 1000
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 4.3
|
||||
ghost atom cutoff = 4.3
|
||||
binsize = 2.15, bins = 8 6 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton/tri
|
||||
stencil: half/bin/2d/tri
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 3.065 | 3.065 | 3.065 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press Volume
|
||||
0 1.44 -2.6548731 0 -1.2238731 1.9354912 189.52855
|
||||
50 0.97904822 -2.1934929 0 -1.2205637 5.0001562 189.52855
|
||||
100 0.96423603 -2.1711413 0 -1.2129318 5.3040025 189.52855
|
||||
150 0.96430794 -2.153062 0 -1.194781 5.3988945 189.52855
|
||||
200 1.056585 -2.2379316 0 -1.1879503 5.0007883 189.52855
|
||||
250 1.0183256 -2.1921531 0 -1.1801921 5.5370076 189.52855
|
||||
300 0.96855552 -2.140465 0 -1.177963 5.7188412 189.52855
|
||||
350 1.0115567 -2.1883272 0 -1.1830927 5.4437104 189.52855
|
||||
400 0.93743103 -2.1230826 0 -1.1915105 5.7059909 189.52855
|
||||
450 1.1120368 -2.3041274 0 -1.1990408 4.646396 189.52855
|
||||
500 0.99611106 -2.2039016 0 -1.2140162 5.1526658 189.52855
|
||||
550 1.1075519 -2.3349751 0 -1.2343453 4.0671355 189.52855
|
||||
600 1.0550783 -2.3126484 0 -1.2641644 4.5423735 189.52855
|
||||
650 0.98516169 -2.2664919 0 -1.2874875 4.8365475 189.52855
|
||||
700 0.97899201 -2.2815136 0 -1.3086403 4.5415389 189.52855
|
||||
750 1.0107776 -2.3375258 0 -1.3330656 4.3655082 189.52855
|
||||
800 0.97711804 -2.3221241 0 -1.3511131 4.2153988 189.52855
|
||||
850 0.8984454 -2.258341 0 -1.3655108 4.6759265 189.52855
|
||||
900 0.85409237 -2.2157566 0 -1.3670024 5.0180073 189.52855
|
||||
950 0.90195434 -2.2500988 0 -1.3537817 4.8189466 189.52855
|
||||
1000 1.0047283 -2.3359434 0 -1.3374947 4.0788763 189.52855
|
||||
Loop time of 0.0331477 on 1 procs for 1000 steps with 160 atoms
|
||||
|
||||
Performance: 13032596.122 tau/day, 30168.047 timesteps/s, 4.827 Matom-step/s
|
||||
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.017584 | 0.017584 | 0.017584 | 0.0 | 53.05
|
||||
Neigh | 0.0080996 | 0.0080996 | 0.0080996 | 0.0 | 24.43
|
||||
Comm | 0.0010864 | 0.0010864 | 0.0010864 | 0.0 | 3.28
|
||||
Output | 9.9819e-05 | 9.9819e-05 | 9.9819e-05 | 0.0 | 0.30
|
||||
Modify | 0.0057062 | 0.0057062 | 0.0057062 | 0.0 | 17.21
|
||||
Other | | 0.0005715 | | | 1.72
|
||||
|
||||
Nlocal: 160 ave 160 max 160 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 271 ave 271 max 271 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3881 ave 3881 max 3881 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 3881
|
||||
Ave neighs/atom = 24.25625
|
||||
Neighbor list builds = 106
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:00
|
||||
111
examples/nemd/log.30Sep24.g++.4
Normal file
111
examples/nemd/log.30Sep24.g++.4
Normal file
@ -0,0 +1,111 @@
|
||||
LAMMPS (29 Aug 2024 - Development - patch_29Aug2024-394-g75f86a68a7-modified)
|
||||
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# 2d NEMD simulation
|
||||
|
||||
units lj
|
||||
atom_style atomic
|
||||
dimension 2
|
||||
|
||||
lattice sq2 0.8442
|
||||
Lattice spacing in x,y,z = 1.5391903 1.5391903 1.5391903
|
||||
region box prism 0 10 0 8 -0.5 0.5 0 0 0
|
||||
create_box 2 box
|
||||
Created triclinic box = (0 0 -0.76959516) to (15.391903 12.313523 0.76959516) with tilt (0 0 0)
|
||||
2 by 2 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 160 atoms
|
||||
using lattice units in triclinic box = (0 0 -0.76959516) to (15.391903 12.313523 0.76959516) with tilt (0 0 0)
|
||||
create_atoms CPU = 0.001 seconds
|
||||
mass * 1.0
|
||||
|
||||
velocity all create 1.44 87287 loop geom
|
||||
|
||||
region slice block 4 6 INF INF INF INF
|
||||
set region slice type 2
|
||||
Setting atom values ...
|
||||
40 settings made for type
|
||||
|
||||
pair_style lj/cut 4.0
|
||||
pair_coeff * * 1.0 1.0
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 0 every 1
|
||||
|
||||
fix 1 all nvt/sllod temp 1.0 1.0 1.0 tchain 1
|
||||
fix 2 all deform 1 xy erate 0.01 remap v
|
||||
|
||||
#dump 1 all custom 500 dump.nemd id type x y z
|
||||
|
||||
#dump 2 all image 100 image.*.jpg type type adiam 1.2
|
||||
#dump_modify 2 pad 5
|
||||
|
||||
#dump 3 all movie 100 movie.mpg type type adiam 1.2
|
||||
#dump_modify 3 pad 5
|
||||
|
||||
thermo 50
|
||||
|
||||
run 1000
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 4.3
|
||||
ghost atom cutoff = 4.3
|
||||
binsize = 2.15, bins = 8 6 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton/tri
|
||||
stencil: half/bin/2d/tri
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 3.064 | 3.064 | 3.064 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press Volume
|
||||
0 1.44 -2.6548731 0 -1.2238731 1.9354912 189.52855
|
||||
50 0.97904822 -2.1934929 0 -1.2205637 5.0001562 189.52855
|
||||
100 0.96423603 -2.1711413 0 -1.2129318 5.3040025 189.52855
|
||||
150 0.96430794 -2.153062 0 -1.194781 5.3988945 189.52855
|
||||
200 1.056585 -2.2379316 0 -1.1879503 5.0007883 189.52855
|
||||
250 1.0183256 -2.1921531 0 -1.1801921 5.5370076 189.52855
|
||||
300 0.96855552 -2.140465 0 -1.177963 5.7188412 189.52855
|
||||
350 1.0115567 -2.1883272 0 -1.1830927 5.4437104 189.52855
|
||||
400 0.93743103 -2.1230826 0 -1.1915105 5.7059909 189.52855
|
||||
450 1.1120368 -2.3041274 0 -1.1990408 4.646396 189.52855
|
||||
500 0.99611106 -2.2039016 0 -1.2140162 5.1526658 189.52855
|
||||
550 1.1075519 -2.3349751 0 -1.2343453 4.0671355 189.52855
|
||||
600 1.0550783 -2.3126484 0 -1.2641644 4.5423735 189.52855
|
||||
650 0.98516169 -2.2664919 0 -1.2874875 4.8365475 189.52855
|
||||
700 0.97899201 -2.2815136 0 -1.3086403 4.5415389 189.52855
|
||||
750 1.0107776 -2.3375258 0 -1.3330656 4.3655082 189.52855
|
||||
800 0.97711804 -2.3221241 0 -1.3511131 4.2153988 189.52855
|
||||
850 0.8984454 -2.258341 0 -1.3655108 4.6759265 189.52855
|
||||
900 0.85409237 -2.2157566 0 -1.3670024 5.0180073 189.52855
|
||||
950 0.90195434 -2.2500988 0 -1.3537817 4.8189466 189.52855
|
||||
1000 1.0047283 -2.3359434 0 -1.3374947 4.0788763 189.52855
|
||||
Loop time of 0.0158907 on 4 procs for 1000 steps with 160 atoms
|
||||
|
||||
Performance: 27185684.597 tau/day, 62929.825 timesteps/s, 10.069 Matom-step/s
|
||||
98.0% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.0044606 | 0.004562 | 0.0047619 | 0.2 | 28.71
|
||||
Neigh | 0.0023154 | 0.0023979 | 0.002494 | 0.1 | 15.09
|
||||
Comm | 0.0051743 | 0.0054807 | 0.0056638 | 0.3 | 34.49
|
||||
Output | 7.5535e-05 | 8.9889e-05 | 0.00012988 | 0.0 | 0.57
|
||||
Modify | 0.002223 | 0.0023624 | 0.0026372 | 0.3 | 14.87
|
||||
Other | | 0.0009979 | | | 6.28
|
||||
|
||||
Nlocal: 40 ave 42 max 38 min
|
||||
Histogram: 1 0 1 0 0 0 0 1 0 1
|
||||
Nghost: 163.5 ave 166 max 162 min
|
||||
Histogram: 2 0 0 0 0 1 0 0 0 1
|
||||
Neighs: 970.25 ave 1016 max 942 min
|
||||
Histogram: 1 1 0 0 1 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 3881
|
||||
Ave neighs/atom = 24.25625
|
||||
Neighbor list builds = 106
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:00
|
||||
@ -100,6 +100,7 @@ MODULE LIBLAMMPS
|
||||
CONTAINS
|
||||
PROCEDURE :: close => lmp_close
|
||||
PROCEDURE :: error => lmp_error
|
||||
PROCEDURE :: expand => lmp_expand
|
||||
PROCEDURE :: file => lmp_file
|
||||
PROCEDURE :: command => lmp_command
|
||||
PROCEDURE :: commands_list => lmp_commands_list
|
||||
@ -410,6 +411,14 @@ MODULE LIBLAMMPS
|
||||
TYPE(c_ptr), VALUE :: error_text
|
||||
END SUBROUTINE lammps_error
|
||||
|
||||
FUNCTION lammps_expand(handle, line) BIND(C)
|
||||
IMPORT :: c_ptr
|
||||
IMPLICIT NONE
|
||||
TYPE(c_ptr), INTENT(IN), VALUE :: handle
|
||||
TYPE(c_ptr), INTENT(IN), VALUE :: line
|
||||
TYPE(c_ptr) :: lammps_expand
|
||||
END FUNCTION lammps_expand
|
||||
|
||||
SUBROUTINE lammps_file(handle, filename) BIND(C)
|
||||
IMPORT :: c_ptr
|
||||
IMPLICIT NONE
|
||||
@ -1107,10 +1116,24 @@ CONTAINS
|
||||
CALL lammps_free(str)
|
||||
END SUBROUTINE lmp_error
|
||||
|
||||
! equivalent function to lammps_expand()
|
||||
FUNCTION lmp_expand(self, line)
|
||||
CLASS(lammps), INTENT(IN) :: self
|
||||
CHARACTER(len=*), INTENT(IN) :: line
|
||||
TYPE(c_ptr) :: str, res
|
||||
CHARACTER(len=:), ALLOCATABLE :: lmp_expand
|
||||
|
||||
str = f2c_string(line)
|
||||
res = lammps_expand(self%handle, str)
|
||||
CALL lammps_free(str)
|
||||
lmp_expand = c2f_string(res)
|
||||
CALL lammps_free(res)
|
||||
END FUNCTION lmp_expand
|
||||
|
||||
! equivalent function to lammps_file()
|
||||
SUBROUTINE lmp_file(self, filename)
|
||||
CLASS(lammps), INTENT(IN) :: self
|
||||
CHARACTER(len=*) :: filename
|
||||
CHARACTER(len=*), INTENT(IN) :: filename
|
||||
TYPE(c_ptr) :: str
|
||||
|
||||
str = f2c_string(filename)
|
||||
@ -1121,7 +1144,7 @@ CONTAINS
|
||||
! equivalent function to lammps_command()
|
||||
SUBROUTINE lmp_command(self, cmd)
|
||||
CLASS(lammps), INTENT(IN) :: self
|
||||
CHARACTER(len=*) :: cmd
|
||||
CHARACTER(len=*), INTENT(IN) :: cmd
|
||||
TYPE(c_ptr) :: str
|
||||
|
||||
str = f2c_string(cmd)
|
||||
@ -1155,7 +1178,7 @@ CONTAINS
|
||||
! equivalent function to lammps_commands_string()
|
||||
SUBROUTINE lmp_commands_string(self, str)
|
||||
CLASS(lammps), INTENT(IN) :: self
|
||||
CHARACTER(len=*) :: str
|
||||
CHARACTER(len=*), INTENT(IN) :: str
|
||||
TYPE(c_ptr) :: tmp
|
||||
|
||||
tmp = f2c_string(str)
|
||||
@ -1173,7 +1196,7 @@ CONTAINS
|
||||
! equivalent function to lammps_get_thermo
|
||||
REAL(c_double) FUNCTION lmp_get_thermo(self,name)
|
||||
CLASS(lammps), INTENT(IN) :: self
|
||||
CHARACTER(LEN=*) :: name
|
||||
CHARACTER(LEN=*), INTENT(IN) :: name
|
||||
TYPE(c_ptr) :: Cname
|
||||
|
||||
Cname = f2c_string(name)
|
||||
@ -1185,7 +1208,7 @@ CONTAINS
|
||||
FUNCTION lmp_last_thermo(self,what,index) RESULT(thermo_data)
|
||||
CLASS(lammps), INTENT(IN), TARGET :: self
|
||||
CHARACTER(LEN=*), INTENT(IN) :: what
|
||||
INTEGER :: index
|
||||
INTEGER, INTENT(IN) :: index
|
||||
INTEGER(c_int) :: idx
|
||||
TYPE(lammps_data) :: thermo_data, type_data
|
||||
INTEGER(c_int) :: datatype
|
||||
|
||||
@ -586,8 +586,25 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
||||
const int b2y=_block_cell_2d;
|
||||
const int g2x=static_cast<int>(ceil(static_cast<double>(_maxspecial)/b2x));
|
||||
const int g2y=static_cast<int>(ceil(static_cast<double>(nt)/b2y));
|
||||
// the maximum number of blocks on the device is typically 65535
|
||||
// in principle we can use a lower number to have more resource per block 32768
|
||||
const int max_num_blocks = 65535;
|
||||
int shift = 0;
|
||||
if (g2y < max_num_blocks) {
|
||||
_shared->k_transpose.set_size(g2x,g2y,b2x,b2y);
|
||||
_shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt);
|
||||
_shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt,&shift);
|
||||
} else {
|
||||
// using a fixed number of blocks
|
||||
int g2y_m = max_num_blocks;
|
||||
_shared->k_transpose.set_size(g2x,g2y_m,b2x,b2y);
|
||||
// number of chunks needed for the whole transpose
|
||||
const int num_chunks = ceil(static_cast<double>(g2y) / g2y_m);
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
_shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt,&shift);
|
||||
shift += g2y_m*b2y;
|
||||
}
|
||||
}
|
||||
|
||||
time_transpose.stop();
|
||||
}
|
||||
|
||||
|
||||
@ -147,7 +147,7 @@ __kernel void kernel_calc_cell_counts(const unsigned *restrict cell_id,
|
||||
|
||||
__kernel void transpose(__global tagint *restrict out,
|
||||
const __global tagint *restrict in,
|
||||
int columns_in, int rows_in)
|
||||
int columns_in, int rows_in, int shift)
|
||||
{
|
||||
__local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
|
||||
|
||||
@ -158,15 +158,15 @@ __kernel void transpose(__global tagint *restrict out,
|
||||
|
||||
unsigned i=bi*BLOCK_CELL_2D+ti;
|
||||
unsigned j=bj*BLOCK_CELL_2D+tj;
|
||||
if ((i<columns_in) && (j<rows_in))
|
||||
block[tj][ti]=in[j*columns_in+i];
|
||||
if ((i<columns_in) && (j+shift<rows_in))
|
||||
block[tj][ti]=in[(j+shift)*columns_in+i];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
i=bj*BLOCK_CELL_2D+ti;
|
||||
j=bi*BLOCK_CELL_2D+tj;
|
||||
if ((i<rows_in) && (j<columns_in))
|
||||
out[j*rows_in+i] = block[ti][tj];
|
||||
if ((i+shift<rows_in) && (j<columns_in))
|
||||
out[j*rows_in+i+shift] = block[ti][tj];
|
||||
}
|
||||
|
||||
#ifndef LAL_USE_OLD_NEIGHBOR
|
||||
|
||||
@ -1,12 +1,103 @@
|
||||
# CHANGELOG
|
||||
|
||||
## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.4.01)
|
||||
|
||||
### Features:
|
||||
* Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229)
|
||||
|
||||
### Backend and Architecture Enhancements:
|
||||
|
||||
#### CUDA:
|
||||
* Experimental support for unified memory mode (intended for Grace-Hopper etc.) [\#6823](https://github.com/kokkos/kokkos/pull/6823)
|
||||
|
||||
### Bug Fixes
|
||||
* OpenMP: Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284)
|
||||
* Fix implicit copy assignment operators in few AVX2 masks being deleted [#7296](https://github.com/kokkos/kokkos/pull/7296)
|
||||
|
||||
## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00)
|
||||
|
||||
### Features:
|
||||
* Add `Kokkos::View` conversions from and to [`std::mdspan`](https://en.cppreference.com/w/cpp/container/mdspan) [\#6830](https://github.com/kokkos/kokkos/pull/6830) [\#7069](https://github.com/kokkos/kokkos/pull/7069)
|
||||
|
||||
### Backend and Architecture Enhancements:
|
||||
|
||||
#### CUDA:
|
||||
* `nvcc_wrapper`: Adding ability to process `--disable-warnings` flag [\#6936](https://github.com/kokkos/kokkos/issues/6936)
|
||||
* Use recommended/max team size functions in Cuda ParallelFor and Reduce constructors [\#6891](https://github.com/kokkos/kokkos/issues/6891)
|
||||
* Improve compile-times when building with `Kokkos_ENABLE_DEBUG_BOUNDS_CHECK` in Cuda [\#7013](https://github.com/kokkos/kokkos/pull/7013)
|
||||
|
||||
#### HIP:
|
||||
* Use HIP builtin atomics [\#6882](https://github.com/kokkos/kokkos/pull/6882) [\#7000](https://github.com/kokkos/kokkos/pull/7000)
|
||||
* Enable user-specified compiler and linker flags for AMD GPUs [\#7127](https://github.com/kokkos/kokkos/pull/7127)
|
||||
|
||||
#### SYCL:
|
||||
* Add support for Graphs [\#6912](https://github.com/kokkos/kokkos/pull/6912)
|
||||
* Fix multi-GPU support [\#6887](https://github.com/kokkos/kokkos/pull/6887)
|
||||
* Improve performance of reduction and scan operations [\#6562](https://github.com/kokkos/kokkos/pull/6562), [\#6750](https://github.com/kokkos/kokkos/pull/6750)
|
||||
* Fix lock for guarding scratch space in `TeamPolicy` `parallel_reduce` [\#6988](https://github.com/kokkos/kokkos/pull/6988)
|
||||
* Include submission command queue property information into `SYCL::print_configuration()` [\#7004](https://github.com/kokkos/kokkos/pull/7004)
|
||||
|
||||
#### OpenACC:
|
||||
* Make `TeamPolicy` `parallel_for` execute on the correct async queue [\#7012](https://github.com/kokkos/kokkos/pull/7012)
|
||||
|
||||
#### OpenMPTarget:
|
||||
* Honor user requested loop ordering in `MDRange` policy [\#6925](https://github.com/kokkos/kokkos/pull/6925)
|
||||
* Prevent data races by guarding the scratch space used in `parallel_scan` [\#6998](https://github.com/kokkos/kokkos/pull/6998)
|
||||
|
||||
#### HPX:
|
||||
* Workaround issue with template argument deduction to support compilation with NVCC [\#7015](https://github.com/kokkos/kokkos/pull/7015)
|
||||
|
||||
### General Enhancements
|
||||
* Improve performance of view copies in host parallel regions [\#6730](https://github.com/kokkos/kokkos/pull/6730)
|
||||
* Harmonize convertibility rules of `Kokkos::RandomAccessIterator` with `View`s [\#6929](https://github.com/kokkos/kokkos/pull/6929)
|
||||
* Add a check precondition non-overlapping ranges for the `adjacent_difference` algorithm in debug mode [\#6922](https://github.com/kokkos/kokkos/pull/6922)
|
||||
* Add deduction guides for `TeamPolicy` [\#7030](https://github.com/kokkos/kokkos/pull/7030)
|
||||
* SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802)
|
||||
* Updates for `Kokkos::Array`: add `kokkos_swap(Array<T, N>)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148)
|
||||
* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040)
|
||||
|
||||
### Build System Changes
|
||||
* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965)
|
||||
* Update Intel GPU architectures in Makefile [\#6895](https://github.com/kokkos/kokkos/pull/6895)
|
||||
* Fix use of OpenMP with Cuda or HIP as compile language [\#6972](https://github.com/kokkos/kokkos/pull/6972)
|
||||
* Define and enforce new minimum compiler versions for C++20 support [\#7128](https://github.com/kokkos/kokkos/pull/7128), [\#7123](https://github.com/kokkos/kokkos/pull/7123)
|
||||
* Add nvidia Grace CPU architecture: `Kokkos_ARCH_ARMV9_GRACE` [\#7158](https://github.com/kokkos/kokkos/pull/7158)
|
||||
* Fix Makefile.kokkos for Threads [\#6896](https://github.com/kokkos/kokkos/pull/6896)
|
||||
* Remove support for NVHPC as CUDA device compiler [\#6987](https://github.com/kokkos/kokkos/pull/6987)
|
||||
* Fix using CUDAToolkit for CMake 3.28.4 and higher [\#7062](https://github.com/kokkos/kokkos/pull/7062)
|
||||
|
||||
### Incompatibilities (i.e. breaking changes)
|
||||
* Drop `Kokkos::Array` special treatment in `View`s [\#6906](https://github.com/kokkos/kokkos/pull/6906)
|
||||
* Drop `Experimental::RawMemoryAllocationFailure` [\#7145](https://github.com/kokkos/kokkos/pull/7145)
|
||||
|
||||
### Deprecations
|
||||
* Remove `Experimental::LayoutTiled` class template and deprecate `is_layouttiled` trait [\#6907](https://github.com/kokkos/kokkos/pull/6907)
|
||||
* Deprecate `Kokkos::layout_iterate_type_selector` [\#7076](https://github.com/kokkos/kokkos/pull/7076)
|
||||
* Deprecate specialization of `Kokkos::pair` for a single element [\#6947](https://github.com/kokkos/kokkos/pull/6947)
|
||||
* Deprecate `deep_copy` of `UnorderedMap` of different size [\#6812](https://github.com/kokkos/kokkos/pull/6812)
|
||||
* Deprecate trailing `Proxy` template argument of `Kokkos::Array` [\#6934](https://github.com/kokkos/kokkos/pull/6934)
|
||||
* Deprecate implicit conversions of integers to `ChunkSize` [\#7151](https://github.com/kokkos/kokkos/pull/7151)
|
||||
* Deprecate implicit conversions to execution spaces [\#7156](https://github.com/kokkos/kokkos/pull/7156)
|
||||
|
||||
### Bug Fixes
|
||||
* Do not return a copy of the input functor in `Experimental::for_each` [\#6910](https://github.com/kokkos/kokkos/pull/6910)
|
||||
* Fix `realloc` on views of non-default constructible element types [\#6993](https://github.com/kokkos/kokkos/pull/6993)
|
||||
* Fix undefined behavior in `View` initialization or fill with zeros [\#7014](https://github.com/kokkos/kokkos/pull/7014)
|
||||
* Fix `sort_by_key` on host execution spaces when building with NVCC [\#7059](https://github.com/kokkos/kokkos/pull/7059)
|
||||
* Fix using shared libraries and -fvisibility=hidden [\#7065](https://github.com/kokkos/kokkos/pull/7065)
|
||||
* Fix view reference counting when functor copy constructor throws in parallel dispatch [\#6289](https://github.com/kokkos/kokkos/pull/6289)
|
||||
* Fix `initialize(InitializationSetting)` for handling `print_configuration` setting [\#7098](https://github.com/kokkos/kokkos/pull/7098)
|
||||
* Thread safety fixes for the Serial and OpenMP backend [\#7080](https://github.com/kokkos/kokkos/pull/7080), [\#6151](https://github.com/kokkos/kokkos/pull/6151)
|
||||
|
||||
## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01)
|
||||
|
||||
### Backend and Architecture Enhancements:
|
||||
|
||||
#### HIP:
|
||||
* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877)
|
||||
* MI300 support unified memory [\#6877](https://github.com/kokkos/kokkos/pull/6877)
|
||||
|
||||
### Bug Fixes
|
||||
* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951)
|
||||
|
||||
65
lib/kokkos/CITATION.cff
Normal file
65
lib/kokkos/CITATION.cff
Normal file
@ -0,0 +1,65 @@
|
||||
cff-version: 1.2.0
|
||||
title: Kokkos
|
||||
message: >-
|
||||
If you use this software, please cite the overview paper
|
||||
type: software
|
||||
authors:
|
||||
- name: The Kokkos authors
|
||||
website: https://kokkos.org/community/team/
|
||||
identifiers:
|
||||
- type: url
|
||||
website: https://kokkos.org/kokkos-core-wiki/citation.html
|
||||
repository-code: 'https://github.com/kokkos/kokkos'
|
||||
url: 'https://kokkos.org/'
|
||||
license: Apache-2.0
|
||||
preferred-citation:
|
||||
type: article
|
||||
authors:
|
||||
- given-names: Christian R.
|
||||
family-names: Trott
|
||||
- given-names: Damien
|
||||
family-names: Lebrun-Grandié
|
||||
- given-names: Daniel
|
||||
family-names: Arndt
|
||||
- family-names: Ciesko
|
||||
given-names: Jan
|
||||
- given-names: Vinh
|
||||
family-names: Dang
|
||||
- family-names: Ellingwood
|
||||
given-names: Nathan
|
||||
- given-names: Rahulkumar
|
||||
family-names: Gayatri
|
||||
- given-names: Evan
|
||||
family-names: Harvey
|
||||
- given-names: Daisy S.
|
||||
family-names: Hollman
|
||||
- given-names: Dan
|
||||
family-names: Ibanez
|
||||
- given-names: Nevin
|
||||
family-names: Liber
|
||||
- given-names: Jonathan
|
||||
family-names: Madsen
|
||||
- given-names: Jeff
|
||||
family-names: Miles
|
||||
- given-names: David
|
||||
family-names: Poliakoff
|
||||
- given-names: Amy
|
||||
family-names: Powell
|
||||
- given-names: Sivasankaran
|
||||
family-names: Rajamanickam
|
||||
- given-names: Mikael
|
||||
family-names: Simberg
|
||||
- given-names: Dan
|
||||
family-names: Sunderland
|
||||
- given-names: Bruno
|
||||
family-names: Turcksin
|
||||
- given-names: Jeremiah
|
||||
family-names: Wilke
|
||||
doi: 10.1109/TPDS.2021.3097283
|
||||
journal: IEEE Transactions on Parallel and Distributed Systems
|
||||
start: 805
|
||||
end: 817
|
||||
title: "Kokkos 3: Programming Model Extensions for the Exascale Era"
|
||||
volume: 33
|
||||
issue: 4
|
||||
year: 2022
|
||||
@ -150,7 +150,7 @@ ENDIF()
|
||||
|
||||
|
||||
set(Kokkos_VERSION_MAJOR 4)
|
||||
set(Kokkos_VERSION_MINOR 3)
|
||||
set(Kokkos_VERSION_MINOR 4)
|
||||
set(Kokkos_VERSION_PATCH 1)
|
||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
|
||||
|
||||
@ -11,7 +11,7 @@ CXXFLAGS += $(SHFLAGS)
|
||||
endif
|
||||
|
||||
KOKKOS_VERSION_MAJOR = 4
|
||||
KOKKOS_VERSION_MINOR = 3
|
||||
KOKKOS_VERSION_MINOR = 4
|
||||
KOKKOS_VERSION_PATCH = 1
|
||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||
|
||||
@ -21,11 +21,11 @@ KOKKOS_DEVICES ?= "OpenMP"
|
||||
# Options:
|
||||
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
|
||||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
|
||||
# IBM: Power8,Power9
|
||||
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
|
||||
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
|
||||
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
|
||||
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
|
||||
KOKKOS_ARCH ?= ""
|
||||
# Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
@ -41,7 +41,7 @@ KOKKOS_STANDALONE_CMAKE ?= "no"
|
||||
|
||||
# Default settings specific options.
|
||||
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async
|
||||
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
|
||||
KOKKOS_CUDA_OPTIONS ?= "disable_malloc_async"
|
||||
|
||||
# Options: rdc
|
||||
KOKKOS_HIP_OPTIONS ?= ""
|
||||
@ -328,12 +328,43 @@ KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR)
|
||||
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
|
||||
# Traditionally, we supported, e.g., IntelGen9 instead of Intel_Gen9. The latter
|
||||
# matches the CMake option but we also accept the former for backward-compatibility.
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen11)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen12LP)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP))
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen)
|
||||
endif
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG1)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_XeHP)
|
||||
endif
|
||||
# Traditionally the architecture was called PVC instead of Intel_PVC. This
|
||||
# version makes us accept IntelPVC and Intel_PVC as well.
|
||||
KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC)
|
||||
|
||||
# NVIDIA based.
|
||||
@ -394,7 +425,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2)
|
||||
KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc))
|
||||
|
||||
# IBM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
|
||||
@ -758,6 +790,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON")
|
||||
|
||||
KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
|
||||
KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
|
||||
@ -1216,6 +1256,8 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0)
|
||||
endif
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN")
|
||||
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY")
|
||||
|
||||
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
|
||||
|
||||
@ -81,7 +81,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
|
||||
Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
|
||||
Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||

|
||||
[](https://kokkos.org)
|
||||
|
||||
# Kokkos: Core Libraries
|
||||
|
||||
@ -10,43 +10,66 @@ hierarchies and multiple types of execution resources. It currently can use
|
||||
CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other
|
||||
backends in development.
|
||||
|
||||
**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.**
|
||||
**Kokkos Core is part of the [Kokkos C++ Performance Portability Programming Ecosystem](https://kokkos.org/about/abstract/).**
|
||||
|
||||
For the complete documentation, click below:
|
||||
Kokkos is a [Linux Foundation](https://linuxfoundation.org) project.
|
||||
|
||||
# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki)
|
||||
|
||||
# Learning about Kokkos
|
||||
## Learning about Kokkos
|
||||
|
||||
To start learning about Kokkos:
|
||||
|
||||
- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities.
|
||||
- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities.
|
||||
|
||||
- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
|
||||
- [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
|
||||
|
||||
- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html).
|
||||
- [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html).
|
||||
|
||||
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
|
||||
- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
|
||||
|
||||
## Obtaining Kokkos
|
||||
|
||||
The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).
|
||||
|
||||
The current release is [4.3.01](https://github.com/kokkos/kokkos/releases/tag/4.3.01).
|
||||
|
||||
```bash
|
||||
curl -OJ -L https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz
|
||||
# Or with wget
|
||||
wget https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz
|
||||
```
|
||||
|
||||
To clone the latest development version of Kokkos from GitHub:
|
||||
|
||||
```bash
|
||||
git clone -b develop https://github.com/kokkos/kokkos.git
|
||||
```
|
||||
|
||||
### Building Kokkos
|
||||
|
||||
To build Kokkos, you will need to have a C++ compiler that supports C++17 or later.
|
||||
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html).
|
||||
|
||||
Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html).
|
||||
|
||||
You can also install Kokkos using [Spack](https://spack.io/): `spack install kokkos`. [Available configuration options](https://packages.spack.io/package.html?name=kokkos) can be displayed using `spack info kokkos`.
|
||||
|
||||
## For the complete documentation: [kokkos.org/kokkos-core-wiki/](https://kokkos.org/kokkos-core-wiki/)
|
||||
|
||||
## Support
|
||||
|
||||
For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue.
|
||||
|
||||
For non-public questions send an email to: *crtrott(at)sandia.gov*
|
||||
|
||||
# Contributing to Kokkos
|
||||
## Contributing
|
||||
|
||||
Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute.
|
||||
Please see [this page](https://kokkos.org/kokkos-core-wiki/contributing.html) for details on how to contribute.
|
||||
|
||||
# Requirements, Building and Installing
|
||||
## Citing Kokkos
|
||||
|
||||
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html).
|
||||
Please see the [following page](https://kokkos.org/kokkos-core-wiki/citation.html).
|
||||
|
||||
Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html).
|
||||
|
||||
# Citing Kokkos
|
||||
|
||||
Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html).
|
||||
|
||||
# License
|
||||
## License
|
||||
|
||||
[](https://spdx.org/licenses/LLVM-exception.html)
|
||||
|
||||
|
||||
@ -189,6 +189,33 @@ void applyPermutation(const ExecutionSpace& space,
|
||||
KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); });
|
||||
}
|
||||
|
||||
// FIXME_NVCC: nvcc has trouble compiling lambdas inside a function with
|
||||
// variadic templates (sort_by_key_via_sort). Switch to using functors instead.
|
||||
template <typename Permute>
|
||||
struct IotaFunctor {
|
||||
Permute _permute;
|
||||
KOKKOS_FUNCTION void operator()(int i) const { _permute(i) = i; }
|
||||
};
|
||||
template <typename Keys>
|
||||
struct LessFunctor {
|
||||
Keys _keys;
|
||||
KOKKOS_FUNCTION bool operator()(int i, int j) const {
|
||||
return _keys(i) < _keys(j);
|
||||
}
|
||||
};
|
||||
|
||||
// FIXME_NVCC+MSVC: We can't use a lambda instead of a functor which gave us
|
||||
// "For this host platform/dialect, an extended lambda cannot be defined inside
|
||||
// the 'if' or 'else' block of a constexpr if statement"
|
||||
template <typename Keys, typename Comparator>
|
||||
struct KeyComparisonFunctor {
|
||||
Keys m_keys;
|
||||
Comparator m_comparator;
|
||||
KOKKOS_FUNCTION bool operator()(int i, int j) const {
|
||||
return m_comparator(m_keys(i), m_keys(j));
|
||||
}
|
||||
};
|
||||
|
||||
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties,
|
||||
class... MaybeComparator>
|
||||
@ -207,10 +234,9 @@ void sort_by_key_via_sort(
|
||||
n);
|
||||
|
||||
// iota
|
||||
Kokkos::parallel_for(
|
||||
"Kokkos::sort_by_key_via_sort::iota",
|
||||
Kokkos::parallel_for("Kokkos::sort_by_key_via_sort::iota",
|
||||
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
|
||||
KOKKOS_LAMBDA(int i) { permute(i) = i; });
|
||||
IotaFunctor<decltype(permute)>{permute});
|
||||
|
||||
using Layout =
|
||||
typename Kokkos::View<unsigned int*, ExecutionSpace>::array_layout;
|
||||
@ -228,16 +254,15 @@ void sort_by_key_via_sort(
|
||||
Kokkos::DefaultHostExecutionSpace host_exec;
|
||||
|
||||
if constexpr (sizeof...(MaybeComparator) == 0) {
|
||||
Kokkos::sort(
|
||||
host_exec, host_permute,
|
||||
KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); });
|
||||
Kokkos::sort(host_exec, host_permute,
|
||||
LessFunctor<decltype(host_keys)>{host_keys});
|
||||
} else {
|
||||
auto keys_comparator =
|
||||
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
|
||||
Kokkos::sort(
|
||||
host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return keys_comparator(host_keys(i), host_keys(j));
|
||||
});
|
||||
host_exec, host_permute,
|
||||
KeyComparisonFunctor<decltype(host_keys), decltype(keys_comparator)>{
|
||||
host_keys, keys_comparator});
|
||||
}
|
||||
host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort");
|
||||
Kokkos::deep_copy(exec, permute, host_permute);
|
||||
@ -262,16 +287,14 @@ void sort_by_key_via_sort(
|
||||
}
|
||||
#else
|
||||
if constexpr (sizeof...(MaybeComparator) == 0) {
|
||||
Kokkos::sort(
|
||||
exec, permute,
|
||||
KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); });
|
||||
Kokkos::sort(exec, permute, LessFunctor<decltype(keys)>{keys});
|
||||
} else {
|
||||
auto keys_comparator =
|
||||
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
|
||||
Kokkos::sort(
|
||||
exec, permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return keys_comparator(keys(i), keys(j));
|
||||
});
|
||||
exec, permute,
|
||||
KeyComparisonFunctor<decltype(keys), decltype(keys_comparator)>{
|
||||
keys, keys_comparator});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -29,33 +29,31 @@ namespace Experimental {
|
||||
template <
|
||||
class ExecutionSpace, class IteratorType, class UnaryFunctorType,
|
||||
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex,
|
||||
IteratorType first, IteratorType last,
|
||||
UnaryFunctorType functor) {
|
||||
return Impl::for_each_exespace_impl(label, ex, first, last,
|
||||
std::move(functor));
|
||||
void for_each(const std::string& label, const ExecutionSpace& ex,
|
||||
IteratorType first, IteratorType last, UnaryFunctorType functor) {
|
||||
Impl::for_each_exespace_impl(label, ex, first, last, std::move(functor));
|
||||
}
|
||||
|
||||
template <
|
||||
class ExecutionSpace, class IteratorType, class UnaryFunctorType,
|
||||
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first,
|
||||
IteratorType last, UnaryFunctorType functor) {
|
||||
return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default",
|
||||
ex, first, last, std::move(functor));
|
||||
void for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last,
|
||||
UnaryFunctorType functor) {
|
||||
Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", ex,
|
||||
first, last, std::move(functor));
|
||||
}
|
||||
|
||||
template <
|
||||
class ExecutionSpace, class DataType, class... Properties,
|
||||
class UnaryFunctorType,
|
||||
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex,
|
||||
void for_each(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType, Properties...>& v,
|
||||
UnaryFunctorType functor) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
|
||||
|
||||
namespace KE = ::Kokkos::Experimental;
|
||||
return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v),
|
||||
Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v),
|
||||
std::move(functor));
|
||||
}
|
||||
|
||||
@ -63,15 +61,14 @@ template <
|
||||
class ExecutionSpace, class DataType, class... Properties,
|
||||
class UnaryFunctorType,
|
||||
std::enable_if_t<Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
UnaryFunctorType for_each(const ExecutionSpace& ex,
|
||||
void for_each(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType, Properties...>& v,
|
||||
UnaryFunctorType functor) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
|
||||
|
||||
namespace KE = ::Kokkos::Experimental;
|
||||
return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex,
|
||||
KE::begin(v), KE::end(v),
|
||||
std::move(functor));
|
||||
Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex,
|
||||
KE::begin(v), KE::end(v), std::move(functor));
|
||||
}
|
||||
|
||||
//
|
||||
@ -82,23 +79,22 @@ UnaryFunctorType for_each(const ExecutionSpace& ex,
|
||||
|
||||
template <class TeamHandleType, class IteratorType, class UnaryFunctorType,
|
||||
std::enable_if_t<Kokkos::is_team_handle_v<TeamHandleType>, int> = 0>
|
||||
KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle,
|
||||
KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle,
|
||||
IteratorType first, IteratorType last,
|
||||
UnaryFunctorType functor) {
|
||||
return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor));
|
||||
Impl::for_each_team_impl(teamHandle, first, last, std::move(functor));
|
||||
}
|
||||
|
||||
template <class TeamHandleType, class DataType, class... Properties,
|
||||
class UnaryFunctorType,
|
||||
std::enable_if_t<Kokkos::is_team_handle_v<TeamHandleType>, int> = 0>
|
||||
KOKKOS_FUNCTION UnaryFunctorType
|
||||
for_each(const TeamHandleType& teamHandle,
|
||||
KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType, Properties...>& v,
|
||||
UnaryFunctorType functor) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v);
|
||||
|
||||
namespace KE = ::Kokkos::Experimental;
|
||||
return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v),
|
||||
Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v),
|
||||
std::move(functor));
|
||||
}
|
||||
|
||||
|
||||
@ -82,6 +82,11 @@ OutputIteratorType adjacent_difference_exespace_impl(
|
||||
return first_dest;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEBUG
|
||||
// check for overlapping iterators
|
||||
Impl::expect_no_overlap(first_from, last_from, first_dest);
|
||||
#endif
|
||||
|
||||
// run
|
||||
const auto num_elements =
|
||||
Kokkos::Experimental::distance(first_from, last_from);
|
||||
@ -114,6 +119,11 @@ KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl(
|
||||
return first_dest;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEBUG
|
||||
// check for overlapping iterators
|
||||
Impl::expect_no_overlap(first_from, last_from, first_dest);
|
||||
#endif
|
||||
|
||||
// run
|
||||
const auto num_elements =
|
||||
Kokkos::Experimental::distance(first_from, last_from);
|
||||
|
||||
@ -24,6 +24,9 @@ namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
template <class T>
|
||||
class RandomAccessIterator;
|
||||
|
||||
template <typename T, typename enable = void>
|
||||
struct is_admissible_to_kokkos_std_algorithms : std::false_type {};
|
||||
|
||||
@ -58,6 +61,18 @@ using is_iterator = Kokkos::is_detected<iterator_category_t, T>;
|
||||
template <class T>
|
||||
inline constexpr bool is_iterator_v = is_iterator<T>::value;
|
||||
|
||||
template <typename ViewType>
|
||||
struct is_kokkos_iterator : std::false_type {};
|
||||
|
||||
template <typename ViewType>
|
||||
struct is_kokkos_iterator<RandomAccessIterator<ViewType>> {
|
||||
static constexpr bool value =
|
||||
is_admissible_to_kokkos_std_algorithms<ViewType>::value;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
inline constexpr bool is_kokkos_iterator_v = is_kokkos_iterator<T>::value;
|
||||
|
||||
//
|
||||
// are_iterators
|
||||
//
|
||||
@ -215,6 +230,38 @@ KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first,
|
||||
(void)last;
|
||||
}
|
||||
|
||||
//
|
||||
// Check if kokkos iterators are overlapping
|
||||
//
|
||||
template <typename IteratorType1, typename IteratorType2>
|
||||
KOKKOS_INLINE_FUNCTION void expect_no_overlap(
|
||||
[[maybe_unused]] IteratorType1 first, [[maybe_unused]] IteratorType1 last,
|
||||
[[maybe_unused]] IteratorType2 s_first) {
|
||||
if constexpr (is_kokkos_iterator_v<IteratorType1> &&
|
||||
is_kokkos_iterator_v<IteratorType2>) {
|
||||
auto const view1 = first.view();
|
||||
auto const view2 = s_first.view();
|
||||
|
||||
std::size_t stride1 = view1.stride(0);
|
||||
std::size_t stride2 = view2.stride(0);
|
||||
ptrdiff_t first_diff = view1.data() - view2.data();
|
||||
|
||||
// FIXME If strides are not identical, checks may not be made
|
||||
// with the cost of O(1)
|
||||
// Currently, checks are made only if strides are identical
|
||||
// If first_diff == 0, there is already an overlap
|
||||
if (stride1 == stride2 || first_diff == 0) {
|
||||
[[maybe_unused]] bool is_no_overlap = (first_diff % stride1);
|
||||
auto* first_pointer1 = view1.data();
|
||||
auto* first_pointer2 = view2.data();
|
||||
[[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first);
|
||||
[[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first);
|
||||
KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 ||
|
||||
last_pointer1 <= first_pointer2 || is_no_overlap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -151,7 +151,8 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl(
|
||||
}
|
||||
|
||||
#if defined KOKKOS_COMPILER_INTEL || \
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130)
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC))
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -42,8 +42,7 @@ struct StdForEachFunctor {
|
||||
};
|
||||
|
||||
template <class HandleType, class IteratorType, class UnaryFunctorType>
|
||||
UnaryFunctorType for_each_exespace_impl(const std::string& label,
|
||||
const HandleType& handle,
|
||||
void for_each_exespace_impl(const std::string& label, const HandleType& handle,
|
||||
IteratorType first, IteratorType last,
|
||||
UnaryFunctorType functor) {
|
||||
// checks
|
||||
@ -56,8 +55,6 @@ UnaryFunctorType for_each_exespace_impl(const std::string& label,
|
||||
label, RangePolicy<HandleType>(handle, 0, num_elements),
|
||||
StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor));
|
||||
handle.fence("Kokkos::for_each: fence after operation");
|
||||
|
||||
return functor;
|
||||
}
|
||||
|
||||
template <class ExecutionSpace, class IteratorType, class SizeType,
|
||||
@ -75,7 +72,7 @@ IteratorType for_each_n_exespace_impl(const std::string& label,
|
||||
}
|
||||
|
||||
for_each_exespace_impl(label, ex, first, last, std::move(functor));
|
||||
// no neeed to fence since for_each_exespace_impl fences already
|
||||
// no need to fence since for_each_exespace_impl fences already
|
||||
|
||||
return last;
|
||||
}
|
||||
@ -84,9 +81,9 @@ IteratorType for_each_n_exespace_impl(const std::string& label,
|
||||
// team impl
|
||||
//
|
||||
template <class TeamHandleType, class IteratorType, class UnaryFunctorType>
|
||||
KOKKOS_FUNCTION UnaryFunctorType
|
||||
for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first,
|
||||
IteratorType last, UnaryFunctorType functor) {
|
||||
KOKKOS_FUNCTION void for_each_team_impl(const TeamHandleType& teamHandle,
|
||||
IteratorType first, IteratorType last,
|
||||
UnaryFunctorType functor) {
|
||||
// checks
|
||||
Impl::static_assert_random_access_and_accessible(teamHandle, first);
|
||||
Impl::expect_valid_range(first, last);
|
||||
@ -96,7 +93,6 @@ for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first,
|
||||
TeamThreadRange(teamHandle, 0, num_elements),
|
||||
StdForEachFunctor<IteratorType, UnaryFunctorType>(first, functor));
|
||||
teamHandle.team_barrier();
|
||||
return functor;
|
||||
}
|
||||
|
||||
template <class TeamHandleType, class IteratorType, class SizeType,
|
||||
@ -113,7 +109,7 @@ for_each_n_team_impl(const TeamHandleType& teamHandle, IteratorType first,
|
||||
}
|
||||
|
||||
for_each_team_impl(teamHandle, first, last, std::move(functor));
|
||||
// no neeed to fence since for_each_team_impl fences already
|
||||
// no need to fence since for_each_team_impl fences already
|
||||
|
||||
return last;
|
||||
}
|
||||
|
||||
@ -59,6 +59,30 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
||||
ptrdiff_t current_index)
|
||||
: m_view(view), m_current_index(current_index) {}
|
||||
|
||||
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
|
||||
template <class OtherViewType>
|
||||
requires(std::is_constructible_v<view_type, OtherViewType>) KOKKOS_FUNCTION
|
||||
explicit(!std::is_convertible_v<OtherViewType, view_type>)
|
||||
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
|
||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
||||
#else
|
||||
template <
|
||||
class OtherViewType,
|
||||
std::enable_if_t<std::is_constructible_v<view_type, OtherViewType> &&
|
||||
!std::is_convertible_v<OtherViewType, view_type>,
|
||||
int> = 0>
|
||||
KOKKOS_FUNCTION explicit RandomAccessIterator(
|
||||
const RandomAccessIterator<OtherViewType>& other)
|
||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
||||
|
||||
template <class OtherViewType,
|
||||
std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>,
|
||||
int> = 0>
|
||||
KOKKOS_FUNCTION RandomAccessIterator(
|
||||
const RandomAccessIterator<OtherViewType>& other)
|
||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
||||
#endif
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
iterator_type& operator++() {
|
||||
++m_current_index;
|
||||
@ -152,9 +176,16 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
||||
KOKKOS_FUNCTION
|
||||
reference operator*() const { return m_view(m_current_index); }
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
view_type view() const { return m_view; }
|
||||
|
||||
private:
|
||||
view_type m_view;
|
||||
ptrdiff_t m_current_index = 0;
|
||||
|
||||
// Needed for the converting constructor accepting another iterator
|
||||
template <class>
|
||||
friend class RandomAccessIterator;
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
@ -176,7 +176,8 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl(
|
||||
}
|
||||
|
||||
#if defined KOKKOS_COMPILER_INTEL || \
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130)
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC))
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -46,6 +46,44 @@ TEST_F(random_access_iterator_test, constructor) {
|
||||
EXPECT_TRUE(true);
|
||||
}
|
||||
|
||||
TEST_F(random_access_iterator_test, constructiblity) {
|
||||
auto first_d = KE::begin(m_dynamic_view);
|
||||
auto cfirst_d = KE::cbegin(m_dynamic_view);
|
||||
|
||||
static_assert(std::is_constructible_v<decltype(cfirst_d), decltype(first_d)>);
|
||||
static_assert(
|
||||
!std::is_constructible_v<decltype(first_d), decltype(cfirst_d)>);
|
||||
[[maybe_unused]] decltype(cfirst_d) tmp_cfirst_d(first_d);
|
||||
|
||||
auto first_s = KE::begin(m_static_view);
|
||||
auto cfirst_s = KE::cbegin(m_static_view);
|
||||
|
||||
static_assert(std::is_constructible_v<decltype(cfirst_s), decltype(first_s)>);
|
||||
static_assert(
|
||||
!std::is_constructible_v<decltype(first_s), decltype(cfirst_s)>);
|
||||
[[maybe_unused]] decltype(cfirst_s) tmp_cfirst_s(first_s);
|
||||
|
||||
auto first_st = KE::begin(m_strided_view);
|
||||
auto cfirst_st = KE::cbegin(m_strided_view);
|
||||
|
||||
static_assert(
|
||||
std::is_constructible_v<decltype(cfirst_st), decltype(first_st)>);
|
||||
static_assert(
|
||||
!std::is_constructible_v<decltype(first_st), decltype(cfirst_st)>);
|
||||
[[maybe_unused]] decltype(cfirst_st) tmp_cfirst_st(first_st);
|
||||
|
||||
// [FIXME] Better to have tests for the explicit specifier with an expression.
|
||||
// As soon as View converting constructors are re-implemented with a
|
||||
// conditional explicit, we may add those tests.
|
||||
static_assert(std::is_constructible_v<decltype(first_s), decltype(first_d)>);
|
||||
static_assert(std::is_constructible_v<decltype(first_st), decltype(first_d)>);
|
||||
static_assert(std::is_constructible_v<decltype(first_d), decltype(first_s)>);
|
||||
static_assert(std::is_constructible_v<decltype(first_st), decltype(first_s)>);
|
||||
static_assert(std::is_constructible_v<decltype(first_d), decltype(first_st)>);
|
||||
static_assert(std::is_constructible_v<decltype(first_s), decltype(first_st)>);
|
||||
EXPECT_TRUE(true);
|
||||
}
|
||||
|
||||
template <class IteratorType, class ValueType>
|
||||
void test_random_access_it_verify(IteratorType it, ValueType gold_value) {
|
||||
using view_t = Kokkos::View<typename IteratorType::value_type>;
|
||||
|
||||
@ -69,7 +69,7 @@ void iota(ExecutionSpace const &space, ViewType const &v,
|
||||
typename ViewType::value_type value = 0) {
|
||||
using ValueType = typename ViewType::value_type;
|
||||
Kokkos::parallel_for(
|
||||
"ArborX::Algorithms::iota",
|
||||
"Kokkos::Algorithms::iota",
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, v.extent(0)),
|
||||
KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; });
|
||||
}
|
||||
@ -87,6 +87,18 @@ TEST(TEST_CATEGORY, SortByKeyEmptyView) {
|
||||
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
|
||||
}
|
||||
|
||||
// Test #7036
|
||||
TEST(TEST_CATEGORY, SortByKeyEmptyViewHost) {
|
||||
using ExecutionSpace = Kokkos::DefaultHostExecutionSpace;
|
||||
|
||||
// does not matter if we use int or something else
|
||||
Kokkos::View<int *, ExecutionSpace> keys("keys", 0);
|
||||
Kokkos::View<float *, ExecutionSpace> values("values", 0);
|
||||
|
||||
ASSERT_NO_THROW(
|
||||
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKey) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
using MemorySpace = typename ExecutionSpace::memory_space;
|
||||
|
||||
@ -81,5 +81,114 @@ TEST(std_algorithms, is_admissible_to_std_algorithms) {
|
||||
strided_view_3d_t>::value);
|
||||
}
|
||||
|
||||
TEST(std_algorithms, expect_no_overlap) {
|
||||
namespace KE = Kokkos::Experimental;
|
||||
using value_type = double;
|
||||
|
||||
static constexpr size_t extent0 = 13;
|
||||
|
||||
//-------------
|
||||
// 1d views
|
||||
//-------------
|
||||
using static_view_1d_t = Kokkos::View<value_type[extent0]>;
|
||||
[[maybe_unused]] static_view_1d_t static_view_1d{
|
||||
"std-algo-test-1d-contiguous-view-static"};
|
||||
|
||||
using dyn_view_1d_t = Kokkos::View<value_type*>;
|
||||
[[maybe_unused]] dyn_view_1d_t dynamic_view_1d{
|
||||
"std-algo-test-1d-contiguous-view-dynamic", extent0};
|
||||
|
||||
using strided_view_1d_t = Kokkos::View<value_type*, Kokkos::LayoutStride>;
|
||||
Kokkos::LayoutStride layout1d{extent0, 2};
|
||||
strided_view_1d_t strided_view_1d{"std-algo-test-1d-strided-view", layout1d};
|
||||
|
||||
// Overlapping because iterators are identical
|
||||
#if defined(KOKKOS_ENABLE_DEBUG)
|
||||
auto first_s = KE::begin(static_view_1d);
|
||||
auto last_s = first_s + extent0;
|
||||
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s, last_s, first_s); },
|
||||
"Kokkos contract violation:.*");
|
||||
|
||||
auto first_d = KE::begin(dynamic_view_1d);
|
||||
auto last_d = first_d + extent0;
|
||||
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d, last_d, first_d); },
|
||||
"Kokkos contract violation:.*");
|
||||
|
||||
auto first_st = KE::begin(strided_view_1d);
|
||||
auto last_st = first_st + extent0;
|
||||
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_st, last_st, first_st); },
|
||||
"Kokkos contract violation:.*");
|
||||
#endif
|
||||
|
||||
// Ranges are overlapped
|
||||
static constexpr size_t sub_extent0 = 6, offset0 = 3;
|
||||
std::pair<size_t, size_t> range0(0, sub_extent0),
|
||||
range1(offset0, offset0 + sub_extent0);
|
||||
#if defined(KOKKOS_ENABLE_DEBUG)
|
||||
auto static_view_1d_0 = Kokkos::subview(static_view_1d, range0);
|
||||
auto static_view_1d_1 = Kokkos::subview(static_view_1d, range1);
|
||||
auto first_s0 = KE::begin(static_view_1d_0); // [0, 6)
|
||||
auto last_s0 = first_s0 + static_view_1d_0.extent(0);
|
||||
auto first_s1 = KE::begin(static_view_1d_1); // [3, 9)
|
||||
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s0, last_s0, first_s1); },
|
||||
"Kokkos contract violation:.*");
|
||||
|
||||
auto dynamic_view_1d_0 = Kokkos::subview(dynamic_view_1d, range0);
|
||||
auto dynamic_view_1d_1 = Kokkos::subview(dynamic_view_1d, range1);
|
||||
auto first_d0 = KE::begin(dynamic_view_1d_0); // [0, 6)
|
||||
auto last_d0 = first_d0 + dynamic_view_1d_0.extent(0);
|
||||
auto first_d1 = KE::begin(dynamic_view_1d_1); // [3, 9)
|
||||
EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d0, last_d0, first_d1); },
|
||||
"Kokkos contract violation:.*");
|
||||
#endif
|
||||
|
||||
auto strided_view_1d_0 = Kokkos::subview(strided_view_1d, range0);
|
||||
auto strided_view_1d_1 = Kokkos::subview(strided_view_1d, range1);
|
||||
auto first_st0 = KE::begin(strided_view_1d_0); // [0, 12)
|
||||
auto last_st0 = first_st0 + strided_view_1d_0.extent(0);
|
||||
auto first_st1 = KE::begin(strided_view_1d_1); // [3, 15)
|
||||
// Does not overlap since offset (=3) is not divisible by stride (=2)
|
||||
EXPECT_NO_THROW(
|
||||
{ KE::Impl::expect_no_overlap(first_st0, last_st0, first_st1); });
|
||||
|
||||
// Iterating over the same range without overlapping
|
||||
Kokkos::View<value_type[2][extent0], Kokkos::LayoutLeft> static_view_2d{
|
||||
"std-algo-test-2d-contiguous-view-static"};
|
||||
auto sub_static_view_1d_0 = Kokkos::subview(static_view_2d, 0, Kokkos::ALL);
|
||||
auto sub_static_view_1d_1 = Kokkos::subview(static_view_2d, 1, Kokkos::ALL);
|
||||
auto sub_first_s0 = KE::begin(sub_static_view_1d_0); // 0, 2, 4, ...
|
||||
auto sub_last_s0 = sub_first_s0 + sub_static_view_1d_0.extent(0);
|
||||
auto sub_first_s1 = KE::begin(sub_static_view_1d_1); // 1, 3, 5, ...
|
||||
|
||||
EXPECT_NO_THROW({
|
||||
KE::Impl::expect_no_overlap(sub_first_s0, sub_last_s0, sub_first_s1);
|
||||
});
|
||||
|
||||
Kokkos::View<value_type**, Kokkos::LayoutLeft> dynamic_view_2d{
|
||||
"std-algo-test-2d-contiguous-view-dynamic", 2, extent0};
|
||||
auto sub_dynamic_view_1d_0 = Kokkos::subview(dynamic_view_2d, 0, Kokkos::ALL);
|
||||
auto sub_dynamic_view_1d_1 = Kokkos::subview(dynamic_view_2d, 1, Kokkos::ALL);
|
||||
auto sub_first_d0 = KE::begin(sub_dynamic_view_1d_0); // 0, 2, 4, ...
|
||||
auto sub_last_d0 = sub_first_d0 + sub_dynamic_view_1d_0.extent(0);
|
||||
auto sub_first_d1 = KE::begin(sub_dynamic_view_1d_1); // 1, 3, 5, ...
|
||||
|
||||
EXPECT_NO_THROW({
|
||||
KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1);
|
||||
});
|
||||
|
||||
Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3};
|
||||
Kokkos::View<value_type**, Kokkos::LayoutStride> strided_view_2d{
|
||||
"std-algo-test-2d-contiguous-view-strided", layout2d};
|
||||
auto sub_strided_view_1d_0 = Kokkos::subview(strided_view_2d, 0, Kokkos::ALL);
|
||||
auto sub_strided_view_1d_1 = Kokkos::subview(strided_view_2d, 1, Kokkos::ALL);
|
||||
auto sub_first_st0 = KE::begin(sub_strided_view_1d_0); // 0, 6, 12, ...
|
||||
auto sub_last_st0 = sub_first_st0 + sub_strided_view_1d_0.extent(0);
|
||||
auto sub_first_st1 = KE::begin(sub_strided_view_1d_1); // 1, 7, 13, ...
|
||||
|
||||
EXPECT_NO_THROW({
|
||||
KE::Impl::expect_no_overlap(sub_first_st0, sub_last_st0, sub_first_st1);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace stdalgos
|
||||
} // namespace Test
|
||||
|
||||
@ -85,7 +85,7 @@ struct TestFunctorA {
|
||||
break;
|
||||
}
|
||||
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
|
||||
case 2: {
|
||||
auto it = KE::exclusive_scan(
|
||||
@ -213,7 +213,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
break;
|
||||
}
|
||||
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
case 2:
|
||||
case 3: {
|
||||
auto it = exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom),
|
||||
@ -242,7 +242,7 @@ template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
for (int apiId : {0, 1, 2, 3}) {
|
||||
#else
|
||||
for (int apiId : {0, 1}) {
|
||||
|
||||
@ -52,7 +52,7 @@ struct TestFunctorA {
|
||||
Kokkos::single(Kokkos::PerTeam(member),
|
||||
[=, *this]() { m_returnsView(myRowIndex) = result; });
|
||||
}
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
else if (m_apiPick == 2) {
|
||||
using value_type = typename ViewType::value_type;
|
||||
result = KE::is_sorted(member, KE::cbegin(myRowView), KE::cend(myRowView),
|
||||
@ -179,7 +179,7 @@ template <class LayoutTag, class ValueType>
|
||||
void run_all_scenarios(bool makeDataSortedOnPurpose) {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 5153}) {
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
for (int apiId : {0, 1, 2, 3}) {
|
||||
#else
|
||||
for (int apiId : {0, 1}) {
|
||||
|
||||
@ -73,7 +73,7 @@ struct TestFunctorA {
|
||||
m_distancesView(myRowIndex) = resultDist;
|
||||
});
|
||||
}
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
else if (m_apiPick == 2) {
|
||||
using value_type = typename ViewType::value_type;
|
||||
auto it = KE::is_sorted_until(member, KE::cbegin(myRowView),
|
||||
@ -226,7 +226,7 @@ template <class LayoutTag, class ValueType>
|
||||
void run_all_scenarios(const std::string& name, const std::vector<int>& cols) {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : cols) {
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
for (int apiId : {0, 1, 2, 3}) {
|
||||
#else
|
||||
for (int apiId : {0, 1}) {
|
||||
|
||||
@ -59,7 +59,7 @@ struct TestFunctorA {
|
||||
m_distancesView(myRowIndex) = resultDist;
|
||||
});
|
||||
}
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
else if (m_apiPick == 2) {
|
||||
using value_type = typename ViewType::value_type;
|
||||
auto it =
|
||||
@ -170,7 +170,7 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_max_element_team_test, test) {
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
run_all_scenarios<DynamicTag, int>();
|
||||
run_all_scenarios<StridedTwoRowsTag, double>();
|
||||
run_all_scenarios<StridedThreeRowsTag, int>();
|
||||
|
||||
@ -59,7 +59,7 @@ struct TestFunctorA {
|
||||
m_distancesView(myRowIndex) = resultDist;
|
||||
});
|
||||
}
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
else if (m_apiPick == 2) {
|
||||
using value_type = typename ViewType::value_type;
|
||||
auto it =
|
||||
@ -169,7 +169,7 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_min_element_team_test, test) {
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
run_all_scenarios<DynamicTag, int>();
|
||||
run_all_scenarios<StridedTwoRowsTag, double>();
|
||||
run_all_scenarios<StridedThreeRowsTag, int>();
|
||||
|
||||
@ -66,7 +66,7 @@ struct TestFunctorA {
|
||||
m_distancesView(myRowIndex, 1) = resultDist2;
|
||||
});
|
||||
}
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
else if (m_apiPick == 2) {
|
||||
using value_type = typename ViewType::value_type;
|
||||
auto itPair =
|
||||
@ -188,7 +188,7 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_minmax_element_team_test, test) {
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
run_all_scenarios<DynamicTag, int>();
|
||||
run_all_scenarios<StridedTwoRowsTag, double>();
|
||||
run_all_scenarios<StridedThreeRowsTag, int>();
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
|
||||
#if not defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
|
||||
@ -5,6 +5,6 @@ build_script:
|
||||
- cmd: >-
|
||||
mkdir build &&
|
||||
cd build &&
|
||||
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF &&
|
||||
cmake c:\projects\source -DKokkos_ENABLE_IMPL_MDSPAN=OFF -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF &&
|
||||
cmake --build . --target install &&
|
||||
ctest -C Debug --output-on-failure
|
||||
|
||||
@ -4,7 +4,7 @@ KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream)
|
||||
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(view_copy_constructor)
|
||||
#FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow.
|
||||
IF(NOT Kokkos_ENABLE_OPENMPTARGET)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance)
|
||||
|
||||
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
view_copy_constructor
|
||||
SOURCES view_copy_constructor.cpp
|
||||
)
|
||||
46
lib/kokkos/benchmarks/view_copy_constructor/Makefile
Normal file
46
lib/kokkos/benchmarks/view_copy_constructor/Makefile
Normal file
@ -0,0 +1,46 @@
|
||||
KOKKOS_DEVICES=Serial
|
||||
KOKKOS_ARCH = ""
|
||||
|
||||
|
||||
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
ifndef KOKKOS_PATH
|
||||
KOKKOS_PATH = $(MAKEFILE_PATH)../..
|
||||
endif
|
||||
|
||||
SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
|
||||
HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp)
|
||||
|
||||
vpath %.cpp $(sort $(dir $(SRC)))
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
CXX = clang++
|
||||
EXE = view_copy_constructor.exe
|
||||
|
||||
CXXFLAGS ?= -Ofast
|
||||
override CXXFLAGS += -I$(MAKEFILE_PATH)
|
||||
|
||||
DEPFLAGS = -M
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS = -Ofast
|
||||
KOKKOS_CXX_STANDARD=c++20
|
||||
|
||||
OBJ = $(notdir $(SRC:.cpp=.o))
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o view_copy_constructor.cuda view_copy_constructor.exe
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
|
||||
@ -0,0 +1,310 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
// The function "test_view_collection" exposes the copy constructor
|
||||
// and destructor overheads in Kokkos View objects
|
||||
// Please see the lines marked by "NOTE".
|
||||
|
||||
#include <limits>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <sys/time.h>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <iostream>
|
||||
|
||||
// NVIEWS is the number of Kokkos View objects in our ViewCollection object
|
||||
// We have chosen a large value of 40 to make it easier to see performance
|
||||
// differences when using the likelihood attribute
|
||||
#define NVIEWS 40
|
||||
|
||||
class ViewCollection {
|
||||
public:
|
||||
Kokkos::View<double*> v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
|
||||
v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
|
||||
v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40;
|
||||
double m_expected_sum;
|
||||
double m_side_effect;
|
||||
int m_N;
|
||||
|
||||
ViewCollection(int N)
|
||||
: v1("v1", N),
|
||||
v2("v2", N),
|
||||
v3("v3", N),
|
||||
v4("v4", N),
|
||||
v5("v5", N),
|
||||
v6("v6", N),
|
||||
v7("v7", N),
|
||||
v8("v8", N),
|
||||
v9("v9", N),
|
||||
v10("v10", N),
|
||||
v11("v11", N),
|
||||
v12("v12", N),
|
||||
v13("v13", N),
|
||||
v14("v14", N),
|
||||
v15("v15", N),
|
||||
v16("v16", N),
|
||||
v17("v17", N),
|
||||
v18("v18", N),
|
||||
v19("v19", N),
|
||||
v20("v20", N),
|
||||
v21("v21", N),
|
||||
v22("v22", N),
|
||||
v23("v23", N),
|
||||
v24("v24", N),
|
||||
v25("v25", N),
|
||||
v26("v26", N),
|
||||
v27("v27", N),
|
||||
v28("v28", N),
|
||||
v29("v29", N),
|
||||
v30("v30", N),
|
||||
v31("v31", N),
|
||||
v32("v32", N),
|
||||
v33("v33", N),
|
||||
v34("v34", N),
|
||||
v35("v35", N),
|
||||
v36("v36", N),
|
||||
v37("v37", N),
|
||||
v38("v38", N),
|
||||
v39("v39", N),
|
||||
v40("v40", N),
|
||||
m_expected_sum(N * NVIEWS),
|
||||
m_side_effect(0.0),
|
||||
m_N(N) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
v1(i) = 1;
|
||||
v2(i) = 1;
|
||||
v3(i) = 1;
|
||||
v4(i) = 1;
|
||||
v5(i) = 1;
|
||||
v6(i) = 1;
|
||||
v7(i) = 1;
|
||||
v8(i) = 1;
|
||||
v9(i) = 1;
|
||||
v10(i) = 1;
|
||||
v11(i) = 1;
|
||||
v12(i) = 1;
|
||||
v13(i) = 1;
|
||||
v14(i) = 1;
|
||||
v15(i) = 1;
|
||||
v16(i) = 1;
|
||||
v17(i) = 1;
|
||||
v18(i) = 1;
|
||||
v19(i) = 1;
|
||||
v20(i) = 1;
|
||||
v21(i) = 1;
|
||||
v22(i) = 1;
|
||||
v23(i) = 1;
|
||||
v24(i) = 1;
|
||||
v25(i) = 1;
|
||||
v26(i) = 1;
|
||||
v27(i) = 1;
|
||||
v28(i) = 1;
|
||||
v29(i) = 1;
|
||||
v30(i) = 1;
|
||||
v31(i) = 1;
|
||||
v32(i) = 1;
|
||||
v33(i) = 1;
|
||||
v34(i) = 1;
|
||||
v35(i) = 1;
|
||||
v36(i) = 1;
|
||||
v37(i) = 1;
|
||||
v38(i) = 1;
|
||||
v39(i) = 1;
|
||||
v40(i) = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// The ADD_COPY_CONSTRUCTOR macro is helpful to compare time in the copy
|
||||
// constructor between compilers. We have found that the GNU compiler
|
||||
// is sometimes able to inline the default copy constructor.
|
||||
#ifdef ADD_COPY_CONSTRUCTOR
|
||||
__attribute__((noinline)) ViewCollection(const ViewCollection& other)
|
||||
: v1(other.v1),
|
||||
v2(other.v2),
|
||||
v3(other.v3),
|
||||
v4(other.v4),
|
||||
v5(other.v5),
|
||||
v6(other.v6),
|
||||
v7(other.v7),
|
||||
v8(other.v8),
|
||||
v9(other.v9),
|
||||
v10(other.v10),
|
||||
v11(other.v11),
|
||||
v12(other.v12),
|
||||
v13(other.v13),
|
||||
v14(other.v14),
|
||||
v15(other.v15),
|
||||
v16(other.v16),
|
||||
v17(other.v17),
|
||||
v18(other.v18),
|
||||
v19(other.v19),
|
||||
v20(other.v20),
|
||||
v21(other.v21),
|
||||
v22(other.v22),
|
||||
v23(other.v23),
|
||||
v24(other.v24),
|
||||
v25(other.v25),
|
||||
v26(other.v26),
|
||||
v27(other.v27),
|
||||
v28(other.v28),
|
||||
v29(other.v29),
|
||||
v30(other.v30),
|
||||
v31(other.v31),
|
||||
v32(other.v32),
|
||||
v33(other.v33),
|
||||
v34(other.v34),
|
||||
v35(other.v35),
|
||||
v36(other.v36),
|
||||
v37(other.v37),
|
||||
v38(other.v38),
|
||||
v39(other.v39),
|
||||
v40(other.v40),
|
||||
m_expected_sum(other.m_expected_sum),
|
||||
m_side_effect(other.m_side_effect),
|
||||
m_N(other.m_N) {}
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double sum_views(int ii, bool execute_kernel) {
|
||||
double result = 0.0;
|
||||
if (execute_kernel) {
|
||||
// This code is only executed when using the command line option -k
|
||||
// The computation references all Kokkos views. This may help our
|
||||
// effort to stop compilers from optimizing away the Kokkos views
|
||||
for (int i = 0; i < m_N; ++i) {
|
||||
result += v1(i) + v2(i) + v3(i) + v4(i) + v5(i) + v6(i) + v7(i) +
|
||||
v8(i) + v9(i) + v10(i) + v11(i) + v12(i) + v13(i) + v14(i) +
|
||||
v15(i) + v16(i) + v17(i) + v18(i) + v19(i) + v20(i) + v21(i) +
|
||||
v22(i) + v23(i) + v24(i) + v25(i) + v26(i) + v27(i) + v28(i) +
|
||||
v29(i) + v30(i) + v31(i) + v32(i) + v33(i) + v34(i) + v35(i) +
|
||||
v36(i) + v37(i) + v38(i) + v39(i) + v40(i);
|
||||
}
|
||||
} else {
|
||||
result = m_expected_sum;
|
||||
}
|
||||
// This statement introduces a side effect that may help our effort to
|
||||
// stop compilers from optimizing away the temporary ViewCollection object
|
||||
m_side_effect = result * (ii + 1);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
void test_view_collection_kk(int N, int num_iter, bool execute_kernel) {
|
||||
ViewCollection view_collection(N);
|
||||
|
||||
Kokkos::Timer view_collection_timer;
|
||||
double max_value = 0.0;
|
||||
// Max Reduction boilerplate code taken from slide 53 of
|
||||
// kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf
|
||||
Kokkos::parallel_reduce(
|
||||
"collection-reduction", num_iter,
|
||||
KOKKOS_LAMBDA(int i, double& valueToUpdate) {
|
||||
// NOTE: The following lines expose the Kokkos View overheads
|
||||
ViewCollection tmp_view_collection = view_collection;
|
||||
double my_value = tmp_view_collection.sum_views(i, execute_kernel);
|
||||
if (my_value > valueToUpdate) valueToUpdate = my_value;
|
||||
},
|
||||
Kokkos::Max<double>(max_value));
|
||||
double view_collection_time = view_collection_timer.seconds();
|
||||
|
||||
bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6;
|
||||
std::cout << "View Time = " << view_collection_time << " seconds"
|
||||
<< std::endl;
|
||||
if (success) {
|
||||
std::cout << "Kokkos run:" << std::endl;
|
||||
std::cout << "SUCCESS" << std::endl;
|
||||
} else {
|
||||
std::cout << "FAILURE" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void test_view_collection_serial(int N, int num_iter, bool execute_kernel) {
|
||||
ViewCollection view_collection(N);
|
||||
|
||||
Kokkos::Timer view_collection_timer;
|
||||
double max_value = 0.0;
|
||||
// Max Reduction boilerplate code taken from slide 53 of
|
||||
// kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf
|
||||
for (int i = 0; i < num_iter; ++i) {
|
||||
// NOTE: The following lines expose the Kokkos View overheads
|
||||
ViewCollection tmp_view_collection = view_collection;
|
||||
double my_value = tmp_view_collection.sum_views(i, execute_kernel);
|
||||
if (my_value > max_value) max_value = my_value;
|
||||
}
|
||||
double view_collection_time = view_collection_timer.seconds();
|
||||
|
||||
bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6;
|
||||
std::cout << "View Time 2 = " << view_collection_time << " seconds"
|
||||
<< std::endl;
|
||||
if (success) {
|
||||
std::cout << "Serial run:" << std::endl;
|
||||
std::cout << "SUCCESS" << std::endl;
|
||||
} else {
|
||||
std::cout << "FAILURE" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
// The benchmark is only testing reference counting for views on host.
|
||||
#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_SERIAL) || \
|
||||
defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_HPX)
|
||||
int N = 1;
|
||||
int num_iter = 1 << 27;
|
||||
bool execute_kernel = false;
|
||||
|
||||
for (int i = 0; i < argc; i++) {
|
||||
if ((strcmp(argv[i], "-N") == 0)) {
|
||||
N = atoi(argv[++i]);
|
||||
if (N < 1) {
|
||||
std::cout << "Array extent must be >= 1" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else if (strcmp(argv[i], "-i") == 0) {
|
||||
num_iter = atoi(argv[++i]);
|
||||
if (num_iter < 1) {
|
||||
std::cout << "Number of iterations must be >= 1" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else if (strcmp(argv[i], "-k") == 0) {
|
||||
execute_kernel = true;
|
||||
} else if ((strcmp(argv[i], "-h") == 0)) {
|
||||
printf(" Options:\n");
|
||||
printf(" -N <int>: Array extent\n");
|
||||
printf(" -i <int>: Number of iterations\n");
|
||||
printf(" -k: Execute the summation kernel\n");
|
||||
printf(" -h: Print this message\n\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Array extent = " << N << std::endl;
|
||||
std::cout << "Iterations = " << num_iter << std::endl;
|
||||
std::cout << "Execute summation kernel = " << std::boolalpha << execute_kernel
|
||||
<< std::noboolalpha << std::endl;
|
||||
|
||||
// Test inside a Kokkos kernel.
|
||||
Kokkos::initialize(argc, argv);
|
||||
{ test_view_collection_kk(N, num_iter, execute_kernel); }
|
||||
|
||||
// Test outside Kokkos kernel.
|
||||
test_view_collection_serial(N, num_iter, execute_kernel);
|
||||
|
||||
Kokkos::finalize();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -233,7 +233,7 @@ do
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle more known nvcc args
|
||||
--extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler)
|
||||
--extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler|--disable-warnings)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle known nvcc args that have an argument
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
||||
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib
|
||||
TEST_OPTIONAL_TPLS CUSPARSE
|
||||
)
|
||||
|
||||
TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib)
|
||||
|
||||
@ -225,8 +225,13 @@ FUNCTION(kokkos_compilation)
|
||||
# if built w/o CUDA support, we want to basically make this a no-op
|
||||
SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@)
|
||||
|
||||
|
||||
IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
|
||||
SET(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..")
|
||||
ENDIF()
|
||||
|
||||
# search relative first and then absolute
|
||||
SET(_HINTS "${CMAKE_CURRENT_LIST_DIR}/../.." "@CMAKE_INSTALL_PREFIX@")
|
||||
SET(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@")
|
||||
|
||||
# find kokkos_launch_compiler
|
||||
FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER
|
||||
|
||||
@ -37,6 +37,7 @@
|
||||
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA // deprecated
|
||||
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
|
||||
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
|
||||
#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
|
||||
@ -52,6 +53,8 @@
|
||||
#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated
|
||||
#cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_MDSPAN
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND
|
||||
#cmakedefine KOKKOS_ENABLE_ATOMICS_BYPASS
|
||||
|
||||
/* TPL Settings */
|
||||
@ -65,6 +68,7 @@
|
||||
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX
|
||||
#cmakedefine KOKKOS_ARCH_ARMV81
|
||||
#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX2
|
||||
#cmakedefine KOKKOS_ARCH_ARMV9_GRACE
|
||||
#cmakedefine KOKKOS_ARCH_A64FX
|
||||
#cmakedefine KOKKOS_ARCH_AVX
|
||||
#cmakedefine KOKKOS_ARCH_AVX2
|
||||
|
||||
@ -7,13 +7,18 @@ IF (NOT CUDAToolkit_ROOT)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
# FIXME CMake 3.28.4 creates more targets than we export
|
||||
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0" AND CMAKE_VERSION VERSION_LESS "3.28.4")
|
||||
find_package(CUDAToolkit)
|
||||
ELSE()
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
|
||||
IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC AND CMAKE_VERSION VERSION_LESS "3.20.1")
|
||||
MESSAGE(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
|
||||
ENDIF()
|
||||
|
||||
IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
|
||||
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart
|
||||
)
|
||||
KOKKOS_EXPORT_CMAKE_TPL(CUDAToolkit REQUIRED)
|
||||
ELSE()
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
|
||||
|
||||
IF (TARGET CUDA::cudart)
|
||||
SET(FOUND_CUDART TRUE)
|
||||
@ -30,14 +35,10 @@ ELSE()
|
||||
ENDIF()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC)
|
||||
SET(KOKKOS_CUDA_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
|
||||
ELSE()
|
||||
SET(KOKKOS_CUDA_ERROR DEFAULT_MSG)
|
||||
ENDIF()
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${KOKKOS_CUDA_ERROR} FOUND_CUDART FOUND_CUDA_DRIVER)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${DEFAULT_MSG} FOUND_CUDART FOUND_CUDA_DRIVER)
|
||||
IF (FOUND_CUDA_DRIVER AND FOUND_CUDART)
|
||||
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
|
||||
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
@ -35,7 +35,6 @@ IF(NOT _CUDA_FAILURE)
|
||||
GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS)
|
||||
GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
|
||||
GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY})
|
||||
KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
|
||||
ELSE()
|
||||
SET(TPL_ENABLE_CUDA OFF)
|
||||
ENDIF()
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
#@HEADER
|
||||
# ************************************************************************
|
||||
#
|
||||
# Kokkos v. 4.0
|
||||
# Copyright (2022) National Technology & Engineering
|
||||
# Solutions of Sandia, LLC (NTESS).
|
||||
#
|
||||
# Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
# the U.S. Government retains certain rights in this software.
|
||||
#
|
||||
# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
# ************************************************************************
|
||||
# @HEADER
|
||||
|
||||
#include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
|
||||
|
||||
#IF (TPL_ENABLE_CUDA)
|
||||
# GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
|
||||
# GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
|
||||
# GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
|
||||
# KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
|
||||
#ENDIF()
|
||||
|
||||
@ -118,14 +118,6 @@ FUNCTION(KOKKOS_ADD_TEST)
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(KOKKOS_ADD_ADVANCED_TEST)
|
||||
if (KOKKOS_HAS_TRILINOS)
|
||||
TRIBITS_ADD_ADVANCED_TEST(${ARGN})
|
||||
else()
|
||||
# TODO Write this
|
||||
endif()
|
||||
ENDFUNCTION()
|
||||
|
||||
MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME)
|
||||
ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME})
|
||||
TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES})
|
||||
|
||||
@ -28,6 +28,7 @@ KOKKOS_CHECK_DEPRECATED_OPTIONS(
|
||||
#-------------------------------------------------------------------------------
|
||||
SET(KOKKOS_ARCH_LIST)
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
KOKKOS_DEPRECATED_LIST(ARCH ARCH)
|
||||
|
||||
@ -49,6 +50,7 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(ARMV9_GRACE "ARMv9 NVIDIA Grace CPU")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs")
|
||||
DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs")
|
||||
@ -189,12 +191,6 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||
ELSEIF(CUDAToolkit_BIN_DIR)
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..)
|
||||
ENDIF()
|
||||
ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
|
||||
SET(CUDA_ARCH_FLAG "-gpu")
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda)
|
||||
IF (KOKKOS_ENABLE_CUDA) # FIXME ideally unreachable when CUDA not enabled
|
||||
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -cuda)
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
SET(CUDA_ARCH_FLAG "-arch")
|
||||
ENDIF()
|
||||
@ -209,6 +205,11 @@ ENDIF()
|
||||
|
||||
|
||||
#------------------------------- KOKKOS_HIP_OPTIONS ---------------------------
|
||||
KOKKOS_OPTION(IMPL_AMDGPU_FLAGS "" STRING "Set compiler flags for AMD GPUs")
|
||||
KOKKOS_OPTION(IMPL_AMDGPU_LINK "" STRING "Set linker flags for AMD GPUs")
|
||||
MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_FLAGS)
|
||||
MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_LINK)
|
||||
|
||||
#clear anything that might be in the cache
|
||||
GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS)
|
||||
IF(KOKKOS_ENABLE_HIP)
|
||||
@ -301,6 +302,20 @@ IF (KOKKOS_ARCH_A64FX)
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_ARMV9_GRACE)
|
||||
SET(KOKKOS_ARCH_ARM_NEON ON)
|
||||
check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2)
|
||||
check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS)
|
||||
IF (COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
|
||||
DEFAULT -mcpu=neoverse-n2 -msve-vector-bits=128
|
||||
)
|
||||
ELSE()
|
||||
MESSAGE(WARNING "Compiler does not support ARMv9 Grace architecture")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
IF (KOKKOS_ARCH_ZEN)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
|
||||
@ -535,16 +550,16 @@ IF (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC)
|
||||
SET(KOKKOS_ARCH_AVX512XEON OFF)
|
||||
ENDIF()
|
||||
|
||||
# FIXME_NVCC nvcc doesn't seem to support Arm Neon.
|
||||
IF(KOKKOS_ARCH_ARM_NEON AND KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
UNSET(KOKKOS_ARCH_ARM_NEON)
|
||||
ENDIF()
|
||||
|
||||
IF (NOT KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA)
|
||||
IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
Clang -fcuda-rdc
|
||||
NVIDIA --relocatable-device-code=true
|
||||
NVHPC -gpu=rdc
|
||||
)
|
||||
ELSEIF(KOKKOS_ENABLE_CUDA)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
NVHPC -gpu=nordc
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
@ -571,7 +586,7 @@ IF (KOKKOS_ENABLE_HIP)
|
||||
COMPILER_SPECIFIC_FLAGS(
|
||||
DEFAULT -fgpu-rdc
|
||||
)
|
||||
IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
|
||||
IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC AND NOT KOKKOS_IMPL_AMDGPU_FLAGS)
|
||||
COMPILER_SPECIFIC_LINK_OPTIONS(
|
||||
DEFAULT --hip-link
|
||||
)
|
||||
@ -653,11 +668,6 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
|
||||
SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE)
|
||||
IF(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
|
||||
SET(CMAKE_CUDA_ARCHITECTURES ${KOKKOS_CUDA_ARCHITECTURES} PARENT_SCOPE)
|
||||
ELSE()
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
|
||||
STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${FLAG})
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}")
|
||||
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}")
|
||||
ELSE()
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}")
|
||||
IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
@ -666,7 +676,6 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
LIST(APPEND KOKKOS_CUDA_ARCH_FLAGS ${FLAG})
|
||||
SET(KOKKOS_CUDA_ARCH_FLAGS ${KOKKOS_CUDA_ARCH_FLAGS} PARENT_SCOPE)
|
||||
LIST(APPEND KOKKOS_CUDA_ARCH_LIST ${ARCH})
|
||||
@ -707,8 +716,10 @@ FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG)
|
||||
IF(KOKKOS_ENABLE_HIP)
|
||||
SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE)
|
||||
ENDIF()
|
||||
IF(NOT KOKKOS_IMPL_AMDGPU_FLAGS)
|
||||
SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE)
|
||||
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
|
||||
ENDIF()
|
||||
IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE)
|
||||
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}")
|
||||
ENDIF()
|
||||
@ -724,6 +735,15 @@ FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
|
||||
CHECK_AMDGPU_ARCH(${ARCH} ${FLAG})
|
||||
ENDFOREACH()
|
||||
|
||||
IF(KOKKOS_IMPL_AMDGPU_FLAGS)
|
||||
IF (NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
|
||||
MESSAGE(FATAL_ERROR "When IMPL_AMDGPU_FLAGS is set the architecture autodectection is disabled. "
|
||||
"Please explicitly set the GPU architecture.")
|
||||
ENDIF()
|
||||
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${KOKKOS_IMPL_AMDGPU_FLAGS}")
|
||||
GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${KOKKOS_IMPL_AMDGPU_LINK}")
|
||||
ENDIF()
|
||||
|
||||
MACRO(SET_AND_CHECK_AMD_ARCH ARCH FLAG)
|
||||
KOKKOS_SET_OPTION(ARCH_${ARCH} ON)
|
||||
CHECK_AMDGPU_ARCH(${ARCH} ${FLAG})
|
||||
@ -984,7 +1004,7 @@ IF (KOKKOS_ARCH_HOPPER90)
|
||||
ENDIF()
|
||||
|
||||
#HIP detection of gpu arch
|
||||
IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
|
||||
IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED AND NOT KOKKOS_IMPL_AMDGPU_FLAGS)
|
||||
FIND_PROGRAM(ROCM_ENUMERATOR rocm_agent_enumerator)
|
||||
IF(NOT ROCM_ENUMERATOR)
|
||||
MESSAGE(FATAL_ERROR "Autodetection of AMD GPU architecture not possible as "
|
||||
|
||||
@ -42,12 +42,8 @@ IF(Kokkos_ENABLE_CUDA)
|
||||
# If launcher was found and nvcc_wrapper was not specified as
|
||||
# compiler and `CMAKE_CXX_COMPILIER_LAUNCHER` is not set, set to use launcher.
|
||||
# Will ensure CMAKE_CXX_COMPILER is replaced by nvcc_wrapper
|
||||
IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang
|
||||
AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC))
|
||||
IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||
IF(CMAKE_CXX_COMPILER_LAUNCHER)
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
|
||||
MESSAGE(STATUS "Using nvc++ as device compiler requires Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON!")
|
||||
ENDIF()
|
||||
MESSAGE(FATAL_ERROR "Cannot use CMAKE_CXX_COMPILER_LAUNCHER if the CMAKE_CXX_COMPILER is not able to compile CUDA code, i.e. nvcc_wrapper or clang++!")
|
||||
ENDIF()
|
||||
# the first argument to launcher is always the C++ compiler defined by cmake
|
||||
@ -149,56 +145,85 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Fujitsu)
|
||||
ENDIF()
|
||||
|
||||
# Enforce the minimum compilers supported by Kokkos.
|
||||
SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) 8.0.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) 10.0.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) 15.0.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 8.2.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 19.0.5 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) 2021.1.1 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) 2023.0.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 11.0.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 5.2.0 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI 22.3 or higher")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC 19.29 or higher")
|
||||
IF(NOT CMAKE_CXX_STANDARD)
|
||||
SET(CMAKE_CXX_STANDARD 17)
|
||||
ENDIF()
|
||||
IF(CMAKE_CXX_STANDARD EQUAL 17)
|
||||
SET(KOKKOS_CLANG_CPU_MINIMUM 8.0.0)
|
||||
SET(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0)
|
||||
SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
|
||||
SET(KOKKOS_GCC_MINIMUM 8.2.0)
|
||||
SET(KOKKOS_INTEL_MINIMUM 19.0.5)
|
||||
SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1)
|
||||
SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
|
||||
SET(KOKKOS_NVCC_MINIMUM 11.0.0)
|
||||
SET(KOKKOS_HIPCC_MINIMUM 5.2.0)
|
||||
SET(KOKKOS_NVHPC_MINIMUM 22.3)
|
||||
SET(KOKKOS_MSVC_MINIMUM 19.29)
|
||||
ELSE()
|
||||
SET(KOKKOS_CLANG_CPU_MINIMUM 14.0.0)
|
||||
SET(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0)
|
||||
SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
|
||||
SET(KOKKOS_GCC_MINIMUM 10.1.0)
|
||||
SET(KOKKOS_INTEL_MINIMUM "not supported")
|
||||
SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0)
|
||||
SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
|
||||
SET(KOKKOS_NVCC_MINIMUM 12.0.0)
|
||||
SET(KOKKOS_HIPCC_MINIMUM 5.2.0)
|
||||
SET(KOKKOS_NVHPC_MINIMUM 22.3)
|
||||
SET(KOKKOS_MSVC_MINIMUM 19.30)
|
||||
ENDIF()
|
||||
|
||||
SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos for C++${CMAKE_CXX_STANDARD}. Required minimum compiler versions:")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) ${KOKKOS_CLANG_CPU_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel ${KOKKOS_INTEL_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC ${KOKKOS_HIPCC_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI ${KOKKOS_NVHPC_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC ${KOKKOS_MSVC_MINIMUM}")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported")
|
||||
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\nCompiler: ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION}\n")
|
||||
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT Kokkos_ENABLE_CUDA)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.0.0)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CPU_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_CUDA)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 10.0.0)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CUDA_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.2.0)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_GCC_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.0.5)
|
||||
IF((NOT CMAKE_CXX_STANDARD EQUAL 17) OR (KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_MINIMUM}))
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2021.1.1)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND Kokkos_ENABLE_SYCL)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2023.0.0)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.0.0)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVCC_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE)
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 5.2.0)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_HIPCC_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 22.3)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVHPC_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
# Treat PGI internally as NVHPC to simplify handling both compilers.
|
||||
@ -206,13 +231,13 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NV
|
||||
# backward-compatible to pgc++.
|
||||
SET(KOKKOS_CXX_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE)
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.29)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_MSVC_MINIMUM})
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL OR KOKKOS_CXX_COMPILER_ID STREQUAL XLClang)
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_OPENMPTARGET)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.0)
|
||||
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS KOKKOS_CLANG_OPENMPTARGET_MINIMUM)
|
||||
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
@ -48,6 +48,8 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda
|
||||
# resolved but we keep the option around a bit longer to be safe.
|
||||
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
|
||||
KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
|
||||
KOKKOS_ENABLE_OPTION(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA")
|
||||
|
||||
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" )
|
||||
KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" )
|
||||
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
|
||||
@ -75,8 +77,12 @@ KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified me
|
||||
# This option will go away eventually, but allows fallback to old implementation when needed.
|
||||
KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation")
|
||||
KOKKOS_ENABLE_OPTION(ATOMICS_BYPASS OFF "**NOT RECOMMENDED** Whether to make atomics non-atomic for non-threaded MPI-only use cases")
|
||||
KOKKOS_ENABLE_OPTION(IMPL_REF_COUNT_BRANCH_UNLIKELY ON "Whether to use the C++20 `[[unlikely]]` attribute in the view reference counting")
|
||||
mark_as_advanced(Kokkos_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY)
|
||||
KOKKOS_ENABLE_OPTION(IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND OFF "Whether to enable a workaround for invalid use of View of Views that causes program hang on destruction.")
|
||||
mark_as_advanced(Kokkos_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND)
|
||||
|
||||
KOKKOS_ENABLE_OPTION(IMPL_MDSPAN OFF "Whether to enable experimental mdspan support")
|
||||
KOKKOS_ENABLE_OPTION(IMPL_MDSPAN ON "Whether to enable experimental mdspan support")
|
||||
KOKKOS_ENABLE_OPTION(MDSPAN_EXTERNAL OFF BOOL "Whether to use an external version of mdspan")
|
||||
KOKKOS_ENABLE_OPTION(IMPL_SKIP_COMPILER_MDSPAN ON BOOL "Whether to use an internal version of mdspan even if the compiler supports mdspan")
|
||||
mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN)
|
||||
@ -131,7 +137,7 @@ FUNCTION(check_device_specific_options)
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
|
||||
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC IMPL_CUDA_UNIFIED_MEMORY)
|
||||
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE)
|
||||
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH)
|
||||
|
||||
|
||||
@ -709,7 +709,12 @@ MACRO(kokkos_find_imported NAME)
|
||||
ENDIF()
|
||||
|
||||
IF (NOT TPL_LIBRARY_SUFFIXES)
|
||||
SET(TPL_LIBRARY_SUFFIXES lib lib64)
|
||||
SET(TPL_LIBRARY_SUFFIXES lib)
|
||||
IF(KOKKOS_IMPL_32BIT)
|
||||
LIST(APPEND TPL_LIBRARY_SUFFIXES lib32)
|
||||
ELSE()
|
||||
LIST(APPEND TPL_LIBRARY_SUFFIXES lib64)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
SET(${NAME}_INCLUDE_DIRS)
|
||||
|
||||
@ -124,12 +124,8 @@ IF(KOKKOS_ENABLE_CUDA)
|
||||
ELSEIF(CMAKE_CXX_EXTENSIONS)
|
||||
MESSAGE(FATAL_ERROR "Compiling CUDA code with clang doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF")
|
||||
ENDIF()
|
||||
ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC))
|
||||
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
|
||||
MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. To allow nvc++ as Cuda compiler, Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON must be set!")
|
||||
ELSE()
|
||||
MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or NVC++ or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}")
|
||||
ENDIF()
|
||||
ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||
MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
|
||||
@ -103,13 +103,19 @@ if (Kokkos_ENABLE_IMPL_MDSPAN AND Kokkos_ENABLE_MDSPAN_EXTERNAL)
|
||||
endif()
|
||||
|
||||
IF (Kokkos_ENABLE_OPENMP)
|
||||
find_package(OpenMP REQUIRED)
|
||||
find_package(OpenMP REQUIRED COMPONENTS CXX)
|
||||
# FIXME_TRILINOS Trilinos doesn't allow for Kokkos to use find_dependency
|
||||
# so we just append the flags here instead of linking with the OpenMP target.
|
||||
IF(KOKKOS_HAS_TRILINOS)
|
||||
COMPILER_SPECIFIC_FLAGS(DEFAULT ${OpenMP_CXX_FLAGS})
|
||||
ELSE()
|
||||
KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED)
|
||||
KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED COMPONENTS CXX)
|
||||
ENDIF()
|
||||
IF(Kokkos_ENABLE_HIP AND KOKKOS_COMPILE_LANGUAGE STREQUAL HIP)
|
||||
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS ${OpenMP_CXX_FLAGS})
|
||||
ENDIF()
|
||||
IF(Kokkos_ENABLE_CUDA AND KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA)
|
||||
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -Xcompiler ${OpenMP_CXX_FLAGS})
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
|
||||
@ -160,6 +160,12 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
|
||||
)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
# We noticed problems with -fvisibility=hidden for inline static variables
|
||||
# if Kokkos was built as shared library.
|
||||
IF(BUILD_SHARED_LIBS)
|
||||
SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY VISIBILITY_INLINES_HIDDEN ON)
|
||||
SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY CXX_VISIBILITY_PRESET hidden)
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME)
|
||||
@ -241,34 +247,6 @@ MACRO(KOKKOS_CONFIGURE_CORE)
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${KOKKOS_ENABLED_DEVICES}")
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_SetupBackend.hpp "KOKKOS_SETUP" "setup/Kokkos_Setup" "${DEVICE_SETUP_LIST}")
|
||||
KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${KOKKOS_ENABLED_DEVICES}")
|
||||
SET(_DEFAULT_HOST_MEMSPACE "::Kokkos::HostSpace")
|
||||
KOKKOS_OPTION(DEFAULT_DEVICE_MEMORY_SPACE "" STRING "Override default device memory space")
|
||||
KOKKOS_OPTION(DEFAULT_HOST_MEMORY_SPACE "" STRING "Override default host memory space")
|
||||
KOKKOS_OPTION(DEFAULT_DEVICE_EXECUTION_SPACE "" STRING "Override default device execution space")
|
||||
KOKKOS_OPTION(DEFAULT_HOST_PARALLEL_EXECUTION_SPACE "" STRING "Override default host parallel execution space")
|
||||
IF (NOT Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE STREQUAL "")
|
||||
SET(_DEVICE_PARALLEL ${Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE})
|
||||
MESSAGE(STATUS "Override default device execution space: ${_DEVICE_PARALLEL}")
|
||||
SET(KOKKOS_DEVICE_SPACE_ACTIVE ON)
|
||||
ELSE()
|
||||
IF (_DEVICE_PARALLEL STREQUAL "NoTypeDefined")
|
||||
SET(KOKKOS_DEVICE_SPACE_ACTIVE OFF)
|
||||
ELSE()
|
||||
SET(KOKKOS_DEVICE_SPACE_ACTIVE ON)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
IF (NOT Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE STREQUAL "")
|
||||
SET(_HOST_PARALLEL ${Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE})
|
||||
MESSAGE(STATUS "Override default host parallel execution space: ${_HOST_PARALLEL}")
|
||||
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON)
|
||||
ELSE()
|
||||
IF (_HOST_PARALLEL STREQUAL "NoTypeDefined")
|
||||
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE OFF)
|
||||
ELSE()
|
||||
SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
#We are ready to configure the header
|
||||
CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY)
|
||||
ENDMACRO()
|
||||
|
||||
@ -484,15 +462,10 @@ ENDFUNCTION()
|
||||
|
||||
|
||||
FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET)
|
||||
IF(KOKKOS_HAS_TRILINOS)
|
||||
#ignore the target, tribits doesn't do anything directly with targets
|
||||
TRIBITS_INCLUDE_DIRECTORIES(${ARGN})
|
||||
ELSE() #append to a list for later
|
||||
KOKKOS_LIB_TYPE(${TARGET} INCTYPE)
|
||||
FOREACH(DIR ${ARGN})
|
||||
TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $<BUILD_INTERFACE:${DIR}>)
|
||||
ENDFOREACH()
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET)
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
#@HEADER
|
||||
# ************************************************************************
|
||||
#
|
||||
# Kokkos v. 4.0
|
||||
# Copyright (2022) National Technology & Engineering
|
||||
# Solutions of Sandia, LLC (NTESS).
|
||||
#
|
||||
# Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
# the U.S. Government retains certain rights in this software.
|
||||
#
|
||||
# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
#@HEADER
|
||||
|
||||
# Check for CUDA support
|
||||
|
||||
IF (NOT TPL_ENABLE_CUDA)
|
||||
MESSAGE(FATAL_ERROR "\nCUSPARSE requires CUDA")
|
||||
ELSE()
|
||||
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
|
||||
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
|
||||
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
|
||||
ENDIF()
|
||||
|
||||
@ -944,13 +944,13 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
||||
|
||||
if (sizeMismatch) {
|
||||
::Kokkos::realloc(arg_prop, d_view, n0, n1, n2, n3, n4, n5, n6, n7);
|
||||
if (alloc_prop_input::initialize) {
|
||||
if constexpr (alloc_prop_input::initialize) {
|
||||
h_view = create_mirror_view(typename t_host::memory_space(), d_view);
|
||||
} else {
|
||||
h_view = create_mirror_view(Kokkos::WithoutInitializing,
|
||||
typename t_host::memory_space(), d_view);
|
||||
}
|
||||
} else if (alloc_prop_input::initialize) {
|
||||
} else if constexpr (alloc_prop_input::initialize) {
|
||||
if constexpr (alloc_prop_input::has_execution_space) {
|
||||
const auto& exec_space =
|
||||
Impl::get_property<Impl::ExecutionSpaceTag>(arg_prop);
|
||||
@ -1038,12 +1038,10 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
||||
/* Resize on Device */
|
||||
if (sizeMismatch) {
|
||||
::Kokkos::resize(properties, d_view, n0, n1, n2, n3, n4, n5, n6, n7);
|
||||
if (alloc_prop_input::initialize) {
|
||||
h_view = create_mirror_view(typename t_host::memory_space(), d_view);
|
||||
} else {
|
||||
h_view = create_mirror_view(Kokkos::WithoutInitializing,
|
||||
typename t_host::memory_space(), d_view);
|
||||
}
|
||||
// this part of the lambda was relocated in a method as it contains a
|
||||
// `if constexpr`. In some cases, both branches were evaluated
|
||||
// leading to a compile error
|
||||
resync_host(properties);
|
||||
|
||||
/* Mark Device copy as modified */
|
||||
++modified_flags(1);
|
||||
@ -1054,13 +1052,10 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
||||
/* Resize on Host */
|
||||
if (sizeMismatch) {
|
||||
::Kokkos::resize(properties, h_view, n0, n1, n2, n3, n4, n5, n6, n7);
|
||||
if (alloc_prop_input::initialize) {
|
||||
d_view = create_mirror_view(typename t_dev::memory_space(), h_view);
|
||||
|
||||
} else {
|
||||
d_view = create_mirror_view(Kokkos::WithoutInitializing,
|
||||
typename t_dev::memory_space(), h_view);
|
||||
}
|
||||
// this part of the lambda was relocated in a method as it contains a
|
||||
// `if constexpr`. In some cases, both branches were evaluated
|
||||
// leading to a compile error
|
||||
resync_device(properties);
|
||||
|
||||
/* Mark Host copy as modified */
|
||||
++modified_flags(0);
|
||||
@ -1099,6 +1094,39 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// resync host mirror from device
|
||||
// this code was relocated from a lambda as it contains a `if constexpr`.
|
||||
// In some cases, both branches were evaluated, leading to a compile error
|
||||
template <class... ViewCtorArgs>
|
||||
inline void resync_host(Impl::ViewCtorProp<ViewCtorArgs...> const&) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
|
||||
if constexpr (alloc_prop_input::initialize) {
|
||||
h_view = create_mirror_view(typename t_host::memory_space(), d_view);
|
||||
} else {
|
||||
h_view = create_mirror_view(Kokkos::WithoutInitializing,
|
||||
typename t_host::memory_space(), d_view);
|
||||
}
|
||||
}
|
||||
|
||||
// resync device mirror from host
|
||||
// this code was relocated from a lambda as it contains a `if constexpr`
|
||||
// In some cases, both branches were evaluated leading to a compile error
|
||||
template <class... ViewCtorArgs>
|
||||
inline void resync_device(Impl::ViewCtorProp<ViewCtorArgs...> const&) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
|
||||
if constexpr (alloc_prop_input::initialize) {
|
||||
d_view = create_mirror_view(typename t_dev::memory_space(), h_view);
|
||||
|
||||
} else {
|
||||
d_view = create_mirror_view(Kokkos::WithoutInitializing,
|
||||
typename t_dev::memory_space(), h_view);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
|
||||
@ -1657,8 +1657,7 @@ KOKKOS_FUNCTION auto as_view_of_rank_n(
|
||||
|
||||
if constexpr (std::is_same_v<decltype(layout), Kokkos::LayoutLeft> ||
|
||||
std::is_same_v<decltype(layout), Kokkos::LayoutRight> ||
|
||||
std::is_same_v<decltype(layout), Kokkos::LayoutStride> ||
|
||||
is_layouttiled<decltype(layout)>::value) {
|
||||
std::is_same_v<decltype(layout), Kokkos::LayoutStride>) {
|
||||
for (int i = N; i < 7; ++i)
|
||||
layout.dimension[i] = KOKKOS_IMPL_CTOR_DEFAULT_ARG;
|
||||
}
|
||||
@ -1933,254 +1932,155 @@ struct MirrorDRVType {
|
||||
} // namespace Impl
|
||||
|
||||
namespace Impl {
|
||||
|
||||
// create a mirror
|
||||
// private interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline typename DynRankView<T, P...>::HostMirror create_mirror(
|
||||
const DynRankView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
|
||||
nullptr) {
|
||||
using src_type = DynRankView<T, P...>;
|
||||
using dst_type = typename src_type::HostMirror;
|
||||
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
|
||||
static_assert(
|
||||
!alloc_prop_input::has_label,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror "
|
||||
"must not include a label!");
|
||||
static_assert(
|
||||
!alloc_prop_input::has_pointer,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror must "
|
||||
"not include a pointer!");
|
||||
static_assert(
|
||||
!alloc_prop_input::allow_padding,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror must "
|
||||
"not explicitly allow padding!");
|
||||
inline auto create_mirror(const DynRankView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
|
||||
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
|
||||
|
||||
auto prop_copy = Impl::with_properties_if_unset(
|
||||
arg_prop, std::string(src.label()).append("_mirror"));
|
||||
|
||||
return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank()));
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline auto create_mirror(
|
||||
const DynRankView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
std::enable_if_t<Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
|
||||
nullptr) {
|
||||
if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
|
||||
using dst_type = typename Impl::MirrorDRVType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::view_type;
|
||||
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
return dst_type(prop_copy,
|
||||
Impl::reconstructLayout(src.layout(), src.rank()));
|
||||
} else {
|
||||
using src_type = DynRankView<T, P...>;
|
||||
using dst_type = typename src_type::HostMirror;
|
||||
|
||||
static_assert(
|
||||
!alloc_prop_input::has_label,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror "
|
||||
"must not include a label!");
|
||||
static_assert(
|
||||
!alloc_prop_input::has_pointer,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror must "
|
||||
"not include a pointer!");
|
||||
static_assert(
|
||||
!alloc_prop_input::allow_padding,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror must "
|
||||
"not explicitly allow padding!");
|
||||
|
||||
auto prop_copy = Impl::with_properties_if_unset(
|
||||
arg_prop, std::string(src.label()).append("_mirror"));
|
||||
|
||||
return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank()));
|
||||
return dst_type(prop_copy,
|
||||
Impl::reconstructLayout(src.layout(), src.rank()));
|
||||
}
|
||||
#if defined(KOKKOS_COMPILER_INTEL) || \
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC))
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
// Create a mirror in host space
|
||||
template <class T, class... P>
|
||||
inline typename DynRankView<T, P...>::HostMirror create_mirror(
|
||||
const DynRankView<T, P...>& src,
|
||||
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize,
|
||||
void>::value>* = nullptr) {
|
||||
return Impl::create_mirror(src, Kokkos::Impl::ViewCtorProp<>{});
|
||||
// public interface
|
||||
template <class T, class... P,
|
||||
class Enable = std::enable_if_t<
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
inline auto create_mirror(const DynRankView<T, P...>& src) {
|
||||
return Impl::create_mirror(src, Kokkos::view_alloc());
|
||||
}
|
||||
|
||||
template <class T, class... P>
|
||||
inline typename DynRankView<T, P...>::HostMirror create_mirror(
|
||||
Kokkos::Impl::WithoutInitializing_t wi, const DynRankView<T, P...>& src,
|
||||
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize,
|
||||
void>::value>* = nullptr) {
|
||||
// public interface that accepts a without initializing flag
|
||||
template <class T, class... P,
|
||||
class Enable = std::enable_if_t<
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
inline auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi,
|
||||
const DynRankView<T, P...>& src) {
|
||||
return Impl::create_mirror(src, Kokkos::view_alloc(wi));
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline typename DynRankView<T, P...>::HostMirror create_mirror(
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
const DynRankView<T, P...>& src,
|
||||
std::enable_if_t<
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
|
||||
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = nullptr) {
|
||||
return Impl::create_mirror(src, arg_prop);
|
||||
}
|
||||
|
||||
// Create a mirror in a new space
|
||||
// public interface that accepts a space
|
||||
template <class Space, class T, class... P,
|
||||
typename Enable = std::enable_if_t<
|
||||
class Enable = std::enable_if_t<
|
||||
Kokkos::is_space<Space>::value &&
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value>>
|
||||
typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror(
|
||||
const Space&, const Kokkos::DynRankView<T, P...>& src) {
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
auto create_mirror(const Space&, const Kokkos::DynRankView<T, P...>& src) {
|
||||
return Impl::create_mirror(
|
||||
src, Kokkos::view_alloc(typename Space::memory_space{}));
|
||||
}
|
||||
|
||||
template <class Space, class T, class... P>
|
||||
typename Impl::MirrorDRVType<Space, T, P...>::view_type create_mirror(
|
||||
Kokkos::Impl::WithoutInitializing_t wi, const Space&,
|
||||
const Kokkos::DynRankView<T, P...>& src,
|
||||
std::enable_if_t<std::is_same<typename ViewTraits<T, P...>::specialize,
|
||||
void>::value>* = nullptr) {
|
||||
// public interface that accepts a space and a without initializing flag
|
||||
template <class Space, class T, class... P,
|
||||
class Enable = std::enable_if_t<
|
||||
Kokkos::is_space<Space>::value &&
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
|
||||
const Kokkos::DynRankView<T, P...>& src) {
|
||||
return Impl::create_mirror(
|
||||
src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline auto create_mirror(
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
const DynRankView<T, P...>& src,
|
||||
std::enable_if_t<
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
|
||||
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* = nullptr) {
|
||||
using ReturnType = typename Impl::MirrorDRVType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::view_type;
|
||||
return ReturnType{Impl::create_mirror(src, arg_prop)};
|
||||
// public interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class T, class... P, class... ViewCtorArgs,
|
||||
typename Enable = std::enable_if_t<
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
inline auto create_mirror(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
const DynRankView<T, P...>& src) {
|
||||
return Impl::create_mirror(src, arg_prop);
|
||||
}
|
||||
|
||||
namespace Impl {
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline std::enable_if_t<
|
||||
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
|
||||
std::is_same<
|
||||
typename DynRankView<T, P...>::memory_space,
|
||||
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
|
||||
std::is_same<
|
||||
typename DynRankView<T, P...>::data_type,
|
||||
typename DynRankView<T, P...>::HostMirror::data_type>::value,
|
||||
typename DynRankView<T, P...>::HostMirror>
|
||||
create_mirror_view(const DynRankView<T, P...>& src,
|
||||
const typename Impl::ViewCtorProp<ViewCtorArgs...>&) {
|
||||
return src;
|
||||
}
|
||||
|
||||
// create a mirror view
|
||||
// private interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline std::enable_if_t<
|
||||
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
|
||||
!(std::is_same<
|
||||
typename DynRankView<T, P...>::memory_space,
|
||||
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
|
||||
std::is_same<
|
||||
typename DynRankView<T, P...>::data_type,
|
||||
typename DynRankView<T, P...>::HostMirror::data_type>::value),
|
||||
typename DynRankView<T, P...>::HostMirror>
|
||||
create_mirror_view(
|
||||
inline auto create_mirror_view(
|
||||
const DynRankView<T, P...>& src,
|
||||
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
|
||||
return Kokkos::Impl::create_mirror(src, arg_prop);
|
||||
[[maybe_unused]] const typename Impl::ViewCtorProp<ViewCtorArgs...>&
|
||||
arg_prop) {
|
||||
if constexpr (!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
|
||||
if constexpr (std::is_same<typename DynRankView<T, P...>::memory_space,
|
||||
typename DynRankView<
|
||||
T, P...>::HostMirror::memory_space>::value &&
|
||||
std::is_same<typename DynRankView<T, P...>::data_type,
|
||||
typename DynRankView<
|
||||
T, P...>::HostMirror::data_type>::value) {
|
||||
return typename DynRankView<T, P...>::HostMirror(src);
|
||||
} else {
|
||||
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
|
||||
}
|
||||
} else {
|
||||
if constexpr (Impl::MirrorDRViewType<typename Impl::ViewCtorProp<
|
||||
ViewCtorArgs...>::memory_space,
|
||||
T, P...>::is_same_memspace) {
|
||||
return typename Impl::MirrorDRViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::view_type(src);
|
||||
} else {
|
||||
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
|
||||
}
|
||||
}
|
||||
#if defined(KOKKOS_COMPILER_INTEL) || \
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC))
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs,
|
||||
class = std::enable_if_t<
|
||||
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
|
||||
inline std::enable_if_t<
|
||||
Kokkos::is_space<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space>::value &&
|
||||
Impl::MirrorDRViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::is_same_memspace,
|
||||
typename Impl::MirrorDRViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::view_type>
|
||||
create_mirror_view(const Kokkos::DynRankView<T, P...>& src,
|
||||
const typename Impl::ViewCtorProp<ViewCtorArgs...>&) {
|
||||
return src;
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs,
|
||||
class = std::enable_if_t<
|
||||
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
|
||||
inline std::enable_if_t<
|
||||
Kokkos::is_space<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space>::value &&
|
||||
!Impl::MirrorDRViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::is_same_memspace,
|
||||
typename Impl::MirrorDRViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::view_type>
|
||||
create_mirror_view(
|
||||
const Kokkos::DynRankView<T, P...>& src,
|
||||
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
|
||||
return Kokkos::Impl::create_mirror(src, arg_prop);
|
||||
}
|
||||
} // namespace Impl
|
||||
|
||||
// Create a mirror view in host space
|
||||
// public interface
|
||||
template <class T, class... P>
|
||||
inline std::enable_if_t<
|
||||
(std::is_same<
|
||||
typename DynRankView<T, P...>::memory_space,
|
||||
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
|
||||
std::is_same<typename DynRankView<T, P...>::data_type,
|
||||
typename DynRankView<T, P...>::HostMirror::data_type>::value),
|
||||
typename DynRankView<T, P...>::HostMirror>
|
||||
create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
|
||||
return src;
|
||||
}
|
||||
|
||||
template <class T, class... P>
|
||||
inline std::enable_if_t<
|
||||
!(std::is_same<
|
||||
typename DynRankView<T, P...>::memory_space,
|
||||
typename DynRankView<T, P...>::HostMirror::memory_space>::value &&
|
||||
std::is_same<
|
||||
typename DynRankView<T, P...>::data_type,
|
||||
typename DynRankView<T, P...>::HostMirror::data_type>::value),
|
||||
typename DynRankView<T, P...>::HostMirror>
|
||||
create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
|
||||
return Kokkos::create_mirror(src);
|
||||
inline auto create_mirror_view(const Kokkos::DynRankView<T, P...>& src) {
|
||||
return Impl::create_mirror_view(src, Kokkos::view_alloc());
|
||||
}
|
||||
|
||||
// public interface that accepts a without initializing flag
|
||||
template <class T, class... P>
|
||||
inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
|
||||
const DynRankView<T, P...>& src) {
|
||||
return Impl::create_mirror_view(src, Kokkos::view_alloc(wi));
|
||||
}
|
||||
|
||||
// Create a mirror view in a new space
|
||||
// FIXME_C++17 Improve SFINAE here.
|
||||
// public interface that accepts a space
|
||||
template <class Space, class T, class... P,
|
||||
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
|
||||
inline typename Impl::MirrorDRViewType<Space, T, P...>::view_type
|
||||
create_mirror_view(
|
||||
const Space&, const Kokkos::DynRankView<T, P...>& src,
|
||||
std::enable_if_t<
|
||||
Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>* = nullptr) {
|
||||
return src;
|
||||
inline auto create_mirror_view(const Space&,
|
||||
const Kokkos::DynRankView<T, P...>& src) {
|
||||
return Impl::create_mirror_view(
|
||||
src, Kokkos::view_alloc(typename Space::memory_space()));
|
||||
}
|
||||
|
||||
// FIXME_C++17 Improve SFINAE here.
|
||||
// public interface that accepts a space and a without initializing flag
|
||||
template <class Space, class T, class... P,
|
||||
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
|
||||
inline typename Impl::MirrorDRViewType<Space, T, P...>::view_type
|
||||
create_mirror_view(
|
||||
const Space& space, const Kokkos::DynRankView<T, P...>& src,
|
||||
std::enable_if_t<
|
||||
!Impl::MirrorDRViewType<Space, T, P...>::is_same_memspace>* = nullptr) {
|
||||
return Kokkos::create_mirror(space, src);
|
||||
}
|
||||
|
||||
template <class Space, class T, class... P>
|
||||
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
|
||||
inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
|
||||
const Space&,
|
||||
const Kokkos::DynRankView<T, P...>& src) {
|
||||
@ -2188,6 +2088,8 @@ inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi,
|
||||
src, Kokkos::view_alloc(typename Space::memory_space{}, wi));
|
||||
}
|
||||
|
||||
// public interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline auto create_mirror_view(
|
||||
const typename Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
@ -2195,58 +2097,29 @@ inline auto create_mirror_view(
|
||||
return Impl::create_mirror_view(src, arg_prop);
|
||||
}
|
||||
|
||||
template <class... ViewCtorArgs, class T, class... P>
|
||||
// create a mirror view and deep copy it
|
||||
// public interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class... ViewCtorArgs, class T, class... P,
|
||||
class Enable = std::enable_if_t<
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value>>
|
||||
auto create_mirror_view_and_copy(
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>&,
|
||||
const Kokkos::DynRankView<T, P...>& src,
|
||||
std::enable_if_t<
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
|
||||
Impl::MirrorDRViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::is_same_memspace>* = nullptr) {
|
||||
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
const Kokkos::DynRankView<T, P...>& src) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
static_assert(
|
||||
alloc_prop_input::has_memory_space,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must include a memory space!");
|
||||
static_assert(!alloc_prop_input::has_pointer,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must "
|
||||
"not include a pointer!");
|
||||
static_assert(!alloc_prop_input::allow_padding,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must "
|
||||
"not explicitly allow padding!");
|
||||
|
||||
Impl::check_view_ctor_args_create_mirror_view_and_copy<ViewCtorArgs...>();
|
||||
|
||||
if constexpr (Impl::MirrorDRViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
|
||||
T, P...>::is_same_memspace) {
|
||||
// same behavior as deep_copy(src, src)
|
||||
if (!alloc_prop_input::has_execution_space)
|
||||
if constexpr (!alloc_prop_input::has_execution_space)
|
||||
fence(
|
||||
"Kokkos::create_mirror_view_and_copy: fence before returning src view");
|
||||
"Kokkos::create_mirror_view_and_copy: fence before returning src "
|
||||
"view");
|
||||
return src;
|
||||
}
|
||||
|
||||
template <class... ViewCtorArgs, class T, class... P>
|
||||
auto create_mirror_view_and_copy(
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
const Kokkos::DynRankView<T, P...>& src,
|
||||
std::enable_if_t<
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
|
||||
!Impl::MirrorDRViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::is_same_memspace>* = nullptr) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
static_assert(
|
||||
alloc_prop_input::has_memory_space,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must include a memory space!");
|
||||
static_assert(!alloc_prop_input::has_pointer,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must "
|
||||
"not include a pointer!");
|
||||
static_assert(!alloc_prop_input::allow_padding,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must "
|
||||
"not explicitly allow padding!");
|
||||
} else {
|
||||
using Space = typename alloc_prop_input::memory_space;
|
||||
using Mirror = typename Impl::MirrorDRViewType<Space, T, P...>::view_type;
|
||||
|
||||
@ -2265,6 +2138,11 @@ auto create_mirror_view_and_copy(
|
||||
deep_copy(mirror, src);
|
||||
return mirror;
|
||||
}
|
||||
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC)
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class Space, class T, class... P>
|
||||
auto create_mirror_view_and_copy(const Space&,
|
||||
|
||||
@ -590,62 +590,21 @@ struct MirrorDynamicViewType {
|
||||
} // namespace Impl
|
||||
|
||||
namespace Impl {
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline auto create_mirror(
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
std::enable_if_t<!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
|
||||
nullptr) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
|
||||
static_assert(
|
||||
!alloc_prop_input::has_label,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror "
|
||||
"must not include a label!");
|
||||
static_assert(
|
||||
!alloc_prop_input::has_pointer,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror must "
|
||||
"not include a pointer!");
|
||||
static_assert(
|
||||
!alloc_prop_input::allow_padding,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror must "
|
||||
"not explicitly allow padding!");
|
||||
// create a mirror
|
||||
// private interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline auto create_mirror(const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
check_view_ctor_args_create_mirror<ViewCtorArgs...>();
|
||||
|
||||
auto prop_copy = Impl::with_properties_if_unset(
|
||||
arg_prop, std::string(src.label()).append("_mirror"));
|
||||
|
||||
auto ret = typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror(
|
||||
prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
|
||||
|
||||
ret.resize_serial(src.extent(0));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline auto create_mirror(
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
std::enable_if_t<Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>* =
|
||||
nullptr) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
|
||||
static_assert(
|
||||
!alloc_prop_input::has_label,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror "
|
||||
"must not include a label!");
|
||||
static_assert(
|
||||
!alloc_prop_input::has_pointer,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror must "
|
||||
"not include a pointer!");
|
||||
static_assert(
|
||||
!alloc_prop_input::allow_padding,
|
||||
"The view constructor arguments passed to Kokkos::create_mirror must "
|
||||
"not explicitly allow padding!");
|
||||
|
||||
if constexpr (Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
|
||||
using MemorySpace = typename alloc_prop_input::memory_space;
|
||||
auto prop_copy = Impl::with_properties_if_unset(
|
||||
arg_prop, std::string(src.label()).append("_mirror"));
|
||||
|
||||
auto ret = typename Kokkos::Impl::MirrorDynamicViewType<
|
||||
MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(),
|
||||
@ -653,33 +612,59 @@ inline auto create_mirror(
|
||||
|
||||
ret.resize_serial(src.extent(0));
|
||||
|
||||
return ret;
|
||||
} else {
|
||||
auto ret = typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror(
|
||||
prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size());
|
||||
|
||||
ret.resize_serial(src.extent(0));
|
||||
|
||||
return ret;
|
||||
}
|
||||
#if defined(KOKKOS_COMPILER_INTEL) || \
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC))
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
// Create a mirror in host space
|
||||
template <class T, class... P>
|
||||
// public interface
|
||||
template <class T, class... P,
|
||||
typename Enable = std::enable_if_t<
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
inline auto create_mirror(
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
return Impl::create_mirror(src, Impl::ViewCtorProp<>{});
|
||||
}
|
||||
|
||||
template <class T, class... P>
|
||||
// public interface that accepts a without initializing flag
|
||||
template <class T, class... P,
|
||||
typename Enable = std::enable_if_t<
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
inline auto create_mirror(
|
||||
Kokkos::Impl::WithoutInitializing_t wi,
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
return Impl::create_mirror(src, Kokkos::view_alloc(wi));
|
||||
}
|
||||
|
||||
// Create a mirror in a new space
|
||||
template <class Space, class T, class... P>
|
||||
// public interface that accepts a space
|
||||
template <class Space, class T, class... P,
|
||||
typename Enable = std::enable_if_t<
|
||||
Kokkos::is_space<Space>::value &&
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
inline auto create_mirror(
|
||||
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
return Impl::create_mirror(
|
||||
src, Kokkos::view_alloc(typename Space::memory_space{}));
|
||||
}
|
||||
|
||||
template <class Space, class T, class... P>
|
||||
// public interface that accepts a space and a without initializing flag
|
||||
template <class Space, class T, class... P,
|
||||
typename Enable = std::enable_if_t<
|
||||
Kokkos::is_space<Space>::value &&
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
typename Kokkos::Impl::MirrorDynamicViewType<Space, T, P...>::view_type
|
||||
create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
@ -687,7 +672,11 @@ create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&,
|
||||
src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
// public interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class T, class... P, class... ViewCtorArgs,
|
||||
typename Enable = std::enable_if_t<
|
||||
std::is_void_v<typename ViewTraits<T, P...>::specialize>>>
|
||||
inline auto create_mirror(
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
@ -696,76 +685,56 @@ inline auto create_mirror(
|
||||
|
||||
namespace Impl {
|
||||
|
||||
// create a mirror view
|
||||
// private interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline std::enable_if_t<
|
||||
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
|
||||
(std::is_same<
|
||||
typename Kokkos::Experimental::DynamicView<T, P...>::memory_space,
|
||||
inline auto create_mirror_view(
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
|
||||
if constexpr (!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space) {
|
||||
if constexpr (std::is_same<typename Kokkos::Experimental::DynamicView<
|
||||
T, P...>::memory_space,
|
||||
typename Kokkos::Experimental::DynamicView<
|
||||
T, P...>::HostMirror::memory_space>::value &&
|
||||
std::is_same<
|
||||
typename Kokkos::Experimental::DynamicView<T, P...>::data_type,
|
||||
std::is_same<typename Kokkos::Experimental::DynamicView<
|
||||
T, P...>::data_type,
|
||||
typename Kokkos::Experimental::DynamicView<
|
||||
T, P...>::HostMirror::data_type>::value),
|
||||
typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror>
|
||||
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
|
||||
return src;
|
||||
T, P...>::HostMirror::data_type>::value) {
|
||||
return
|
||||
typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror(src);
|
||||
} else {
|
||||
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
|
||||
}
|
||||
} else {
|
||||
if constexpr (Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<
|
||||
ViewCtorArgs...>::memory_space,
|
||||
T, P...>::is_same_memspace) {
|
||||
return typename Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::view_type(src);
|
||||
} else {
|
||||
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
|
||||
}
|
||||
}
|
||||
#if defined(KOKKOS_COMPILER_INTEL) || \
|
||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC))
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline std::enable_if_t<
|
||||
!Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space &&
|
||||
!(std::is_same<
|
||||
typename Kokkos::Experimental::DynamicView<T, P...>::memory_space,
|
||||
typename Kokkos::Experimental::DynamicView<
|
||||
T, P...>::HostMirror::memory_space>::value &&
|
||||
std::is_same<
|
||||
typename Kokkos::Experimental::DynamicView<T, P...>::data_type,
|
||||
typename Kokkos::Experimental::DynamicView<
|
||||
T, P...>::HostMirror::data_type>::value),
|
||||
typename Kokkos::Experimental::DynamicView<T, P...>::HostMirror>
|
||||
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
|
||||
return Kokkos::create_mirror(arg_prop, src);
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs,
|
||||
class = std::enable_if_t<
|
||||
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
|
||||
std::enable_if_t<Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
|
||||
T, P...>::is_same_memspace,
|
||||
typename Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
|
||||
T, P...>::view_type>
|
||||
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>&) {
|
||||
return src;
|
||||
}
|
||||
|
||||
template <class T, class... P, class... ViewCtorArgs,
|
||||
class = std::enable_if_t<
|
||||
Impl::ViewCtorProp<ViewCtorArgs...>::has_memory_space>>
|
||||
std::enable_if_t<!Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
|
||||
T, P...>::is_same_memspace,
|
||||
typename Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
|
||||
T, P...>::view_type>
|
||||
create_mirror_view(const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop) {
|
||||
return Kokkos::Impl::create_mirror(src, arg_prop);
|
||||
}
|
||||
} // namespace Impl
|
||||
|
||||
// Create a mirror view in host space
|
||||
// public interface
|
||||
template <class T, class... P>
|
||||
inline auto create_mirror_view(
|
||||
const typename Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{});
|
||||
}
|
||||
|
||||
// public interface that accepts a without initializing flag
|
||||
template <class T, class... P>
|
||||
inline auto create_mirror_view(
|
||||
Kokkos::Impl::WithoutInitializing_t wi,
|
||||
@ -773,15 +742,18 @@ inline auto create_mirror_view(
|
||||
return Impl::create_mirror_view(src, Kokkos::view_alloc(wi));
|
||||
}
|
||||
|
||||
// Create a mirror in a new space
|
||||
template <class Space, class T, class... P>
|
||||
// public interface that accepts a space
|
||||
template <class Space, class T, class... P,
|
||||
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
|
||||
inline auto create_mirror_view(
|
||||
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
return Impl::create_mirror_view(src,
|
||||
view_alloc(typename Space::memory_space{}));
|
||||
}
|
||||
|
||||
template <class Space, class T, class... P>
|
||||
// public interface that accepts a space and a without initializing flag
|
||||
template <class Space, class T, class... P,
|
||||
class Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
|
||||
inline auto create_mirror_view(
|
||||
Kokkos::Impl::WithoutInitializing_t wi, const Space&,
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
@ -789,6 +761,8 @@ inline auto create_mirror_view(
|
||||
src, Kokkos::view_alloc(wi, typename Space::memory_space{}));
|
||||
}
|
||||
|
||||
// public interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class T, class... P, class... ViewCtorArgs>
|
||||
inline auto create_mirror_view(
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
@ -985,58 +959,29 @@ struct ViewCopy<Kokkos::Experimental::DynamicView<DP...>,
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
template <class... ViewCtorArgs, class T, class... P>
|
||||
// create a mirror view and deep copy it
|
||||
// public interface that accepts arbitrary view constructor args passed by a
|
||||
// view_alloc
|
||||
template <class... ViewCtorArgs, class T, class... P,
|
||||
class Enable = std::enable_if_t<
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value>>
|
||||
auto create_mirror_view_and_copy(
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>&,
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
std::enable_if_t<
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
|
||||
Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::is_same_memspace>* = nullptr) {
|
||||
[[maybe_unused]] const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
static_assert(
|
||||
alloc_prop_input::has_memory_space,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must include a memory space!");
|
||||
static_assert(!alloc_prop_input::has_pointer,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must "
|
||||
"not include a pointer!");
|
||||
static_assert(!alloc_prop_input::allow_padding,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must "
|
||||
"not explicitly allow padding!");
|
||||
|
||||
Impl::check_view_ctor_args_create_mirror_view_and_copy<ViewCtorArgs...>();
|
||||
|
||||
if constexpr (Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space,
|
||||
T, P...>::is_same_memspace) {
|
||||
// same behavior as deep_copy(src, src)
|
||||
if (!alloc_prop_input::has_execution_space)
|
||||
if constexpr (!alloc_prop_input::has_execution_space)
|
||||
fence(
|
||||
"Kokkos::create_mirror_view_and_copy: fence before returning src view");
|
||||
"Kokkos::create_mirror_view_and_copy: fence before returning src "
|
||||
"view");
|
||||
return src;
|
||||
}
|
||||
|
||||
template <class... ViewCtorArgs, class T, class... P>
|
||||
auto create_mirror_view_and_copy(
|
||||
const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
||||
const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
std::enable_if_t<
|
||||
std::is_void<typename ViewTraits<T, P...>::specialize>::value &&
|
||||
!Impl::MirrorDynamicViewType<
|
||||
typename Impl::ViewCtorProp<ViewCtorArgs...>::memory_space, T,
|
||||
P...>::is_same_memspace>* = nullptr) {
|
||||
using alloc_prop_input = Impl::ViewCtorProp<ViewCtorArgs...>;
|
||||
static_assert(
|
||||
alloc_prop_input::has_memory_space,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must include a memory space!");
|
||||
static_assert(!alloc_prop_input::has_pointer,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must "
|
||||
"not include a pointer!");
|
||||
static_assert(!alloc_prop_input::allow_padding,
|
||||
"The view constructor arguments passed to "
|
||||
"Kokkos::create_mirror_view_and_copy must "
|
||||
"not explicitly allow padding!");
|
||||
} else {
|
||||
using Space = typename alloc_prop_input::memory_space;
|
||||
using Mirror =
|
||||
typename Impl::MirrorDynamicViewType<Space, T, P...>::view_type;
|
||||
@ -1057,8 +1002,14 @@ auto create_mirror_view_and_copy(
|
||||
deep_copy(mirror, src);
|
||||
return mirror;
|
||||
}
|
||||
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||
!defined(KOKKOS_COMPILER_MSVC)
|
||||
__builtin_unreachable();
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class Space, class T, class... P>
|
||||
template <class Space, class T, class... P,
|
||||
typename Enable = std::enable_if_t<Kokkos::is_space<Space>::value>>
|
||||
auto create_mirror_view_and_copy(
|
||||
const Space&, const Kokkos::Experimental::DynamicView<T, P...>& src,
|
||||
std::string const& name = "") {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user