diff --git a/.github/workflows/check-vla.yml b/.github/workflows/check-vla.yml new file mode 100644 index 0000000000..ab89018a3d --- /dev/null +++ b/.github/workflows/check-vla.yml @@ -0,0 +1,89 @@ +# GitHub action to build LAMMPS on Linux with gcc and -Werror=vla +name: "Check for Variable Length Arrays" + +on: + push: + branches: + - develop + pull_request: + branches: + - develop + + workflow_dispatch: + +jobs: + build: + name: Build with -Werror=vla + if: ${{ github.repository == 'lammps/lammps' }} + runs-on: ubuntu-latest + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Install extra packages + run: | + sudo apt-get update + sudo apt-get install -y ccache \ + libeigen3-dev \ + libcurl4-openssl-dev \ + mold \ + mpi-default-bin \ + mpi-default-dev \ + ninja-build \ + python3-dev + + - name: Create Build Environment + run: mkdir build + + - name: Set up ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: linux-vla-ccache-${{ github.sha }} + restore-keys: linux-vla-ccache- + + - name: Building LAMMPS via CMake + shell: bash + run: | + ccache -z + python3 -m venv linuxenv + source linuxenv/bin/activate + python3 -m pip install numpy + python3 -m pip install pyyaml + cmake -S cmake -B build \ + -C cmake/presets/most.cmake \ + -D CMAKE_CXX_COMPILER=g++ \ + -D CMAKE_C_COMPILER=gcc \ + -D CMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -D CMAKE_C_COMPILER_LAUNCHER=ccache \ + -D CMAKE_BUILD_TYPE=Debug \ + -D CMAKE_CXX_FLAGS_DEBUG="-Og -g -Werror=vla" \ + -D DOWNLOAD_POTENTIALS=off \ + -D BUILD_MPI=on \ + -D BUILD_SHARED_LIBS=off \ + -D BUILD_TOOLS=off \ + -D ENABLE_TESTING=off \ + -D MLIAP_ENABLE_ACE=on \ + -D MLIAP_ENABLE_PYTHON=off \ + -D PKG_AWPMD=on \ + -D PKG_GPU=on \ + -D GPU_API=opencl \ + -D PKG_LATBOLTZ=on \ + -D PKG_MDI=on \ + -D PKG_MANIFOLD=on \ + -D PKG_ML-PACE=on \ + -D PKG_ML-RANN=off \ + -D PKG_MOLFILE=on \ + -D PKG_RHEO=on \ + -D PKG_PTM=on \ + -D PKG_PYTHON=on \ + -D PKG_QTB=on \ + -D PKG_SMTBQ=on \ + -G Ninja + cmake --build build + ccache -s diff --git a/.github/workflows/compile-msvc.yml b/.github/workflows/compile-msvc.yml index 1a0f1ea62f..7560bc0549 100644 --- a/.github/workflows/compile-msvc.yml +++ b/.github/workflows/compile-msvc.yml @@ -1,5 +1,5 @@ -# GitHub action to build LAMMPS on Windows with Visual C++ -name: "Native Windows Compilation and Unit Tests" +# GitHub action to test LAMMPS on Windows with Visual C++ +name: "Windows Unit Tests" on: push: @@ -11,11 +11,17 @@ on: workflow_dispatch: +concurrency: + group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + jobs: build: name: Windows Compilation Test if: ${{ github.repository == 'lammps/lammps' }} runs-on: windows-latest + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache steps: - name: Checkout repository @@ -23,36 +29,41 @@ jobs: with: fetch-depth: 2 + - name: Enable MSVC++ + uses: lammps/setup-msvc-dev@v3 + with: + arch: x64 + + - name: Install Ccache + run: | + choco install ccache ninja -y + + - name: Set up ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: win-unit-ccache-${{ github.sha }} + restore-keys: win-unit-ccache- + - name: Select Python version uses: actions/setup-python@v5 with: python-version: '3.11' - name: Building LAMMPS via CMake - shell: bash run: | + ccache -z python3 -m pip install numpy python3 -m pip install pyyaml - nuget install MSMPIsdk - nuget install MSMPIDIST - cmake -C cmake/presets/windows.cmake \ - -D DOWNLOAD_POTENTIALS=off \ - -D PKG_PYTHON=on \ - -D WITH_PNG=off \ - -D WITH_JPEG=off \ - -S cmake -B build \ - -D BUILD_SHARED_LIBS=on \ - -D LAMMPS_EXCEPTIONS=on \ - -D ENABLE_TESTING=on - cmake --build build --config Release --parallel 2 + cmake -C cmake\presets\windows.cmake -D CMAKE_CXX_COMPILER=cl -D CMAKE_CXX_COMPILER_LAUNCHER=ccache -D CMAKE_C_COMPILER=cl -D CMAKE_C_COMPILER_LAUNCHER=ccache -D CMAKE_Fortran_COMPILER="" -D DOWNLOAD_POTENTIALS=off -D PKG_PYTHON=on -D WITH_PNG=off -D WITH_JPEG=off -S cmake -B build -D BUILD_SHARED_LIBS=on -D ENABLE_TESTING=on -D CMAKE_BUILD_TYPE=Release -G Ninja + cmake --build build + ccache -s - name: Run LAMMPS executable - shell: bash run: | - ./build/Release/lmp.exe -h - ./build/Release/lmp.exe -in bench/in.lj + build\lmp.exe -h + build\lmp.exe -in bench\in.lj - name: Run Unit Tests working-directory: build - shell: bash - run: ctest -V -C Release -E FixTimestep:python_move_nve + run: ctest -V -E FixTimestep:python_move_nve diff --git a/.github/workflows/full-regression.yml b/.github/workflows/full-regression.yml new file mode 100644 index 0000000000..a6b5353b9b --- /dev/null +++ b/.github/workflows/full-regression.yml @@ -0,0 +1,109 @@ +# GitHub action to build LAMMPS on Linux and run regression tests +name: "Full Regression Test" + +on: + push: + branches: + - develop + + workflow_dispatch: + +jobs: + build: + name: Build LAMMPS + # restrict to official LAMMPS repository + if: ${{ github.repository == 'lammps/lammps' }} + runs-on: ubuntu-latest + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + strategy: + max-parallel: 8 + matrix: + idx: [ 0, 1, 2, 3, 4, 5, 6, 7 ] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + show-progress: false + + - name: Install extra packages + run: | + sudo apt-get update + sudo apt-get install -y ccache ninja-build libeigen3-dev \ + libcurl4-openssl-dev python3-dev \ + mpi-default-bin mpi-default-dev + + - name: Create Build Environment + run: mkdir build + + - name: Set up ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: linux-full-ccache-${{ github.sha }} + restore-keys: linux-full-ccache- + + - name: Building LAMMPS via CMake + shell: bash + run: | + ccache -z + python3 -m venv linuxenv + source linuxenv/bin/activate + python3 -m pip install --upgrade pip + python3 -m pip install numpy pyyaml junit_xml + cmake -S cmake -B build \ + -C cmake/presets/gcc.cmake \ + -C cmake/presets/most.cmake \ + -D CMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -D CMAKE_C_COMPILER_LAUNCHER=ccache \ + -D BUILD_SHARED_LIBS=off \ + -D DOWNLOAD_POTENTIALS=off \ + -D PKG_MANIFOLD=on \ + -D PKG_ML-PACE=on \ + -D PKG_ML-RANN=on \ + -D PKG_RHEO=on \ + -D PKG_PTM=on \ + -D PKG_PYTHON=on \ + -D PKG_QTB=on \ + -D PKG_SMTBQ=on \ + -G Ninja + cmake --build build + ccache -s + + - name: Run Full Regression Tests + shell: bash + run: | + source linuxenv/bin/activate + python3 tools/regression-tests/run_tests.py \ + --lmp-bin=build/lmp \ + --config-file=tools/regression-tests/config_serial.yaml \ + --examples-top-level=examples --analyze --num-workers=8 + + python3 tools/regression-tests/run_tests.py \ + --lmp-bin=build/lmp \ + --config-file=tools/regression-tests/config_serial.yaml \ + --list-input=input-list-${{ matrix.idx }}.txt \ + --output-file=output-${{ matrix.idx }}.xml \ + --progress-file=progress-${{ matrix.idx }}.yaml \ + --log-file=run-${{ matrix.idx }}.log + + tar -cvf full-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: full-regression-test-artifact-${{ matrix.idx }} + path: full-regression-test-${{ matrix.idx }}.tar + + merge: + runs-on: ubuntu-latest + needs: build + steps: + - name: Merge Artifacts + uses: actions/upload-artifact/merge@v4 + with: + name: merged-full-regresssion-artifact + pattern: full-regression-test-artifact-* + diff --git a/.github/workflows/quick-regression.yml b/.github/workflows/quick-regression.yml new file mode 100644 index 0000000000..88794bfa0a --- /dev/null +++ b/.github/workflows/quick-regression.yml @@ -0,0 +1,118 @@ +# GitHub action to build LAMMPS on Linux and run selected regression tests +name: "Quick Regression Test" + +on: + pull_request: + branches: + - develop + + workflow_dispatch: + +concurrency: + group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +jobs: + build: + name: Build LAMMPS + # restrict to official LAMMPS repository + if: ${{ github.repository == 'lammps/lammps' }} + runs-on: ubuntu-latest + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + strategy: + max-parallel: 4 + matrix: + idx: [ 0, 1, 2, 3 ] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + show-progress: false + + - name: Install extra packages + run: | + sudo apt-get update + sudo apt-get install -y ccache ninja-build libeigen3-dev \ + libcurl4-openssl-dev python3-dev \ + mpi-default-bin mpi-default-dev + + - name: Create Build Environment + run: mkdir build + + - name: Set up ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: linux-quick-ccache-${{ github.sha }} + restore-keys: linux-quick-ccache- + + - name: Building LAMMPS via CMake + shell: bash + run: | + ccache -z + python3 -m venv linuxenv + source linuxenv/bin/activate + python3 -m pip install --upgrade pip + python3 -m pip install numpy pyyaml junit_xml + cmake -S cmake -B build \ + -C cmake/presets/gcc.cmake \ + -C cmake/presets/most.cmake \ + -D CMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -D CMAKE_C_COMPILER_LAUNCHER=ccache \ + -D BUILD_SHARED_LIBS=off \ + -D DOWNLOAD_POTENTIALS=off \ + -D PKG_MANIFOLD=on \ + -D PKG_ML-PACE=on \ + -D PKG_ML-RANN=on \ + -D PKG_RHEO=on \ + -D PKG_PTM=on \ + -D PKG_PYTHON=on \ + -D PKG_QTB=on \ + -D PKG_SMTBQ=on \ + -G Ninja + cmake --build build + ccache -s + + - name: Run Regression Tests for Modified Styles + shell: bash + run: | + source linuxenv/bin/activate + python3 tools/regression-tests/run_tests.py \ + --lmp-bin=build/lmp \ + --config-file=tools/regression-tests/config_quick.yaml \ + --examples-top-level=examples \ + --quick-reference=tools/regression-tests/reference.yaml \ + --quick --quick-branch=origin/develop --quick-max=100 --num-workers=4 + + if [ -f input-list-${{ matrix.idx }}.txt ] + then \ + python3 tools/regression-tests/run_tests.py \ + --lmp-bin=build/lmp \ + --config-file=tools/regression-tests/config_quick.yaml \ + --list-input=input-list-${{ matrix.idx }}.txt \ + --output-file=output-${{ matrix.idx }}.xml \ + --progress-file=progress-${{ matrix.idx }}.yaml \ + --log-file=run-${{ matrix.idx }}.log + fi + + tar -cvf quick-regression-test-${{ matrix.idx }}.tar run-${{ matrix.idx }}.log progress-${{ matrix.idx }}.yaml output-${{ matrix.idx }}.xml + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: quick-regression-test-artifact-${{ matrix.idx }} + path: quick-regression-test-${{ matrix.idx }}.tar + + merge: + runs-on: ubuntu-latest + needs: build + steps: + - name: Merge Artifacts + uses: actions/upload-artifact/merge@v4 + with: + name: merged-quick-regresssion-artifact + pattern: quick-regression-test-artifact-* + diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml new file mode 100644 index 0000000000..7be2c4fc46 --- /dev/null +++ b/.github/workflows/style-check.yml @@ -0,0 +1,37 @@ +# GitHub action to run checks from tools/coding_standard +name: "Check for Programming Style Conformance" + +on: + push: + branches: + - develop + pull_request: + branches: + - develop + + workflow_dispatch: + +concurrency: + group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +jobs: + build: + name: Programming Style Conformance + if: ${{ github.repository == 'lammps/lammps' }} + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Tests + working-directory: src + shell: bash + run: | + make check-whitespace + make check-permissions + make check-homepage + make check-errordocs diff --git a/.github/workflows/unittest-linux.yml b/.github/workflows/unittest-linux.yml new file mode 100644 index 0000000000..ce98fcea35 --- /dev/null +++ b/.github/workflows/unittest-linux.yml @@ -0,0 +1,86 @@ +# GitHub action to build LAMMPS on Linux and run standard unit tests +name: "Unittest for Linux /w LAMMPS_BIGBIG" + +on: + push: + branches: + - develop + pull_request: + branches: + - develop + + workflow_dispatch: + +concurrency: + group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +jobs: + build: + name: Linux Unit Test + if: ${{ github.repository == 'lammps/lammps' }} + runs-on: ubuntu-latest + env: + CCACHE_DIR: ${{ github.workspace }}/.ccache + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Install extra packages + run: | + sudo apt-get update + sudo apt-get install -y ccache \ + libeigen3-dev \ + libcurl4-openssl-dev \ + mold \ + ninja-build \ + python3-dev + + - name: Create Build Environment + run: mkdir build + + - name: Set up ccache + uses: actions/cache@v4 + with: + path: ${{ env.CCACHE_DIR }} + key: linux-unit-ccache-${{ github.sha }} + restore-keys: linux-unit-ccache- + + - name: Building LAMMPS via CMake + shell: bash + run: | + ccache -z + python3 -m venv linuxenv + source linuxenv/bin/activate + python3 -m pip install numpy + python3 -m pip install pyyaml + cmake -S cmake -B build \ + -C cmake/presets/gcc.cmake \ + -C cmake/presets/most.cmake \ + -D CMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -D CMAKE_C_COMPILER_LAUNCHER=ccache \ + -D BUILD_SHARED_LIBS=on \ + -D LAMMPS_SIZES=bigbig \ + -D DOWNLOAD_POTENTIALS=off \ + -D ENABLE_TESTING=on \ + -D MLIAP_ENABLE_ACE=on \ + -D MLIAP_ENABLE_PYTHON=off \ + -D PKG_MANIFOLD=on \ + -D PKG_ML-PACE=on \ + -D PKG_ML-RANN=on \ + -D PKG_RHEO=on \ + -D PKG_PTM=on \ + -D PKG_PYTHON=on \ + -D PKG_QTB=on \ + -D PKG_SMTBQ=on \ + -G Ninja + cmake --build build + ccache -s + + - name: Run Tests + working-directory: build + shell: bash + run: ctest -V diff --git a/.github/workflows/unittest-macos.yml b/.github/workflows/unittest-macos.yml index b0bc4b2727..0d478a9d6b 100644 --- a/.github/workflows/unittest-macos.yml +++ b/.github/workflows/unittest-macos.yml @@ -11,6 +11,10 @@ on: workflow_dispatch: +concurrency: + group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + jobs: build: name: MacOS Unit Test diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 5b29fb6047..1bd387b5b9 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -12,6 +12,11 @@ endif() if(POLICY CMP0075) cmake_policy(SET CMP0075 NEW) endif() +# set policy to silence warnings about requiring execute permission for find_program +# we use OLD because the python-config script for the Fedora MinGW cross-compiler requires it currently +if(POLICY CMP0109) + cmake_policy(SET CMP0109 OLD) +endif() # set policy to silence warnings about timestamps of downloaded files. review occasionally if it may be set to NEW if(POLICY CMP0135) cmake_policy(SET CMP0135 OLD) @@ -469,13 +474,13 @@ if(BUILD_OMP) if(CMAKE_VERSION VERSION_LESS 3.28) get_filename_component(_exe "${CMAKE_CXX_COMPILER}" NAME) if((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") AND (_exe STREQUAL "crayCC")) - set(CMAKE_SHARED_LINKER_FLAGS_${BTYPE} "${CMAKE_SHARED_LINKER_FLAGS_${BTYPE} -fopenmp") - set(CMAKE_STATIC_LINKER_FLAGS_${BTYPE} "${CMAKE_STATIC_LINKER_FLAGS_${BTYPE} -fopenmp") + set(CMAKE_SHARED_LINKER_FLAGS_${BTYPE} "${CMAKE_SHARED_LINKER_FLAGS_${BTYPE}} -fopenmp") + set(CMAKE_STATIC_LINKER_FLAGS_${BTYPE} "${CMAKE_STATIC_LINKER_FLAGS_${BTYPE}} -fopenmp") endif() else() if(CMAKE_CXX_COMPILER_ID STREQUAL "CrayClang") - set(CMAKE_SHARED_LINKER_FLAGS_${BTYPE} "${CMAKE_SHARED_LINKER_FLAGS_${BTYPE} -fopenmp") - set(CMAKE_STATIC_LINKER_FLAGS_${BTYPE} "${CMAKE_STATIC_LINKER_FLAGS_${BTYPE} -fopenmp") + set(CMAKE_SHARED_LINKER_FLAGS_${BTYPE} "${CMAKE_SHARED_LINKER_FLAGS_${BTYPE}} -fopenmp") + set(CMAKE_STATIC_LINKER_FLAGS_${BTYPE} "${CMAKE_STATIC_LINKER_FLAGS_${BTYPE}} -fopenmp") endif() endif() endif() @@ -492,7 +497,7 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUA PROPERTIES COMPILE_OPTIONS "-std=c++14") endif() -if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR BUILD_TOOLS) +if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR PKG_RHEO OR BUILD_TOOLS) enable_language(C) if (NOT USE_INTERNAL_LINALG) find_package(LAPACK) @@ -510,14 +515,6 @@ if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR BUILD_T endif() endif() -find_package(CURL QUIET COMPONENTS HTTP HTTPS) -option(WITH_CURL "Enable libcurl support" ${CURL_FOUND}) -if(WITH_CURL) - find_package(CURL REQUIRED COMPONENTS HTTP HTTPS) - target_compile_definitions(lammps PRIVATE -DLAMMPS_CURL) - target_link_libraries(lammps PRIVATE CURL::libcurl) -endif() - # tweak jpeg library names to avoid linker errors with MinGW cross-compilation set(JPEG_NAMES libjpeg libjpeg-62) find_package(JPEG QUIET) @@ -575,7 +572,7 @@ else() endif() foreach(PKG_WITH_INCL KSPACE PYTHON ML-IAP VORONOI COLVARS ML-HDNNP MDI MOLFILE NETCDF - PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM COMPRESS ML-PACE LEPTON RHEO) + PLUMED QMMM ML-QUIP SCAFACOS MACHDYN VTK KIM COMPRESS ML-PACE LEPTON EXTRA-COMMAND) if(PKG_${PKG_WITH_INCL}) include(Packages/${PKG_WITH_INCL}) endif() @@ -968,6 +965,9 @@ message(STATUS "<<< Compilers and Flags: >>> C++ Standard: ${CMAKE_CXX_STANDARD} C++ Flags: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}} Defines: ${DEFINES}") +if(CMAKE_CXX_COMPILER_LAUNCHER) + message(STATUS " Launcher: ${CMAKE_CXX_COMPILER_LAUNCHER}") +endif() get_target_property(OPTIONS lammps COMPILE_OPTIONS) if(OPTIONS) message(" Options: ${OPTIONS}") @@ -986,6 +986,9 @@ if(_index GREATER -1) Type: ${CMAKE_C_COMPILER_ID} Version: ${CMAKE_C_COMPILER_VERSION} C Flags: ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${BTYPE}}") + if(CMAKE_C_COMPILER_LAUNCHER) + message(STATUS " Launcher: ${CMAKE_C_COMPILER_LAUNCHER}") + endif() endif() message(STATUS "<<< Linker flags: >>>") message(STATUS "Executable name: ${LAMMPS_BINARY}") diff --git a/cmake/Modules/Documentation.cmake b/cmake/Modules/Documentation.cmake index 400109067f..7b8f4a5ba0 100644 --- a/cmake/Modules/Documentation.cmake +++ b/cmake/Modules/Documentation.cmake @@ -4,6 +4,8 @@ option(BUILD_DOC "Build LAMMPS HTML documentation" OFF) if(BUILD_DOC) + option(BUILD_DOC_VENV "Build LAMMPS documentation virtual environment" ON) + mark_as_advanced(BUILD_DOC_VENV) # Current Sphinx versions require at least Python 3.8 # use default (or custom) Python executable, if version is sufficient if(Python_VERSION VERSION_GREATER_EQUAL 3.8) @@ -18,14 +20,6 @@ if(BUILD_DOC) find_package(Doxygen 1.8.10 REQUIRED) file(GLOB DOC_SOURCES CONFIGURE_DEPENDS ${LAMMPS_DOC_DIR}/src/[^.]*.rst) - add_custom_command( - OUTPUT docenv - COMMAND ${VIRTUALENV} docenv - ) - - set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin) - set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt) - set(SPHINX_CONFIG_DIR ${LAMMPS_DOC_DIR}/utils/sphinx-config) set(SPHINX_CONFIG_FILE_TEMPLATE ${SPHINX_CONFIG_DIR}/conf.py.in) set(SPHINX_STATIC_DIR ${SPHINX_CONFIG_DIR}/_static) @@ -44,14 +38,32 @@ if(BUILD_DOC) # configure paths in conf.py, since relative paths change when file is copied configure_file(${SPHINX_CONFIG_FILE_TEMPLATE} ${DOC_BUILD_CONFIG_FILE}) - add_custom_command( - OUTPUT ${DOC_BUILD_DIR}/requirements.txt - DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE} - COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt - COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install --upgrade pip - COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install --upgrade ${LAMMPS_DOC_DIR}/utils/converters - COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install -r ${DOC_BUILD_DIR}/requirements.txt --upgrade - ) + if(BUILD_DOC_VENV) + add_custom_command( + OUTPUT docenv + COMMAND ${VIRTUALENV} docenv + ) + + set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin) + set(DOCENV_REQUIREMENTS_FILE ${LAMMPS_DOC_DIR}/utils/requirements.txt) + + add_custom_command( + OUTPUT ${DOC_BUILD_DIR}/requirements.txt + DEPENDS docenv ${DOCENV_REQUIREMENTS_FILE} + COMMAND ${CMAKE_COMMAND} -E copy ${DOCENV_REQUIREMENTS_FILE} ${DOC_BUILD_DIR}/requirements.txt + COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install --upgrade pip + COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install --upgrade ${LAMMPS_DOC_DIR}/utils/converters + COMMAND ${DOCENV_BINARY_DIR}/pip $ENV{PIP_OPTIONS} install -r ${DOC_BUILD_DIR}/requirements.txt --upgrade + ) + + set(DOCENV_DEPS docenv ${DOC_BUILD_DIR}/requirements.txt) + if(NOT TARGET Sphinx::sphinx-build) + add_executable(Sphinx::sphinx-build IMPORTED GLOBAL) + set_target_properties(Sphinx::sphinx-build PROPERTIES IMPORTED_LOCATION "${DOCENV_BINARY_DIR}/sphinx-build") + endif() + else() + find_package(Sphinx) + endif() set(MATHJAX_URL "https://github.com/mathjax/MathJax/archive/3.1.3.tar.gz" CACHE STRING "URL for MathJax tarball") set(MATHJAX_MD5 "b81661c6e6ba06278e6ae37b30b0c492" CACHE STRING "MD5 checksum of MathJax tarball") @@ -97,8 +109,8 @@ if(BUILD_DOC) endif() add_custom_command( OUTPUT html - DEPENDS ${DOC_SOURCES} docenv ${DOC_BUILD_DIR}/requirements.txt ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE} - COMMAND ${DOCENV_BINARY_DIR}/sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html + DEPENDS ${DOC_SOURCES} ${DOCENV_DEPS} ${DOXYGEN_XML_DIR}/index.xml ${BUILD_DOC_CONFIG_FILE} + COMMAND Sphinx::sphinx-build ${SPHINX_EXTRA_OPTS} -b html -c ${DOC_BUILD_DIR} -d ${DOC_BUILD_DIR}/doctrees ${LAMMPS_DOC_DIR}/src ${DOC_BUILD_DIR}/html COMMAND ${CMAKE_COMMAND} -E create_symlink Manual.html ${DOC_BUILD_DIR}/html/index.html COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src/PDF ${DOC_BUILD_DIR}/html/PDF COMMAND ${CMAKE_COMMAND} -E remove -f ${DOXYGEN_XML_DIR}/run.stamp diff --git a/cmake/Modules/FindSphinx.cmake b/cmake/Modules/FindSphinx.cmake new file mode 100644 index 0000000000..3718ecc543 --- /dev/null +++ b/cmake/Modules/FindSphinx.cmake @@ -0,0 +1,29 @@ +# Find sphinx-build +find_program(Sphinx_EXECUTABLE NAMES sphinx-build + PATH_SUFFIXES bin + DOC "Sphinx documenation build executable") +mark_as_advanced(Sphinx_EXECUTABLE) + +if(Sphinx_EXECUTABLE) + execute_process(COMMAND ${Sphinx_EXECUTABLE} --version + OUTPUT_VARIABLE sphinx_version + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _sphinx_version_result) + + if(_sphinx_version_result) + message(WARNING "Unable to determine sphinx-build verison: ${_sphinx_version_result}") + else() + string(REGEX REPLACE "sphinx-build ([0-9.]+).*" + "\\1" + Sphinx_VERSION + "${sphinx_version}") + endif() + + if(NOT TARGET Sphinx::sphinx-build) + add_executable(Sphinx::sphinx-build IMPORTED GLOBAL) + set_target_properties(Sphinx::sphinx-build PROPERTIES IMPORTED_LOCATION "${Sphinx_EXECUTABLE}") + endif() +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Sphinx REQUIRED_VARS Sphinx_EXECUTABLE VERSION_VAR Sphinx_VERSION) diff --git a/cmake/Modules/Packages/EXTRA-COMMAND.cmake b/cmake/Modules/Packages/EXTRA-COMMAND.cmake new file mode 100644 index 0000000000..13c98bafd3 --- /dev/null +++ b/cmake/Modules/Packages/EXTRA-COMMAND.cmake @@ -0,0 +1,10 @@ +# the geturl command needs libcurl + +find_package(CURL QUIET COMPONENTS HTTP HTTPS) +option(WITH_CURL "Enable libcurl support" ${CURL_FOUND}) +if(WITH_CURL) + find_package(CURL REQUIRED COMPONENTS HTTP HTTPS) + target_compile_definitions(lammps PRIVATE -DLAMMPS_CURL) + target_link_libraries(lammps PRIVATE CURL::libcurl) +endif() + diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 3776d18a3e..adb3abab6b 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -8,8 +8,24 @@ endif() ######################################################################## # consistency checks and Kokkos options/settings required by LAMMPS if(Kokkos_ENABLE_CUDA) - message(STATUS "KOKKOS: Enabling CUDA LAMBDA function support") - set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "" FORCE) + option(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC "CUDA asynchronous malloc support" OFF) + mark_as_advanced(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC) + if(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC) + message(STATUS "KOKKOS: CUDA malloc async support enabled") + else() + message(STATUS "KOKKOS: CUDA malloc async support disabled") + endif() +endif() +if(Kokkos_ENABLE_HIP) + option(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS "Enable multiple kernel instantiations with HIP" ON) + mark_as_advanced(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS) + option(Kokkos_ENABLE_ROCTHRUST "Use RoCThrust library" ON) + mark_as_advanced(Kokkos_ENABLE_ROCTHRUST) + + if(Kokkos_ARCH_AMD_GFX942 OR Kokkos_ARCH_AMD_GFX940) + option(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY "Enable unified memory with HIP" ON) + mark_as_advanced(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY) + endif() endif() # Adding OpenMP compiler flags without the checks done for # BUILD_OMP can result in compile failures. Enforce consistency. @@ -18,6 +34,15 @@ if(Kokkos_ENABLE_OPENMP) message(FATAL_ERROR "Must enable BUILD_OMP with Kokkos_ENABLE_OPENMP") endif() endif() + +if(Kokkos_ENABLE_SERIAL) + if(NOT (Kokkos_ENABLE_OPENMP OR Kokkos_ENABLE_THREADS OR + Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP OR Kokkos_ENABLE_SYCL + OR Kokkos_ENABLE_OPENMPTARGET)) + option(Kokkos_ENABLE_ATOMICS_BYPASS "Disable atomics for Kokkos Serial Backend" ON) + mark_as_advanced(Kokkos_ENABLE_ATOMICS_BYPASS) + endif() +endif() ######################################################################## option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF) @@ -45,8 +70,8 @@ if(DOWNLOAD_KOKKOS) list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") include(ExternalProject) - set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") - set(KOKKOS_MD5 "243de871b3dc2cf3990c1c404032df83" CACHE STRING "MD5 checksum of KOKKOS tarball") + set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") + set(KOKKOS_MD5 "de6ee80d00b6212b02bfb7f1e71a8392" CACHE STRING "MD5 checksum of KOKKOS tarball") mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_MD5) GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK) @@ -71,7 +96,7 @@ if(DOWNLOAD_KOKKOS) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) elseif(EXTERNAL_KOKKOS) - find_package(Kokkos 4.3.01 REQUIRED CONFIG) + find_package(Kokkos 4.4.01 REQUIRED CONFIG) target_link_libraries(lammps PRIVATE Kokkos::kokkos) else() set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) @@ -127,7 +152,7 @@ if(PKG_KSPACE) ${KOKKOS_PKG_SOURCES_DIR}/grid3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) set(FFT_KOKKOS "KISS" CACHE STRING "FFT library for Kokkos-enabled KSPACE package") - set(FFT_KOKKOS_VALUES KISS FFTW3 MKL HIPFFT CUFFT) + set(FFT_KOKKOS_VALUES KISS FFTW3 MKL NVPL HIPFFT CUFFT MKL_GPU) set_property(CACHE FFT_KOKKOS PROPERTY STRINGS ${FFT_KOKKOS_VALUES}) validate_option(FFT_KOKKOS FFT_KOKKOS_VALUES) string(TOUPPER ${FFT_KOKKOS} FFT_KOKKOS) @@ -137,10 +162,8 @@ if(PKG_KSPACE) message(FATAL_ERROR "The CUDA backend of Kokkos requires either KISS FFT or CUFFT.") elseif(FFT_KOKKOS STREQUAL "KISS") message(WARNING "Using KISS FFT with the CUDA backend of Kokkos may be sub-optimal.") - target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS) elseif(FFT_KOKKOS STREQUAL "CUFFT") find_package(CUDAToolkit REQUIRED) - target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_CUFFT) target_link_libraries(lammps PRIVATE CUDA::cufft) endif() elseif(Kokkos_ENABLE_HIP) @@ -152,10 +175,21 @@ if(PKG_KSPACE) elseif(FFT_KOKKOS STREQUAL "HIPFFT") include(DetectHIPInstallation) find_package(hipfft REQUIRED) - target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_HIPFFT) target_link_libraries(lammps PRIVATE hip::hipfft) endif() + elseif(FFT_KOKKOS STREQUAL "MKL_GPU") + if(NOT Kokkos_ENABLE_SYCL) + message(FATAL_ERROR "Using MKL_GPU FFT currently requires the SYCL backend of Kokkos.") + endif() + find_package(MKL REQUIRED) + target_link_libraries(lammps PRIVATE mkl_sycl_dft mkl_intel_ilp64 mkl_tbb_thread mkl_core tbb) + elseif(FFT_KOKKOS STREQUAL "MKL") + find_package(MKL REQUIRED) + elseif(FFT_KOKKOS STREQUAL "NVPL") + find_package(nvpl_fft REQUIRED) + target_link_libraries(lammps PRIVATE nvpl::fftw) endif() + target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_${FFT_KOKKOS}) endif() if(PKG_ML-IAP) diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 1fdd898144..3801140fe0 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -10,7 +10,7 @@ if(${FFTW}_FOUND) else() set(FFT "KISS" CACHE STRING "FFT library for KSPACE package") endif() -set(FFT_VALUES KISS FFTW3 MKL) +set(FFT_VALUES KISS FFTW3 MKL NVPL) set_property(CACHE FFT PROPERTY STRINGS ${FFT_VALUES}) validate_option(FFT FFT_VALUES) string(TOUPPER ${FFT} FFT) @@ -41,6 +41,10 @@ elseif(FFT STREQUAL "MKL") target_compile_definitions(lammps PRIVATE -DFFT_MKL_THREADS) endif() target_link_libraries(lammps PRIVATE MKL::MKL) +elseif(FFT STREQUAL "NVPL") + find_package(nvpl_fft REQUIRED) + target_compile_definitions(lammps PRIVATE -DFFT_NVPL) + target_link_libraries(lammps PRIVATE nvpl::fftw) else() # last option is KISSFFT target_compile_definitions(lammps PRIVATE -DFFT_KISS) diff --git a/cmake/Modules/Packages/ML-PACE.cmake b/cmake/Modules/Packages/ML-PACE.cmake index 248b8eea76..8660898138 100644 --- a/cmake/Modules/Packages/ML-PACE.cmake +++ b/cmake/Modules/Packages/ML-PACE.cmake @@ -1,5 +1,11 @@ -set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.11.25.fix.tar.gz" CACHE STRING "URL for PACE evaluator library sources") +# PACE library support for ML-PACE package +# set policy to silence warnings about timestamps of downloaded files. review occasionally if it may be set to NEW +if(POLICY CMP0135) + cmake_policy(SET CMP0135 OLD) +endif() + +set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.11.25.fix.tar.gz" CACHE STRING "URL for PACE evaluator library sources") set(PACELIB_MD5 "b45de9a633f42ed65422567e3ce56f9f" CACHE STRING "MD5 checksum of PACE evaluator library tarball") mark_as_advanced(PACELIB_URL) mark_as_advanced(PACELIB_MD5) diff --git a/cmake/Modules/Packages/PLUMED.cmake b/cmake/Modules/Packages/PLUMED.cmake index 8312589478..8dab157a24 100644 --- a/cmake/Modules/Packages/PLUMED.cmake +++ b/cmake/Modules/Packages/PLUMED.cmake @@ -1,5 +1,10 @@ # Plumed2 support for PLUMED package +# set policy to silence warnings about timestamps of downloaded files. review occasionally if it may be set to NEW +if(POLICY CMP0135) + cmake_policy(SET CMP0135 OLD) +endif() + # for supporting multiple concurrent plumed2 installations for debugging and testing set(PLUMED_SUFFIX "" CACHE STRING "Suffix for Plumed2 library") mark_as_advanced(PLUMED_SUFFIX) @@ -27,9 +32,9 @@ endif() # Note: must also adjust check for supported API versions in # fix_plumed.cpp when version changes from v2.n.x to v2.n+1.y -set(PLUMED_URL "https://github.com/plumed/plumed2/releases/download/v2.9.1/plumed-src-2.9.1.tgz" +set(PLUMED_URL "https://github.com/plumed/plumed2/releases/download/v2.9.2/plumed-src-2.9.2.tgz" CACHE STRING "URL for PLUMED tarball") -set(PLUMED_MD5 "c3b2d31479c1e9ce211719d40e9efbd7" CACHE STRING "MD5 checksum of PLUMED tarball") +set(PLUMED_MD5 "04862602a372c1013bdfee2d6d03bace" CACHE STRING "MD5 checksum of PLUMED tarball") mark_as_advanced(PLUMED_URL) mark_as_advanced(PLUMED_MD5) @@ -81,6 +86,9 @@ if((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND (CMAKE_CROSSCOMPILING)) DEPENDS plumed_build COMMENT "Copying Plumed files" ) + if(CMAKE_PROJECT_NAME STREQUAL "lammps") + target_link_libraries(lammps INTERFACE LAMMPS::PLUMED) + endif() else() @@ -155,6 +163,9 @@ else() endif() set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include) file(MAKE_DIRECTORY ${INSTALL_DIR}/include) + if(CMAKE_PROJECT_NAME STREQUAL "lammps") + target_link_libraries(lammps PRIVATE LAMMPS::PLUMED) + endif() else() find_package(PkgConfig REQUIRED) pkg_check_modules(PLUMED REQUIRED plumed${PLUMED_SUFFIX}) @@ -169,7 +180,9 @@ else() endif() set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_LINK_LIBRARIES "${PLUMED_LOAD}") set_target_properties(LAMMPS::PLUMED PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${PLUMED_INCLUDE_DIRS}") + if(CMAKE_PROJECT_NAME STREQUAL "lammps") + target_link_libraries(lammps PUBLIC LAMMPS::PLUMED) + endif() endif() endif() -target_link_libraries(lammps PRIVATE LAMMPS::PLUMED) diff --git a/cmake/Modules/Packages/RHEO.cmake b/cmake/Modules/Packages/RHEO.cmake deleted file mode 100644 index 7639acd8bc..0000000000 --- a/cmake/Modules/Packages/RHEO.cmake +++ /dev/null @@ -1,2 +0,0 @@ -find_package(GSL 2.6 REQUIRED) -target_link_libraries(lammps PRIVATE GSL::gsl) diff --git a/cmake/packaging/LAMMPS_DMG_Background.xcf b/cmake/packaging/LAMMPS_DMG_Background.xcf new file mode 100644 index 0000000000..cff3222717 Binary files /dev/null and b/cmake/packaging/LAMMPS_DMG_Background.xcf differ diff --git a/cmake/presets/kokkos-sycl-intel.cmake b/cmake/presets/kokkos-sycl-intel.cmake new file mode 100644 index 0000000000..3fc75e4b2d --- /dev/null +++ b/cmake/presets/kokkos-sycl-intel.cmake @@ -0,0 +1,18 @@ +# preset that enables KOKKOS and selects SYCL compilation with OpenMP +# enabled as well. Also sets some performance related compiler flags. +set(PKG_KOKKOS ON CACHE BOOL "" FORCE) +set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE) +set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "" FORCE) +set(Kokkos_ENABLE_CUDA OFF CACHE BOOL "" FORCE) +set(Kokkos_ENABLE_SYCL ON CACHE BOOL "" FORCE) + +# hide deprecation warnings temporarily for stable release +set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) + +set(CMAKE_CXX_COMPILER icpx CACHE STRING "" FORCE) +set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE) +set(CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE) +# Silence everything +set(CMAKE_CXX_FLAGS "-w" CACHE STRING "" FORCE) +set(CMAKE_EXE_LINKER_FLAGS "-fsycl -flink-huge-device-code -fsycl-max-parallel-link-jobs=32 -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\" " CACHE STRING "" FORCE) +set(CMAKE_TUNE_FLAGS "-O3 -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen" CACHE STRING "" FORCE) diff --git a/cmake/presets/kokkos-sycl.cmake b/cmake/presets/kokkos-sycl-nvidia.cmake similarity index 100% rename from cmake/presets/kokkos-sycl.cmake rename to cmake/presets/kokkos-sycl-nvidia.cmake diff --git a/cmake/presets/mingw-cross.cmake b/cmake/presets/mingw-cross.cmake index 100ce13632..413744b078 100644 --- a/cmake/presets/mingw-cross.cmake +++ b/cmake/presets/mingw-cross.cmake @@ -67,6 +67,7 @@ set(WIN_PACKAGES REACTION REAXFF REPLICA + RHEO RIGID SHOCK SMTBQ diff --git a/cmake/presets/most.cmake b/cmake/presets/most.cmake index d01642f94d..05282eebdd 100644 --- a/cmake/presets/most.cmake +++ b/cmake/presets/most.cmake @@ -60,6 +60,7 @@ set(ALL_PACKAGES REACTION REAXFF REPLICA + RHEO RIGID SHOCK SPH diff --git a/cmake/presets/windows.cmake b/cmake/presets/windows.cmake index 403d40efa4..71241c559c 100644 --- a/cmake/presets/windows.cmake +++ b/cmake/presets/windows.cmake @@ -60,6 +60,7 @@ set(WIN_PACKAGES REACTION REAXFF REPLICA + RHEO RIGID SHOCK SMTBQ diff --git a/doc/lammps.1 b/doc/lammps.1 index 00d8a4f61f..75581bd008 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,7 +1,7 @@ -.TH LAMMPS "1" "27 June 2024" "2024-06-27" +.TH LAMMPS "1" "29 August 2024" "2024-08-29" .SH NAME .B LAMMPS -\- Molecular Dynamics Simulator. Version 27 June 2024 +\- Molecular Dynamics Simulator. Version 29 August 2024 .SH SYNOPSIS .B lmp diff --git a/doc/msi2lmp.1 b/doc/msi2lmp.1 index 5cb0754e4f..075e25e3b9 100644 --- a/doc/msi2lmp.1 +++ b/doc/msi2lmp.1 @@ -1,4 +1,4 @@ -.TH MSI2LMP "1" "v3.9.10" "2023-03-10" +.TH MSI2LMP "1" "v3.9.11" "2024-09-06" .SH NAME .B MSI2LMP \- Converter for Materials Studio files to LAMMPS @@ -101,7 +101,7 @@ msi2lmp decane -c 0 -f oplsaa .SH COPYRIGHT -© 2003--2022 Sandia Corporation +© 2003--2024 Sandia Corporation This package is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst index 9cd938280b..3adec76abb 100644 --- a/doc/src/Build_development.rst +++ b/doc/src/Build_development.rst @@ -138,12 +138,27 @@ during development: The status of this automated testing can be viewed on `https://ci.lammps.org `_. -The scripts and inputs for integration, run, and regression testing -are maintained in a -`separate repository `_ -of the LAMMPS project on GitHub. A few tests are also run as GitHub -Actions and their configuration files are in the ``.github/workflows/`` -folder of the LAMMPS git tree. +The scripts and inputs for integration, run, and legacy regression +testing are maintained in a `separate repository +`_ of the LAMMPS project on +GitHub. A few tests are also run as GitHub Actions and their +configuration files are in the ``.github/workflows/`` folder of the +LAMMPS git tree. + +Regression tests can also be performed locally with the :ref:`regression +tester tool `. The tool checks if a given LAMMPS binary run +with selected input examples produces thermo output that is consistent +with the provided log files. The script can be run in one pass over all +available input files, but it can also first create multiple lists of +inputs or folders that can then be run with multiple workers +concurrently to speed things up. Another mode allows to do a quick +check of inputs that contain commands that have changes in the current +checkout branch relative to a git branch. This works similar to the two +pass mode, but will select only shorter runs and no more than 100 inputs +that are chosen randomly. This ensures that this test runs +significantly faster compared to the full test run. These test runs can +also be performed with instrumented LAMMPS binaries (see previous +section). The unit testing facility is integrated into the CMake build process of the LAMMPS source code distribution itself. It can be enabled by @@ -630,11 +645,35 @@ The following target are available for both, GNU make and CMake: GitHub command line interface ----------------------------- -GitHub is developing a `tool for the command line -`_ that interacts with the GitHub website via a -command called ``gh``. This can be extremely convenient when working -with a Git repository hosted on GitHub (like LAMMPS). It is thus highly -recommended to install it when doing LAMMPS development. +GitHub has developed a `command line tool `_ +to interact with the GitHub website via a command called ``gh``. +This is extremely convenient when working with a Git repository hosted +on GitHub (like LAMMPS). It is thus highly recommended to install it +when doing LAMMPS development. To use ``gh`` you must be within a git +checkout of a repository and you must obtain an authentication token +to connect your checkout with a GitHub user. This is done with the +command: ``gh auth login`` where you then have to follow the prompts. +Here are some examples: -The capabilities of the ``gh`` command is continually expanding, so -please see the documentation at https://cli.github.com/manual/ +.. list-table:: + :header-rows: 1 + :widths: 34 66 + + * - Command + - Description + * - ``gh pr list`` + - List currently open pull requests + * - ``gh pr checks 404`` + - Shows the status of all checks for pull request #404 + * - ``gh pr view 404`` + - Shows the description and recent comments for pull request #404 + * - ``gh co 404`` + - Check out the branch from pull request #404; set up for pushing changes + * - ``gh issue list`` + - List currently open issues + * - ``gh issue view 430 --comments`` + - Shows the description and all comments for issue #430 + +The capabilities of the ``gh`` command are continually expanding, so +for more details please see the documentation at https://cli.github.com/manual/ +or use ``gh --help`` or ``gh --help`` for embedded help. diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index aea039f0bc..8465bea829 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -7,6 +7,8 @@ in addition to .. list-table:: :align: center :header-rows: 1 + :widths: 50 50 + :width: 80% * - CMake build - Traditional make @@ -115,7 +117,7 @@ GPU package To build with this package, you must choose options for precision and which GPU hardware to build for. The GPU package currently supports -three different types of backends: OpenCL, CUDA and HIP. +three different types of back ends: OpenCL, CUDA and HIP. CMake build ^^^^^^^^^^^ @@ -205,7 +207,7 @@ necessary for ``hipcc`` and the linker to work correctly. .. versionadded:: 3Aug2022 Using the CHIP-SPV implementation of HIP is supported. It allows one to -run HIP code on Intel GPUs via the OpenCL or Level Zero backends. To use +run HIP code on Intel GPUs via the OpenCL or Level Zero back ends. To use CHIP-SPV, you must set ``-DHIP_USE_DEVICE_SORT=OFF`` in your CMake command line as CHIP-SPV does not yet support hipCUB. As of Summer 2022, the use of HIP for Intel GPUs is experimental. You should only use this @@ -751,14 +753,27 @@ This list was last updated for version 4.3.0 of the Kokkos library. platform-appropriate vendor library: rocFFT on AMD GPUs or cuFFT on NVIDIA GPUs. - To simplify compilation, five preset files are included in the + For Intel GPUs using SYCL, set these variables: + + .. code-block:: bash + + -D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above + -D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above + -D Kokkos_ENABLE_SYCL=yes + -D Kokkos_ENABLE_OPENMP=yes + -D FFT_KOKKOS=MKL_GPU + + This will enable FFTs on the GPU using the oneMKL library. + + To simplify compilation, six preset files are included in the ``cmake/presets`` folder, ``kokkos-serial.cmake``, ``kokkos-openmp.cmake``, ``kokkos-cuda.cmake``, - ``kokkos-hip.cmake``, and ``kokkos-sycl.cmake``. They will enable - the KOKKOS package and enable some hardware choices. For GPU - support those preset files must be customized to match the - hardware used. So to compile with CUDA device parallelization with - some common packages enabled, you can do the following: + ``kokkos-hip.cmake``, ``kokkos-sycl-nvidia.cmake``, and + ``kokkos-sycl-intel.cmake``. They will enable the KOKKOS + package and enable some hardware choices. For GPU support those + preset files must be customized to match the hardware used. So + to compile with CUDA device parallelization with some common + packages enabled, you can do the following: .. code-block:: bash @@ -830,6 +845,18 @@ This list was last updated for version 4.3.0 of the Kokkos library. FFT_INC = -DFFT_HIPFFT # enable use of hipFFT (optional) FFT_LIB = -lhipfft # link to hipFFT library + For Intel GPUs using SYCL: + + .. code-block:: make + + KOKKOS_DEVICES = SYCL + KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is + # hosting the GPU + # GPUARCH = GPU from list above + FFT_INC = -DFFT_KOKKOS_MKL_GPU # enable use of oneMKL for Intel GPUs (optional) + # link to oneMKL FFT library + FFT_LIB = -lmkl_sycl_dft -lmkl_intel_ilp64 -lmkl_tbb_thread -mkl_core -ltbb + Advanced KOKKOS compilation settings ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1517,6 +1544,11 @@ in lib/pace or somewhere else, which must be done before building LAMMPS with this package. The code for the library can be found at: `https://github.com/ICAMS/lammps-user-pace/ `_ +Instead of including the ML-PACE package directly into LAMMPS, it +is also possible to skip this step and build the ML-PACE package as +a plugin using the CMake script files in the ``examples/PACKAGE/pace/plugin`` +folder and then load this plugin at runtime with the :doc:`plugin command `. + .. tabs:: .. tab:: CMake build @@ -1701,6 +1733,11 @@ try a different one, switch to a different build system, consider a global PLUMED installation or consider downloading PLUMED during the LAMMPS build. +Instead of including the PLUMED package directly into LAMMPS, it +is also possible to skip this step and build the PLUMED package as +a plugin using the CMake script files in the ``examples/PACKAGE/plumed/plugin`` +folder and then load this plugin at runtime with the :doc:`plugin command `. + .. tabs:: .. tab:: CMake build @@ -2214,28 +2251,38 @@ verified to work in February 2020 with Quantum Espresso versions 6.3 to RHEO package ------------ -To build with this package you must have the `GNU Scientific Library -(GSL) ` installed in locations that -are accessible in your environment. The GSL library should be at least -version 2.7. +This package depends on the BPM package. .. tabs:: .. tab:: CMake build - If CMake cannot find the GSL library or include files, you can set: - .. code-block:: bash - -D GSL_ROOT_DIR=path # path to root of GSL installation + -D PKG_RHEO=yes # enable the package itself + -D PKG_BPM=yes # the RHEO package requires BPM + -D USE_INTERNAL_LINALG=value # prefer internal LAPACK if true + + Some features in the RHEO package are dependent on code in the BPM + package so the latter one *must* be enabled as well. + + The RHEO package also requires LAPACK (and BLAS) and CMake + can identify their locations and pass that info to the RHEO + build script. But on some systems this may cause problems when + linking or the dependency is not desired. By using the setting + ``-D USE_INTERNAL_LINALG=yes`` when running the CMake + configuration, you will select compiling and linking the bundled + linear algebra library and work around the limitations. .. tab:: Traditional make - LAMMPS will try to auto-detect the GSL compiler and linker flags - from the corresponding ``pkg-config`` file (``gsl.pc``), otherwise - you can edit the file ``lib/rheo/Makefile.lammps`` - to specify the paths and library names where indicated by comments. - This must be done **before** the package is installed. + The RHEO package requires LAPACK (and BLAS) which can be either + a system provided library or the bundled "linalg" library. This + is a subset of LAPACK translated to C++. For that, one of the + provided ``Makefile.lammps.`` files needs to be copied + to ``Makefile.lammps`` and edited as needed. The default file + uses the bundled "linalg" library, which can be built by + ``make lib-linalg args='-m serial'`` in the ``src`` folder. ---------- diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index dc79cc3ed9..e4a53ddee7 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -67,10 +67,10 @@ libraries and better pipelining for packing and communication. .. code-block:: bash - -D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, - # else KISS - -D FFT_KOKKOS=value # FFTW3 or MKL or KISS or CUFFT or HIPFFT, - # default is KISS + -D FFT=value # FFTW3 or MKL or NVPL or KISS, + # default is FFTW3 if found, else KISS + -D FFT_KOKKOS=value # FFTW3 or MKL or NVPL or KISS or CUFFT + # or HIPFFT or MKL_GPU, default is KISS -D FFT_SINGLE=value # yes or no (default), no = double precision -D FFT_PACK=value # array (default) or pointer or memcpy -D FFT_USE_HEFFTE=value # yes or no (default), yes links to heFFTe @@ -103,6 +103,8 @@ libraries and better pipelining for packing and communication. -D FFT_HEFFTE_BACKEND=value # FFTW or MKL or empty/undefined for the stock # heFFTe back end -D Heffte_ROOT=path # path to an existing heFFTe installation + -D nvpl_fft_INCLUDE_DIR=path # path to NVPL FFT include files + -D nvpl_fft_LIBRARY_DIR=path # path to NVPL FFT libraries .. note:: @@ -121,9 +123,10 @@ libraries and better pipelining for packing and communication. .. code-block:: make FFT_INC = -DFFT_ # where is KISS (default), FFTW3, - # FFTW (same as FFTW3), or MKL + # FFTW (same as FFTW3), NVPL, or MKL FFT_INC = -DFFT_KOKKOS_ # where is KISS (default), FFTW3, - # FFTW (same as FFTW3), MKL, CUFFT, or HIPFFT + # FFTW (same as FFTW3), NVPL, MKL, CUFFT, + # HIPFFT, or MKL_GPU FFT_INC = -DFFT_SINGLE # do not specify for double precision FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries @@ -141,6 +144,9 @@ libraries and better pipelining for packing and communication. # cuFFT either precision FFT_LIB = -lcufft + # MKL_GPU either precision + FFT_LIB = -lmkl_sycl_dft -lmkl_intel_ilp64 -lmkl_tbb_thread -lmkl_core -ltbb + # FFTW3 double precision FFT_LIB = -lfftw3 @@ -165,6 +171,10 @@ libraries and better pipelining for packing and communication. # MKL with automatic runtime selection of interface libs FFT_LIB = -lmkl_rt + # threaded NVPL FFT + FFT_LIB = -lnvpl_fftw + + As with CMake, you do not need to set paths in ``FFT_INC`` or ``FFT_PATH``, if the compiler can find the FFT header and library files in its default search path. You must specify ``FFT_LIB`` @@ -218,10 +228,15 @@ The Intel MKL math library is part of the Intel compiler suite. It can be used with the Intel or GNU compiler (see the ``FFT_LIB`` setting above). +The NVIDIA Performance Libraries (NVPL) FFT library is optimized for NVIDIA +Grace Armv9.0 architecture. You can download it from https://docs.nvidia.com/nvpl/ + The cuFFT and hipFFT FFT libraries are packaged with NVIDIA's CUDA and AMD's HIP installations, respectively. These FFT libraries require the Kokkos acceleration package to be enabled and the Kokkos back end to be -GPU-resident (i.e., HIP or CUDA). +GPU-resident (i.e., HIP or CUDA). Similarly, GPU offload of FFTs on +Intel GPUs with oneMKL currently requires the Kokkos acceleration +package to be enabled with the SYCL back end. Performing 3d FFTs in parallel can be time-consuming due to data access and required communication. This cost can be reduced by performing @@ -492,7 +507,7 @@ during a run. Support for downloading files ----------------------------- -.. versionadded:: TBD +.. versionadded:: 29Aug2024 The :doc:`geturl command ` command uses the `the libcurl library `_ to download files. This requires that diff --git a/doc/src/Commands_removed.rst b/doc/src/Commands_removed.rst index f902a61515..ea8b3d4b03 100644 --- a/doc/src/Commands_removed.rst +++ b/doc/src/Commands_removed.rst @@ -171,7 +171,7 @@ instructions to install i-PI from PyPI via pip are provided. LAMMPS shell ------------ -.. versionchanged:: TBD +.. versionchanged:: 29Aug2024 The LAMMPS shell has been removed from the LAMMPS distribution. Users are encouraged to use the :ref:`LAMMPS-GUI ` tool instead. diff --git a/doc/src/Developer_plugins.rst b/doc/src/Developer_plugins.rst index 4cfdc92504..354350dde7 100644 --- a/doc/src/Developer_plugins.rst +++ b/doc/src/Developer_plugins.rst @@ -283,7 +283,7 @@ in the ``examples/kim/plugin`` folder. No changes to the sources of the KIM package themselves are needed; only the plugin interface and loader code needs to be added. This example only supports building with CMake, but is probably a more typical example. To compile you need to run CMake -with -DLAMMPS_SOURCE_DIR=. Other +with ``-DLAMMPS_SOURCE_DIR=``. Other configuration setting are identical to those for compiling LAMMPS. A second example for a plugin from a package is in the diff --git a/doc/src/Developer_write.rst b/doc/src/Developer_write.rst index ef4d06a5f6..54b1b6eb81 100644 --- a/doc/src/Developer_write.rst +++ b/doc/src/Developer_write.rst @@ -12,3 +12,4 @@ details are provided for writing code for LAMMPS. Developer_write_pair Developer_write_fix + Developer_write_command diff --git a/doc/src/Developer_write_command.rst b/doc/src/Developer_write_command.rst new file mode 100644 index 0000000000..16ac2092f6 --- /dev/null +++ b/doc/src/Developer_write_command.rst @@ -0,0 +1,348 @@ +Writing a new command style +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Command styles allow to do system manipulations or interfaces to the +operating system. + +In the text below, we will discuss the implementation of one example. As +shown on the page for :doc:`writing or extending command styles +`, in order to implement a new command style, a new class +must be written that is either directly or indirectly derived from the +``Command`` class. There is just one method that must be implemented: +``Command::command()``. In addition, a custom constructor is needed to get +access to the members of the ``LAMMPS`` class like the ``Error`` class to +print out error messages. The ``Command::command()`` method processes the +arguments passed to the command in the input and executes it. Any other +methods would be for the convenience of implementation of the new command. + +In general, new command styles should be added to the :ref:`EXTRA-COMMAND +package `. If you feel that your contribution should be +added to a different package, please consult with the :doc:`LAMMPS +developers ` first. The contributed code needs to support +the :doc:`traditional GNU make build process ` **and** the +:doc:`CMake build process `. + +---- + +Case 1: Implementing the geturl command +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this section, we will describe the procedure of adding a simple command +style to LAMMPS: the :doc:`geturl command ` that allows to download +files directly without having to rely on an external program like "wget" or +"curl". The complete implementation can be found in the files +``src/EXTRA-COMMAND/geturl.cpp`` and ``src/EXTRA-COMMAND/geturl.h`` of the +LAMMPS source code. + +Interfacing the *libcurl* library +""""""""""""""""""""""""""""""""" + +Rather than implementing the various protocols for downloading files, we +rely on an external library: `libcurl library `_. +This requires that the library and its headers are installed. For the +traditional GNU make build system, this simply requires edits to the machine +makefile to add compilation flags like for other libraries. For the CMake +based build system, we need to add some lines to the file +``cmake/Modules/Packages/EXTRA-COMMAND.cmake``: + +.. code-block:: cmake + + find_package(CURL QUIET COMPONENTS HTTP HTTPS) + option(WITH_CURL "Enable libcurl support" ${CURL_FOUND}) + if(WITH_CURL) + find_package(CURL REQUIRED COMPONENTS HTTP HTTPS) + target_compile_definitions(lammps PRIVATE -DLAMMPS_CURL) + target_link_libraries(lammps PRIVATE CURL::libcurl) + endif() + +The first ``find_package()`` command uses a built-in CMake module to find +an existing *libcurl* installation with development headers and support for +using the HTTP and HTTPS protocols. The "QUIET" flag ensures that there is +no screen output and no error if the search fails. The status of the search +is recorded in the "${CURL_FOUND}" variable. That variable sets the default +of the WITH_CURL option, which toggles whether support for *libcurl* is included +or not. + +The second ``find_package()`` uses the "REQUIRED" flag to produce an error +if the WITH_CURL option was set to ``True``, but no suitable *libcurl* +implementation with development support was found. This construct is used +so that the CMake script code inside the ``if(WITH_CURL)`` and ``endif()`` +block can be expanded later to download and compile *libcurl* as part of the +LAMMPS build process, if it is not found locally. The +``target_compile_definitions()`` function added the define ``-DLAMMPS_CURL`` +to the compilation flags when compiling objects for the LAMMPS library. +This allows to always compile the :doc:`geturl command `, but use +pre-processing to compile in the interface to *libcurl* only when it is +present and usable and otherwise stop with an error message about the +unavailability of *libcurl* to execute the functionality of the command. + +Header file +""""""""""" + +The first segment of any LAMMPS source should be the copyright and +license statement. Note the marker in the first line to indicate to +editors like emacs that this file is a C++ source, even though the .h +extension suggests a C source (this is a convention inherited from the +very beginning of the C++ version of LAMMPS). + +.. code-block:: c++ + + /* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + ------------------------------------------------------------------------- */ + +Every command style must be registered in LAMMPS by including the following +lines of code in the second part of the header after the copyright +message and before the include guards for the class definition: + +.. code-block:: c++ + + #ifdef COMMAND_CLASS + // clang-format off + CommandStyle(geturl,GetURL); + // clang-format on + #else + +This block between ``#ifdef COMMAND_CLASS`` and ``#else`` will be +included by the ``Input`` class in ``input.cpp`` to build a map of +"factory functions" that will create an instance of a Command class +and call its ``command()`` method. The map connects the name of the +command ``geturl`` with the name of the class ``GetURL``. During +compilation, LAMMPS constructs a file ``style_command.h`` that contains +``#include`` statements for all "installed" command styles. Before +including ``style_command.h`` into ``input.cpp``, the ``COMMAND_CLASS`` +define is set and the ``CommandStyle(name,class)`` macro defined. The +code of the macro adds the installed command styles to the "factory map" +which enables the ``Input`` to execute the command. + +The list of header files to include in ``style_command.h`` is automatically +updated by the build system if there are new files, so the presence of the +new header file in the ``src/EXTRA-COMMAND`` folder and the enabling of the +EXTRA-COMMAND package will trigger LAMMPS to include the new command style +when it is (re-)compiled. The "// clang-format" format comments are needed +so that running :ref:`clang-format ` on the file will not +insert unwanted blanks which would break the ``CommandStyle`` macro. + +The third part of the header file is the actual class definition of the +``GetURL`` class. This has the custom constructor and the ``command()`` +method implemented by this command style. For the constructor there is +nothing to do but to pass the ``lmp`` pointer to the base class. Since the +``command()`` method is labeled "virtual" in the base class, it must be +given the "override" property. + +.. code-block:: c++ + + #ifndef LMP_GETURL_H + #define LMP_GETURL_H + + #include "command.h" + + namespace LAMMPS_NS { + + class GetURL : public Command { + public: + GetURL(class LAMMPS *lmp) : Command(lmp) {}; + void command(int, char **) override; + }; + } // namespace LAMMPS_NS + #endif + #endif + +The "override" property helps to detect unexpected mismatches because +compilation will stop with an error in case the signature of a function +is changed in the base class without also changing it in all derived +classes. + +Implementation file +""""""""""""""""""" + +We move on to the implementation of the ``GetURL`` class in the +``geturl.cpp`` file. This file also starts with a LAMMPS copyright and +license header. Below that notice is typically the space where comments may +be added with additional information about this specific file, the +author(s), affiliation(s), and email address(es). This way the contributing +author(s) can be easily contacted, when there are questions about the +implementation later. Since the file(s) may be around for a long time, it +is beneficial to use some kind of "permanent" email address, if possible. + +.. code-block:: c++ + + /* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + ------------------------------------------------------------------------- */ + + /* ---------------------------------------------------------------------- + Contributing authors: Axel Kohlmeyer (Temple U), + ------------------------------------------------------------------------- */ + + #include "geturl.h" + + #include "comm.h" + #include "error.h" + + #if defined(LAMMPS_CURL) + #include + #endif + + using namespace LAMMPS_NS; + +The second section of the implementation file has various include +statements. The include file for the class header has to come first, then a +couple of LAMMPS classes (sorted alphabetically) followed by the header for +the *libcurl* interface. This is wrapped into an ``#ifdef`` block so that +LAMMPS will compile this file without error when the *libcurl* header is not +available and thus the define not set. The final statement of this segment +imports the ``LAMMPS_NS::`` namespace globally for this file. This way, all +LAMMPS specific functions and classes do not have to be prefixed with +``LAMMPS_NS::``. + +The command() function (required) +""""""""""""""""""""""""""""""""" + +Since the required custom constructor is trivial and implemented in the +header, there is only one function that must be implemented for a command +style and that is the ``command()`` function. + +.. code-block:: c++ + + void GetURL::command(int narg, char **arg) + { + #if !defined(LAMMPS_CURL) + error->all(FLERR, "LAMMPS has not been compiled with libcurl support"); + #else + if (narg < 1) utils::missing_cmd_args(FLERR, "geturl", error); + int verify = 1; + int overwrite = 1; + int verbose = 0; + +This first part also has the ``#ifdef`` block depending on the LAMMPS_CURL +define. This way the command will simply print an error, if *libcurl* is +not available but will not fail to compile. Furthermore, it sets the +defaults for the following optional arguments. + +.. code-block:: c++ + + // process arguments + + std::string url = arg[0]; + + // sanity check + + if ((url.find(':') == std::string::npos) || (url.find('/') == std::string::npos)) + error->all(FLERR, "URL '{}' is not a supported URL", url); + + std::string output = url.substr(url.find_last_of('/') + 1); + if (output.empty()) error->all(FLERR, "URL '{}' must end in a file string", url); + +This block stores the positional, i.e. non-optional argument of the URL to +be downloaded and adds a couple of sanity checks on the string to make sure it is +a valid URL. Also it derives the default name of the output file from the URL. + +.. code-block:: c++ + + int iarg = 1; + while (iarg < narg) { + if (strcmp(arg[iarg], "output") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "geturl output", error); + output = arg[iarg + 1]; + ++iarg; + } else if (strcmp(arg[iarg], "overwrite") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "geturl overwrite", error); + overwrite = utils::logical(FLERR, arg[iarg + 1], false, lmp); + ++iarg; + } else if (strcmp(arg[iarg], "verify") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "geturl verify", error); + verify = utils::logical(FLERR, arg[iarg + 1], false, lmp); + ++iarg; + } else if (strcmp(arg[iarg], "verbose") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "geturl verbose", error); + verbose = utils::logical(FLERR, arg[iarg + 1], false, lmp); + ++iarg; + } else { + error->all(FLERR, "Unknown geturl keyword: {}", arg[iarg]); + } + ++iarg; + } + +This block parses the optional arguments following the URL and stops with an +error if there are arguments missing or an unknown argument is encountered. + +.. code-block:: c++ + + // only download files from rank 0 + + if (comm->me != 0) return; + + if (!overwrite && platform::file_is_readable(output)) return; + + // open output file for writing + + FILE *out = fopen(output.c_str(), "wb"); + if (!out) + error->all(FLERR, "Cannot open output file {} for writing: {}", output, utils::getsyserror()); + +Here all MPI ranks other than 0 will return, so that the URL download will +only happen from a single MPI rank. For that rank the output file is opened +for writing using the C library function ``fopen()``. + +.. code-block:: c++ + + // initialize curl and perform download + + CURL *curl; + curl_global_init(CURL_GLOBAL_DEFAULT); + curl = curl_easy_init(); + if (curl) { + (void) curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); + (void) curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) out); + (void) curl_easy_setopt(curl, CURLOPT_FILETIME, 1L); + (void) curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1L); + if (verbose && screen) { + (void) curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); + (void) curl_easy_setopt(curl, CURLOPT_STDERR, (void *) screen); + } + if (!verify) { + (void) curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); + (void) curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L); + } + auto res = curl_easy_perform(curl); + if (res != CURLE_OK) { + long response = 0L; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response); + error->one(FLERR, "Download of {} failed with: {} {}", output, curl_easy_strerror(res), + response); + } + curl_easy_cleanup(curl); + +This block now implements the actual URL download with the selected options +via the "easy" interface of *libcurl*. For the details of what these +function calls do, please have a look at the `*libcurl documentation +`_. + + .. code-block:: c++ + + } + curl_global_cleanup(); + fclose(out); + #endif + } + +Finally, the previously opened file is closed and the command is complete. diff --git a/doc/src/Developer_write_pair.rst b/doc/src/Developer_write_pair.rst index 1433effc54..5d5e081042 100644 --- a/doc/src/Developer_write_pair.rst +++ b/doc/src/Developer_write_pair.rst @@ -160,7 +160,7 @@ message and before the include guards for the class definition: #endif -This block of between ``#ifdef PAIR_CLASS`` and ``#else`` will be +This block between ``#ifdef PAIR_CLASS`` and ``#else`` will be included by the ``Force`` class in ``force.cpp`` to build a map of "factory functions" that will create an instance of these classes and return a pointer to it. The map connects the name of the pair style, diff --git a/doc/src/Howto_cmake.rst b/doc/src/Howto_cmake.rst index 55e5b171a6..43aa519293 100644 --- a/doc/src/Howto_cmake.rst +++ b/doc/src/Howto_cmake.rst @@ -348,7 +348,7 @@ Some common LAMMPS specific variables * - ``FFT`` - select which FFT library to use: ``FFTW3``, ``MKL``, ``KISS`` (default, unless FFTW3 is found) * - ``FFT_KOKKOS`` - - select which FFT library to use in Kokkos-enabled styles: ``FFTW3``, ``MKL``, ``HIPFFT``, ``CUFFT``, ``KISS`` (default) + - select which FFT library to use in Kokkos-enabled styles: ``FFTW3``, ``MKL``, ``HIPFFT``, ``CUFFT``, ``MKL_GPU``, ``KISS`` (default) * - ``FFT_SINGLE`` - select whether to use single precision FFTs (default: ``off``) * - ``WITH_JPEG`` diff --git a/doc/src/Howto_lammps_gui.rst b/doc/src/Howto_lammps_gui.rst index 4d283cbcc8..21e6a31ccc 100644 --- a/doc/src/Howto_lammps_gui.rst +++ b/doc/src/Howto_lammps_gui.rst @@ -19,9 +19,9 @@ to the online LAMMPS documentation for known LAMMPS commands and styles. Pre-compiled, ready-to-use LAMMPS-GUI executables for Linux x86\_64 (Ubuntu 20.04LTS or later and compatible), macOS (version 11 aka Big Sur or later), and Windows (version 10 or later) :ref:`are available - ` for download. None-MPI LAMMPS executables for - running LAMMPS from the command line and :doc:`some LAMMPS tools ` - are also included. + ` for download. Non-MPI LAMMPS executables (as + ``lmp``) for running LAMMPS from the command line and :doc:`some + LAMMPS tools ` compiled executables are also included. The source code for LAMMPS-GUI is included in the LAMMPS source code distribution and can be found in the ``tools/lammps-gui`` folder. It @@ -29,40 +29,50 @@ to the online LAMMPS documentation for known LAMMPS commands and styles. `. LAMMPS-GUI tries to provide an experience similar to what people -traditionally would have running LAMMPS using a command line window -and the console LAMMPS executable but just rolled into a single executable: +traditionally would have running LAMMPS using a command line window and +the console LAMMPS executable but just rolled into a single executable: - writing & editing LAMMPS input files with a text editor - run LAMMPS on those input file with selected command line flags -- use or extract data from the created files and visualize it with - either a molecular visualization program or a plotting program +- extract data from the created files and visualize it with and + external software That procedure is quite effective for people proficient in using the command line, as that allows them to use tools for the individual steps -that they are most comfortable with. It is often *required* to adopt -this workflow when running LAMMPS simulations on high-performance +that they are most comfortable with. In fact, it is often *required* to +adopt this workflow when running LAMMPS simulations on high-performance computing facilities. The main benefit of using LAMMPS-GUI is that many basic tasks can be -done directly from the GUI without switching to a text console window or -using external programs, let alone writing scripts to extract data from -the generated output. It also integrates well with graphical desktop -environments where the `.lmp` filename extension can be registered with -LAMMPS-GUI as the executable to launch when double clicking on such -files. Also, LAMMPS-GUI has support for drag-n-drop, i.e. an input -file can be selected and then moved and dropped on the LAMMPS-GUI -executable, and LAMMPS-GUI will launch and read the file into its -buffer. +done directly from the GUI **without** switching to a text console +window or using external programs, let alone writing scripts to extract +data from the generated output. It also integrates well with graphical +desktop environments where the `.lmp` filename extension can be +registered with LAMMPS-GUI as the executable to launch when double +clicking on such files. Also, LAMMPS-GUI has support for drag-n-drop, +i.e. an input file can be selected and then moved and dropped on the +LAMMPS-GUI executable, and LAMMPS-GUI will launch and read the file into +its buffer. In many cases LAMMPS-GUI will be integrated into the +graphical desktop environment and can be launched like other +applications. LAMMPS-GUI thus makes it easier for beginners to get started running simple LAMMPS simulations. It is very suitable for tutorials on LAMMPS since you only need to learn how to use a single program for most tasks and thus time can be saved and people can focus on learning LAMMPS. -The tutorials at https://lammpstutorials.github.io/ were specifically +The tutorials at https://lammpstutorials.github.io/ are specifically updated for use with LAMMPS-GUI. Another design goal is to keep the barrier low when replacing part of -the functionality of LAMMPS-GUI with external tools. +the functionality of LAMMPS-GUI with external tools. That said, LAMMPS-GUI +has some unique functionality that is not found elsewhere: + +- auto-adapting to features available in the integrated LAMMPS library +- interactive visualization using the :doc:`dump image ` + command with the option to copy-paste the resulting settings +- automatic slide show generation from dump image out at runtime +- automatic plotting of thermodynamics data at runtime +- inspection of binary restart files The following text provides a detailed tour of the features and functionality of LAMMPS-GUI. Suggestions for new features and @@ -134,9 +144,13 @@ When LAMMPS-GUI starts, it shows the main window, labeled *Editor*, with either an empty buffer or the contents of the file used as argument. In the latter case it may look like the following: -.. image:: JPG/lammps-gui-main.png - :align: center - :scale: 50% +.. |gui-main1| image:: JPG/lammps-gui-main.png + :width: 48% + +.. |gui-main2| image:: JPG/lammps-gui-dark.png + :width: 48% + +|gui-main1| |gui-main2| There is the typical menu bar at the top, then the main editor buffer, and a status bar at the bottom. The input file contents are shown @@ -276,8 +290,6 @@ right mouse button into the *Output* window text area. :align: center :scale: 50% -.. versionadded:: 1.6 - Should the *Output* window contain embedded YAML format text (see above for a demonstration), for example from using :doc:`thermo_style yaml ` or :doc:`thermo_modify line yaml `, the @@ -289,10 +301,6 @@ text area. Charts Window ------------- -.. versionadded:: 1.6 - - Plot smoothing support - By default, when starting a run, a *Charts* window opens that displays a plot of thermodynamic output of the LAMMPS calculation as shown below. @@ -327,10 +335,6 @@ corresponds to. Same as for the *Output* window, the chart window is replaced on each new run, but the behavior can be changed in the *Preferences* dialog. -.. versionadded:: 1.6 - - Support for YAML export added - From the *File* menu on the top left, it is possible to save an image of the currently displayed plot or export the data in either plain text columns (for use by plotting tools like `gnuplot @@ -371,8 +375,6 @@ zoom in or zoom out of the displayed images. The button on the very left triggers an export of the slide show animation to a movie file, provided the `FFmpeg program `_ is installed. -.. versionadded:: 1.6 - When clicking on the "garbage can" icon, all image files of the slide show will be deleted. Since their number can be large for long simulations, this option enables to safely and quickly clean up the @@ -391,7 +393,7 @@ below. .. image:: JPG/lammps-gui-variable-info.png :align: center - :scale: 75% + :scale: 50% Like for the *Output* and *Charts* windows, its content is continuously updated during a run. It will show "(none)" if there are no variables @@ -435,20 +437,21 @@ instance when using reduced (= 'lj') :doc:`units `, then LAMMPS-GUI will check the current pair style and if it is a Lennard-Jones type potential, it will extract the *sigma* parameter for each atom type and assign atom diameters from those numbers. +For cases where atom diameters are not auto-detected, the *Atom size* field +can be edited and a suitable value set manually. The default value +is inferred from the x-direction lattice spacing. -Otherwise the default sequence of colors of the :doc:`dump image -` command is assigned to the different atom types and the -diameters are all the same. +If elements cannot be detected the default sequence of colors of the +:doc:`dump image ` command is assigned to the different atom +types. -.. figure:: JPG/lammps-gui-image.png - :align: center - :scale: 50% +.. |gui-image1| image:: JPG/lammps-gui-image.png + :width: 48% - Visualization of LAMMPS "peptide" example +.. |gui-image2| image:: JPG/lammps-gui-funnel.png + :width: 48% -.. versionchanged:: 1.6 - - Buttons for toggling shininess and re-centering were added. +|gui-image1| |gui-image2| The default image size, some default image quality settings, the view style and some colors can be changed in the *Preferences* dialog @@ -468,8 +471,6 @@ current image can be saved to a file (keyboard shortcut `Ctrl-S`) or copied to the clipboard (keyboard shortcut `Ctrl-C`) for pasting the image into another application. -.. versionadded:: 1.6 - From the *File* menu it is also possible to copy the current :doc:`dump image ` and :doc:`dump_modify ` commands to the clipboard so they can be pasted into a LAMMPS input file @@ -488,8 +489,6 @@ Paste (`Ctrl-V`), Undo (`Ctrl-Z`), Redo (`Ctrl-Shift-Z`), Select All dialog will pop up asking whether to cancel the exit operation, or to save or not save the buffer contents to a file. -.. versionadded:: 1.6 - The editor has an auto-save mode that can be enabled or disabled in the *Preferences* dialog. In auto-save mode, the editor buffer is automatically saved before running LAMMPS or before exiting LAMMPS-GUI. @@ -553,8 +552,6 @@ context menu that open the corresponding documentation page in the online LAMMPS documentation in a web browser window. When using the keyboard, the first of those entries is chosen. -.. versionadded:: 1.6 - If the word under the cursor is a file, then additionally the context menu has an entry to open the file in a read-only text viewer window. If the file is a LAMMPS restart file, instead the menu entry offers to @@ -572,8 +569,6 @@ will contain a corresponding message. Inspecting a Restart file ^^^^^^^^^^^^^^^^^^^^^^^^^ -.. versionadded:: 1.6 - When LAMMPS-GUI is asked to "Inspect a Restart", it will read the restart file into a LAMMPS instance and then open three different windows. The first window is a text viewer with the output of an @@ -629,9 +624,10 @@ Edit ^^^^ The *Edit* menu offers the usual editor functions like *Undo*, *Redo*, -*Cut*, *Copy*, *Paste*. It can also open a *Preferences* dialog -(keyboard shortcut `Ctrl-P`) and allows deleting all stored preferences -and settings, so they are reset to their default values. +*Cut*, *Copy*, *Paste*, and a *Find and Replace* dialog (keyboard +shortcut `Ctrl-F`). It can also open a *Preferences* dialog (keyboard +shortcut `Ctrl-P`) and allows deleting all stored preferences and +settings, so they are reset to their default values. Run ^^^ @@ -667,7 +663,7 @@ set *before* a run is started. .. image:: JPG/lammps-gui-variables.png :align: center - :scale: 75% + :scale: 50% The *Set Variables* dialog will be pre-populated with entries that are set as index variables in the input and any variables that are @@ -716,6 +712,43 @@ https://lammpstutorials.github.io/ in a web browser window. ----- +Find and Replace +---------------- + +.. image:: JPG/lammps-gui-find.png + :align: center + :scale: 33% + +The *Find and Replace* dialog allows searching for and replacing +text in the *Editor* window. + +The dialog can be opened either from the *Edit* menu or with the +keyboard shortcut `Ctrl-F`. You can enter the text to search for. +Through three check-boxes the search behavior can be adjusted: + +- If checked, "Match case" does a case sensitive search; otherwise + the search is case insensitive. + +- If checked, "Wrap around" starts searching from the start of the + document, if there is no match found from the current cursor position + until the end of the document; otherwise the search will stop. + +- If checked, the "Whole word" setting only finds full word matches + (white space and special characters are word boundaries). + +Clicking on the *Next* button will search for the next occurrence of the +search text and select / highlight it. Clicking on the *Replace* button +will replace an already highlighted search text and find the next one. +If no text is selected, or the selected text does not match the +selection string, then the first click on the *Replace* button will +only search and highlight the next occurrence of the search string. +Clicking on the *Replace All* button will replace all occurrences from +the cursor position to the end of the file; if the *Wrap around* box is +checked, then it will replace **all** occurrences in the **entire** +document. Clicking on the *Done* button will dismiss the dialog. + +------ + Preferences ----------- @@ -848,7 +881,7 @@ available (On macOS use the Command key instead of Ctrl/Control). .. list-table:: :header-rows: 1 - :widths: auto + :widths: 16 19 13 16 13 22 * - Shortcut - Function @@ -890,32 +923,32 @@ available (On macOS use the Command key instead of Ctrl/Control). - Quit Application - Ctrl+A - Select All - - Ctrl+P - - Preferences + - Ctrl+F + - Find and Replace * - Ctrl+W - Close Window - - Ctrl+Shift+H - - Quick Help - - Ctrl+Shift+G - - LAMMPS-GUI Howto - * - Ctrl+Shift+A - - About LAMMPS - - Ctrl+? - - Context Help - - Ctrl+Shift+W - - Show Variables - * - Ctrl+Shift+M - - LAMMPS Manual - TAB - Reformat line - Shift+TAB - Show Completions - * - Ctrl+Shift+T - - LAMMPS Tutorial - - Ctrl+Shift+Enter + * - Ctrl+Shift+Enter - Run File - - - - + - Ctrl+Shift+W + - Show Variables + - Ctrl+P + - Preferences + * - Ctrl+Shift+A + - About LAMMPS + - Ctrl+Shift+H + - Quick Help + - Ctrl+Shift+G + - LAMMPS-GUI Howto + * - Ctrl+Shift+M + - LAMMPS Manual + - Ctrl+? + - Context Help + - Ctrl+Shift+T + - LAMMPS Tutorial Further editing keybindings `are documented with the Qt documentation `_. In diff --git a/doc/src/Howto_pylammps.rst b/doc/src/Howto_pylammps.rst index 5ef3248e1d..645434bbab 100644 --- a/doc/src/Howto_pylammps.rst +++ b/doc/src/Howto_pylammps.rst @@ -39,16 +39,18 @@ lammps.lammps * interface modeled after the LAMMPS :ref:`C language library interface API ` * requires knowledge of how LAMMPS internally works (C pointers, etc) * full support for running Python with MPI using `mpi4py `_ +* no overhead from creating a more Python-like interface lammps.PyLammps """"""""""""""" -* higher-level abstraction built on *top* of original :py:class:`ctypes based interface ` +* higher-level abstraction built on *top* of the original :py:class:`ctypes based interface ` * manipulation of Python objects * communication with LAMMPS is hidden from API user * shorter, more concise Python * better IPython integration, designed for quick prototyping * designed for serial execution +* additional overhead from capturing and parsing the LAMMPS screen output Quick Start ----------- diff --git a/doc/src/Intro_authors.rst b/doc/src/Intro_authors.rst index 78c8506421..84470ba3a0 100644 --- a/doc/src/Intro_authors.rst +++ b/doc/src/Intro_authors.rst @@ -56,7 +56,7 @@ lammps.org". General questions about LAMMPS should be posted in the - SNL - jmgoff at sandia.gov - machine learned potentials, QEq solvers, Python - * - Megan McCarthy + * - Meg McCarthy - SNL - megmcca at sandia.gov - alloys, micro-structure, machine learned potentials @@ -67,7 +67,7 @@ lammps.org". General questions about LAMMPS should be posted in the * - `Trung Nguyen `_ - U Chicago - ndactrung at gmail.com - - soft matter, GPU package + - soft matter, GPU package, DIELECTRIC package, regression testing .. _rb: https://rbberger.github.io/ .. _gc: https://enthalpiste.fr/ diff --git a/doc/src/JPG/lammps-gui-dark.png b/doc/src/JPG/lammps-gui-dark.png new file mode 100644 index 0000000000..31921bf537 Binary files /dev/null and b/doc/src/JPG/lammps-gui-dark.png differ diff --git a/doc/src/JPG/lammps-gui-find.png b/doc/src/JPG/lammps-gui-find.png new file mode 100644 index 0000000000..452c5818eb Binary files /dev/null and b/doc/src/JPG/lammps-gui-find.png differ diff --git a/doc/src/JPG/lammps-gui-funnel.png b/doc/src/JPG/lammps-gui-funnel.png new file mode 100644 index 0000000000..6f24f8bbd9 Binary files /dev/null and b/doc/src/JPG/lammps-gui-funnel.png differ diff --git a/doc/src/JPG/lammps-gui-image.png b/doc/src/JPG/lammps-gui-image.png index 969aadce0b..5f71cb2be5 100644 Binary files a/doc/src/JPG/lammps-gui-image.png and b/doc/src/JPG/lammps-gui-image.png differ diff --git a/doc/src/JPG/lammps-gui-main.png b/doc/src/JPG/lammps-gui-main.png index 69efe3db68..c7a1e5a98c 100644 Binary files a/doc/src/JPG/lammps-gui-main.png and b/doc/src/JPG/lammps-gui-main.png differ diff --git a/doc/src/Library_atoms.rst b/doc/src/Library_atoms.rst index cebd8f0c2e..5ebfe04b37 100644 --- a/doc/src/Library_atoms.rst +++ b/doc/src/Library_atoms.rst @@ -4,6 +4,7 @@ Per-atom properties This section documents the following functions: - :cpp:func:`lammps_extract_atom_datatype` +- :cpp:func:`lammps_extract_atom_size` - :cpp:func:`lammps_extract_atom` ----------------------- @@ -13,6 +14,11 @@ This section documents the following functions: ----------------------- +.. doxygenfunction:: lammps_extract_atom_size + :project: progguide + +----------------------- + .. doxygenfunction:: lammps_extract_atom :project: progguide diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst index 4483601a88..43429feb03 100644 --- a/doc/src/Packages_details.rst +++ b/doc/src/Packages_details.rst @@ -1823,7 +1823,8 @@ Aidan Thompson^3, Gabor Csanyi^2, Christoph Ortner^4, Ralf Drautz^1. **Install:** This package has :ref:`specific installation instructions ` on the -:doc:`Build extras ` page. +:doc:`Build extras ` page. This package may also be compiled +as a plugin to avoid licensing conflicts when distributing binaries. **Supporting info:** @@ -2344,7 +2345,9 @@ and Gareth Tribello. **Install:** -This package has :ref:`specific installation instructions ` on the :doc:`Build extras ` page. +This package has :ref:`specific installation instructions ` on the +:doc:`Build extras ` page. This package may also be compiled +as a plugin to avoid licensing conflicts when distributing binaries. **Supporting info:** @@ -2642,7 +2645,7 @@ This package has :ref:`specific installation instructions ` on the :doc:`B **Authors:** Joel T. Clemmer (Sandia National Labs), Thomas C. O'Connor (Carnegie Mellon University) -.. versionadded:: TBD +.. versionadded:: 29Aug2024 **Supporting info:** diff --git a/doc/src/Run_options.rst b/doc/src/Run_options.rst index 86e8e47626..4f7021cd53 100644 --- a/doc/src/Run_options.rst +++ b/doc/src/Run_options.rst @@ -508,7 +508,7 @@ e.g. the *nfile* and *fileper* keywords. See the **-restart2info restartfile keyword ...** -.. versionadded:: TBD +.. versionadded:: 29Aug2024 Write out some info about the restart file and and immediately exit. This is the same operation as if the following 2-line input script were diff --git a/doc/src/Tools.rst b/doc/src/Tools.rst index 6c12baf967..ba7cb2035a 100644 --- a/doc/src/Tools.rst +++ b/doc/src/Tools.rst @@ -590,20 +590,31 @@ and the LAMMPS library, via ``-D LAMMPS_SOURCE_DIR=/path/to/lammps/src``. CMake will try to guess a build folder with the LAMMPS library from that path, but it can also be set with ``-D LAMMPS_LIB_DIR=/path/to/lammps/lib``. +Plugin version +"""""""""""""" + Rather than linking to the LAMMPS library during compilation, it is also -possible to compile the GUI with a plugin loader that will load -the LAMMPS library dynamically at runtime during the start of the GUI -from a shared library; e.g. ``liblammps.so`` or ``liblammps.dylib`` or +possible to compile the GUI with a plugin loader that will load the +LAMMPS library dynamically at runtime during the start of the GUI from a +shared library; e.g. ``liblammps.so`` or ``liblammps.dylib`` or ``liblammps.dll`` (depending on the operating system). This has the advantage that the LAMMPS library can be built from updated or modified LAMMPS source without having to recompile the GUI. The ABI of the LAMMPS C-library interface is very stable and generally backward -compatible. This feature is enabled by setting -``-D LAMMPS_GUI_USE_PLUGIN=on`` and then ``-D +compatible. This feature is enabled by setting ``-D +LAMMPS_GUI_USE_PLUGIN=on`` and then ``-D LAMMPS_PLUGINLIB_DIR=/path/to/lammps/plugin/loader``. Typically, this would be the ``examples/COUPLE/plugin`` folder of the LAMMPS distribution. +When compiling LAMMPS-GUI with plugin support, there is an additional +command line flag (``-p `` or ``--pluginpath ``) which +allows to override the path to LAMMPS shared library used by the GUI. +This is usually auto-detected on the first run and can be changed in the +LAMMPS-GUI *Preferences* dialog. The command line flag allows to reset +this path to a valid value in case the original setting has become +invalid. An empty path ("") as argument restores the default setting. + Platform notes ^^^^^^^^^^^^^^ @@ -671,6 +682,15 @@ folder> --target tgz`` or ``make tgz`` to build a ``LAMMPS-Linux-amd64.tar.gz`` file with the executables and their support libraries. +It is also possible to build a `flatpak bundle +`_ which is +a way to distribute applications in a way that is compatible with most +Linux distributions. Use the "flatpak" target to trigger a compile +(``cmake --build --target flatpak`` or ``make flatpak``). +Please note that this will not build from the local sources but from the +repository and branch listed in the ``org.lammps.lammps-gui.yml`` +LAMMPS-GUI source folder. + ---------- .. _arc: @@ -1002,7 +1022,7 @@ regression tests with a given LAMMPS binary. The tool launches the LAMMPS binary with any given input script under one of the `examples` subdirectories, and compares the thermo output in the generated log file with those in the provided log file with the same number of processors -ub the same subdirectory. If the differences between the actual and +in the same subdirectory. If the differences between the actual and reference values are within specified tolerances, the test is considered passed. For each test batch, that is, a set of example input scripts, the mpirun command, the LAMMPS command line arguments, and the diff --git a/doc/src/bond_rheo_shell.rst b/doc/src/bond_rheo_shell.rst index 992917c104..090f5ab7aa 100644 --- a/doc/src/bond_rheo_shell.rst +++ b/doc/src/bond_rheo_shell.rst @@ -38,7 +38,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 The *rheo/shell* bond style is designed to work with :doc:`fix rheo/oxidation ` which creates candidate diff --git a/doc/src/compute_rheo_property_atom.rst b/doc/src/compute_rheo_property_atom.rst index 5840cd8acc..8686a0dec2 100644 --- a/doc/src/compute_rheo_property_atom.rst +++ b/doc/src/compute_rheo_property_atom.rst @@ -55,7 +55,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 Define a computation that stores atom attributes specific to the RHEO package for each atom in the group. This is useful so that the values diff --git a/doc/src/compute_temp.rst b/doc/src/compute_temp.rst index 4c7a46b822..2638a1ae11 100644 --- a/doc/src/compute_temp.rst +++ b/doc/src/compute_temp.rst @@ -59,7 +59,7 @@ factor, these tensor components are twice those of the traditional kinetic energy tensor. The six components of the vector are ordered :math:`xx`, :math:`yy`, :math:`zz`, :math:`xy`, :math:`xz`, :math:`yz`. - + The number of atoms contributing to the temperature is assumed to be constant for the duration of the run; use the *dynamic* option of the :doc:`compute_modify ` command if this is not the case. diff --git a/doc/src/fix_adapt.rst b/doc/src/fix_adapt.rst index 03aef12a6c..a44ce8e780 100644 --- a/doc/src/fix_adapt.rst +++ b/doc/src/fix_adapt.rst @@ -319,25 +319,32 @@ all types from 1 to :math:`N`. A leading asterisk means all types from :math:`N` (inclusive). A middle asterisk means all types from m to n (inclusive). -Currently *bond* does not support bond_style hybrid nor bond_style -hybrid/overlay as bond styles. The bond styles that currently work -with fix_adapt are +If :doc:`bond_style hybrid ` is used, *bstyle* should be a +sub-style name. The bond styles that currently work with fix adapt are: -+------------------------------------+------------+------------+ -| :doc:`class2 ` | r0 | type bonds | -+------------------------------------+------------+------------+ -| :doc:`fene ` | k,r0 | type bonds | -+------------------------------------+------------+------------+ -| :doc:`fene/nm ` | k,r0 | type bonds | -+------------------------------------+------------+------------+ -| :doc:`gromos ` | k,r0 | type bonds | -+------------------------------------+------------+------------+ -| :doc:`harmonic ` | k,r0 | type bonds | -+------------------------------------+------------+------------+ -| :doc:`morse ` | r0 | type bonds | -+------------------------------------+------------+------------+ -| :doc:`nonlinear ` | epsilon,r0 | type bonds | -+------------------------------------+------------+------------+ ++---------------------------------------------------+---------------------------+------------+ +| :doc:`class2 ` | r0 | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`fene ` | k,r0 | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`fene/expand ` | k,r0,epsilon,sigma,shift | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`fene/nm ` | k,r0 | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`gromos ` | k,r0 | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`harmonic ` | k,r0 | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`harmonic/shift ` | k,r0,r1 | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`harmonic/restrain ` | k | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`mm3 ` | k,r0 | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`morse ` | r0 | type bonds | ++---------------------------------------------------+---------------------------+------------+ +| :doc:`nonlinear ` | epsilon,r0 | type bonds | ++---------------------------------------------------+---------------------------+------------+ ---------- @@ -357,15 +364,34 @@ all types from 1 to :math:`N`. A leading asterisk means all types from :math:`N` (inclusive). A middle asterisk means all types from m to n (inclusive). -Currently *angle* does not support angle_style hybrid nor angle_style -hybrid/overlay as angle styles. The angle styles that currently work -with fix_adapt are +If :doc:`angle_style hybrid ` is used, *astyle* should be a +sub-style name. The angle styles that currently work with fix adapt are: -+------------------------------------+----------+-------------+ -| :doc:`harmonic ` | k,theta0 | type angles | -+------------------------------------+----------+-------------+ -| :doc:`cosine ` | k | type angles | -+------------------------------------+----------+-------------+ ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`harmonic ` | k,theta0 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`charmm ` | k,theta0 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`class2 ` | k2,k3,k4,theta0 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`cosine ` | k | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`cosine/periodic ` | k,b,n | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`cosine/squared/restricted ` | k,theta0 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`dipole ` | k,gamma0 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`fourier ` | k,c0,c1,c2 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`fourier/simple ` | k,c,n | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`mm3 ` | k,theta0 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`quartic ` | k2,k3,k4,theta0 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ +| :doc:`spica ` | k,theta0 | type angles | ++--------------------------------------------------------------------+-----------------+-------------+ Note that internally, theta0 is stored in radians, so the variable this fix uses to reset theta0 needs to generate values in radians. diff --git a/doc/src/fix_meso_move.rst b/doc/src/fix_meso_move.rst index 64b451b7f1..d5e1bba446 100644 --- a/doc/src/fix_meso_move.rst +++ b/doc/src/fix_meso_move.rst @@ -247,7 +247,7 @@ defined by the :doc:`atom_style sph ` command. All particles in the group must be mesoscopic SPH/SDPD particles. -.. versionchanged:: TBD +.. versionchanged:: 29Aug2024 This fix is incompatible with deformation controls that remap velocity, for instance the *remap v* option of :doc:`fix deform `. diff --git a/doc/src/fix_mvv_dpd.rst b/doc/src/fix_mvv_dpd.rst index e64a162bf4..44883e92ad 100644 --- a/doc/src/fix_mvv_dpd.rst +++ b/doc/src/fix_mvv_dpd.rst @@ -97,7 +97,7 @@ These fixes are part of the DPD-MESO package. They are only enabled if LAMMPS was built with that package. See the :doc:`Build package ` page for more info. -.. versionchanged:: TBD +.. versionchanged:: 29Aug2024 This fix is incompatible with deformation controls that remap velocity, for instance the *remap v* option of :doc:`fix deform `. diff --git a/doc/src/fix_rheo.rst b/doc/src/fix_rheo.rst index cf135ae0af..eb88ef0536 100644 --- a/doc/src/fix_rheo.rst +++ b/doc/src/fix_rheo.rst @@ -43,7 +43,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 Perform time integration for RHEO particles, updating positions, velocities, and densities. For an overview of other features available in the RHEO package, diff --git a/doc/src/fix_rheo_oxidation.rst b/doc/src/fix_rheo_oxidation.rst index ba3ead3f1f..dc22ed304a 100644 --- a/doc/src/fix_rheo_oxidation.rst +++ b/doc/src/fix_rheo_oxidation.rst @@ -27,7 +27,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 This fix dynamically creates bonds on the surface of fluids to represent physical processes such as oxidation. It is intended diff --git a/doc/src/fix_rheo_pressure.rst b/doc/src/fix_rheo_pressure.rst index 40d623ae07..2a714b298b 100644 --- a/doc/src/fix_rheo_pressure.rst +++ b/doc/src/fix_rheo_pressure.rst @@ -33,7 +33,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 This fix defines a pressure equation of state for RHEO particles. One can define different equations of state for different atom types. An equation diff --git a/doc/src/fix_rheo_thermal.rst b/doc/src/fix_rheo_thermal.rst index cf245cbdca..214bc1db86 100644 --- a/doc/src/fix_rheo_thermal.rst +++ b/doc/src/fix_rheo_thermal.rst @@ -48,7 +48,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 This fix performs time integration of temperature for atom style rheo/thermal. In addition, it defines multiple thermal properties of particles and handles diff --git a/doc/src/fix_rheo_viscosity.rst b/doc/src/fix_rheo_viscosity.rst index 5bc1b2a210..804059e6f8 100644 --- a/doc/src/fix_rheo_viscosity.rst +++ b/doc/src/fix_rheo_viscosity.rst @@ -38,7 +38,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 This fix defines a viscosity for RHEO particles. One can define different viscosities for different atom types, but a viscosity must be specified for diff --git a/doc/src/fix_rigid_meso.rst b/doc/src/fix_rigid_meso.rst index 3f734e3fef..6a9a85a865 100644 --- a/doc/src/fix_rigid_meso.rst +++ b/doc/src/fix_rigid_meso.rst @@ -353,7 +353,7 @@ defined by the :doc:`atom_style sph ` command. All particles in the group must be mesoscopic SPH/SDPD particles. -.. versionchanged:: TBD +.. versionchanged:: 29Aug2024 This fix is incompatible with deformation controls that remap velocity, for instance the *remap v* option of :doc:`fix deform `. diff --git a/doc/src/fix_shake.rst b/doc/src/fix_shake.rst index 339d0fd68c..434415eecf 100644 --- a/doc/src/fix_shake.rst +++ b/doc/src/fix_shake.rst @@ -137,7 +137,7 @@ constrained (within a fudge factor of MASSDELTA specified in both bonds in the angle are constrained then the angle will also be constrained if its type is in the list. -.. versionchanged:: TBD +.. versionchanged:: 29Aug2024 The types may be given as type labels *only* if there is no atom, bond, or angle type label named *b*, *a*, *t*, or *m* defined in the diff --git a/doc/src/fix_smd_integrate_tlsph.rst b/doc/src/fix_smd_integrate_tlsph.rst index 44d4bab3a5..fce4c057c3 100644 --- a/doc/src/fix_smd_integrate_tlsph.rst +++ b/doc/src/fix_smd_integrate_tlsph.rst @@ -53,7 +53,7 @@ Restrictions This fix is part of the MACHDYN package. It is only enabled if LAMMPS was built with that package. See the :doc:`Build package ` page for more info. -.. versionchanged:: TBD +.. versionchanged:: 29Aug2024 This fix is incompatible with deformation controls that remap velocity, for instance the *remap v* option of :doc:`fix deform `. diff --git a/doc/src/fix_smd_integrate_ulsph.rst b/doc/src/fix_smd_integrate_ulsph.rst index 6b1e070763..60dfb06a0e 100644 --- a/doc/src/fix_smd_integrate_ulsph.rst +++ b/doc/src/fix_smd_integrate_ulsph.rst @@ -61,7 +61,7 @@ Restrictions This fix is part of the MACHDYN package. It is only enabled if LAMMPS was built with that package. See the :doc:`Build package ` page for more info. -.. versionchanged:: TBD +.. versionchanged:: 29Aug2024 This fix is incompatible with deformation controls that remap velocity, for instance the *remap v* option of :doc:`fix deform `. diff --git a/doc/src/geturl.rst b/doc/src/geturl.rst index 0ca0ce0cd3..acf1e21a3e 100644 --- a/doc/src/geturl.rst +++ b/doc/src/geturl.rst @@ -32,7 +32,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 Download a file from an URL to the local disk. This is implemented with the `libcurl library `_ which supports a diff --git a/doc/src/group2ndx.rst b/doc/src/group2ndx.rst index 19c472e109..e1c4fd23f5 100644 --- a/doc/src/group2ndx.rst +++ b/doc/src/group2ndx.rst @@ -51,7 +51,7 @@ index file. When specifying group IDs, only those groups will be written to the index file. In order to follow the Gromacs conventions, the group *all* will be renamed to *System* in the index file. -The *ndx2group* command will create of update group definitions from +The *ndx2group* command will create or update group definitions from those stored in an index file. Without specifying any group IDs, all groups except *System* will be read from the index file and the corresponding groups recreated. If a group of the same name already diff --git a/doc/src/neigh_modify.rst b/doc/src/neigh_modify.rst index 753990c93f..bf87c8452c 100644 --- a/doc/src/neigh_modify.rst +++ b/doc/src/neigh_modify.rst @@ -159,7 +159,7 @@ sample scenarios where this is useful: * When one or more rigid bodies are specified, interactions within each body can be turned off to save needless computation. See the :doc:`fix rigid ` command for more details. -.. versionchanged:: TBD +.. versionchanged:: 29Aug2024 Support for type labels was added. diff --git a/doc/src/pair_hbond_dreiding.rst b/doc/src/pair_hbond_dreiding.rst index 7e73f23b08..ce19ff9e38 100644 --- a/doc/src/pair_hbond_dreiding.rst +++ b/doc/src/pair_hbond_dreiding.rst @@ -18,28 +18,27 @@ Syntax .. code-block:: LAMMPS - pair_style style N inner_distance_cutoff outer_distance_cutoff angle_cutof + pair_style style N inner_distance_cutoff outer_distance_cutoff angle_cutoff * style = *hbond/dreiding/lj* or *hbond/dreiding/morse* -* n = cosine angle periodicity +* N = power of cosine of angle theta (integer) * inner_distance_cutoff = global inner cutoff for Donor-Acceptor interactions (distance units) * outer_distance_cutoff = global cutoff for Donor-Acceptor interactions (distance units) -* angle_cutoff = global angle cutoff for Acceptor-Hydrogen-Donor -* interactions (degrees) +* angle_cutoff = global angle cutoff for Acceptor-Hydrogen-Donor interactions (degrees) Examples """""""" .. code-block:: LAMMPS - pair_style hybrid/overlay lj/cut 10.0 hbond/dreiding/lj 4 9.0 11.0 90 + pair_style hybrid/overlay lj/cut 10.0 hbond/dreiding/lj 4 9.0 11.0 90.0 pair_coeff 1 2 hbond/dreiding/lj 3 i 9.5 2.75 4 9.0 11.0 90.0 - pair_style hybrid/overlay lj/cut 10.0 hbond/dreiding/morse 2 9.0 11.0 90 - pair_coeff 1 2 hbond/dreiding/morse 3 i 3.88 1.7241379 2.9 2 9 11 90 + pair_style hybrid/overlay lj/cut 10.0 hbond/dreiding/morse 2 9.0 11.0 90.0 + pair_coeff 1 2 hbond/dreiding/morse 3 i 3.88 1.7241379 2.9 2 9.0 11.0 90.0 labelmap atom 1 C 2 O 3 H - pair_coeff C O hbond/dreiding/morse H i 3.88 1.7241379 2.9 2 9 11 90 + pair_coeff C O hbond/dreiding/morse H i 3.88 1.7241379 2.9 2 9.0 11.0 90.0 Description """"""""""" @@ -65,7 +64,8 @@ force field, given by: where :math:`r_{\rm in}` is the inner spline distance cutoff, :math:`r_{\rm out}` is the outer distance cutoff, :math:`\theta_c` is -the angle cutoff, and *n* is the cosine periodicity. +the angle cutoff, and :math:`n` is the power of the cosine of the angle +:math:`\theta`. Here, *r* is the radial distance between the donor (D) and acceptor (A) atoms and :math:`\theta` is the bond angle between the acceptor, the @@ -217,7 +217,8 @@ These pair styles do not support the :doc:`pair_modify ` tail option for adding long-range tail corrections to energy and pressure. -These pair styles do not write their information to :doc:`binary restart files `, so pair_style and pair_coeff commands need to be +These pair styles do not write their information to :doc:`binary restart +files `, so pair_style and pair_coeff commands need to be re-specified in an input script that reads a restart file. These pair styles can only be used via the *pair* keyword of the diff --git a/doc/src/pair_rheo.rst b/doc/src/pair_rheo.rst index 993ec3cee3..927d2f0266 100644 --- a/doc/src/pair_rheo.rst +++ b/doc/src/pair_rheo.rst @@ -31,7 +31,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 Pair style *rheo* computes pressure and viscous forces between particles in the :doc:`rheo package `. If thermal evolution is turned diff --git a/doc/src/pair_rheo_solid.rst b/doc/src/pair_rheo_solid.rst index 0b1ed47fb8..f71cba7bdb 100644 --- a/doc/src/pair_rheo_solid.rst +++ b/doc/src/pair_rheo_solid.rst @@ -21,7 +21,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 29Aug2024 Style *rheo/solid* is effectively a copy of pair style :doc:`bpm/spring ` except it only applies forces diff --git a/doc/src/region.rst b/doc/src/region.rst index 9d2af01de1..3a27c4b5ff 100644 --- a/doc/src/region.rst +++ b/doc/src/region.rst @@ -18,13 +18,13 @@ Syntax *delete* = no args *block* args = xlo xhi ylo yhi zlo zhi xlo,xhi,ylo,yhi,zlo,zhi = bounds of block in all dimensions (distance units) - xlo,xhi,ylo,yhi,zlo,zhi can be a variable + xlo,xhi,ylo,yhi,zlo,zhi can be a variable (see below) *cone* args = dim c1 c2 radlo radhi lo hi dim = *x* or *y* or *z* = axis of cone c1,c2 = coords of cone axis in other 2 dimensions (distance units) radlo,radhi = cone radii at lo and hi end (distance units) lo,hi = bounds of cone in dim (distance units) - c1,c2,radlo,radhi,lo,hi can be a variable (see below) + c1,c2,radlo,radhi,lo,hi can be a variable (see below) *cylinder* args = dim c1 c2 radius lo hi dim = *x* or *y* or *z* = axis of cylinder c1,c2 = coords of cylinder axis in other 2 dimensions (distance units) @@ -38,6 +38,7 @@ Syntax *plane* args = px py pz nx ny nz px,py,pz = point on the plane (distance units) nx,ny,nz = direction normal to plane (distance units) + px,py,pz can be a variable (see below) *prism* args = xlo xhi ylo yhi zlo zhi xy xz yz xlo,xhi,ylo,yhi,zlo,zhi = bounds of untilted prism (distance units) xy = distance to tilt y in x direction (distance units) @@ -166,7 +167,7 @@ extending in the y-direction from -5.0 to the upper box boundary. .. versionadded:: 4May2022 -For style *ellipsoid*, an axis-aligned ellipsoid is defined. The +For style *ellipsoid*, an axis-aligned ellipsoid is defined. The ellipsoid has its center at (x,y,z) and is defined by 3 axis-aligned vectors given by A = (a,0,0); B = (0,b,0); C = (0,0,c). Note that although the ellipsoid is specified as axis-aligned it can be rotated @@ -206,9 +207,10 @@ parameters a,b,c for style *ellipsoid*, can each be specified as an equal-style :doc:`variable `. Likewise, for style *sphere* and *ellipsoid* the x-, y-, and z- coordinates of the center of the sphere/ellipsoid can be specified as an equal-style variable. And for -style *cylinder* the two center positions c1 and c2 for the location -of the cylinder axes can be specified as a equal-style variable. For style *cone* -all properties can be defined via equal-style variables. +style *cylinder* the two center positions c1 and c2 for the location of +the cylinder axes can be specified as a equal-style variable. For style +*cone* all properties can be defined via equal-style variables. For +style *plane* the point can be defined via equal-style variables. If the value is a variable, it should be specified as v_name, where name is the variable name. In this case, the variable will be diff --git a/doc/src/variable.rst b/doc/src/variable.rst index 330e44139e..1867532efa 100644 --- a/doc/src/variable.rst +++ b/doc/src/variable.rst @@ -1042,7 +1042,7 @@ label2type(), but returns 1 if the type label has been assigned, otherwise it returns 0. This function can be used to check if a particular type label already exists in the simulation. -.. versionadded:: TBD +.. versionadded:: 29Aug2024 The is_timeout() function returns 1 when the :doc:`timer timeout ` has expired otherwise it returns 0. This function can be used diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index ae4b52efee..70d6b4e323 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -141,6 +141,7 @@ arg arge args argv +Armv arrhenius Arun arXiv @@ -215,7 +216,6 @@ ba Babadi Babaei backcolor -backends Baczewski Bagchi Bagi @@ -1539,6 +1539,7 @@ idx ie ielement ieni +ifdef ifdefs iff ifort @@ -2725,6 +2726,7 @@ OMP oneAPI onebody onelevel +oneMKL oneway onlysalt ons @@ -4130,6 +4132,7 @@ Xiaowang Xie xk xlat +xlattice xlo xmax Xmax @@ -4181,6 +4184,7 @@ yflag yhi yi ylat +ylattice ylo ylz ymax @@ -4229,6 +4233,7 @@ Ziegenhain zincblende zj Zj +zlattice zlim zlo Zm diff --git a/examples/COUPLE/plugin/liblammpsplugin.c b/examples/COUPLE/plugin/liblammpsplugin.c index 5d27a0a64b..c8a38c21e4 100644 --- a/examples/COUPLE/plugin/liblammpsplugin.c +++ b/examples/COUPLE/plugin/liblammpsplugin.c @@ -41,7 +41,6 @@ #include - liblammpsplugin_t *liblammpsplugin_load(const char *lib) { liblammpsplugin_t *lmp; @@ -106,6 +105,7 @@ liblammpsplugin_t *liblammpsplugin_load(const char *lib) ADDSYM(map_atom); ADDSYM(extract_atom_datatype); + ADDSYM(extract_atom_size); ADDSYM(extract_atom); ADDSYM(extract_compute); @@ -191,6 +191,9 @@ liblammpsplugin_t *liblammpsplugin_load(const char *lib) ADDSYM(is_running); ADDSYM(force_timeout); + // symbol not present + if (!lmp->config_has_exceptions) return NULL; + lmp->has_exceptions = lmp->config_has_exceptions(); if (lmp->has_exceptions) { ADDSYM(has_error); diff --git a/examples/COUPLE/plugin/liblammpsplugin.h b/examples/COUPLE/plugin/liblammpsplugin.h index 556718816c..b444f75215 100644 --- a/examples/COUPLE/plugin/liblammpsplugin.h +++ b/examples/COUPLE/plugin/liblammpsplugin.h @@ -151,6 +151,7 @@ struct _liblammpsplugin { int (*map_atom)(void *, const void *); int (*extract_atom_datatype)(void *, const char *); + int (*extract_atom_size)(void *, const char *, int); void *(*extract_atom)(void *, const char *); void *(*extract_compute)(void *, const char *, int, int); diff --git a/examples/PACKAGES/pace/plugin/CMakeLists.txt b/examples/PACKAGES/pace/plugin/CMakeLists.txt index 0701a754c4..ede63e3d38 100644 --- a/examples/PACKAGES/pace/plugin/CMakeLists.txt +++ b/examples/PACKAGES/pace/plugin/CMakeLists.txt @@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 3.16) -project(paceplugin VERSION 1.0 LANGUAGES CXX) +project(paceplugin VERSION 1.1 LANGUAGES CXX) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) include(CheckIncludeFileCXX) @@ -15,7 +15,11 @@ include(ML-PACE) ########################## # building the plugins -add_library(paceplugin MODULE paceplugin.cpp ${LAMMPS_SOURCE_DIR}/ML-PACE/pair_pace.cpp) +add_library(paceplugin MODULE paceplugin.cpp + ${LAMMPS_SOURCE_DIR}/ML-PACE/pair_pace.cpp + ${LAMMPS_SOURCE_DIR}/ML-PACE/pair_pace_extrapolation.cpp + ${LAMMPS_SOURCE_DIR}/ML-PACE/compute_pace.cpp) + target_link_libraries(paceplugin PRIVATE pace) target_link_libraries(paceplugin PRIVATE lammps) target_include_directories(paceplugin PRIVATE ${LAMMPS_SOURCE_DIR}/ML-PACE) diff --git a/examples/PACKAGES/pace/plugin/paceplugin.cpp b/examples/PACKAGES/pace/plugin/paceplugin.cpp index adf1c168f9..f231318d23 100644 --- a/examples/PACKAGES/pace/plugin/paceplugin.cpp +++ b/examples/PACKAGES/pace/plugin/paceplugin.cpp @@ -3,6 +3,8 @@ #include "version.h" #include "pair_pace.h" +#include "pair_pace_extrapolation.h" +#include "compute_pace.h" using namespace LAMMPS_NS; @@ -11,6 +13,16 @@ static Pair *pair_pace_creator(LAMMPS *lmp) return new PairPACE(lmp); } +static Pair *pair_pace_extrapolation_creator(LAMMPS *lmp) +{ + return new PairPACEExtrapolation(lmp); +} + +static Compute *compute_pace_creator(LAMMPS *lmp, int argc, char **argv) +{ + return new ComputePACE(lmp, argc, argv); +} + extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc) { lammpsplugin_t plugin; @@ -25,4 +37,24 @@ extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc) plugin.creator.v1 = (lammpsplugin_factory1 *) &pair_pace_creator; plugin.handle = handle; (*register_plugin)(&plugin, lmp); + + // register pace/extrapolation pair style + plugin.version = LAMMPS_VERSION; + plugin.style = "pair"; + plugin.name = "pace/extrapolation"; + plugin.info = "PACE plugin extrapolation pair style v1.0"; + plugin.author = "Axel Kohlmeyer (akohlmey@gmail.com)"; + plugin.creator.v1 = (lammpsplugin_factory1 *) &pair_pace_extrapolation_creator; + plugin.handle = handle; + (*register_plugin)(&plugin, lmp); + + // register pace compute style + plugin.version = LAMMPS_VERSION; + plugin.style = "compute"; + plugin.name = "pace"; + plugin.info = "PACE plugin compute style v1.0"; + plugin.author = "Axel Kohlmeyer (akohlmey@gmail.com)"; + plugin.creator.v2 = (lammpsplugin_factory2 *) &compute_pace_creator; + plugin.handle = handle; + (*register_plugin)(&plugin, lmp); } diff --git a/examples/PACKAGES/pace/plugin/paceplugin.nsis b/examples/PACKAGES/pace/plugin/paceplugin.nsis index de8d1d8478..0ca96b95a6 100644 --- a/examples/PACKAGES/pace/plugin/paceplugin.nsis +++ b/examples/PACKAGES/pace/plugin/paceplugin.nsis @@ -39,7 +39,7 @@ InstallDir "$LOCALAPPDATA\${PACEPLUGIN}" ShowInstDetails show ShowUninstDetails show -SetCompressor lzma +SetCompressor zlib !define MUI_ABORTWARNING diff --git a/examples/PACKAGES/plumed/plugin/CMakeLists.txt b/examples/PACKAGES/plumed/plugin/CMakeLists.txt new file mode 100644 index 0000000000..33ed34123d --- /dev/null +++ b/examples/PACKAGES/plumed/plugin/CMakeLists.txt @@ -0,0 +1,59 @@ +# -*- CMake -*- build system for plugin examples. +# The is meant to be used as a template for plugins that are +# distributed independent from the LAMMPS package. +########################################## + +cmake_minimum_required(VERSION 3.16) + +project(plumedplugin VERSION 1.0 LANGUAGES CXX) + +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) +include(CheckIncludeFileCXX) +include(LAMMPSInterfacePlugin) +include(PLUMED) + +########################## +# building the plugins + +add_library(plumedplugin MODULE plumedplugin.cpp ${LAMMPS_SOURCE_DIR}/PLUMED/fix_plumed.cpp) +target_link_libraries(plumedplugin PRIVATE LAMMPS::PLUMED) +target_link_libraries(plumedplugin PRIVATE lammps) +target_include_directories(plumedplugin PRIVATE ${LAMMPS_SOURCE_DIR}/PLUMED) +set_target_properties(plumedplugin PROPERTIES PREFIX "" SUFFIX ".so") + +# MacOS seems to need this +if(CMAKE_SYSTEM_NAME STREQUAL Darwin) + set_target_properties(plumedplugin PROPERTIES LINK_FLAGS "-Wl,-undefined,dynamic_lookup") +elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows") +# tell CMake to export all symbols to a .dll on Windows with special case for MinGW cross-compilers + set_target_properties(plumedplugin PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) + if(CMAKE_CROSSCOMPILING) + set_target_properties(plumedplugin PROPERTIES LINK_FLAGS "-Wl,--export-all-symbols") + endif() + + get_lammps_version(${LAMMPS_SOURCE_DIR}/version.h LAMMPS_VERSION) + find_program(MAKENSIS_PATH makensis) + if(MAKENSIS_PATH) + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_SOURCE_DIR}/lammps.ico + ${CMAKE_SOURCE_DIR}/lammps-text-logo-wide.bmp ${CMAKE_SOURCE_DIR}/plumedplugin.nsis + ${CMAKE_BINARY_DIR}) + if(BUILD_MPI) + if(USE_MSMPI) + add_custom_target(package ${MAKENSIS_PATH} -V1 -DVERSION=${LAMMPS_VERSION}-MSMPI plumedplugin.nsis + DEPENDS plumedplugin plumed_copy lammps.ico lammps-text-logo-wide.bmp plumedplugin.nsis + BYPRODUCTS LAMMPS-PLUMED-plugin-${LAMMPS_VERSION}-MSMPI.exe) + else() + add_custom_target(package ${MAKENSIS_PATH} -V1 -DVERSION=${LAMMPS_VERSION}-MPI plumedplugin.nsis + DEPENDS plumedplugin plumed_copy lammps.ico lammps-text-logo-wide.bmp plumedplugin.nsis + BYPRODUCTS LAMMPS-PLUMED-plugin-${LAMMPS_VERSION}-MPI.exe) + endif() + else() + add_custom_target(package ${MAKENSIS_PATH} -V1 -DVERSION=${LAMMPS_VERSION} plumedplugin.nsis + COMMAND ${CMAKE_COMMAND} -E echo ${PWD} + DEPENDS plumedplugin plumed_copy lammps.ico lammps-text-logo-wide.bmp plumedplugin.nsis + BYPRODUCTS LAMMPS-PLUMED-plugin-${LAMMPS_VERSION}.exe) + endif() + endif() +else() + set_target_properties(plumedplugin PROPERTIES LINK_FLAGS "-rdynamic") +endif() diff --git a/examples/PACKAGES/plumed/plugin/LAMMPSInterfacePlugin.cmake b/examples/PACKAGES/plumed/plugin/LAMMPSInterfacePlugin.cmake new file mode 120000 index 0000000000..2ac6d20a54 --- /dev/null +++ b/examples/PACKAGES/plumed/plugin/LAMMPSInterfacePlugin.cmake @@ -0,0 +1 @@ +../../../../cmake/Modules/LAMMPSInterfacePlugin.cmake \ No newline at end of file diff --git a/examples/PACKAGES/plumed/plugin/PLUMED.cmake b/examples/PACKAGES/plumed/plugin/PLUMED.cmake new file mode 120000 index 0000000000..b69e8b04f5 --- /dev/null +++ b/examples/PACKAGES/plumed/plugin/PLUMED.cmake @@ -0,0 +1 @@ +../../../../cmake/Modules/Packages/PLUMED.cmake \ No newline at end of file diff --git a/examples/PACKAGES/plumed/plugin/README.txt b/examples/PACKAGES/plumed/plugin/README.txt new file mode 100644 index 0000000000..2b1971ddbc --- /dev/null +++ b/examples/PACKAGES/plumed/plugin/README.txt @@ -0,0 +1,2 @@ +This folder contains a loader and support files to build the PLUMED package as plugin. +For more information please see: https://docs.lammps.org/Developer_plugins.html diff --git a/examples/PACKAGES/plumed/plugin/lammps-text-logo-wide.bmp b/examples/PACKAGES/plumed/plugin/lammps-text-logo-wide.bmp new file mode 100644 index 0000000000..b9ec4c35f2 Binary files /dev/null and b/examples/PACKAGES/plumed/plugin/lammps-text-logo-wide.bmp differ diff --git a/examples/PACKAGES/plumed/plugin/lammps.ico b/examples/PACKAGES/plumed/plugin/lammps.ico new file mode 100644 index 0000000000..cce156bf79 Binary files /dev/null and b/examples/PACKAGES/plumed/plugin/lammps.ico differ diff --git a/examples/PACKAGES/plumed/plugin/plumedplugin.cpp b/examples/PACKAGES/plumed/plugin/plumedplugin.cpp new file mode 100644 index 0000000000..48edc48e6e --- /dev/null +++ b/examples/PACKAGES/plumed/plugin/plumedplugin.cpp @@ -0,0 +1,28 @@ + +#include "lammpsplugin.h" +#include "version.h" + +#include "fix_plumed.h" + +using namespace LAMMPS_NS; + +static Fix *fix_plumed_creator(LAMMPS *lmp, int argc, char **argv) +{ + return new FixPlumed(lmp, argc, argv); +} + +extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc) +{ + lammpsplugin_t plugin; + lammpsplugin_regfunc register_plugin = (lammpsplugin_regfunc) regfunc; + + // register plumed fix style + plugin.version = LAMMPS_VERSION; + plugin.style = "fix"; + plugin.name = "plumed"; + plugin.info = "Plumed2 plugin fix style v1.0"; + plugin.author = "Axel Kohlmeyer (akohlmey@gmail.com)"; + plugin.creator.v1 = (lammpsplugin_factory1 *) &fix_plumed_creator; + plugin.handle = handle; + (*register_plugin)(&plugin, lmp); +} diff --git a/examples/PACKAGES/plumed/plugin/plumedplugin.nsis b/examples/PACKAGES/plumed/plugin/plumedplugin.nsis new file mode 100644 index 0000000000..1de9800d26 --- /dev/null +++ b/examples/PACKAGES/plumed/plugin/plumedplugin.nsis @@ -0,0 +1,172 @@ +#!Nsis Installer Command Script +# +# The following external defines are recognized: +# ${VERSION} = YYYYMMDD + +!include "MUI2.nsh" +!include "FileFunc.nsh" + +!define MUI_ICON "lammps.ico" +!define MUI_UNICON "lammps.ico" +!define MUI_HEADERIMAGE +!define MUI_HEADERIMAGE_BITMAP "lammps-text-logo-wide.bmp" +!define MUI_HEADERIMAGE_RIGHT + +Unicode true +XPStyle on + +!include "LogicLib.nsh" +!addplugindir "envvar/Plugins/x86-unicode" +!include "x64.nsh" + +RequestExecutionLevel user + +!macro VerifyUserIsAdmin +UserInfo::GetAccountType +pop $0 +${If} $0 != "admin" + messageBox mb_iconstop "Administrator rights required!" + setErrorLevel 740 ;ERROR_ELEVATION_REQUIRED + quit +${EndIf} +!macroend + +!define PLUMEDPLUGIN "LAMMPS PLUMED Plugin ${VERSION}" +OutFile "LAMMPS-PLUMED-plugin-${VERSION}.exe" + +Name "${PLUMEDPLUGIN}" +InstallDir "$LOCALAPPDATA\${PLUMEDPLUGIN}" + +ShowInstDetails show +ShowUninstDetails show +SetCompressor zlib + +!define MUI_ABORTWARNING + +!insertmacro MUI_PAGE_DIRECTORY +!insertmacro MUI_PAGE_INSTFILES + +!insertmacro MUI_UNPAGE_CONFIRM +!insertmacro MUI_UNPAGE_INSTFILES + +!insertmacro MUI_LANGUAGE "English" + +function .onInit + # Determine if LAMMPS was already installed and check whether it was in 32-bit + # or 64-bit. Then look up path to uninstaller and offer to uninstall or quit + SetRegView 32 + ReadRegDWORD $0 HKCU "Software\LAMMPS-PLUMED" "Bits" + SetRegView LastUsed + ${If} $0 == "32" + SetRegView 32 + ${ElseIf} $0 == "64" + SetRegView 64 + ${Else} + SetRegView 64 + ${EndIf} + ClearErrors + ReadRegStr $R0 HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" "UninstallString" + SetRegView LastUsed + ${If} ${Errors} + DetailPrint "LAMMPS PLUMED plugin not (yet) installed" + ${Else} + MessageBox MB_YESNO "LAMMPS PLUMED plugin ($0 bit) is already installed. Uninstall existing version?" /SD IDYES IDNO Quit + Pop $R1 + StrCmp $R1 2 Quit +1 + Exec $R0 + Quit: + Quit + ${EndIf} + setShellVarContext all +functionEnd + +Section "${PLUMEDPLUGIN}" SecPlumedplugin + SectionIn RO + # Write LAMMPS installation bitness marker. Always use 32-bit registry view + SetRegView 32 + IntFmt $0 "0x%08X" 64 + WriteRegDWORD HKCU "Software\LAMMPS-PLUMED" "Bits" $0 + + # Switch to "native" registry view + SetRegView 64 + SetShellVarContext current + + SetOutPath "$INSTDIR" + CreateDirectory "$INSTDIR\patches" + CreateDirectory "$INSTDIR\bin" + File lammps.ico + File plumedplugin.so + + SetOutPath "$INSTDIR\bin" + File plumed.exe + + # Register Application and its uninstaller + WriteRegStr HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "DisplayName" "${PLUMEDPLUGIN}" + WriteRegStr HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "Publisher" "The LAMMPS and PLUMED Developers" + WriteRegStr HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "URLInfoAbout" "lammps.org" + WriteRegStr HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "DisplayIcon" "$INSTDIR\lammps.ico" + WriteRegStr HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "DisplayVersion" "${VERSION}" + WriteRegStr HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "InstallLocation" "$INSTDIR" + WriteRegStr HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "UninstallString" "$\"$INSTDIR\uninstall.exe$\"" + WriteRegStr HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "QuietUninstallString" "$\"$INSTDIR\uninstall.exe$\" /S" + + ${GetSize} "$INSTDIR" "/S=0K" $0 $1 $2 + IntFmt $0 "0x%08X" $0 + WriteRegDWORD HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" \ + "EstimatedSize" "$0" + + # update path variables + EnVar::SetHKCU + # add plumed executable path + EnVar::AddValue "PATH" "$INSTDIR\bin" + # add to LAMMPS plugin search path + EnVar::AddValue "LAMMPS_PLUGIN_PATH" "$INSTDIR" + # add plumed2 patch files + EnVar::AddValue "PLUMED_ROOT" "$INSTDIR" + + WriteUninstaller "$INSTDIR\Uninstall.exe" +SectionEnd + +function un.onInit + SetShellVarContext current +functionEnd + +Section "Uninstall" + # remove LAMMPS bitness/installation indicator always in 32-bit registry view + SetRegView 32 + DeleteRegKey HKCU "Software\LAMMPS-PLUMED" + + # unregister extension, and uninstall info + SetRegView 64 + SetShellVarContext current + # unregister installation + DeleteRegKey HKCU "Software\Microsoft\Windows\CurrentVersion\Uninstall\LAMMPS-PLUMED" + + # update path variables + EnVar::SetHKCU + # remove plumed executable path + EnVar::DeleteValue "PATH" "$INSTDIR\bin" + # remove entry from LAMMPS plugin search path + EnVar::DeleteValue "LAMMPS_PLUGIN_PATH" "$INSTDIR" + # remove plumed patch environment + EnVar::Delete "PLUMED_ROOT" + + RMDir /r /REBOOTOK "$INSTDIR\patches" + RMDir /r /REBOOTOK "$INSTDIR\bin" + Delete /REBOOTOK "$INSTDIR\plumedplugin.so" + Delete /REBOOTOK "$INSTDIR\Uninstall.exe" + Delete /REBOOTOK "$INSTDIR\lammps.ico" + RMDir /REBOOTOK "$INSTDIR" +SectionEnd + +# Local Variables: +# mode: sh +# End: diff --git a/examples/bpm/impact/brokenDump b/examples/bpm/impact/brokenDump deleted file mode 100644 index 0a2316cd5e..0000000000 --- a/examples/bpm/impact/brokenDump +++ /dev/null @@ -1,3914 +0,0 @@ -599 1817 5468 -600 1808 1815 -554 1938 5471 -548 5471 5591 -554 1938 5471 -571 5471 5590 -572 5471 5589 -599 1817 5468 -548 5471 5591 -571 5471 5590 -572 5471 5589 -608 1814 1815 -616 1815 1926 -622 5231 5350 -622 1818 1935 -635 1818 1929 -638 5468 5469 -648 1806 1923 -648 1818 1937 -649 1811 1812 -650 1822 5468 -650 1822 1935 -652 5469 5472 -655 1822 5589 -660 1812 1813 -662 1816 1929 -666 1810 1923 -672 1935 1937 -676 1810 1812 -676 1822 1938 -677 1806 1917 -677 1822 1937 -679 1812 1816 -683 1819 1937 -683 1816 1931 -684 1816 1937 -686 1810 1931 -695 1583 1584 -699 1821 1822 -700 1812 1929 -616 1815 1926 -635 1818 1929 -648 1806 1923 -650 1822 1935 -660 1923 1930 -662 1816 1929 -666 1810 1923 -671 1935 1936 -672 1935 1937 -676 1822 1938 -677 1806 1917 -683 1816 1931 -686 1810 1931 -687 2052 5709 -692 1923 2040 -692 2169 5709 -694 2052 5588 -696 1936 1940 -700 1812 1929 -605 5477 5596 -610 5467 5468 -611 5237 5350 -622 5231 5350 -628 5467 5474 -629 5473 5474 -635 5467 5472 -637 5473 5480 -638 5468 5469 -640 5474 5475 -646 5354 5467 -647 5467 5475 -650 1822 5468 -652 5347 5354 -652 5469 5472 -653 5479 5480 -658 5473 5478 -661 5472 5475 -661 5474 5478 -663 5480 5481 -665 5358 5478 -668 5227 5234 -669 5480 5593 -670 5347 5355 -671 5234 5347 -672 5479 5486 -674 5352 5354 -674 5475 5478 -677 5471 5472 -678 5478 5593 -679 5478 5481 -681 5352 5355 -682 5352 5475 -682 5468 5471 -687 5352 5357 -687 5486 5599 -689 5232 5234 -689 5481 5601 -690 5476 5478 -691 5480 5484 -693 5484 5599 -694 5471 5477 -700 5479 5484 -655 1822 5589 -669 5480 5593 -678 5478 5593 -687 5486 5599 -687 2052 5709 -692 2169 5709 -693 5484 5599 -693 5594 5600 -694 2052 5588 -695 5600 5714 -699 5828 5829 -700 5951 5958 -704 1806 1925 -706 1810 1925 -711 1816 1932 -711 1937 1940 -712 1800 1917 -716 1926 1931 -720 1457 1464 -721 1804 1917 -722 5469 5470 -722 1803 1920 -728 1820 5471 -729 1802 1809 -730 1925 1928 -734 1353 1469 -735 1923 1925 -737 5230 5231 -738 1804 1925 -739 1810 1926 -739 1816 1821 -740 1911 1918 -743 1822 5471 -745 1821 1937 -746 1458 1574 -755 1586 5231 -756 1469 4994 -756 1463 1470 -756 1467 1577 -756 1815 1931 -757 1804 1919 -758 1683 1793 -758 1800 1919 -760 1809 1810 -762 1692 1808 -763 5469 5471 -763 1932 1937 -765 1808 1810 -766 1914 1915 -769 1804 1806 -769 1821 1938 -771 1806 1809 -774 1926 1933 -776 1459 1574 -776 1568 1575 -776 1573 1574 -779 4994 5110 -779 5111 5229 -779 5229 5230 -781 1804 1807 -783 1797 1908 -783 1800 1806 -784 5112 5229 -787 1799 1804 -788 1799 1805 -789 1464 1580 -789 1802 1807 -795 1807 1810 -800 1674 1790 -800 1790 1797 -703 2057 5709 -704 5952 6071 -711 1816 1932 -711 1937 1940 -711 2052 5710 -712 1800 1917 -716 1926 1931 -717 2169 5829 -719 2277 2278 -721 1804 1917 -722 1803 1920 -724 2037 2038 -727 1940 2052 -730 1925 1928 -730 1940 2054 -731 1928 1930 -734 2169 5830 -735 1923 1925 -736 1928 1931 -736 1924 2040 -736 2044 2154 -736 2057 5710 -740 1911 1918 -745 1928 1934 -746 1923 1928 -746 1923 1931 -748 1921 2031 -749 1939 1940 -750 2170 2286 -751 1928 2048 -754 1928 2040 -755 1931 1933 -756 1917 1919 -756 1815 1931 -757 1804 1919 -758 1800 1919 -758 1933 2043 -759 1930 2040 -761 2057 5830 -762 1921 2038 -762 2283 2284 -763 1932 1937 -764 2287 2402 -766 1914 1915 -766 2040 2047 -769 1821 1938 -771 2161 2271 -774 1926 1933 -774 2406 2513 -778 2040 2042 -782 1930 1931 -784 2174 5829 -785 1912 2028 -788 2043 2048 -788 2041 2157 -790 1923 1924 -793 1940 2055 -795 2038 2039 -796 2174 5830 -799 1931 1934 -799 1937 1938 -799 2286 2287 -702 5114 5233 -702 5484 5601 -703 5470 5472 -704 5116 5117 -704 5255 5368 -707 5352 5470 -707 5495 5608 -710 5232 5237 -711 5232 5355 -713 5483 5602 -714 5478 5483 -716 5476 5477 -717 5232 5235 -718 5470 5471 -720 5227 5235 -721 5230 5232 -721 5599 5607 -722 5469 5470 -722 5482 5483 -725 5364 5484 -726 5123 5236 -728 1820 5471 -729 5366 5485 -729 5483 5601 -734 5350 5355 -737 5230 5231 -737 5350 5357 -739 5234 5235 -741 5374 5375 -742 5488 5495 -743 1822 5471 -745 5481 5484 -746 5114 5227 -749 5500 5501 -749 5606 5607 -751 5486 5605 -751 5604 5607 -751 5614 5615 -753 5479 5487 -760 5601 5602 -762 5113 5120 -763 5000 5110 -763 5469 5471 -763 5492 5605 -765 5108 5114 -767 5112 5227 -771 5472 5477 -773 5476 5596 -775 5381 5494 -778 5486 5487 -779 4994 5110 -779 5111 5229 -779 5229 5230 -779 5375 5488 -779 5470 5477 -781 5107 5108 -783 5116 5118 -784 5112 5229 -784 5242 5243 -784 5602 5604 -787 5368 5369 -787 5475 5477 -789 5233 5235 -791 5129 5242 -791 5602 5603 -792 5248 5249 -794 5485 5486 -795 5477 5478 -702 5484 5601 -703 2057 5709 -704 5952 6071 -708 5957 5964 -709 5593 5600 -711 2052 5710 -717 2169 5829 -718 5709 5829 -720 5736 5855 -721 5593 5601 -721 5599 5607 -722 5720 5721 -729 5483 5601 -730 5598 5600 -730 5828 5830 -733 5735 5742 -733 5709 5830 -733 5958 6077 -734 2169 5830 -736 2057 5710 -738 5714 5722 -740 5598 5601 -747 5605 5606 -749 5606 5607 -751 5486 5605 -751 5604 5607 -751 5614 5615 -751 5849 5856 -757 5841 5954 -759 5621 5741 -760 5601 5602 -761 5721 5722 -761 2057 5830 -761 5828 5833 -763 5492 5605 -769 5835 5948 -772 5721 5725 -776 5615 5735 -784 5602 5604 -784 2174 5829 -785 5833 5835 -786 5969 5976 -790 5726 5727 -791 5602 5603 -795 5831 5951 -796 2174 5830 -798 5605 5726 -799 5960 5961 -801 1568 1685 -801 1586 5229 -802 1463 1465 -802 1799 1800 -803 4991 5107 -803 1673 1680 -803 1685 1692 -803 5108 5112 -804 1799 1806 -804 1806 1807 -804 1911 1916 -805 1799 1802 -807 1807 1809 -808 1573 1575 -808 1586 1588 -808 5350 5351 -808 5351 5352 -810 1354 1469 -813 4994 5109 -813 1353 1468 -816 1580 1587 -816 1819 1821 -818 5351 5470 -819 1685 1690 -820 1689 1799 -821 1463 1468 -821 1911 1912 -823 1464 1574 -823 1575 1685 -823 1689 1806 -824 1471 5229 -824 1685 1687 -824 1799 1801 -825 1570 1575 -827 1583 1586 -827 1676 1677 -831 1463 1464 -833 1683 1801 -835 1805 1806 -836 5108 5109 -841 1586 1587 -841 1687 1807 -842 1686 1687 -843 1686 1802 -845 1681 1793 -847 1676 1681 -848 1471 5108 -849 1570 1690 -849 1687 1689 -849 1692 1802 -850 1684 1690 -851 1808 1809 -852 1676 1678 -853 1348 1465 -853 1464 1465 -854 1462 1464 -854 1464 1579 -854 1471 1583 -856 1348 1468 -857 1684 1687 -857 1687 1801 -857 1687 1799 -858 1570 1576 -859 1806 1810 -860 1585 5229 -860 1585 1588 -864 1462 1465 -865 1354 5109 -868 1681 1683 -870 1459 1576 -871 1573 1576 -872 1354 1468 -872 1683 1799 -873 1686 1801 -876 1585 1587 -877 1459 1579 -879 1687 1802 -882 1683 1686 -885 1584 1585 -885 1681 1801 -886 1466 5108 -886 1583 1585 -893 4995 5109 -896 1459 1464 -900 1354 1466 -900 1682 1684 -801 2045 2047 -802 2041 2158 -802 2396 2403 -803 1929 1931 -803 2039 2154 -804 1911 1916 -805 2045 2048 -805 2154 2159 -808 2040 2048 -812 1918 1919 -812 2045 2051 -812 2045 2157 -814 2040 2045 -814 2154 2161 -817 1928 1933 -818 1932 1938 -819 1932 1934 -819 2047 2050 -821 1911 1912 -821 2045 2165 -821 2156 2161 -822 2048 2050 -823 2286 5829 -825 2055 5710 -825 2047 2157 -828 2036 2038 -829 2156 2159 -829 2172 5830 -831 2161 2276 -831 2274 2281 -838 2167 2277 -839 1931 1932 -839 2022 2029 -841 1938 1939 -842 2159 2161 -842 2164 2280 -844 2276 2277 -848 2039 2153 -849 6191 6307 -852 2049 2050 -855 2050 2051 -856 2047 2165 -859 2161 2162 -861 5710 5712 -861 2157 2164 -863 5830 5832 -868 2162 2276 -869 2050 2165 -873 2631 6191 -874 5831 5832 -875 2276 2282 -876 2031 2036 -876 2290 2399 -879 2044 2161 -879 5712 5830 -881 2163 2165 -881 2161 2277 -884 2051 2165 -884 2507 2508 -885 2055 5591 -887 2051 2166 -889 2277 2279 -890 2160 2161 -890 2519 2520 -892 1933 1934 -896 2163 2164 -897 2393 2394 -897 2394 2501 -898 2282 2284 -898 2405 5952 -803 4991 5107 -803 5108 5112 -803 5118 5123 -803 5366 5487 -806 5477 5590 -808 5350 5351 -808 5351 5352 -812 5507 5620 -818 5351 5470 -822 5115 5116 -822 5122 5123 -822 5483 5596 -822 5484 5602 -824 5247 5248 -825 5365 5372 -825 5364 5487 -826 5129 5247 -829 5255 5373 -829 5364 5370 -832 5477 5595 -832 5483 5484 -832 5484 5486 -832 5611 5612 -834 5123 5241 -837 5124 5129 -840 5373 5374 -841 5000 5115 -842 5250 5255 -843 5130 5247 -843 5349 5350 -844 5122 5124 -846 5366 5370 -848 5248 5250 -848 5363 5483 -850 5249 5250 -853 5499 5500 -856 5365 5370 -859 5118 5121 -859 5235 5238 -859 5481 5482 -862 5124 5127 -865 5230 5237 -865 5256 5373 -865 5367 5370 -869 4995 5110 -869 5476 5483 -874 5350 5352 -875 5250 5256 -876 5374 5376 -877 5364 5482 -877 5376 5381 -879 5130 5250 -884 5250 5253 -884 5365 5373 -888 5001 5115 -891 5364 5369 -891 5381 5499 -891 5481 5483 -893 4995 5109 -894 4990 4997 -897 4997 5113 -898 5245 5250 -899 4995 5115 -900 5245 5247 -900 5371 5378 -803 5604 5609 -803 5970 6089 -805 5620 5627 -806 5477 5590 -810 5606 5610 -811 6196 6203 -812 5507 5620 -813 5612 5726 -815 6197 6313 -816 5856 5975 -816 6083 6090 -823 2286 5829 -825 2055 5710 -829 2172 5830 -832 5477 5595 -832 5611 5612 -832 5854 5855 -833 6084 6202 -833 6197 6307 -836 5971 5976 -837 5835 5955 -840 5971 6089 -840 6084 6090 -841 5732 5733 -843 5969 5974 -847 5737 5742 -847 5958 6071 -848 6075 6187 -849 6201 6202 -849 6085 6202 -849 6191 6307 -850 5727 5840 -852 5954 5955 -853 5855 5862 -855 5609 5723 -856 5610 5726 -858 6201 6203 -859 5740 5741 -861 5710 5712 -862 5604 5728 -862 5955 6074 -862 5955 5959 -863 5830 5832 -863 5954 5956 -864 5737 5855 -865 5955 5956 -865 5953 6071 -865 6088 6089 -866 5970 6088 -866 6083 6085 -866 6197 6312 -867 5835 5956 -867 5974 5976 -867 6083 6088 -868 5961 5967 -869 6085 6090 -871 5610 5728 -871 5954 5959 -872 5606 5728 -872 5740 5742 -872 5954 6074 -873 5857 5862 -873 6088 6090 -874 5831 5832 -877 5857 5975 -877 5948 5956 -877 6068 6075 -878 6074 6076 -879 5712 5830 -880 5974 5975 -881 5833 5836 -882 6192 6307 -883 5955 6076 -885 2055 5591 -888 5954 5957 -888 6201 6204 -891 6198 6203 -893 5961 6074 -894 5609 5728 -895 6198 6313 -897 5740 5743 -898 5830 5833 -898 5959 5961 -900 6068 6076 -901 1683 1684 -904 1570 1682 -906 1815 1816 -907 1677 1793 -913 1570 1572 -916 1571 1573 -918 1572 1573 -925 1467 1583 -928 1459 1571 -931 1582 1585 -937 1459 1461 -938 1810 1815 -942 1580 1582 -943 1566 1683 -944 1465 1582 -945 1560 1670 -949 1821 1932 -953 1461 1462 -958 1821 1939 -962 1577 1585 -965 1676 1683 -967 1566 1682 -968 1460 1465 -978 1460 1462 -979 1813 1815 -984 1348 1460 -986 1350 1468 -989 1565 1572 -994 1455 1571 -902 2165 2171 -904 2284 2285 -905 2277 2284 -907 2279 2282 -908 6191 6306 -910 2164 2165 -910 2285 2399 -912 1933 2048 -912 2400 2513 -914 2282 2398 -915 2163 2168 -916 2162 2164 -918 2033 2036 -920 2274 2276 -921 2401 2513 -923 2166 2171 -923 2164 2274 -925 1915 1916 -925 2274 2279 -926 2289 2290 -928 2056 2166 -929 1916 2031 -929 5712 5713 -929 2156 2158 -929 2158 2276 -931 2151 2156 -931 2279 2281 -932 2151 2158 -933 2030 2036 -935 2285 2398 -936 2162 2168 -936 2168 2170 -938 2165 2168 -940 2513 2518 -941 2399 2404 -949 1821 1932 -949 2274 2275 -949 2518 2520 -954 2056 2173 -957 2166 2168 -958 1821 1939 -958 5711 5712 -958 2274 2282 -959 2520 6191 -960 2399 2405 -961 2281 2398 -964 1932 1939 -964 1933 1939 -965 2170 2280 -967 2151 2153 -969 2168 2171 -969 2171 2173 -969 2512 2513 -970 2396 2398 -970 2521 2631 -971 1933 2049 -974 5712 5832 -974 2151 2152 -974 5832 5833 -977 2158 2268 -977 6192 6306 -985 2519 2521 -987 2289 5832 -990 2035 2153 -991 2281 2390 -995 5829 5833 -996 5712 5831 -997 2512 2514 -999 2172 2173 -904 5123 5242 -904 5252 5253 -904 5250 5252 -905 5245 5248 -906 5376 5379 -911 5245 5253 -912 5501 5502 -913 5245 5252 -913 5371 5372 -916 5121 5124 -917 5113 5121 -919 5118 5124 -920 5252 5256 -921 5130 5245 -922 5246 5250 -925 5367 5369 -926 5120 5121 -926 5245 5246 -926 5371 5374 -929 5123 5124 -935 5367 5368 -938 5363 5482 -939 5119 5124 -943 5132 5245 -944 5126 5130 -944 5256 5371 -945 5382 5499 -946 5363 5369 -948 5126 5247 -948 5502 5620 -953 5119 5239 -956 5117 5230 -956 5362 5368 -956 5499 5502 -957 5232 5350 -960 4995 5107 -961 5502 5619 -963 5001 5113 -963 5119 5120 -965 5118 5233 -965 5378 5381 -968 5371 5379 -971 5126 5127 -972 5116 5123 -974 5378 5379 -992 5125 5126 -902 5737 5860 -904 5728 5729 -905 5726 5734 -906 5971 6094 -907 5723 5730 -908 5732 5852 -911 5831 5833 -912 5728 5734 -912 5852 5855 -915 5829 5830 -916 5974 5977 -921 5742 5855 -923 5833 5838 -925 5833 5956 -926 5953 5956 -926 6198 6318 -927 5620 5625 -927 5723 5843 -927 5953 5955 -929 5712 5713 -929 5857 5980 -930 5728 5730 -930 5974 5980 -930 6088 6094 -932 6085 6207 -933 5733 5734 -933 5961 6082 -934 6088 6207 -934 6085 6091 -935 5953 6076 -936 5965 6088 -937 5728 5731 -939 5833 5951 -939 6073 6075 -939 6198 6312 -940 5956 6076 -944 6088 6091 -946 5726 5731 -946 5733 5737 -948 5502 5620 -949 5621 5622 -949 5831 5838 -950 5857 5860 -951 5724 5843 -951 6192 6312 -952 5727 5731 -952 5959 5962 -954 5852 5860 -955 6080 6081 -957 5718 5838 -957 5730 5736 -958 5711 5712 -959 5729 5730 -961 5502 5619 -964 5836 5838 -966 5739 5852 -966 5951 5953 -967 5855 5860 -967 6192 6309 -968 5620 5622 -970 5731 5846 -971 5958 6076 -974 5739 5860 -974 5832 5833 -974 5953 5958 -975 5724 5844 -975 5837 5838 -977 5725 5730 -977 6192 6306 -979 5737 5852 -980 5731 5733 -985 5730 5849 -986 5725 5843 -987 6199 6207 -987 2289 5832 -988 5837 5844 -989 5619 5625 -993 5843 5849 -993 6193 6194 -995 5730 5731 -995 5837 5839 -995 5829 5833 -996 5712 5831 -999 5619 5622 -1000 6199 6204 -1004 1460 1461 -1006 1465 1577 -1020 1580 1581 -1026 1574 1581 -1027 1454 1461 -1028 1344 1460 -1033 1579 1580 -1043 1574 1579 -1047 1461 1465 -1047 1465 1579 -1049 1350 1460 -1056 1350 1466 -1057 1681 1684 -1062 1799 1807 -1085 1343 1460 -1086 1574 1576 -1093 1466 4991 -1002 2401 2404 -1004 2170 2288 -1005 2507 2514 -1006 2404 2406 -1007 2172 5832 -1008 2173 2174 -1010 2033 2035 -1010 2405 2406 -1014 5950 5951 -1019 2521 6189 -1021 2521 6191 -1024 2035 2145 -1024 2510 2517 -1028 2396 2401 -1032 2028 2145 -1032 2515 2518 -1034 2162 2282 -1034 2512 2515 -1034 2518 2521 -1039 5829 5950 -1039 2510 2515 -1042 2028 2033 -1047 2167 2168 -1048 2401 2403 -1051 2507 2509 -1057 2509 2512 -1058 2028 2029 -1059 1916 2030 -1060 2401 2407 -1061 2396 2397 -1061 2506 2507 -1062 2174 2289 -1062 2406 2407 -1071 2504 2507 -1073 2517 2518 -1074 5832 5950 -1077 2507 2512 -1080 2274 2277 -1081 2028 2030 -1085 2511 2512 -1086 2403 2510 -1088 2394 2506 -1091 2395 2507 -1092 2162 2277 -1092 2516 2517 -1092 6190 6191 -1093 2166 2173 -1100 2516 2518 -1003 5003 5113 -1006 5252 5365 -1008 5118 5120 -1010 5119 5126 -1014 5251 5252 -1017 5378 5382 -1025 5258 5371 -1037 5126 5245 -1040 5377 5378 -1046 5362 5369 -1047 5498 5502 -1049 5118 5238 -1064 5235 5237 -1073 5382 5497 -1075 5118 5241 -1080 4997 5001 -1088 5497 5502 -1091 4995 4997 -1094 5497 5505 -1095 5236 5237 -1001 5730 5850 -1001 5848 5849 -1004 5730 5848 -1005 6087 6207 -1006 5729 5731 -1007 5739 5858 -1007 5835 5838 -1007 5972 5975 -1008 5622 5740 -1009 5731 5848 -1009 5739 5743 -1010 5727 5730 -1010 5731 5849 -1013 5733 5854 -1014 5950 5951 -1017 5725 5848 -1017 5846 5849 -1019 5731 5736 -1019 5848 5854 -1019 5973 6094 -1019 2521 6189 -1020 5842 5843 -1020 5973 5977 -1020 6087 6091 -1021 5731 5851 -1021 6086 6091 -1023 6086 6094 -1024 5852 5858 -1025 5725 5845 -1026 5849 5854 -1026 5849 5850 -1028 5852 5859 -1029 5736 5854 -1031 5731 5854 -1031 5842 5844 -1034 5849 5851 -1037 5730 5843 -1038 6200 6204 -1039 6198 6310 -1048 5836 5839 -1049 5731 5734 -1049 5842 5845 -1049 5733 5852 -1049 5972 5977 -1053 6068 6073 -1056 5736 5737 -1059 5736 5849 -1059 5737 5854 -1060 5839 5842 -1064 5610 5731 -1067 5727 5848 -1068 5729 5849 -1069 5857 5859 -1071 5841 5842 -1072 5972 5980 -1073 5834 5839 -1074 6200 6318 -1076 5859 5972 -1077 6199 6205 -1078 6194 6312 -1079 5738 5743 -1083 5835 5839 -1084 5972 5978 -1086 6075 6195 -1089 5972 5979 -1089 6193 6310 -1092 5618 5622 -1092 5857 5972 -1092 6190 6191 -1115 1349 1466 -1131 4991 4995 -1133 1574 1575 -1143 1354 4991 -1150 1350 4991 -1154 4990 4991 -1157 4991 4992 -1158 1908 1913 -1167 1905 1906 -1171 1349 4991 -1179 1349 4874 -1180 1905 1910 -1184 4874 4991 -1184 5107 5109 -1186 1349 4992 -1194 1905 1907 -1194 1905 1908 -1197 1574 1580 -1199 4991 5109 -1102 2277 2282 -1105 2167 2282 -1107 1912 2030 -1107 2174 2288 -1107 2400 2507 -1108 5951 5952 -1108 2394 2507 -1110 2510 2511 -1115 2403 2518 -1119 1913 2030 -1120 2174 5950 -1120 2405 2519 -1121 2516 2521 -1122 2041 2159 -1126 5829 5948 -1129 1913 1915 -1133 2407 2518 -1137 1912 2022 -1137 2151 2159 -1138 2166 2283 -1138 2521 6188 -1146 6069 6073 -1152 1910 1912 -1158 1908 1913 -1165 6188 6306 -1167 1905 1906 -1168 1910 1913 -1180 1905 1910 -1181 2407 2519 -1181 6069 6187 -1186 2045 2159 -1187 2174 2286 -1188 2504 2511 -1192 2628 6188 -1194 1905 1907 -1198 2519 6072 -1200 1899 1906 -1200 2039 2159 -1113 5230 5235 -1114 5502 5617 -1117 4996 4997 -1130 5236 5238 -1131 4991 4995 -1136 5249 5368 -1138 5247 5250 -1154 4990 4991 -1155 5384 5497 -1156 5617 5622 -1166 4997 4998 -1171 5250 5367 -1184 5107 5109 -1184 5236 5243 -1185 5617 5625 -1199 5118 5236 -1103 6192 6304 -1108 5951 5952 -1109 6194 6310 -1114 6070 6071 -1114 6070 6076 -1115 5834 5841 -1115 6087 6205 -1119 5972 6092 -1122 5840 5842 -1124 5972 5974 -1126 5829 5948 -1126 5973 6092 -1128 5745 5858 -1130 5725 5840 -1135 5622 5738 -1145 5855 5857 -1146 6069 6073 -1153 5739 5745 -1153 5853 5972 -1156 5617 5622 -1157 6200 6310 -1159 5973 6093 -1160 6086 6093 -1165 5855 5856 -1167 5727 5846 -1174 5738 5745 -1181 6069 6187 -1185 5617 5625 -1186 6070 6073 -1188 5738 5746 -1198 2519 6072 -1200 5721 5840 -1200 5835 5954 -1205 1349 1354 -1214 1809 1920 -1217 1240 1349 -1229 1792 1907 -1229 1792 1908 -1235 1686 1796 -1236 1804 1809 -1244 5109 5112 -1245 1788 1907 -1246 5109 5110 -1246 1809 1925 -1251 1809 1926 -1253 1788 1899 -1254 1350 1354 -1254 1925 1926 -1256 1791 1792 -1278 1348 1350 -1280 1680 1797 -1280 1790 1792 -1288 1680 1796 -1290 1788 1791 -1300 1349 1350 -1205 6188 6304 -1208 2174 5832 -1211 2519 6189 -1214 1809 1920 -1215 2041 2045 -1229 1792 1907 -1243 1926 1927 -1245 1788 1907 -1251 1809 1926 -1253 1788 1899 -1254 1925 1926 -1258 5832 5838 -1259 2042 2045 -1262 2167 2283 -1263 2039 2044 -1267 2040 2041 -1274 1927 1928 -1278 2282 2283 -1282 2166 2167 -1215 5617 5624 -1228 5504 5617 -1231 5236 5241 -1243 5250 5365 -1244 5109 5112 -1246 5109 5110 -1268 5497 5504 -1205 6188 6304 -1211 2519 6189 -1212 6194 6304 -1214 5624 5738 -1215 5617 5624 -1224 6189 6191 -1228 6194 6311 -1258 5832 5838 -1259 6191 6192 -1261 5958 5959 -1269 5726 5728 -1276 6199 6206 -1301 1681 1796 -1305 1790 1795 -1315 1680 1795 -1319 1343 1350 -1323 1681 1795 -1329 1782 1899 -1333 1350 1351 -1354 1789 1795 -1354 1789 1792 -1354 5110 5111 -1355 1675 1795 -1359 1786 1788 -1367 1675 1792 -1372 1787 1790 -1378 1786 1899 -1380 1786 1907 -1390 1782 1893 -1393 1788 1789 -1306 1924 1928 -1309 2519 6191 -1310 1928 2043 -1312 5832 5951 -1313 2165 2167 -1320 2162 2167 -1321 1928 2042 -1329 1782 1899 -1378 1786 1899 -1380 1786 1907 -1316 5111 5112 -1354 5110 5111 -1382 5501 5619 -1384 5501 5620 -1309 2519 6191 -1312 5832 5951 -1317 6200 6316 -1338 5733 5846 -1364 5606 5726 -1368 5619 5620 -1372 5620 5621 -1378 5614 5620 -1382 5501 5619 -1384 5501 5620 -1396 5619 5621 -1403 1782 1901 -1405 1677 1681 -1410 1804 1920 -1415 1787 1789 -1427 1677 1678 -1445 1675 1677 -1447 1787 1788 -1450 5111 5230 -1451 1675 1787 -1472 1671 1787 -1477 1670 1677 -1495 1462 1463 -1499 1469 5110 -1410 1804 1920 -1416 2279 2284 -1425 2044 2159 -1445 2284 2393 -1458 2516 6187 -1472 2031 2038 -1484 2284 2398 -1486 2032 2038 -1488 2042 2043 -1495 1927 2043 -1404 5368 5370 -1430 5006 5116 -1446 5116 5121 -1450 5111 5230 -1479 4999 5116 -1489 5129 5248 -1499 1469 5110 -1416 5621 5740 -1420 5614 5621 -1435 5616 5621 -1439 5733 5853 -1458 2516 6187 -1516 1347 1463 -1520 1781 1788 -1557 1338 1448 -1558 1348 1463 -1580 1560 1678 -1593 1454 1455 -1594 1782 1788 -1598 1564 1678 -1504 2398 2399 -1510 2038 2148 -1512 2033 2038 -1512 6069 6189 -1516 2393 2399 -1521 2284 2399 -1543 2033 2153 -1556 2037 2044 -1567 2038 2154 -1568 2038 2153 -1598 2042 2044 -1598 2504 2509 -1508 5000 5116 -1516 5129 5130 -1530 5127 5130 -1538 5001 5116 -1550 4880 4996 -1567 5128 5129 -1568 5128 5135 -1574 5128 5130 -1589 5009 5125 -1512 6069 6189 -1574 6072 6073 -1606 1459 1462 -1610 1353 1463 -1612 1558 1560 -1618 1553 1670 -1626 1679 1684 -1630 1448 1456 -1631 1684 1686 -1635 1553 1554 -1637 1455 1456 -1642 1553 1558 -1644 1554 1664 -1645 1558 1561 -1654 1557 1673 -1661 1439 1446 -1661 1440 1446 -1664 1440 1556 -1666 1455 1459 -1669 1547 1548 -1671 1553 1555 -1675 1556 1557 -1676 1680 1790 -1678 1673 1674 -1680 1554 1555 -1684 1437 1547 -1685 1564 1679 -1690 1331 1332 -1690 1555 1558 -1690 1679 1685 -1694 1453 1456 -1699 1563 1564 -1620 2275 2390 -1630 2390 2391 -1630 2518 2519 -1630 2506 2509 -1631 2275 2279 -1632 2511 2616 -1637 2390 2392 -1637 2399 2400 -1638 2275 2391 -1640 2397 2504 -1647 2262 2269 -1648 2384 2391 -1649 2505 2509 -1662 2276 2279 -1662 2504 2512 -1664 2029 2030 -1675 2279 2392 -1677 2395 2506 -1677 2505 2616 -1678 2406 2519 -1681 2392 2395 -1683 2263 2378 -1686 2278 2279 -1693 2391 2395 -1698 2506 2508 -1699 2504 2616 -1700 2510 2518 -1603 5121 5123 -1604 5125 5127 -1620 5013 5125 -1628 5009 5013 -1633 5251 5258 -1650 5008 5009 -1655 5010 5013 -1660 5127 5128 -1660 5256 5374 -1669 5122 5128 -1670 5256 5379 -1672 5013 5127 -1673 5008 5010 -1676 5255 5374 -1678 5256 5261 -1679 5012 5128 -1680 4892 5008 -1682 5138 5257 -1682 5255 5261 -1684 5258 5379 -1691 5377 5382 -1693 5254 5255 -1694 5012 5127 -1697 5258 5377 -1700 5254 5256 -1607 6071 6072 -1702 1561 1563 -1704 1569 1679 -1709 1554 1672 -1710 1336 1338 -1715 1437 1555 -1717 1563 1569 -1719 1672 1673 -1720 1556 1561 -1720 1557 1558 -1722 1440 1441 -1723 1441 1555 -1723 1547 1554 -1723 1552 1664 -1724 1329 1445 -1726 1439 1441 -1734 1563 1679 -1741 1439 1444 -1742 1674 1675 -1744 1675 1790 -1745 1441 1556 -1747 1681 1686 -1748 1435 1441 -1749 1435 1437 -1751 1569 1686 -1755 1217 1218 -1756 1329 1444 -1763 1679 1686 -1766 1329 1335 -1770 1438 1441 -1771 1224 1331 -1773 1562 1563 -1776 1670 1671 -1781 1453 1459 -1783 1331 1338 -1791 1437 1438 -1792 1430 1547 -1793 1555 1556 -1794 1436 1438 -1800 1459 1573 -1800 1563 1678 -1701 2394 2395 -1701 2385 2498 -1701 2492 2499 -1703 2030 2033 -1703 2610 2616 -1705 2278 2394 -1710 2027 2029 -1711 2023 2139 -1711 2512 2518 -1714 2506 2618 -1714 2505 2618 -1715 2391 2504 -1715 2407 6189 -1719 2389 2391 -1719 2501 2508 -1722 2250 2257 -1730 2610 2617 -1736 2385 2500 -1737 2504 2506 -1739 2503 2508 -1740 2399 2406 -1742 2389 2506 -1743 2389 2501 -1744 2401 2518 -1745 2401 2406 -1746 2031 2033 -1748 2610 2618 -1752 2503 2618 -1753 2389 2498 -1756 2400 2406 -1757 2391 2392 -1757 2493 2604 -1758 2612 2618 -1759 2389 2500 -1761 2386 2500 -1762 2492 2500 -1762 2504 2505 -1762 2503 2613 -1763 2610 2615 -1764 2615 2618 -1765 2140 2256 -1766 2388 2389 -1767 2611 2721 -1771 2503 2615 -1772 2383 2500 -1772 2502 2508 -1779 2399 2401 -1781 2032 2033 -1784 2492 2497 -1786 2611 2615 -1787 2027 2147 -1787 2387 2388 -1787 2383 2389 -1789 2493 2497 -1790 2502 2613 -1791 2612 2615 -1792 2027 2139 -1792 2386 2388 -1793 2392 2393 -1797 2612 2613 -1800 2391 2506 -1703 5258 5262 -1704 5251 5259 -1710 4896 5008 -1714 5138 5259 -1714 5253 5254 -1715 5377 5384 -1718 5256 5259 -1719 5012 5129 -1720 4891 4898 -1722 5258 5259 -1727 5258 5264 -1730 5010 5011 -1731 5377 5385 -1732 5012 5013 -1733 4896 4898 -1736 5264 5385 -1738 5264 5383 -1739 5011 5012 -1743 5136 5259 -1744 5257 5264 -1747 4896 5016 -1752 5257 5258 -1757 5136 5253 -1758 5136 5254 -1761 5263 5270 -1762 5135 5248 -1765 5135 5253 -1769 5135 5254 -1774 5138 5142 -1774 5251 5256 -1776 5262 5385 -1777 5136 5141 -1780 5000 5001 -1788 5137 5257 -1790 5136 5142 -1795 4898 4899 -1715 2407 6189 -1736 6072 6189 -1756 5957 5958 -1761 6072 6190 -1801 1336 1456 -1811 1430 1431 -1814 1670 1672 -1816 1430 1437 -1816 1554 1558 -1818 1458 1459 -1822 1670 1675 -1823 1430 1435 -1829 1110 1217 -1829 1324 1444 -1829 1457 1458 -1834 1672 1675 -1842 1558 1672 -1846 1456 1459 -1849 1552 1554 -1851 1324 1436 -1854 1552 1558 -1861 1435 1438 -1863 1324 1326 -1866 1675 1789 -1874 1454 1459 -1877 880 887 -1877 1320 1436 -1877 1446 1563 -1878 1671 1675 -1881 1457 1459 -1887 1554 1670 -1888 862 869 -1888 874 881 -1888 1329 1330 -1892 1671 1789 -1893 1304 1311 -1894 1314 1424 -1894 1671 1672 -1897 868 875 -1899 1671 1674 -1802 2721 2723 -1807 2721 2722 -1808 2715 2722 -1817 2383 2386 -1818 2615 2723 -1819 2492 2495 -1821 2032 2147 -1822 2133 2140 -1822 2383 2385 -1825 2406 2518 -1826 2383 2388 -1826 2385 2492 -1827 2381 2386 -1829 2728 2829 -1837 2032 2148 -1837 2272 2381 -1841 2378 2383 -1842 2614 2615 -1844 2721 2726 -1851 2267 2383 -1852 2267 2381 -1853 2244 2251 -1858 2391 2498 -1859 2278 2393 -1862 2726 2728 -1871 2378 2379 -1871 2395 2504 -1875 2134 2250 -1884 2835 2836 -1885 2378 2380 -1886 2267 2380 -1891 2726 2729 -1894 2263 2380 -1898 2612 2614 -1900 2266 2267 -1803 4896 5010 -1806 5130 5135 -1809 5256 5258 -1835 5137 5144 -1836 5269 5270 -1837 5134 5141 -1842 5134 5136 -1844 5137 5142 -1845 5139 5141 -1847 5139 5142 -1848 5027 5144 -1851 5130 5133 -1852 4777 4784 -1853 5250 5373 -1860 4896 5011 -1860 5027 5143 -1863 5137 5140 -1864 5139 5140 -1866 5137 5145 -1877 5027 5145 -1880 5135 5136 -1883 5377 5379 -1886 5026 5033 -1888 4784 4891 -1895 5025 5145 -1898 4891 4899 -1901 1327 1444 -1902 1197 1310 -1902 1670 1678 -1904 1446 1556 -1906 1675 1678 -1908 780 886 -1910 1319 1320 -1913 1673 1675 -1916 1331 1339 -1917 863 971 -1919 683 785 -1920 868 869 -1921 1319 1324 -1921 1678 1680 -1921 1669 1671 -1925 1675 1680 -1928 880 881 -1928 1324 1327 -1935 1671 1781 -1936 1082 1089 -1938 1664 1671 -1941 779 786 -1941 1196 1203 -1942 867 869 -1942 868 870 -1944 1327 1329 -1946 880 882 -1946 1558 1678 -1951 965 972 -1951 1305 1421 -1951 1680 1681 -1955 879 881 -1956 768 874 -1957 864 869 -1957 1321 1327 -1961 1083 1196 -1964 689 785 -1965 1322 1327 -1966 874 876 -1966 1212 1319 -1966 1319 1321 -1969 874 875 -1971 880 885 -1974 780 885 -1975 873 875 -1978 864 971 -1979 689 791 -1983 978 1088 -1983 1205 1206 -1983 1210 1324 -1985 763 870 -1985 1328 1329 -1985 1673 1678 -1986 1415 1422 -1989 1103 1104 -1989 1679 1680 -1995 1210 1212 -1997 780 781 -1997 1304 1309 -1998 970 971 -1999 779 781 -2000 873 876 -1919 2265 2266 -1923 2516 6189 -1927 2931 2932 -1930 2611 2614 -1931 2808 2809 -1932 2263 2372 -1934 2264 2266 -1934 2815 2913 -1937 2261 2263 -1937 2829 2837 -1939 2251 2366 -1944 2259 2265 -1945 2472 2577 -1949 2465 2466 -1949 2502 2607 -1952 2261 2264 -1952 2925 2926 -1954 2352 2459 -1956 2583 2584 -1957 2821 2919 -1958 2257 2372 -1962 2345 2346 -1962 2695 2701 -1963 2257 2373 -1964 2229 2230 -1965 2701 2802 -1965 2829 2836 -1969 2257 2261 -1970 2810 2913 -1972 2256 2261 -1973 2366 2372 -1975 1995 1996 -1975 2144 2261 -1976 2919 2920 -1977 2694 2695 -1977 2726 2837 -1979 2809 2810 -1979 3034 3125 -1982 2257 2374 -1985 2113 2229 -1987 2924 2926 -1989 2257 2366 -1991 2149 2265 -1992 1879 1995 -1992 2230 2345 -1992 2467 2472 -1992 2807 2809 -1993 2913 2918 -1993 3131 3132 -1994 2112 2113 -1994 2577 2582 -1999 2465 2467 -1904 4782 4784 -1907 5031 5143 -1914 5025 5140 -1917 4896 4899 -1917 4896 4901 -1920 5024 5134 -1922 5027 5031 -1923 4784 4785 -1927 4782 4899 -1929 4465 4564 -1931 5024 5139 -1933 4805 4912 -1944 5033 5143 -1948 4452 4453 -1948 5031 5145 -1950 4916 5032 -1951 5024 5141 -1953 4459 4558 -1953 5019 5024 -1957 5033 5034 -1961 5025 5030 -1962 4895 5011 -1964 5024 5140 -1965 4704 4705 -1971 4471 4570 -1974 4894 4900 -1981 4440 4441 -1982 4894 4901 -1989 4680 4686 -1995 4894 4899 -1995 5032 5033 -1998 5024 5030 -2000 4337 4434 -1913 5956 5959 -1923 2516 6189 -1926 5957 5959 -1995 6080 6082 -1999 5965 6082 -2004 684 785 -2004 867 870 -2005 1197 1309 -2011 695 791 -2011 4330 4331 -2012 1643 1650 -2013 1199 1205 -2014 1210 1321 -2015 1532 1539 -2016 1650 1760 -2017 1197 1198 -2017 1206 1319 -2018 970 972 -2018 1533 1649 -2018 1878 1879 -2019 879 882 -2022 689 790 -2022 966 1076 -2023 688 695 -2024 694 4331 -2024 683 784 -2024 876 879 -2026 775 885 -2026 867 976 -2029 1760 1767 -2031 1091 1092 -2034 1210 1327 -2035 870 873 -2035 1761 1878 -2037 690 791 -2037 1206 1210 -2037 1415 1420 -2041 779 784 -2041 1210 1216 -2042 864 973 -2043 778 885 -2043 1070 1077 -2043 1210 1322 -2046 694 695 -2046 1304 1306 -2047 967 972 -2048 865 870 -2050 1215 1322 -2051 878 879 -2051 1760 1765 -2052 970 973 -2052 1305 1306 -2052 1645 1650 -2053 1235 1236 -2054 864 976 -2055 694 696 -2055 1422 1532 -2055 1533 1534 -2059 1532 1537 -2059 1877 1878 -2060 778 781 -2060 1209 1210 -2060 1305 1420 -2062 693 695 -2062 1122 1235 -2063 1196 1197 -2064 1760 1878 -2065 678 784 -2066 1235 1237 -2066 1417 1422 -2067 684 787 -2067 967 1076 -2068 1532 1649 -2069 689 695 -2071 1236 1240 -2071 1532 1534 -2072 684 790 -2072 1417 1537 -2073 1098 1205 -2075 877 879 -2075 1192 1198 -2076 684 784 -2080 4329 4331 -2081 1235 1240 -2082 690 790 -2084 866 870 -2084 1648 1649 -2085 1529 1532 -2085 1648 1650 -2085 1761 1762 -2085 1762 1767 -2086 680 784 -2088 684 782 -2088 690 695 -2089 1531 1534 -2090 1192 1309 -2096 1417 1420 -2097 690 796 -2099 1300 1420 -2099 1534 1648 -2002 2464 2465 -2002 2611 2723 -2004 2259 2264 -2004 2802 2807 -2005 2694 2696 -2006 2695 2696 -2007 2467 2470 -2009 2918 2920 -2010 2693 2694 -2011 2258 2261 -2011 2347 2352 -2011 2696 2802 -2012 2344 2345 -2013 2230 2231 -2013 2352 2464 -2014 2696 2701 -2014 2926 2927 -2015 2584 2688 -2016 3138 3225 -2017 1996 2112 -2017 2113 2114 -2017 2228 2229 -2018 1878 1879 -2018 2582 2584 -2019 2255 2366 -2020 2584 2694 -2020 2584 2585 -2022 2353 2464 -2027 2255 2257 -2028 2810 2912 -2029 1996 1997 -2030 2607 2612 -2031 2579 2582 -2032 2919 2926 -2036 1994 1995 -2036 2345 2347 -2038 2579 2585 -2039 2256 2259 -2039 2467 2582 -2041 2347 2353 -2042 2111 2112 -2046 2347 2350 -2046 2693 2695 -2050 2516 6188 -2051 2231 2344 -2052 2149 2259 -2054 2920 2921 -2055 2585 2693 -2057 1879 1880 -2059 1877 1878 -2061 2696 2804 -2062 2804 2807 -2064 2467 2469 -2065 2137 2247 -2065 2143 2149 -2065 2144 2259 -2066 1997 2111 -2068 2228 2231 -2069 2142 2149 -2070 2609 2612 -2070 2696 2801 -2073 2114 2228 -2074 2462 2467 -2079 2921 3021 -2081 1994 1997 -2082 2111 2114 -2083 1880 1994 -2085 2258 2260 -2086 1877 1880 -2092 2462 2464 -2094 2574 2582 -2096 2609 2611 -2098 2258 2259 -2001 4787 4894 -2005 5023 5025 -2011 4330 4331 -2014 4577 4680 -2015 4464 4465 -2019 5254 5374 -2020 5031 5033 -2023 5031 5151 -2025 4780 4781 -2029 4349 4446 -2034 5023 5024 -2035 4782 4787 -2039 5254 5261 -2040 4783 4785 -2041 4440 4447 -2041 5025 5028 -2043 4672 4783 -2051 4361 4458 -2055 4785 4788 -2060 4686 4687 -2061 4459 4564 -2063 5027 5028 -2066 4687 4693 -2066 5022 5023 -2073 5020 5028 -2089 4459 4563 -2091 5261 5374 -2092 5034 5037 -2094 4676 4783 -2094 5254 5259 -2096 4915 4922 -2099 4907 5017 -2100 4440 4442 -2012 5962 5964 -2042 5964 5965 -2049 5963 5964 -2050 2516 6188 -2102 1760 1762 -2102 1762 1877 -2104 1121 1122 -2104 1207 1210 -2105 693 696 -2105 1075 1076 -2105 1126 1235 -2106 1208 1215 -2107 1240 4874 -2108 1205 1207 -2109 967 1081 -2109 1645 1765 -2112 775 877 -2112 1648 1651 -2114 1195 1196 -2115 1300 1306 -2117 1195 1198 -2118 1759 1762 -2119 1192 1306 -2120 872 873 -2120 968 971 -2120 1096 1207 -2121 1417 1419 -2123 696 4329 -2125 866 976 -2125 1092 1207 -2125 1195 1201 -2125 1645 1651 -2128 690 788 -2129 1092 1199 -2129 1205 1208 -2129 1208 1209 -2130 1092 1205 -2130 1303 1306 -2131 1208 1210 -2132 686 790 -2133 686 782 -2133 871 876 -2133 1207 1208 -2134 1092 1096 -2144 1085 1199 -2147 968 973 -2149 1096 1205 -2152 1101 1214 -2152 1417 1529 -2153 1092 1093 -2154 1240 4875 -2155 1085 1086 -2156 4329 4332 -2159 1646 1648 -2162 975 1086 -2162 1090 1092 -2163 1208 1213 -2164 968 976 -2164 1095 1208 -2166 690 692 -2168 776 784 -2168 1085 1090 -2169 1095 1096 -2170 1530 1534 -2171 975 1079 -2171 981 1085 -2172 1086 1087 -2172 1529 1531 -2174 1122 1126 -2175 1205 1210 -2181 1090 1093 -2181 1100 1107 -2181 1647 1648 -2189 776 781 -2190 1354 4992 -2194 1758 1762 -2195 1010 4648 -2195 1645 1647 -2199 1085 1087 -2102 1762 1877 -2105 2227 2231 -2107 2142 2147 -2108 2250 2255 -2108 2360 2367 -2108 2693 2696 -2111 2142 2259 -2113 2231 2342 -2113 2366 2368 -2114 2349 2464 -2116 2915 2918 -2118 2692 2696 -2120 2226 2231 -2120 2347 2349 -2121 2110 2114 -2123 2342 2350 -2128 2255 2258 -2128 2367 2368 -2131 2226 2227 -2132 2349 2456 -2132 2921 3020 -2133 2227 2342 -2133 2342 2347 -2134 1875 1877 -2134 2342 2349 -2134 2806 2807 -2135 2144 2149 -2135 2806 2810 -2136 2349 2350 -2136 2604 2609 -2137 2691 2693 -2138 2932 3027 -2139 2579 2581 -2142 2349 2353 -2143 1992 1994 -2143 2497 2607 -2145 2114 2226 -2146 1993 1997 -2146 2144 2264 -2148 2581 2693 -2152 2927 3027 -2154 2226 2234 -2157 2462 2463 -2159 2361 2474 -2159 2927 3026 -2160 2696 2799 -2161 2144 2146 -2161 2799 2807 -2162 2109 2114 -2162 2144 2147 -2162 3027 3032 -2165 2109 2226 -2165 2143 2144 -2166 2611 2715 -2169 2116 2226 -2169 2349 2462 -2169 2709 2716 -2170 2348 2349 -2172 2469 2574 -2174 2144 2256 -2175 2253 2258 -2175 2932 3034 -2176 1876 1880 -2178 2226 2233 -2179 3026 3027 -2180 2921 2926 -2182 2256 2258 -2182 3027 3034 -2183 2574 2581 -2185 2365 2367 -2185 2926 3027 -2187 2109 2110 -2189 2139 2144 -2189 2250 2251 -2190 3027 3029 -2192 2139 2142 -2192 2146 2256 -2195 2109 2111 -2195 2605 2715 -2197 2257 2258 -2104 4452 4454 -2106 4669 4774 -2107 4330 4332 -2108 4343 4446 -2108 4907 5022 -2110 5026 5027 -2117 4808 4921 -2118 5133 5136 -2123 5132 5133 -2127 4902 4907 -2128 4785 4787 -2131 4908 5022 -2140 4344 4446 -2140 4780 4787 -2141 4678 4789 -2147 4908 5020 -2149 5374 5379 -2150 4662 4663 -2154 4337 4439 -2154 4900 4901 -2156 4329 4332 -2156 4451 4453 -2156 4678 4684 -2157 5019 5136 -2162 4439 4440 -2163 4332 4337 -2165 4904 4908 -2168 4338 4439 -2169 4335 4439 -2169 4446 4451 -2169 4910 5026 -2170 4337 4440 -2173 4905 5022 -2174 4332 4335 -2175 4454 4459 -2176 4337 4338 -2178 4786 4787 -2182 4900 4902 -2182 4910 5028 -2185 4780 4782 -2190 4902 4905 -2191 4335 4337 -2192 4343 4445 -2193 4437 4440 -2194 4440 4445 -2198 5136 5139 -2109 5965 6083 -2110 5963 6083 -2129 5970 6083 -2140 5979 6092 -2143 5751 5864 -2147 6205 6206 -2150 5741 5742 -2159 6098 6099 -2160 5873 5880 -2169 5744 5745 -2174 5977 6092 -2174 6120 6238 -2177 6202 6203 -2180 6098 6217 -2187 6202 6209 -2188 5870 5871 -2191 5622 5743 -2192 6089 6090 -2192 6092 6100 -2192 6452 6459 -2195 5977 6097 -2198 5745 5746 -2198 6099 6100 -2198 6090 6202 -2200 5743 5746 -2200 6105 6217 -2201 1085 1092 -2201 1240 4992 -2202 1412 1420 -2203 1080 1187 -2208 777 885 -2209 975 1087 -2210 1083 1084 -2211 968 969 -2212 691 696 -2223 1456 1458 -2225 1757 1765 -2226 777 781 -2234 1302 1420 -2239 1084 1195 -2240 777 883 -2252 777 877 -2256 1302 1306 -2256 1757 1762 -2257 1530 1646 -2259 1082 1087 -2260 969 973 -2261 865 872 -2262 686 788 -2264 979 1087 -2264 1529 1530 -2270 969 970 -2271 1758 1875 -2272 692 693 -2273 1640 1646 -2280 1082 1084 -2289 978 1082 -2294 776 783 -2295 871 872 -2202 2605 2609 -2203 2810 2910 -2207 2226 2228 -2208 2257 2263 -2212 2256 2264 -2215 2606 2609 -2219 2143 2258 -2220 2365 2474 -2224 1993 2109 -2225 2910 2918 -2225 3027 3028 -2228 2142 2144 -2233 2251 2252 -2236 2252 2255 -2236 2605 2717 -2238 2031 2032 -2240 2139 2140 -2248 2917 2918 -2249 2139 2141 -2251 2917 2921 -2254 2709 2717 -2258 2025 2031 -2258 2581 2685 -2261 1915 2031 -2266 1875 1876 -2271 1758 1875 -2274 2251 2255 -2275 2805 2806 -2276 2474 2476 -2276 2691 2692 -2280 2921 3018 -2283 2368 2371 -2287 2030 2031 -2288 2921 3026 -2289 1992 1993 -2294 2468 2475 -2294 2698 2799 -2295 1876 1992 -2202 4336 4337 -2206 4338 4445 -2206 4904 4905 -2210 4678 4791 -2211 4676 4791 -2212 4678 4783 -2213 4344 4445 -2216 4899 4901 -2224 5374 5381 -2228 4897 4905 -2229 4678 4682 -2230 4343 4440 -2233 4338 4440 -2235 4344 4448 -2237 4779 4780 -2237 4788 4902 -2241 4330 4337 -2247 4899 4902 -2249 4454 4457 -2253 4336 4440 -2253 4677 4684 -2258 4338 4437 -2260 4448 4451 -2266 4327 4332 -2266 4451 4454 -2266 4669 4779 -2274 4450 4451 -2275 4903 5020 -2281 4443 4451 -2282 4785 4786 -2284 4677 4682 -2291 4910 5020 -2296 5635 5636 -2202 5977 6094 -2202 6440 6447 -2203 5976 6089 -2204 5757 5870 -2205 6446 6453 -2209 6202 6207 -2212 5975 5976 -2212 6452 6453 -2214 6097 6100 -2220 6548 6555 -2221 6099 6103 -2224 6451 6453 -2225 5741 5743 -2226 5744 5749 -2232 6316 6317 -2234 5746 5749 -2235 6103 6217 -2235 6202 6204 -2240 5744 5751 -2240 5749 5752 -2241 6452 6454 -2244 5746 5748 -2247 6232 6239 -2248 6316 6318 -2250 5747 5749 -2255 5741 5748 -2258 5751 5752 -2258 6448 6453 -2260 6344 6458 -2261 6103 6219 -2265 6217 6225 -2266 5766 5885 -2267 6219 6225 -2268 5750 5751 -2268 6005 6012 -2271 5627 5748 -2275 6343 6458 -2277 6094 6095 -2281 5747 5748 -2282 5630 5750 -2283 6095 6097 -2283 6080 6085 -2289 6223 6224 -2290 6224 6225 -2290 6343 6350 -2293 5892 6005 -2294 5885 5886 -2296 5635 5636 -2296 6119 6126 -2296 6343 6349 -2298 6653 6660 -2300 5750 5755 -2301 4762 4763 -2302 1081 1084 -2302 1640 1647 -2305 1757 1764 -2307 1647 1757 -2312 973 975 -2318 866 974 -2318 1126 1237 -2328 1193 1195 -2330 680 782 -2333 685 692 -2334 1301 1306 -2338 1301 1309 -2345 4328 4329 -2346 771 877 -2347 973 976 -2349 973 1087 -2351 1007 1014 -2352 691 692 -2352 1080 1084 -2360 1124 4651 -2363 1007 1008 -2363 1096 1213 -2364 973 978 -2365 1194 1195 -2367 1192 1194 -2374 971 972 -2380 1101 1213 -2383 1007 1009 -2386 971 973 -2391 1013 1014 -2394 1011 1121 -2398 691 4328 -2400 686 692 -2305 2812 2910 -2309 2365 2482 -2310 2495 2502 -2311 2143 2259 -2312 2255 2368 -2331 2586 2593 -2353 2254 2255 -2358 2368 2369 -2361 2513 2520 -2363 2916 2917 -2370 2253 2255 -2374 2253 2260 -2374 2917 2923 -2393 2110 2226 -2398 2513 2625 -2301 4762 4763 -2303 4903 4904 -2306 4344 4443 -2310 4340 4445 -2310 5629 5636 -2313 4332 4334 -2324 4460 4564 -2326 4334 4439 -2326 5141 5254 -2329 4683 4690 -2334 4768 4769 -2340 4899 4900 -2345 4328 4329 -2347 4568 4677 -2347 4657 4762 -2348 4897 4902 -2357 4775 4882 -2368 4460 4566 -2369 5638 5645 -2371 4460 4563 -2375 4897 4898 -2376 4679 4682 -2376 4676 4786 -2378 4779 4781 -2386 4465 4570 -2386 4664 4774 -2394 4327 4328 -2397 4465 4569 -2398 691 4328 -2399 4449 4454 -2302 5752 5755 -2303 6097 6102 -2306 5752 5754 -2306 6090 6207 -2309 5747 5754 -2310 5629 5636 -2310 6222 6225 -2310 6233 6349 -2313 6345 6458 -2315 6006 6125 -2315 6090 6208 -2316 6345 6350 -2319 6224 6228 -2323 5645 5759 -2323 5757 5878 -2324 6094 6096 -2325 6653 6654 -2328 6090 6091 -2330 6102 6219 -2341 5976 6095 -2343 6334 6335 -2348 5752 5753 -2350 5765 5766 -2352 6219 6220 -2353 6348 6350 -2355 6348 6349 -2359 5755 5758 -2362 6457 6458 -2365 5755 5757 -2365 6234 6349 -2366 6340 6341 -2367 6457 6459 -2369 5638 5645 -2370 6224 6340 -2376 6234 6239 -2379 5884 5885 -2386 6096 6097 -2391 6340 6342 -2396 6237 6239 -2397 5640 5645 -2409 1080 1193 -2426 1079 1084 -2433 1121 1123 -2434 1079 1087 -2437 4650 4651 -2451 1413 1529 -2461 1192 1301 -2469 1079 1086 -2469 1641 1647 -2476 1412 1419 -2494 1354 4994 -2496 968 975 -2422 2917 3018 -2425 2474 2475 -2426 2254 2369 -2459 2253 2369 -2467 2368 2370 -2467 2513 2514 -2473 2233 2342 -2486 2365 2370 -2404 4774 4779 -2412 4782 4785 -2413 4460 4569 -2421 4897 4904 -2425 4346 4443 -2426 4466 4570 -2427 5525 5638 -2429 4776 4779 -2434 4779 4782 -2437 4650 4651 -2437 4663 4774 -2440 4327 4431 -2443 4340 4443 -2447 5635 5642 -2448 4768 4770 -2449 4683 4684 -2452 4684 4685 -2455 5635 5640 -2459 4466 4572 -2461 4460 4561 -2464 4773 4779 -2465 4683 4688 -2471 4768 4775 -2477 4664 4773 -2478 4778 4779 -2481 4466 4569 -2485 4663 4773 -2489 4658 4664 -2491 4334 4431 -2492 4663 4664 -2493 4340 4437 -2493 4544 4653 -2494 4462 4569 -2497 4682 4685 -2500 4658 4773 -2405 6218 6328 -2406 6228 6342 -2408 6237 6238 -2408 6345 6463 -2409 6220 6222 -2423 6097 6214 -2423 6121 6238 -2424 6121 6126 -2426 6348 6463 -2438 5766 5767 -2438 6340 6345 -2441 5876 5883 -2442 5976 5977 -2444 6457 6463 -2447 5635 5642 -2455 5635 5640 -2462 6089 6094 -2469 5636 5637 -2472 6455 6456 -2477 5633 5753 -2480 6234 6354 -2485 6228 6345 -2485 6340 6347 -2487 6005 6010 -2490 6124 6126 -2520 1123 1126 -2553 1187 1194 -2561 1010 4538 -2569 1188 1194 -2579 1010 4649 -2588 1301 1303 -2589 1015 4648 -2589 1188 1301 -2591 1302 1418 -2600 978 1087 -2502 2476 2479 -2507 2475 2476 -2519 2476 2588 -2530 2473 2475 -2534 2025 2032 -2537 2365 2477 -2543 2469 2580 -2549 2030 2032 -2557 2473 2580 -2558 2476 2477 -2572 2027 2032 -2575 3018 3020 -2578 2473 2588 -2579 2607 2613 -2584 2032 2142 -2590 2580 2582 -2593 2476 2478 -2503 4567 4569 -2507 4456 4563 -2508 4334 4437 -2512 4462 4567 -2512 4661 4663 -2515 4570 4575 -2516 4771 4779 -2517 4567 4570 -2519 4663 4768 -2519 4664 4771 -2520 4657 4767 -2522 4462 4568 -2524 4561 4569 -2524 4685 4688 -2524 4777 4782 -2531 4767 4769 -2531 4777 4778 -2532 4572 4575 -2537 4660 4664 -2544 4575 4577 -2547 4658 4768 -2552 4657 4768 -2554 4567 4572 -2556 4454 4456 -2556 4658 4661 -2561 1010 4538 -2564 4685 4687 -2569 4660 4773 -2571 4676 4681 -2585 4666 4778 -2589 4666 4771 -2596 4658 4767 -2598 4686 4688 -2599 4780 4786 -2501 6005 6007 -2505 5876 5881 -2507 5882 5883 -2517 5886 5887 -2518 6124 6125 -2519 6006 6007 -2520 6348 6354 -2524 5761 5878 -2524 5876 5878 -2541 5976 6094 -2546 6348 6351 -2554 5742 5861 -2555 6237 6240 -2557 5883 5884 -2568 5742 5743 -2570 5753 5755 -2573 5767 5885 -2575 6347 6348 -2578 6004 6007 -2581 5887 5892 -2583 5881 5884 -2583 6446 6447 -2599 6001 6007 -2605 1096 1208 -2613 1015 1121 -2615 1101 1208 -2628 1015 4761 -2628 1093 1095 -2646 1908 1915 -2654 1240 4877 -2659 1301 1302 -2667 1301 1418 -2688 1295 1301 -2696 1085 1088 -2607 2254 2370 -2617 2469 2582 -2619 2613 2614 -2621 2581 2582 -2646 1908 1915 -2655 2473 2478 -2655 2615 2724 -2662 1909 1915 -2665 2473 2583 -2676 2582 2585 -2685 2364 2477 -2690 1915 2025 -2700 2363 2370 -2611 4655 4767 -2616 4572 4577 -2616 4679 4685 -2620 4449 4450 -2624 4683 4691 -2626 5522 5641 -2628 1015 4761 -2636 4576 4577 -2644 4455 4561 -2644 4570 4577 -2649 4680 4685 -2662 4462 4561 -2670 4573 4580 -2675 4652 4657 -2677 5521 5528 -2679 4658 4765 -2683 4659 4660 -2692 4686 4693 -2694 4675 4786 -2601 6124 6243 -2602 5885 5887 -2604 6124 6127 -2606 6121 6243 -2618 6457 6460 -2619 5887 6010 -2619 5889 6002 -2619 6085 6088 -2626 5522 5641 -2626 5755 5760 -2637 6345 6347 -2641 5753 5873 -2641 5760 5878 -2645 5883 5887 -2648 5753 5754 -2648 5887 6002 -2654 5767 5772 -2656 5887 5890 -2660 5887 5893 -2661 5767 5890 -2663 5765 5772 -2667 6102 6214 -2669 5887 6004 -2687 5651 5771 -2688 5761 5879 -2689 5760 5873 -2717 871 878 -2730 1352 4992 -2738 1015 4649 -2744 901 4538 -2752 906 4538 -2762 1012 4649 -2763 906 4649 -2768 1087 1088 -2769 1015 4651 -2797 1013 4649 -2797 1125 1126 -2704 2025 2030 -2705 1910 2030 -2710 1910 1915 -2745 2027 2030 -2750 1915 2030 -2751 2022 2030 -2752 2473 2582 -2762 2604 2605 -2718 4660 4661 -2720 4572 4574 -2720 4692 4693 -2722 4572 4685 -2723 4456 4555 -2727 4679 4681 -2728 4677 4685 -2736 4456 4561 -2739 4691 4693 -2744 901 4538 -2749 4567 4568 -2749 4688 4691 -2750 4674 4681 -2752 906 4538 -2753 4679 4680 -2756 4555 4561 -2784 4692 4694 -2785 4690 4691 -2791 4691 4694 -2705 6095 6096 -2712 6220 6221 -2715 5889 6010 -2719 5879 5884 -2723 6089 6096 -2723 6096 6102 -2724 5884 5886 -2725 5766 5879 -2737 6214 6220 -2740 6007 6124 -2745 5760 5879 -2752 5889 6008 -2756 6003 6007 -2763 5895 6008 -2763 6122 6124 -2765 6002 6007 -2780 6002 6010 -2799 5893 6008 -2817 901 4432 -2818 1013 1015 -2819 1013 4651 -2820 1238 4875 -2827 1124 4761 -2832 1124 1126 -2832 1126 1238 -2833 4541 4649 -2837 901 4539 -2839 1088 1090 -2847 1012 1013 -2849 906 4539 -2852 1238 4877 -2858 1352 4877 -2872 1238 4763 -2872 1352 4994 -2808 3012 3019 -2812 2471 2478 -2848 3020 3023 -2849 2472 2582 -2854 3019 3020 -2880 3017 3019 -2810 4692 4699 -2814 4677 4683 -2817 901 4432 -2823 4698 4699 -2827 1124 4761 -2832 4694 4697 -2833 4541 4649 -2835 4697 4699 -2848 4574 4677 -2865 4689 4694 -2868 4658 4660 -2873 4705 4810 -2885 4689 4690 -2898 4699 4700 -2811 5893 6010 -2813 5893 6013 -2818 6014 6015 -2842 6002 6009 -2847 5766 5886 -2868 5887 6005 -2870 6003 6122 -2881 6008 6016 -2908 794 4432 -2911 971 976 -2938 1124 4763 -2940 971 978 -2946 869 971 -2958 799 4432 -2969 869 978 -2982 869 976 -2988 794 4431 -2990 799 4539 -2901 2620 2724 -2918 2724 2729 -2934 2472 2583 -2903 4680 4682 -2904 4700 4810 -2906 4689 4696 -2908 794 4432 -2921 4571 4680 -2942 4700 4809 -2958 799 4432 -2966 4680 4687 -2980 4810 4815 -2984 4653 4660 -2988 794 4431 -2990 4572 4680 -2902 5892 6012 -2904 5904 6023 -2907 6015 6016 -2913 5892 6010 -2918 6235 6238 -2922 6123 6127 -2928 6013 6016 -2928 6304 6305 -2929 6017 6024 -2937 6122 6123 -2949 5893 6011 -2950 6002 6122 -2957 6123 6243 -2960 6021 6134 -2973 5892 6011 -2974 6235 6243 -2980 6014 6019 -2983 5891 6011 -2984 5892 5898 -2995 5892 5893 -2995 5898 6011 -2999 6455 6463 -3000 6347 6455 -3014 794 4328 -3039 794 4433 -3073 696 4328 -3090 1353 4994 -3092 906 4541 -3021 2724 2731 -3040 2692 2799 -3014 794 4328 -3021 4654 4765 -3039 794 4433 -3039 4816 4817 -3054 4817 4823 -3073 696 4328 -3075 4650 4652 -3079 4654 4658 -3080 4696 4697 -3092 906 4541 -3002 5891 5898 -3004 6235 6240 -3005 5893 6016 -3006 5891 5892 -3010 5893 5898 -3021 6011 6016 -3025 6016 6018 -3027 6016 6019 -3028 5778 5891 -3028 5885 5891 -3029 6236 6240 -3030 5890 5892 -3030 6454 6459 -3040 6236 6354 -3045 5885 5890 -3052 5896 5898 -3052 6235 6241 -3058 6346 6354 -3068 5772 5885 -3071 6235 6242 -3075 6123 6241 -3075 6346 6351 -3083 6346 6347 -3093 5896 5897 -3096 6455 6460 -3107 904 4539 -3112 799 4433 -3116 906 1013 -3158 1090 1095 -3161 866 968 -3171 1088 1095 -3191 1088 1089 -3141 2729 2731 -3107 4652 4655 -3112 799 4433 -3164 4653 4654 -3188 4654 4655 -3102 5778 5897 -3106 5895 6016 -3118 6346 6352 -3120 6454 6566 -3132 6236 6352 -3135 6089 6091 -3141 6453 6454 -3150 5896 5899 -3155 6445 6447 -3157 6454 6571 -3170 6451 6565 -3184 6448 6560 -3187 5777 5784 -3198 6019 6021 -3213 1013 4650 -3225 1095 1207 -3233 1301 1308 -3213 1013 4650 -3202 6448 6565 -3206 6456 6460 -3209 6454 6565 -3213 6447 6560 -3227 6456 6462 -3233 6456 6571 -3243 6304 6306 -3245 5895 5899 -3246 6561 6671 -3250 6554 6560 -3251 6450 6565 -3267 6559 6560 -3271 6455 6462 -3273 5895 6014 -3278 6557 6560 -3284 6560 6561 -3290 6559 6562 -3294 6557 6565 -3297 6561 6562 -3371 4649 4651 -3383 4649 4652 -3394 1095 1202 -3325 2934 2941 -3337 2935 3036 -3340 2836 2940 -3345 2513 2515 -3327 4680 4681 -3338 4695 4702 -3363 4697 4700 -3369 4695 4696 -3371 5381 5500 -3383 4649 4652 -3305 6557 6562 -3311 6564 6565 -3313 6561 6670 -3316 6450 6557 -3320 6563 6564 -3326 6557 6564 -3327 6345 6348 -3329 6563 6565 -3333 6563 6571 -3334 5894 5899 -3334 6558 6562 -3336 6561 6665 -3353 6346 6461 -3355 6563 6568 -3357 6346 6353 -3363 6555 6665 -3367 6456 6569 -3368 6558 6564 -3379 5894 5901 -3382 6556 6665 -3464 696 4433 -3489 1302 1412 -3414 3029 3032 -3424 2728 2837 -3425 3029 3034 -3432 3125 3130 -3435 3130 3132 -3408 5381 5387 -3421 4680 4792 -3427 5379 5382 -3431 4695 4700 -3460 5381 5382 -3464 696 4433 -3482 5380 5381 -3494 5379 5380 -3417 6555 6666 -3418 6556 6670 -3466 6222 6227 -3476 6563 6570 -3477 6664 6665 -3489 6343 6345 -3500 6552 6662 -3508 1295 1412 -3509 1300 1302 -3529 1085 1093 -3531 799 4435 -3542 797 4433 -3546 975 1085 -3569 979 1085 -3512 3131 3133 -3527 3127 3130 -3574 2836 2840 -3583 2940 2942 -3590 2834 2837 -3524 4698 4700 -3528 5262 5379 -3531 799 4435 -3537 5261 5380 -3542 797 4433 -3544 5261 5381 -3549 4702 4703 -3552 5261 5379 -3560 4700 4703 -3563 4592 4701 -3581 4701 4703 -3595 4592 4598 -3598 4703 4706 -3505 6342 6343 -3515 5781 5900 -3518 6227 6342 -3521 6651 6752 -3526 6343 6344 -3528 5894 5902 -3528 6659 6666 -3540 5781 5902 -3540 6664 6666 -3555 6664 6667 -3560 6659 6660 -3561 5779 5902 -3561 6661 6666 -3563 6660 6666 -3577 6658 6660 -3585 5780 5787 -3588 6758 6759 -3598 6558 6670 -3601 1302 1303 -3608 3030 3037 -3623 2836 2837 -3625 2939 2941 -3625 2941 3036 -3642 2941 2942 -3653 2840 2942 -3663 2941 3044 -3676 3037 3134 -3680 3049 3140 -3685 3042 3043 -3687 2942 2945 -3611 4591 4598 -3618 4698 4705 -3630 4596 4701 -3639 4596 4598 -3646 4596 4706 -3674 4703 4705 -3676 4596 4709 -3694 4492 4591 -3602 5779 5897 -3606 5781 5785 -3614 6662 6664 -3615 6655 6660 -3617 6556 6662 -3623 5780 5785 -3632 6659 6661 -3642 5780 5786 -3647 5666 5786 -3647 6558 6668 -3655 5782 5785 -3665 5672 5786 -3673 5779 5784 -3691 5780 5788 -3694 5666 5788 -3696 5672 5792 -3702 1013 4541 -3739 3146 3147 -3748 3042 3044 -3755 3147 3240 -3771 3036 3038 -3775 3047 3049 -3781 2739 6420 -3787 3031 3128 -3796 2942 2943 -3702 1013 4541 -3711 4704 4706 -3713 4591 4599 -3721 4485 4492 -3737 4490 4492 -3748 4704 4709 -3771 4490 4599 -3774 4596 4599 -3776 4388 4492 -3782 4594 4599 -3785 4388 4491 -3800 4704 4711 -3701 5670 5786 -3718 5664 5788 -3722 6305 6419 -3726 5782 5784 -3726 6227 6337 -3737 5672 5794 -3748 6661 6667 -3750 5670 5788 -3769 6305 6309 -3775 5783 5785 -3781 2739 6420 -3797 5672 5676 -3810 2832 2837 -3815 3140 3148 -3817 2740 2847 -3822 3147 3151 -3824 2945 3044 -3834 3044 3050 -3836 3042 3047 -3839 3235 3322 -3844 3047 3050 -3845 3234 3241 -3849 3047 3148 -3850 3044 3047 -3852 3042 3050 -3852 3147 3148 -3853 3042 3049 -3853 3047 3052 -3855 3045 3050 -3856 3043 3047 -3857 2944 2945 -3857 3045 3047 -3858 2945 3042 -3858 3050 3052 -3863 2945 3045 -3865 3145 3148 -3866 3143 3148 -3874 2941 2947 -3875 2943 2944 -3879 2944 2950 -3880 2947 3042 -3880 3051 3052 -3887 2941 2945 -3889 2940 2941 -3892 3052 3053 -3895 2950 3045 -3895 3234 3242 -3898 3147 3242 -3801 4596 4601 -3803 4492 4493 -3803 4495 4599 -3820 4491 4493 -3822 4594 4601 -3827 5671 5676 -3829 4493 4496 -3850 5673 5794 -3856 4392 4491 -3858 4601 4704 -3863 4495 4496 -3865 4704 4710 -3871 4387 4394 -3886 4392 4394 -3890 5677 5684 -3897 4493 4499 -3812 5678 5792 -3813 5669 5788 -3816 6306 6309 -3818 5783 5788 -3824 5663 5784 -3827 5671 5676 -3839 5678 5798 -3843 5670 5794 -3843 5783 5784 -3850 5673 5794 -3864 5676 5792 -3864 5678 5800 -3878 5676 5678 -3880 5678 5682 -3881 5670 5789 -3890 5677 5684 -3891 5676 5794 -3933 1295 1302 -3987 859 860 -3904 2838 2839 -3909 3145 3242 -3912 2839 2840 -3914 3145 3150 -3914 3239 3241 -3919 3239 3242 -3922 2837 2839 -3922 3143 3149 -3923 3237 3242 -3937 3150 3237 -3944 3052 3143 -3944 3143 3150 -3951 3322 3324 -3952 2839 2943 -3952 3322 3323 -3956 3145 3237 -3957 3239 3322 -3965 3239 3330 -3973 3242 3244 -3982 3323 3404 -3987 2726 2731 -3988 3046 3052 -3991 3239 3244 -3992 3322 3327 -3993 2731 2832 -3994 3052 3149 -3996 3045 3052 -3996 3322 3325 -3999 3052 3148 -3901 4392 4499 -3903 5677 5682 -3909 4495 4600 -3925 4494 4496 -3931 5683 5684 -3953 4295 4393 -3961 4494 4499 -3964 5684 5685 -3973 5679 5682 -3973 5562 5682 -3981 4394 4395 -3991 4494 4501 -3991 5682 5685 -3992 5683 5690 -3996 4393 4395 -3903 5677 5682 -3905 5676 5797 -3908 5783 5790 -3914 5676 5800 -3921 5675 5794 -3930 5676 5795 -3931 5683 5684 -3939 6558 6669 -3940 6558 6662 -3941 5783 5789 -3948 6661 6664 -3949 6647 6654 -3954 5676 5681 -3964 5684 5685 -3971 6663 6664 -3973 5562 5682 -3973 5679 5682 -3973 6658 6661 -3977 5669 5789 -3988 6468 6575 -3989 6656 6663 -3991 5682 5685 -3992 5683 5690 -4007 2923 3026 -4010 3327 3329 -4011 3327 3330 -4012 2923 3025 -4014 3404 3405 -4020 2923 3018 -4025 3237 3244 -4033 3325 3330 -4040 3238 3244 -4044 3404 3406 -4048 3013 3110 -4053 3244 3325 -4063 3323 3327 -4012 4395 4398 -4016 4394 4397 -4019 5683 5688 -4023 4397 4499 -4034 5570 5689 -4044 4294 4301 -4053 4397 4398 -4062 5568 5688 -4065 5685 5688 -4082 4299 4393 -4094 4397 4500 -4019 5683 5688 -4028 6569 6570 -4029 6661 6663 -4045 6569 6680 -4053 6019 6022 -4055 6581 6582 -4055 6663 6764 -4062 5568 5688 -4062 6569 6571 -4062 6576 6680 -4065 5685 5688 -4068 5675 5789 -4082 6021 6142 -4084 6570 6574 -4085 6569 6574 -4096 5680 5682 -4097 6680 6688 -4099 6574 6680 -4128 696 4331 -4141 797 4331 -4150 797 4435 -4102 3327 3404 -4105 3024 3026 -4112 3327 3412 -4117 3405 3480 -4119 3405 3409 -4121 3406 3412 -4133 3404 3409 -4154 3024 3029 -4158 3474 3481 -4161 3406 3409 -4168 2508 2613 -4170 3327 3332 -4198 3029 3031 -4102 4299 4301 -4124 4299 4401 -4147 4207 4300 -4147 5576 5689 -4148 5683 5691 -4149 5679 5681 -4150 797 4435 -4151 5574 5689 -4153 4396 4398 -4154 5570 5691 -4158 4331 4433 -4163 5685 5687 -4164 4294 4302 -4166 4331 4435 -4167 4331 4332 -4174 5568 5691 -4182 4331 4434 -4197 5574 5691 -4106 5675 5795 -4108 6574 6682 -4112 6680 6683 -4115 6017 6019 -4117 6022 6142 -4117 6021 6140 -4117 6680 6685 -4119 6570 6680 -4120 6680 6682 -4123 6027 6140 -4123 6570 6571 -4130 6764 6771 -4132 6460 6571 -4135 6674 6680 -4136 6568 6571 -4137 6681 6788 -4138 6681 6685 -4140 6025 6140 -4142 6687 6788 -4147 5576 5689 -4148 5683 5691 -4149 5679 5681 -4151 5574 5689 -4151 6680 6681 -4154 5570 5691 -4163 5685 5687 -4165 6025 6142 -4166 6685 6687 -4168 6685 6688 -4174 5568 5691 -4174 6146 6147 -4176 6080 6199 -4176 6661 6772 -4180 6764 6772 -4182 6140 6148 -4183 6573 6574 -4187 6794 6795 -4189 6687 6795 -4190 6019 6024 -4195 6147 6148 -4197 5574 5691 -4201 962 969 -4210 1440 1555 -4204 3029 3130 -4209 3325 3332 -4227 3133 3138 -4239 3130 3133 -4253 3024 3025 -4271 3405 3482 -4282 3327 3407 -4290 3474 3482 -4238 5576 5695 -4246 5674 5681 -4259 5582 5695 -4275 5576 5697 -4294 5574 5694 -4299 5574 5697 -4211 6788 6796 -4214 6687 6796 -4219 6146 6265 -4220 6571 6573 -4226 6566 6571 -4232 6024 6142 -4233 6682 6683 -4237 6795 6799 -4238 5576 5695 -4241 6794 6899 -4242 6794 6796 -4245 6147 6151 -4246 5674 5681 -4249 6795 6899 -4254 6017 6018 -4258 6145 6148 -4259 5582 5695 -4266 6691 6796 -4275 5576 5697 -4278 6153 6265 -4286 6683 6684 -4287 6899 6900 -4291 6899 6901 -4294 5574 5694 -4296 6142 6143 -4296 6151 6265 -4299 5574 5697 -4300 6683 6685 -4301 3138 3230 -4302 3129 3130 -4306 3122 3130 -4317 3133 3136 -4345 3406 3407 -4352 3406 3482 -4364 3403 3482 -4370 3231 3232 -4302 4206 4213 -4308 4299 4302 -4309 5582 5701 -4339 4396 4401 -4340 4301 4302 -4349 5580 5695 -4384 4300 4302 -4388 4301 4304 -4390 5582 5703 -4305 6151 6267 -4306 6796 6799 -4306 6799 6901 -4326 6271 6272 -4328 6024 6137 -4337 6899 6904 -4337 6906 7000 -4341 6265 6273 -4349 5580 5695 -4360 6573 6683 -4360 6685 6690 -4363 6272 6273 -4365 6143 6145 -4368 6796 6797 -4368 6904 6906 -4369 5680 5681 -4375 5680 5687 -4397 6690 6796 -4405 3128 3133 -4408 3479 3481 -4411 3475 3543 -4455 3326 3332 -4457 3479 3482 -4458 3138 3231 -4476 3481 3543 -4497 3408 3409 -4408 5685 5686 -4431 5573 5691 -4433 5568 5686 -4447 5580 5697 -4472 4302 4305 -4474 5586 5701 -4477 5574 5692 -4479 4302 4308 -4480 5580 5700 -4408 5685 5686 -4416 6661 6767 -4417 7006 7007 -4425 6278 6388 -4431 5573 5691 -4433 5568 5686 -4434 6145 6150 -4434 6904 6907 -4439 6904 6910 -4442 6150 6267 -4443 6453 6459 -4445 6272 6276 -4447 5580 5697 -4451 5686 5688 -4452 5686 5691 -4456 6798 6799 -4464 6143 6144 -4465 6276 6388 -4465 7000 7008 -4467 6459 6566 -4470 6572 6573 -4470 6904 7008 -4474 6901 6902 -4475 7000 7007 -4477 7007 7097 -4480 5580 5700 -4480 6270 6273 -4485 6904 6909 -4487 6265 6268 -4489 5686 5687 -4492 6902 6904 -4500 7005 7007 -4502 3326 3407 -4508 3144 3237 -4518 3408 3483 -4524 3128 3129 -4533 3031 3122 -4534 3144 3231 -4550 3479 3551 -4559 3139 3231 -4566 3139 3230 -4567 3407 3408 -4567 3408 3482 -4568 3144 3236 -4573 3477 3482 -4596 3479 3543 -4598 3142 3236 -4507 4211 4300 -4530 4211 4213 -4535 5580 5703 -4555 5586 5703 -4568 4304 4401 -4585 5586 5707 -4586 4211 4308 -4511 7005 7008 -4512 6267 6268 -4519 6276 6390 -4521 7003 7008 -4526 7097 7105 -4536 7005 7105 -4545 6690 6791 -4547 7097 7104 -4553 7103 7104 -4554 7102 7105 -4556 6660 6661 -4563 6268 6270 -4564 6797 6798 -4572 6798 6902 -4572 7104 7188 -4574 7102 7104 -4580 6150 6262 -4587 5686 5693 -4595 6902 6903 -4598 7194 7195 -4627 904 4541 -4660 904 4435 -4667 1413 1523 -4672 962 963 -4691 854 963 -4699 962 967 -4609 3145 3236 -4610 3543 3545 -4618 3139 3236 -4618 3537 3544 -4641 3133 3135 -4645 3139 3228 -4663 3479 3484 -4667 3141 3236 -4667 3544 3545 -4676 3135 3139 -4686 3477 3484 -4606 4327 4334 -4627 904 4541 -4636 4235 4327 -4645 5572 5692 -4655 4327 4335 -4660 904 4435 -4667 4213 4214 -4692 5573 5692 -4616 6394 6395 -4635 7188 7196 -4647 7005 7010 -4653 7102 7196 -4657 7100 7105 -4669 6388 6396 -4674 6903 6909 -4674 7188 7195 -4688 6909 7003 -4699 7003 7010 -4711 967 970 -4718 1406 1407 -4735 854 956 -4742 963 964 -4746 848 854 -4754 956 964 -4762 852 956 -4765 961 964 -4770 964 967 -4788 964 966 -4790 965 967 -4710 3542 3544 -4720 3545 3548 -4730 3135 3136 -4733 3544 3597 -4743 3141 3228 -4746 3234 3236 -4766 3134 3135 -4766 3546 3551 -4789 3545 3605 -4794 3484 3546 -4706 4141 4228 -4708 5579 5697 -4711 5579 5692 -4721 4233 4235 -4723 4228 4235 -4739 4233 4335 -4744 4228 4236 -4763 4134 4141 -4776 4233 4236 -4776 5585 5703 -4779 4139 4141 -4789 4330 4335 -4708 5579 5697 -4710 6395 6396 -4710 7102 7107 -4755 7193 7196 -4767 7189 7273 -4768 7191 7196 -4780 7195 7196 -4782 7201 7279 -4792 6270 6275 -4801 959 964 -4806 848 950 -4807 965 966 -4813 848 956 -4825 848 958 -4832 852 854 -4833 852 964 -4840 852 958 -4858 846 848 -4888 841 846 -4891 841 842 -4895 852 959 -4898 959 965 -4802 3234 3235 -4804 3597 3604 -4807 3546 3548 -4808 3546 3553 -4828 3598 3604 -4831 3128 3135 -4831 3597 3605 -4836 3542 3605 -4841 3234 3239 -4851 3547 3548 -4857 3542 3602 -4859 3547 3605 -4864 3602 3604 -4872 3600 3605 -4873 3597 3602 -4879 3604 3645 -4883 3602 3605 -4900 3645 3652 -4807 4053 4134 -4808 4139 4236 -4828 5698 5703 -4839 4134 4142 -4840 4233 4238 -4884 5585 5704 -4892 4231 4236 -4896 4238 4330 -4802 5697 5698 -4808 6275 6390 -4809 6393 6396 -4809 7193 7199 -4809 7195 7199 -4811 7196 7198 -4828 5698 5703 -4829 7199 7281 -4830 7010 7100 -4833 7285 7286 -4836 6269 6275 -4844 6268 6269 -4845 6150 6269 -4857 7107 7191 -4867 7199 7279 -4873 7279 7287 -4876 5698 5700 -4883 7198 7199 -4884 5585 5704 -4896 6395 6399 -4899 7286 7287 -4904 857 959 -4906 851 852 -4908 850 959 -4910 846 849 -4914 851 959 -4917 1758 1869 -4922 849 851 -4938 850 851 -4943 1758 1877 -4945 841 843 -4952 735 842 -4963 843 849 -4971 735 835 -4979 844 851 -4990 735 843 -4925 3645 3653 -4943 1758 1877 -4949 3602 3653 -4958 3605 3607 -4972 3646 3678 -4979 3645 3650 -4985 3602 3650 -4985 3646 3650 -4995 3647 3650 -4998 3645 3648 -4939 4231 4238 -4947 4139 4142 -4910 7100 7107 -4926 7284 7287 -4935 7286 7364 -4937 7199 7282 -4940 7286 7290 -4963 5698 5705 -4967 7364 7365 -4981 7364 7366 -4984 6275 6385 -4991 7282 7287 -5002 739 843 -5011 736 843 -5018 1752 1869 -5022 1756 1869 -5027 844 846 -5031 1752 1863 -5036 733 735 -5036 1752 1871 -5044 844 845 -5065 1756 1877 -5072 1746 1863 -5073 1756 1871 -5076 843 844 -5084 733 835 -5015 3646 3679 -5017 3602 3607 -5030 3547 3606 -5034 3600 3607 -5036 1752 1871 -5048 3675 3679 -5065 1756 1877 -5065 2336 2343 -5073 1756 1871 -5084 3602 3648 -5036 5586 5704 -5032 7371 7436 -5036 5586 5704 -5049 7290 7366 -5088 7364 7369 -5102 733 843 -5118 729 835 -5121 1750 1863 -5101 3648 3653 -5104 3648 3650 -5119 3644 3679 -5148 3649 3650 -5154 3648 3655 -5161 3649 3679 -5187 3607 3648 -5188 2227 2344 -5127 4396 4403 -5185 4299 4304 -5119 7287 7289 -5127 7369 7371 -5130 7191 7197 -5131 5698 5699 -5154 7436 7443 -5168 6655 6761 -5171 6390 6391 -5193 7369 7372 -5243 3677 3679 -5291 3649 3680 -5224 4304 4396 -5263 4125 4206 -5217 7197 7198 -5243 7289 7290 -5252 7204 7282 -5258 7369 7443 -5299 7289 7295 -5310 4206 4214 -5372 4118 4125 -5311 7366 7367 -5314 7288 7289 -5346 6766 6772 -5360 7367 7369 -5365 7369 7374 -5373 7289 7367 -5418 728 735 -5478 632 728 -5481 733 736 -5489 728 736 -5406 4123 4125 -5414 4304 4305 -5439 4118 4124 -5459 4304 4403 -5463 4211 4214 -5478 4208 4214 -5482 4042 4124 -5496 4123 4214 -5415 7441 7443 -5529 1761 1877 -5542 738 843 -5529 1761 1877 -5509 4123 4129 -5523 4118 4126 -5526 4123 4126 -5546 4302 4303 -5564 4304 4402 -5568 4042 4118 -5583 4211 4216 -5506 7439 7443 -5521 7367 7368 -5557 7374 7439 -5602 4211 4303 -5612 4209 4214 -5616 4041 4126 -5617 4297 4303 -5621 4042 4045 -5627 4123 4128 -5628 4042 4126 -5634 4210 4303 -5659 4043 4045 -5662 4303 4304 -5696 4043 4044 -5789 630 728 -5789 630 733 -5795 738 838 -5715 4121 4126 -5849 733 738 -5856 732 738 -5833 4209 4216 -5888 4044 4126 -5909 731 733 -5941 731 732 -5944 630 730 -5946 625 626 -5951 626 630 -5956 730 731 -5959 627 730 -5973 626 627 -5983 629 730 -5988 619 626 -5917 4121 4128 -6013 629 725 -6037 619 624 -6039 624 627 -6050 619 620 -6063 624 629 -6083 519 620 -6009 4044 4045 -6013 4044 4127 -6052 4128 4209 -6228 620 621 -6242 621 624 -6250 4046 4047 -6259 622 624 -6292 519 613 -6250 4046 4047 -6337 618 621 -6354 613 621 -6364 621 623 -6378 622 623 -6445 616 621 -6514 517 613 -6539 6275 6391 -6656 517 615 -6724 513 517 -6760 517 616 -6765 512 513 -6784 506 512 -6792 513 514 -6801 522 616 -6831 516 517 -6853 506 514 -6854 515 616 -6877 511 514 -6885 417 506 -6908 514 516 -6921 415 506 -6942 515 516 -6947 411 506 -6959 509 515 -6966 509 514 -7022 411 500 -7023 420 509 -7043 411 508 -7047 415 508 -7068 412 508 -7093 415 509 -7099 409 500 -7102 413 509 -7135 409 502 -7160 414 508 -7161 409 508 -7174 409 503 -7175 409 411 -7177 409 415 -7180 414 509 -7185 409 414 -7194 409 412 -7173 6274 6281 -7231 414 503 -7204 6280 6281 diff --git a/fortran/lammps.f90 b/fortran/lammps.f90 index 1617891b92..a2f28073e5 100644 --- a/fortran/lammps.f90 +++ b/fortran/lammps.f90 @@ -542,6 +542,14 @@ MODULE LIBLAMMPS INTEGER(c_int) :: lammps_extract_atom_datatype END FUNCTION lammps_extract_atom_datatype + FUNCTION lammps_extract_atom_size(handle, name, dtype) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), INTENT(IN), VALUE :: handle, name + INTEGER(c_int), INTENT(IN), VALUE :: dtype + INTEGER(c_int) :: lammps_extract_atom_size + END FUNCTION lammps_extract_atom_size + FUNCTION lammps_extract_atom(handle, name) BIND(C) IMPORT :: c_ptr IMPLICIT NONE @@ -1435,7 +1443,7 @@ CONTAINS IF (SIZE_TAGINT == 8) THEN Cptr = C_LOC(id) ELSE - id32 = id + id32 = INT(id, c_int) Cptr = C_LOC(id32) END IF lmp_map_atom_big = lammps_map_atom(self%handle, Cptr) + 1 @@ -1461,43 +1469,35 @@ CONTAINS ntypes = lmp_extract_setting(self, 'ntypes') Cname = f2c_string(name) datatype = lammps_extract_atom_datatype(self%handle, Cname) + nrows = lammps_extract_atom_size(self%handle, Cname, LMP_SIZE_ROWS) + ncols = lammps_extract_atom_size(self%handle, Cname, LMP_SIZE_COLS) Cptr = lammps_extract_atom(self%handle, Cname) CALL lammps_free(Cname) - SELECT CASE (name) - CASE ('mass') - ncols = ntypes + 1 - nrows = 1 - CASE ('x','v','f','mu','omega','torque','angmom') - ncols = nmax - nrows = 3 - CASE DEFAULT - ncols = nmax - nrows = 1 - END SELECT - peratom_data%lammps_instance => self SELECT CASE (datatype) CASE (LAMMPS_INT) peratom_data%datatype = DATA_INT_1D - CALL C_F_POINTER(Cptr, peratom_data%i32_vec, [ncols]) + CALL C_F_POINTER(Cptr, peratom_data%i32_vec, [nrows]) CASE (LAMMPS_INT64) peratom_data%datatype = DATA_INT64_1D - CALL C_F_POINTER(Cptr, peratom_data%i64_vec, [ncols]) + CALL C_F_POINTER(Cptr, peratom_data%i64_vec, [nrows]) CASE (LAMMPS_DOUBLE) peratom_data%datatype = DATA_DOUBLE_1D + ! The mass array is allocated from 0, but only used from 1. We also want to use it from 1. IF (name == 'mass') THEN - CALL C_F_POINTER(Cptr, dummy, [ncols]) + CALL C_F_POINTER(Cptr, dummy, [nrows]) peratom_data%r64_vec(0:) => dummy ELSE - CALL C_F_POINTER(Cptr, peratom_data%r64_vec, [ncols]) + CALL C_F_POINTER(Cptr, peratom_data%r64_vec, [nrows]) END IF CASE (LAMMPS_DOUBLE_2D) peratom_data%datatype = DATA_DOUBLE_2D ! First, we dereference the void** pointer to point to the void* - CALL C_F_POINTER(Cptr, Catomptr, [ncols]) + CALL C_F_POINTER(Cptr, Catomptr, [nrows]) ! Catomptr(1) now points to the first element of the array - CALL C_F_POINTER(Catomptr(1), peratom_data%r64_mat, [nrows,ncols]) + ! rows and columns are swapped in Fortran + CALL C_F_POINTER(Catomptr(1), peratom_data%r64_mat, [ncols,nrows]) CASE (-1) CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & 'per-atom property ' // name // ' not found in extract_setting') @@ -2604,6 +2604,8 @@ CONTAINS TYPE(c_ptr) :: Cid, Ctype, Cx, Cv, Cimage INTEGER(c_int) :: tagint_size, atoms_created + Ctype = c_null_ptr + Cx = c_null_ptr ! type is actually NOT optional, but we can't make id optional without it, ! so we check at run-time IF (.NOT. PRESENT(type)) THEN diff --git a/lib/gpu/Makefile.aurora b/lib/gpu/Makefile.aurora new file mode 100644 index 0000000000..c343e061ee --- /dev/null +++ b/lib/gpu/Makefile.aurora @@ -0,0 +1,31 @@ +# /* ---------------------------------------------------------------------- +# Generic Linux Makefile for OpenCL +# ------------------------------------------------------------------------- */ + +# which file will be copied to Makefile.lammps + +EXTRAMAKE = Makefile.lammps.opencl + +# OCL_TUNE = -DFERMI_OCL # -- Uncomment for NVIDIA Fermi +# OCL_TUNE = -DKEPLER_OCL # -- Uncomment for NVIDIA Kepler +# OCL_TUNE = -DCYPRESS_OCL # -- Uncomment for AMD Cypress +OCL_TUNE = -DGENERIC_OCL # -- Uncomment for generic device + +# this setting should match LAMMPS Makefile +# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL + +LMP_INC = -DLAMMPS_SMALLBIG + +OCL_INC = -I/opt/intel/oneapi/compiler/latest/linux/include/sycl/ # Path to CL directory +OCL_CPP = mpicxx -cxx=icpx -DCUDA_PROXY $(DEFAULT_DEVICE) -xHost -O2 -ffp-model=fast -qoverride-limits -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC) -DGERYON_NO_PROF +OCL_LINK = -L/opt/intel/oneapi/compiler/latest/linux/lib/ -lOpenCL +OCL_PREC = -D_SINGLE_DOUBLE + +BIN_DIR = ./ +OBJ_DIR = ./ +LIB_DIR = ./ +AR = ar +BSH = /bin/sh + +include Opencl.makefile + diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index 10816e2fa6..288415e0e7 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -586,8 +586,25 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, const int b2y=_block_cell_2d; const int g2x=static_cast(ceil(static_cast(_maxspecial)/b2x)); const int g2y=static_cast(ceil(static_cast(nt)/b2y)); - _shared->k_transpose.set_size(g2x,g2y,b2x,b2y); - _shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt); + // the maximum number of blocks on the device is typically 65535 + // in principle we can use a lower number to have more resource per block 32768 + const int max_num_blocks = 65535; + int shift = 0; + if (g2y < max_num_blocks) { + _shared->k_transpose.set_size(g2x,g2y,b2x,b2y); + _shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt,&shift); + } else { + // using a fixed number of blocks + int g2y_m = max_num_blocks; + _shared->k_transpose.set_size(g2x,g2y_m,b2x,b2y); + // number of chunks needed for the whole transpose + const int num_chunks = ceil(static_cast(g2y) / g2y_m); + for (int i = 0; i < num_chunks; i++) { + _shared->k_transpose.run(&dev_special,&dev_special_t,&_maxspecial,&nt,&shift); + shift += g2y_m*b2y; + } + } + time_transpose.stop(); } diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index a7506fc5c3..7d0941ccd5 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -147,7 +147,7 @@ __kernel void kernel_calc_cell_counts(const unsigned *restrict cell_id, __kernel void transpose(__global tagint *restrict out, const __global tagint *restrict in, - int columns_in, int rows_in) + int columns_in, int rows_in, int shift) { __local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1]; @@ -158,15 +158,15 @@ __kernel void transpose(__global tagint *restrict out, unsigned i=bi*BLOCK_CELL_2D+ti; unsigned j=bj*BLOCK_CELL_2D+tj; - if ((i)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148) +* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040) + +### Build System Changes +* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965) +* Update Intel GPU architectures in Makefile [\#6895](https://github.com/kokkos/kokkos/pull/6895) +* Fix use of OpenMP with Cuda or HIP as compile language [\#6972](https://github.com/kokkos/kokkos/pull/6972) +* Define and enforce new minimum compiler versions for C++20 support [\#7128](https://github.com/kokkos/kokkos/pull/7128), [\#7123](https://github.com/kokkos/kokkos/pull/7123) +* Add nvidia Grace CPU architecture: `Kokkos_ARCH_ARMV9_GRACE` [\#7158](https://github.com/kokkos/kokkos/pull/7158) +* Fix Makefile.kokkos for Threads [\#6896](https://github.com/kokkos/kokkos/pull/6896) +* Remove support for NVHPC as CUDA device compiler [\#6987](https://github.com/kokkos/kokkos/pull/6987) +* Fix using CUDAToolkit for CMake 3.28.4 and higher [\#7062](https://github.com/kokkos/kokkos/pull/7062) + +### Incompatibilities (i.e. breaking changes) +* Drop `Kokkos::Array` special treatment in `View`s [\#6906](https://github.com/kokkos/kokkos/pull/6906) +* Drop `Experimental::RawMemoryAllocationFailure` [\#7145](https://github.com/kokkos/kokkos/pull/7145) + +### Deprecations +* Remove `Experimental::LayoutTiled` class template and deprecate `is_layouttiled` trait [\#6907](https://github.com/kokkos/kokkos/pull/6907) +* Deprecate `Kokkos::layout_iterate_type_selector` [\#7076](https://github.com/kokkos/kokkos/pull/7076) +* Deprecate specialization of `Kokkos::pair` for a single element [\#6947](https://github.com/kokkos/kokkos/pull/6947) +* Deprecate `deep_copy` of `UnorderedMap` of different size [\#6812](https://github.com/kokkos/kokkos/pull/6812) +* Deprecate trailing `Proxy` template argument of `Kokkos::Array` [\#6934](https://github.com/kokkos/kokkos/pull/6934) +* Deprecate implicit conversions of integers to `ChunkSize` [\#7151](https://github.com/kokkos/kokkos/pull/7151) +* Deprecate implicit conversions to execution spaces [\#7156](https://github.com/kokkos/kokkos/pull/7156) + +### Bug Fixes +* Do not return a copy of the input functor in `Experimental::for_each` [\#6910](https://github.com/kokkos/kokkos/pull/6910) +* Fix `realloc` on views of non-default constructible element types [\#6993](https://github.com/kokkos/kokkos/pull/6993) +* Fix undefined behavior in `View` initialization or fill with zeros [\#7014](https://github.com/kokkos/kokkos/pull/7014) +* Fix `sort_by_key` on host execution spaces when building with NVCC [\#7059](https://github.com/kokkos/kokkos/pull/7059) +* Fix using shared libraries and -fvisibility=hidden [\#7065](https://github.com/kokkos/kokkos/pull/7065) +* Fix view reference counting when functor copy constructor throws in parallel dispatch [\#6289](https://github.com/kokkos/kokkos/pull/6289) +* Fix `initialize(InitializationSetting)` for handling `print_configuration` setting [\#7098](https://github.com/kokkos/kokkos/pull/7098) +* Thread safety fixes for the Serial and OpenMP backend [\#7080](https://github.com/kokkos/kokkos/pull/7080), [\#6151](https://github.com/kokkos/kokkos/pull/6151) + ## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01) ### Backend and Architecture Enhancements: #### HIP: -* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877) +* MI300 support unified memory [\#6877](https://github.com/kokkos/kokkos/pull/6877) ### Bug Fixes * Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951) diff --git a/lib/kokkos/CITATION.cff b/lib/kokkos/CITATION.cff new file mode 100644 index 0000000000..28c674c451 --- /dev/null +++ b/lib/kokkos/CITATION.cff @@ -0,0 +1,65 @@ +cff-version: 1.2.0 +title: Kokkos +message: >- + If you use this software, please cite the overview paper +type: software +authors: + - name: The Kokkos authors + website: https://kokkos.org/community/team/ +identifiers: + - type: url + website: https://kokkos.org/kokkos-core-wiki/citation.html +repository-code: 'https://github.com/kokkos/kokkos' +url: 'https://kokkos.org/' +license: Apache-2.0 +preferred-citation: + type: article + authors: + - given-names: Christian R. + family-names: Trott + - given-names: Damien + family-names: Lebrun-Grandié + - given-names: Daniel + family-names: Arndt + - family-names: Ciesko + given-names: Jan + - given-names: Vinh + family-names: Dang + - family-names: Ellingwood + given-names: Nathan + - given-names: Rahulkumar + family-names: Gayatri + - given-names: Evan + family-names: Harvey + - given-names: Daisy S. + family-names: Hollman + - given-names: Dan + family-names: Ibanez + - given-names: Nevin + family-names: Liber + - given-names: Jonathan + family-names: Madsen + - given-names: Jeff + family-names: Miles + - given-names: David + family-names: Poliakoff + - given-names: Amy + family-names: Powell + - given-names: Sivasankaran + family-names: Rajamanickam + - given-names: Mikael + family-names: Simberg + - given-names: Dan + family-names: Sunderland + - given-names: Bruno + family-names: Turcksin + - given-names: Jeremiah + family-names: Wilke + doi: 10.1109/TPDS.2021.3097283 + journal: IEEE Transactions on Parallel and Distributed Systems + start: 805 + end: 817 + title: "Kokkos 3: Programming Model Extensions for the Exascale Era" + volume: 33 + issue: 4 + year: 2022 diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 76f2183db8..736cbac218 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -150,7 +150,7 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 3) +set(Kokkos_VERSION_MINOR 4) set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 6fdddd9a53..eb95c5448d 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -11,7 +11,7 @@ CXXFLAGS += $(SHFLAGS) endif KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 3 +KOKKOS_VERSION_MINOR = 4 KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) @@ -21,11 +21,11 @@ KOKKOS_DEVICES ?= "OpenMP" # Options: # Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 -# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX +# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # IBM: Power8,Power9 -# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 +# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 -# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC +# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC KOKKOS_ARCH ?= "" # Options: yes,no KOKKOS_DEBUG ?= "no" @@ -41,7 +41,7 @@ KOKKOS_STANDALONE_CMAKE ?= "no" # Default settings specific options. # Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async -KOKKOS_CUDA_OPTIONS ?= "enable_lambda" +KOKKOS_CUDA_OPTIONS ?= "disable_malloc_async" # Options: rdc KOKKOS_HIP_OPTIONS ?= "" @@ -328,12 +328,43 @@ KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL) KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX) KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR) -KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) +# Traditionally, we supported, e.g., IntelGen9 instead of Intel_Gen9. The latter +# matches the CMake option but we also accept the former for backward-compatibility. KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen11) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen12LP) +endif +KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9) +endif +KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9) \ + + $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11) \ + + $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP)) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen) + endif +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG1) +endif KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 0) + KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_XeHP) +endif +# Traditionally the architecture was called PVC instead of Intel_PVC. This +# version makes us accept IntelPVC and Intel_PVC as well. KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC) # NVIDIA based. @@ -394,7 +425,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8 KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2) KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX) -KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc)) +KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace) +KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc)) # IBM based. KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8) @@ -433,7 +465,6 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH), ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100) endif -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103) # Any AVX? KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) @@ -758,6 +789,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") + + KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 + KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128 +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") @@ -1119,11 +1158,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103 -endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) @@ -1216,6 +1250,8 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0) endif tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN") +tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY") + KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index e6900a822a..e8e429e027 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -81,7 +81,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md index 19793bb82d..c8c6f8f7cf 100644 --- a/lib/kokkos/README.md +++ b/lib/kokkos/README.md @@ -1,4 +1,4 @@ -![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4) +[![Kokkos](https://avatars2.githubusercontent.com/u/10199860?s=200&v=4)](https://kokkos.org) # Kokkos: Core Libraries @@ -10,43 +10,66 @@ hierarchies and multiple types of execution resources. It currently can use CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other backends in development. -**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.** +**Kokkos Core is part of the [Kokkos C++ Performance Portability Programming Ecosystem](https://kokkos.org/about/abstract/).** -For the complete documentation, click below: +Kokkos is a [Linux Foundation](https://linuxfoundation.org) project. -# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki) - -# Learning about Kokkos +## Learning about Kokkos To start learning about Kokkos: -- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities. +- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities. -- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. +- [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. -- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html). +- [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html). -- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability. +- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability. + +## Obtaining Kokkos + +The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). + +The current release is [4.3.01](https://github.com/kokkos/kokkos/releases/tag/4.3.01). + +```bash +curl -OJ -L https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz +# Or with wget +wget https://github.com/kokkos/kokkos/archive/refs/tags/4.3.01.tar.gz +``` + +To clone the latest development version of Kokkos from GitHub: + +```bash +git clone -b develop https://github.com/kokkos/kokkos.git +``` + +### Building Kokkos + +To build Kokkos, you will need to have a C++ compiler that supports C++17 or later. +All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html). + +Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html). + +You can also install Kokkos using [Spack](https://spack.io/): `spack install kokkos`. [Available configuration options](https://packages.spack.io/package.html?name=kokkos) can be displayed using `spack info kokkos`. + +## For the complete documentation: [kokkos.org/kokkos-core-wiki/](https://kokkos.org/kokkos-core-wiki/) + +## Support For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue. For non-public questions send an email to: *crtrott(at)sandia.gov* -# Contributing to Kokkos +## Contributing -Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute. +Please see [this page](https://kokkos.org/kokkos-core-wiki/contributing.html) for details on how to contribute. -# Requirements, Building and Installing +## Citing Kokkos -All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html). +Please see the [following page](https://kokkos.org/kokkos-core-wiki/citation.html). -Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html). - -# Citing Kokkos - -Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html). - -# License +## License [![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html) diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp index 36deccdfb1..f11f807048 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp @@ -189,6 +189,33 @@ void applyPermutation(const ExecutionSpace& space, KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); }); } +// FIXME_NVCC: nvcc has trouble compiling lambdas inside a function with +// variadic templates (sort_by_key_via_sort). Switch to using functors instead. +template +struct IotaFunctor { + Permute _permute; + KOKKOS_FUNCTION void operator()(int i) const { _permute(i) = i; } +}; +template +struct LessFunctor { + Keys _keys; + KOKKOS_FUNCTION bool operator()(int i, int j) const { + return _keys(i) < _keys(j); + } +}; + +// FIXME_NVCC+MSVC: We can't use a lambda instead of a functor which gave us +// "For this host platform/dialect, an extended lambda cannot be defined inside +// the 'if' or 'else' block of a constexpr if statement" +template +struct KeyComparisonFunctor { + Keys m_keys; + Comparator m_comparator; + KOKKOS_FUNCTION bool operator()(int i, int j) const { + return m_comparator(m_keys(i), m_keys(j)); + } +}; + template @@ -207,10 +234,9 @@ void sort_by_key_via_sort( n); // iota - Kokkos::parallel_for( - "Kokkos::sort_by_key_via_sort::iota", - Kokkos::RangePolicy(exec, 0, n), - KOKKOS_LAMBDA(int i) { permute(i) = i; }); + Kokkos::parallel_for("Kokkos::sort_by_key_via_sort::iota", + Kokkos::RangePolicy(exec, 0, n), + IotaFunctor{permute}); using Layout = typename Kokkos::View::array_layout; @@ -228,16 +254,15 @@ void sort_by_key_via_sort( Kokkos::DefaultHostExecutionSpace host_exec; if constexpr (sizeof...(MaybeComparator) == 0) { - Kokkos::sort( - host_exec, host_permute, - KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); }); + Kokkos::sort(host_exec, host_permute, + LessFunctor{host_keys}); } else { auto keys_comparator = std::get<0>(std::tuple(maybeComparator...)); Kokkos::sort( - host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) { - return keys_comparator(host_keys(i), host_keys(j)); - }); + host_exec, host_permute, + KeyComparisonFunctor{ + host_keys, keys_comparator}); } host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort"); Kokkos::deep_copy(exec, permute, host_permute); @@ -262,16 +287,14 @@ void sort_by_key_via_sort( } #else if constexpr (sizeof...(MaybeComparator) == 0) { - Kokkos::sort( - exec, permute, - KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); }); + Kokkos::sort(exec, permute, LessFunctor{keys}); } else { auto keys_comparator = std::get<0>(std::tuple(maybeComparator...)); Kokkos::sort( - exec, permute, KOKKOS_LAMBDA(int i, int j) { - return keys_comparator(keys(i), keys(j)); - }); + exec, permute, + KeyComparisonFunctor{ + keys, keys_comparator}); } #endif } diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp index 6215b325af..05969be463 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -29,49 +29,46 @@ namespace Experimental { template < class ExecutionSpace, class IteratorType, class UnaryFunctorType, std::enable_if_t, int> = 0> -UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - UnaryFunctorType functor) { - return Impl::for_each_exespace_impl(label, ex, first, last, - std::move(functor)); +void for_each(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, UnaryFunctorType functor) { + Impl::for_each_exespace_impl(label, ex, first, last, std::move(functor)); } template < class ExecutionSpace, class IteratorType, class UnaryFunctorType, std::enable_if_t, int> = 0> -UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", - ex, first, last, std::move(functor)); +void for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last, + UnaryFunctorType functor) { + Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", ex, + first, last, std::move(functor)); } template < class ExecutionSpace, class DataType, class... Properties, class UnaryFunctorType, std::enable_if_t, int> = 0> -UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, - const ::Kokkos::View& v, - UnaryFunctorType functor) { +void for_each(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), - std::move(functor)); + Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), + std::move(functor)); } template < class ExecutionSpace, class DataType, class... Properties, class UnaryFunctorType, std::enable_if_t, int> = 0> -UnaryFunctorType for_each(const ExecutionSpace& ex, - const ::Kokkos::View& v, - UnaryFunctorType functor) { +void for_each(const ExecutionSpace& ex, + const ::Kokkos::View& v, + UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, - KE::begin(v), KE::end(v), - std::move(functor)); + Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, + KE::begin(v), KE::end(v), std::move(functor)); } // @@ -82,24 +79,23 @@ UnaryFunctorType for_each(const ExecutionSpace& ex, template , int> = 0> -KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle, - IteratorType first, IteratorType last, - UnaryFunctorType functor) { - return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); +KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); } template , int> = 0> -KOKKOS_FUNCTION UnaryFunctorType -for_each(const TeamHandleType& teamHandle, - const ::Kokkos::View& v, - UnaryFunctorType functor) { +KOKKOS_FUNCTION void for_each(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), - std::move(functor)); + Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), + std::move(functor)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp index a8171fa068..9f7fcf94fe 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp @@ -82,6 +82,11 @@ OutputIteratorType adjacent_difference_exespace_impl( return first_dest; } +#ifdef KOKKOS_ENABLE_DEBUG + // check for overlapping iterators + Impl::expect_no_overlap(first_from, last_from, first_dest); +#endif + // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); @@ -114,6 +119,11 @@ KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl( return first_dest; } +#ifdef KOKKOS_ENABLE_DEBUG + // check for overlapping iterators + Impl::expect_no_overlap(first_from, last_from, first_dest); +#endif + // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 27ce5a6fad..54bb13e25b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -24,18 +24,21 @@ namespace Kokkos { namespace Experimental { namespace Impl { +template +class RandomAccessIterator; + template struct is_admissible_to_kokkos_std_algorithms : std::false_type {}; template struct is_admissible_to_kokkos_std_algorithms< - T, std::enable_if_t< ::Kokkos::is_view::value && T::rank() == 1 && - (std::is_same::value || - std::is_same::value || - std::is_same::value)> > + T, std::enable_if_t<::Kokkos::is_view::value && T::rank() == 1 && + (std::is_same::value || + std::is_same::value || + std::is_same::value)>> : std::true_type {}; template @@ -58,6 +61,18 @@ using is_iterator = Kokkos::is_detected; template inline constexpr bool is_iterator_v = is_iterator::value; +template +struct is_kokkos_iterator : std::false_type {}; + +template +struct is_kokkos_iterator> { + static constexpr bool value = + is_admissible_to_kokkos_std_algorithms::value; +}; + +template +inline constexpr bool is_kokkos_iterator_v = is_kokkos_iterator::value; + // // are_iterators // @@ -215,6 +230,38 @@ KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first, (void)last; } +// +// Check if kokkos iterators are overlapping +// +template +KOKKOS_INLINE_FUNCTION void expect_no_overlap( + [[maybe_unused]] IteratorType1 first, [[maybe_unused]] IteratorType1 last, + [[maybe_unused]] IteratorType2 s_first) { + if constexpr (is_kokkos_iterator_v && + is_kokkos_iterator_v) { + auto const view1 = first.view(); + auto const view2 = s_first.view(); + + std::size_t stride1 = view1.stride(0); + std::size_t stride2 = view2.stride(0); + ptrdiff_t first_diff = view1.data() - view2.data(); + + // FIXME If strides are not identical, checks may not be made + // with the cost of O(1) + // Currently, checks are made only if strides are identical + // If first_diff == 0, there is already an overlap + if (stride1 == stride2 || first_diff == 0) { + [[maybe_unused]] bool is_no_overlap = (first_diff % stride1); + auto* first_pointer1 = view1.data(); + auto* first_pointer2 = view2.data(); + [[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first); + [[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first); + KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 || + last_pointer1 <= first_pointer2 || is_no_overlap); + } + } +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index 3c1e2474bc..ad7b8bb8ca 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -150,8 +150,9 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl( return d_first + count; } -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) __builtin_unreachable(); #endif } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp index d3be3b7f66..99cc4a1cf3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp @@ -42,10 +42,9 @@ struct StdForEachFunctor { }; template -UnaryFunctorType for_each_exespace_impl(const std::string& label, - const HandleType& handle, - IteratorType first, IteratorType last, - UnaryFunctorType functor) { +void for_each_exespace_impl(const std::string& label, const HandleType& handle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { // checks Impl::static_assert_random_access_and_accessible(handle, first); Impl::expect_valid_range(first, last); @@ -56,8 +55,6 @@ UnaryFunctorType for_each_exespace_impl(const std::string& label, label, RangePolicy(handle, 0, num_elements), StdForEachFunctor(first, functor)); handle.fence("Kokkos::for_each: fence after operation"); - - return functor; } template -KOKKOS_FUNCTION UnaryFunctorType -for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, - IteratorType last, UnaryFunctorType functor) { +KOKKOS_FUNCTION void for_each_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { // checks Impl::static_assert_random_access_and_accessible(teamHandle, first); Impl::expect_valid_range(first, last); @@ -96,7 +93,6 @@ for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, TeamThreadRange(teamHandle, 0, num_elements), StdForEachFunctor(first, functor)); teamHandle.team_barrier(); - return functor; } template > { ptrdiff_t current_index) : m_view(view), m_current_index(current_index) {} +#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond + template + requires(std::is_constructible_v) KOKKOS_FUNCTION + explicit(!std::is_convertible_v) + RandomAccessIterator(const RandomAccessIterator& other) + : m_view(other.m_view), m_current_index(other.m_current_index) {} +#else + template < + class OtherViewType, + std::enable_if_t && + !std::is_convertible_v, + int> = 0> + KOKKOS_FUNCTION explicit RandomAccessIterator( + const RandomAccessIterator& other) + : m_view(other.m_view), m_current_index(other.m_current_index) {} + + template , + int> = 0> + KOKKOS_FUNCTION RandomAccessIterator( + const RandomAccessIterator& other) + : m_view(other.m_view), m_current_index(other.m_current_index) {} +#endif + KOKKOS_FUNCTION iterator_type& operator++() { ++m_current_index; @@ -152,9 +176,16 @@ class RandomAccessIterator< ::Kokkos::View > { KOKKOS_FUNCTION reference operator*() const { return m_view(m_current_index); } + KOKKOS_FUNCTION + view_type view() const { return m_view; } + private: view_type m_view; ptrdiff_t m_current_index = 0; + + // Needed for the converting constructor accepting another iterator + template + friend class RandomAccessIterator; }; } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp index c7c2930278..710d04805d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp @@ -175,8 +175,9 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl( d_first + count); } -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) __builtin_unreachable(); #endif } diff --git a/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp b/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp index 282d85548c..7d484136b6 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp @@ -46,6 +46,44 @@ TEST_F(random_access_iterator_test, constructor) { EXPECT_TRUE(true); } +TEST_F(random_access_iterator_test, constructiblity) { + auto first_d = KE::begin(m_dynamic_view); + auto cfirst_d = KE::cbegin(m_dynamic_view); + + static_assert(std::is_constructible_v); + static_assert( + !std::is_constructible_v); + [[maybe_unused]] decltype(cfirst_d) tmp_cfirst_d(first_d); + + auto first_s = KE::begin(m_static_view); + auto cfirst_s = KE::cbegin(m_static_view); + + static_assert(std::is_constructible_v); + static_assert( + !std::is_constructible_v); + [[maybe_unused]] decltype(cfirst_s) tmp_cfirst_s(first_s); + + auto first_st = KE::begin(m_strided_view); + auto cfirst_st = KE::cbegin(m_strided_view); + + static_assert( + std::is_constructible_v); + static_assert( + !std::is_constructible_v); + [[maybe_unused]] decltype(cfirst_st) tmp_cfirst_st(first_st); + + // [FIXME] Better to have tests for the explicit specifier with an expression. + // As soon as View converting constructors are re-implemented with a + // conditional explicit, we may add those tests. + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + EXPECT_TRUE(true); +} + template void test_random_access_it_verify(IteratorType it, ValueType gold_value) { using view_t = Kokkos::View; diff --git a/lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp b/lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp index 16f68eaaf2..9e5bd4a574 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp @@ -69,7 +69,7 @@ void iota(ExecutionSpace const &space, ViewType const &v, typename ViewType::value_type value = 0) { using ValueType = typename ViewType::value_type; Kokkos::parallel_for( - "ArborX::Algorithms::iota", + "Kokkos::Algorithms::iota", Kokkos::RangePolicy(space, 0, v.extent(0)), KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; }); } @@ -87,6 +87,18 @@ TEST(TEST_CATEGORY, SortByKeyEmptyView) { Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values)); } +// Test #7036 +TEST(TEST_CATEGORY, SortByKeyEmptyViewHost) { + using ExecutionSpace = Kokkos::DefaultHostExecutionSpace; + + // does not matter if we use int or something else + Kokkos::View keys("keys", 0); + Kokkos::View values("values", 0); + + ASSERT_NO_THROW( + Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values)); +} + TEST(TEST_CATEGORY, SortByKey) { using ExecutionSpace = TEST_EXECSPACE; using MemorySpace = typename ExecutionSpace::memory_space; diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp index 386d533f7a..2a4525a8c3 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp @@ -81,5 +81,114 @@ TEST(std_algorithms, is_admissible_to_std_algorithms) { strided_view_3d_t>::value); } +TEST(std_algorithms, expect_no_overlap) { + namespace KE = Kokkos::Experimental; + using value_type = double; + + static constexpr size_t extent0 = 13; + + //------------- + // 1d views + //------------- + using static_view_1d_t = Kokkos::View; + [[maybe_unused]] static_view_1d_t static_view_1d{ + "std-algo-test-1d-contiguous-view-static"}; + + using dyn_view_1d_t = Kokkos::View; + [[maybe_unused]] dyn_view_1d_t dynamic_view_1d{ + "std-algo-test-1d-contiguous-view-dynamic", extent0}; + + using strided_view_1d_t = Kokkos::View; + Kokkos::LayoutStride layout1d{extent0, 2}; + strided_view_1d_t strided_view_1d{"std-algo-test-1d-strided-view", layout1d}; + +// Overlapping because iterators are identical +#if defined(KOKKOS_ENABLE_DEBUG) + auto first_s = KE::begin(static_view_1d); + auto last_s = first_s + extent0; + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s, last_s, first_s); }, + "Kokkos contract violation:.*"); + + auto first_d = KE::begin(dynamic_view_1d); + auto last_d = first_d + extent0; + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d, last_d, first_d); }, + "Kokkos contract violation:.*"); + + auto first_st = KE::begin(strided_view_1d); + auto last_st = first_st + extent0; + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_st, last_st, first_st); }, + "Kokkos contract violation:.*"); +#endif + + // Ranges are overlapped + static constexpr size_t sub_extent0 = 6, offset0 = 3; + std::pair range0(0, sub_extent0), + range1(offset0, offset0 + sub_extent0); +#if defined(KOKKOS_ENABLE_DEBUG) + auto static_view_1d_0 = Kokkos::subview(static_view_1d, range0); + auto static_view_1d_1 = Kokkos::subview(static_view_1d, range1); + auto first_s0 = KE::begin(static_view_1d_0); // [0, 6) + auto last_s0 = first_s0 + static_view_1d_0.extent(0); + auto first_s1 = KE::begin(static_view_1d_1); // [3, 9) + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_s0, last_s0, first_s1); }, + "Kokkos contract violation:.*"); + + auto dynamic_view_1d_0 = Kokkos::subview(dynamic_view_1d, range0); + auto dynamic_view_1d_1 = Kokkos::subview(dynamic_view_1d, range1); + auto first_d0 = KE::begin(dynamic_view_1d_0); // [0, 6) + auto last_d0 = first_d0 + dynamic_view_1d_0.extent(0); + auto first_d1 = KE::begin(dynamic_view_1d_1); // [3, 9) + EXPECT_DEATH({ KE::Impl::expect_no_overlap(first_d0, last_d0, first_d1); }, + "Kokkos contract violation:.*"); +#endif + + auto strided_view_1d_0 = Kokkos::subview(strided_view_1d, range0); + auto strided_view_1d_1 = Kokkos::subview(strided_view_1d, range1); + auto first_st0 = KE::begin(strided_view_1d_0); // [0, 12) + auto last_st0 = first_st0 + strided_view_1d_0.extent(0); + auto first_st1 = KE::begin(strided_view_1d_1); // [3, 15) + // Does not overlap since offset (=3) is not divisible by stride (=2) + EXPECT_NO_THROW( + { KE::Impl::expect_no_overlap(first_st0, last_st0, first_st1); }); + + // Iterating over the same range without overlapping + Kokkos::View static_view_2d{ + "std-algo-test-2d-contiguous-view-static"}; + auto sub_static_view_1d_0 = Kokkos::subview(static_view_2d, 0, Kokkos::ALL); + auto sub_static_view_1d_1 = Kokkos::subview(static_view_2d, 1, Kokkos::ALL); + auto sub_first_s0 = KE::begin(sub_static_view_1d_0); // 0, 2, 4, ... + auto sub_last_s0 = sub_first_s0 + sub_static_view_1d_0.extent(0); + auto sub_first_s1 = KE::begin(sub_static_view_1d_1); // 1, 3, 5, ... + + EXPECT_NO_THROW({ + KE::Impl::expect_no_overlap(sub_first_s0, sub_last_s0, sub_first_s1); + }); + + Kokkos::View dynamic_view_2d{ + "std-algo-test-2d-contiguous-view-dynamic", 2, extent0}; + auto sub_dynamic_view_1d_0 = Kokkos::subview(dynamic_view_2d, 0, Kokkos::ALL); + auto sub_dynamic_view_1d_1 = Kokkos::subview(dynamic_view_2d, 1, Kokkos::ALL); + auto sub_first_d0 = KE::begin(sub_dynamic_view_1d_0); // 0, 2, 4, ... + auto sub_last_d0 = sub_first_d0 + sub_dynamic_view_1d_0.extent(0); + auto sub_first_d1 = KE::begin(sub_dynamic_view_1d_1); // 1, 3, 5, ... + + EXPECT_NO_THROW({ + KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1); + }); + + Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3}; + Kokkos::View strided_view_2d{ + "std-algo-test-2d-contiguous-view-strided", layout2d}; + auto sub_strided_view_1d_0 = Kokkos::subview(strided_view_2d, 0, Kokkos::ALL); + auto sub_strided_view_1d_1 = Kokkos::subview(strided_view_2d, 1, Kokkos::ALL); + auto sub_first_st0 = KE::begin(sub_strided_view_1d_0); // 0, 6, 12, ... + auto sub_last_st0 = sub_first_st0 + sub_strided_view_1d_0.extent(0); + auto sub_first_st1 = KE::begin(sub_strided_view_1d_1); // 1, 7, 13, ... + + EXPECT_NO_THROW({ + KE::Impl::expect_no_overlap(sub_first_st0, sub_last_st0, sub_first_st1); + }); +} + } // namespace stdalgos } // namespace Test diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp index 2c8fee02f4..7cb9851087 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp @@ -85,7 +85,7 @@ struct TestFunctorA { break; } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET case 2: { auto it = KE::exclusive_scan( @@ -213,7 +213,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET case 2: case 3: { auto it = exclusive_scan(KE::cbegin(rowFrom), KE::cend(rowFrom), @@ -242,7 +242,7 @@ template void run_all_scenarios() { for (int numTeams : teamSizesToTest) { for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET for (int apiId : {0, 1, 2, 3}) { #else for (int apiId : {0, 1}) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp index f9adeb0654..850e80dde1 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSorted.cpp @@ -52,7 +52,7 @@ struct TestFunctorA { Kokkos::single(Kokkos::PerTeam(member), [=, *this]() { m_returnsView(myRowIndex) = result; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; result = KE::is_sorted(member, KE::cbegin(myRowView), KE::cend(myRowView), @@ -179,7 +179,7 @@ template void run_all_scenarios(bool makeDataSortedOnPurpose) { for (int numTeams : teamSizesToTest) { for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 5153}) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET for (int apiId : {0, 1, 2, 3}) { #else for (int apiId : {0, 1}) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp index 33af5f99de..e3b95527c7 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsSortedUntil.cpp @@ -73,7 +73,7 @@ struct TestFunctorA { m_distancesView(myRowIndex) = resultDist; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; auto it = KE::is_sorted_until(member, KE::cbegin(myRowView), @@ -226,7 +226,7 @@ template void run_all_scenarios(const std::string& name, const std::vector& cols) { for (int numTeams : teamSizesToTest) { for (const auto& numCols : cols) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET for (int apiId : {0, 1, 2, 3}) { #else for (int apiId : {0, 1}) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp index fb891a8780..283525dbd1 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMaxElement.cpp @@ -59,7 +59,7 @@ struct TestFunctorA { m_distancesView(myRowIndex) = resultDist; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; auto it = @@ -170,7 +170,7 @@ void run_all_scenarios() { } TEST(std_algorithms_max_element_team_test, test) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp index 4ba1b6f968..8579b48315 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinElement.cpp @@ -59,7 +59,7 @@ struct TestFunctorA { m_distancesView(myRowIndex) = resultDist; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; auto it = @@ -169,7 +169,7 @@ void run_all_scenarios() { } TEST(std_algorithms_min_element_team_test, test) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp index 17562a5572..51010fdff5 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMinMaxElement.cpp @@ -66,7 +66,7 @@ struct TestFunctorA { m_distancesView(myRowIndex, 1) = resultDist2; }); } -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET else if (m_apiPick == 2) { using value_type = typename ViewType::value_type; auto itPair = @@ -188,7 +188,7 @@ void run_all_scenarios() { } TEST(std_algorithms_minmax_element_team_test, test) { -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp index 94c2a8f1f9..eb00d9e083 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp @@ -16,7 +16,7 @@ #include -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET namespace Test { namespace stdalgos { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp index 60fa369af1..1c43854381 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp @@ -16,7 +16,7 @@ #include -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET namespace Test { namespace stdalgos { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp index 10454d6551..0b0d798fd8 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp @@ -16,7 +16,7 @@ #include -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET namespace Test { namespace stdalgos { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp index b0a3241ec4..17ded226aa 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp @@ -16,7 +16,7 @@ #include -#if not defined KOKKOS_ENABLE_OPENMPTARGET +#ifndef KOKKOS_ENABLE_OPENMPTARGET namespace Test { namespace stdalgos { diff --git a/lib/kokkos/appveyor.yml b/lib/kokkos/appveyor.yml index c0b6e9cab9..d0a5645ef7 100644 --- a/lib/kokkos/appveyor.yml +++ b/lib/kokkos/appveyor.yml @@ -5,6 +5,6 @@ build_script: - cmd: >- mkdir build && cd build && - cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF && + cmake c:\projects\source -DKokkos_ENABLE_IMPL_MDSPAN=OFF -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_4=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF && cmake --build . --target install && ctest -C Debug --output-on-failure diff --git a/lib/kokkos/benchmarks/CMakeLists.txt b/lib/kokkos/benchmarks/CMakeLists.txt index abf5028359..529ef393d9 100644 --- a/lib/kokkos/benchmarks/CMakeLists.txt +++ b/lib/kokkos/benchmarks/CMakeLists.txt @@ -4,7 +4,7 @@ KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather) KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups) KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency) KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream) - +KOKKOS_ADD_BENCHMARK_DIRECTORIES(view_copy_constructor) #FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow. IF(NOT Kokkos_ENABLE_OPENMPTARGET) KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance) diff --git a/lib/kokkos/benchmarks/view_copy_constructor/CMakeLists.txt b/lib/kokkos/benchmarks/view_copy_constructor/CMakeLists.txt new file mode 100644 index 0000000000..50a331b2b3 --- /dev/null +++ b/lib/kokkos/benchmarks/view_copy_constructor/CMakeLists.txt @@ -0,0 +1,4 @@ +KOKKOS_ADD_EXECUTABLE( + view_copy_constructor + SOURCES view_copy_constructor.cpp +) diff --git a/lib/kokkos/benchmarks/view_copy_constructor/Makefile b/lib/kokkos/benchmarks/view_copy_constructor/Makefile new file mode 100644 index 0000000000..70c6d517e0 --- /dev/null +++ b/lib/kokkos/benchmarks/view_copy_constructor/Makefile @@ -0,0 +1,46 @@ +KOKKOS_DEVICES=Serial +KOKKOS_ARCH = "" + + +MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) + +ifndef KOKKOS_PATH + KOKKOS_PATH = $(MAKEFILE_PATH)../.. +endif + +SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) +HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) + +vpath %.cpp $(sort $(dir $(SRC))) + +default: build + echo "Start Build" + +CXX = clang++ +EXE = view_copy_constructor.exe + +CXXFLAGS ?= -Ofast +override CXXFLAGS += -I$(MAKEFILE_PATH) + +DEPFLAGS = -M +LINK = ${CXX} +LINKFLAGS = -Ofast +KOKKOS_CXX_STANDARD=c++20 + +OBJ = $(notdir $(SRC:.cpp=.o)) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o view_copy_constructor.cuda view_copy_constructor.exe + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HEADERS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp b/lib/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp new file mode 100644 index 0000000000..63c49f09c0 --- /dev/null +++ b/lib/kokkos/benchmarks/view_copy_constructor/view_copy_constructor.cpp @@ -0,0 +1,310 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// The function "test_view_collection" exposes the copy constructor +// and destructor overheads in Kokkos View objects +// Please see the lines marked by "NOTE". + +#include +#include +#include +#include +#include +#include +#include + +// NVIEWS is the number of Kokkos View objects in our ViewCollection object +// We have chosen a large value of 40 to make it easier to see performance +// differences when using the likelihood attribute +#define NVIEWS 40 + +class ViewCollection { + public: + Kokkos::View v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, + v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40; + double m_expected_sum; + double m_side_effect; + int m_N; + + ViewCollection(int N) + : v1("v1", N), + v2("v2", N), + v3("v3", N), + v4("v4", N), + v5("v5", N), + v6("v6", N), + v7("v7", N), + v8("v8", N), + v9("v9", N), + v10("v10", N), + v11("v11", N), + v12("v12", N), + v13("v13", N), + v14("v14", N), + v15("v15", N), + v16("v16", N), + v17("v17", N), + v18("v18", N), + v19("v19", N), + v20("v20", N), + v21("v21", N), + v22("v22", N), + v23("v23", N), + v24("v24", N), + v25("v25", N), + v26("v26", N), + v27("v27", N), + v28("v28", N), + v29("v29", N), + v30("v30", N), + v31("v31", N), + v32("v32", N), + v33("v33", N), + v34("v34", N), + v35("v35", N), + v36("v36", N), + v37("v37", N), + v38("v38", N), + v39("v39", N), + v40("v40", N), + m_expected_sum(N * NVIEWS), + m_side_effect(0.0), + m_N(N) { + for (int i = 0; i < N; ++i) { + v1(i) = 1; + v2(i) = 1; + v3(i) = 1; + v4(i) = 1; + v5(i) = 1; + v6(i) = 1; + v7(i) = 1; + v8(i) = 1; + v9(i) = 1; + v10(i) = 1; + v11(i) = 1; + v12(i) = 1; + v13(i) = 1; + v14(i) = 1; + v15(i) = 1; + v16(i) = 1; + v17(i) = 1; + v18(i) = 1; + v19(i) = 1; + v20(i) = 1; + v21(i) = 1; + v22(i) = 1; + v23(i) = 1; + v24(i) = 1; + v25(i) = 1; + v26(i) = 1; + v27(i) = 1; + v28(i) = 1; + v29(i) = 1; + v30(i) = 1; + v31(i) = 1; + v32(i) = 1; + v33(i) = 1; + v34(i) = 1; + v35(i) = 1; + v36(i) = 1; + v37(i) = 1; + v38(i) = 1; + v39(i) = 1; + v40(i) = 1; + } + } + +// The ADD_COPY_CONSTRUCTOR macro is helpful to compare time in the copy +// constructor between compilers. We have found that the GNU compiler +// is sometimes able to inline the default copy constructor. +#ifdef ADD_COPY_CONSTRUCTOR + __attribute__((noinline)) ViewCollection(const ViewCollection& other) + : v1(other.v1), + v2(other.v2), + v3(other.v3), + v4(other.v4), + v5(other.v5), + v6(other.v6), + v7(other.v7), + v8(other.v8), + v9(other.v9), + v10(other.v10), + v11(other.v11), + v12(other.v12), + v13(other.v13), + v14(other.v14), + v15(other.v15), + v16(other.v16), + v17(other.v17), + v18(other.v18), + v19(other.v19), + v20(other.v20), + v21(other.v21), + v22(other.v22), + v23(other.v23), + v24(other.v24), + v25(other.v25), + v26(other.v26), + v27(other.v27), + v28(other.v28), + v29(other.v29), + v30(other.v30), + v31(other.v31), + v32(other.v32), + v33(other.v33), + v34(other.v34), + v35(other.v35), + v36(other.v36), + v37(other.v37), + v38(other.v38), + v39(other.v39), + v40(other.v40), + m_expected_sum(other.m_expected_sum), + m_side_effect(other.m_side_effect), + m_N(other.m_N) {} +#endif + + KOKKOS_INLINE_FUNCTION + double sum_views(int ii, bool execute_kernel) { + double result = 0.0; + if (execute_kernel) { + // This code is only executed when using the command line option -k + // The computation references all Kokkos views. This may help our + // effort to stop compilers from optimizing away the Kokkos views + for (int i = 0; i < m_N; ++i) { + result += v1(i) + v2(i) + v3(i) + v4(i) + v5(i) + v6(i) + v7(i) + + v8(i) + v9(i) + v10(i) + v11(i) + v12(i) + v13(i) + v14(i) + + v15(i) + v16(i) + v17(i) + v18(i) + v19(i) + v20(i) + v21(i) + + v22(i) + v23(i) + v24(i) + v25(i) + v26(i) + v27(i) + v28(i) + + v29(i) + v30(i) + v31(i) + v32(i) + v33(i) + v34(i) + v35(i) + + v36(i) + v37(i) + v38(i) + v39(i) + v40(i); + } + } else { + result = m_expected_sum; + } + // This statement introduces a side effect that may help our effort to + // stop compilers from optimizing away the temporary ViewCollection object + m_side_effect = result * (ii + 1); + return result; + } +}; + +void test_view_collection_kk(int N, int num_iter, bool execute_kernel) { + ViewCollection view_collection(N); + + Kokkos::Timer view_collection_timer; + double max_value = 0.0; + // Max Reduction boilerplate code taken from slide 53 of + // kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf + Kokkos::parallel_reduce( + "collection-reduction", num_iter, + KOKKOS_LAMBDA(int i, double& valueToUpdate) { + // NOTE: The following lines expose the Kokkos View overheads + ViewCollection tmp_view_collection = view_collection; + double my_value = tmp_view_collection.sum_views(i, execute_kernel); + if (my_value > valueToUpdate) valueToUpdate = my_value; + }, + Kokkos::Max(max_value)); + double view_collection_time = view_collection_timer.seconds(); + + bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6; + std::cout << "View Time = " << view_collection_time << " seconds" + << std::endl; + if (success) { + std::cout << "Kokkos run:" << std::endl; + std::cout << "SUCCESS" << std::endl; + } else { + std::cout << "FAILURE" << std::endl; + } +} + +void test_view_collection_serial(int N, int num_iter, bool execute_kernel) { + ViewCollection view_collection(N); + + Kokkos::Timer view_collection_timer; + double max_value = 0.0; + // Max Reduction boilerplate code taken from slide 53 of + // kokkos-tutorials/LectureSeries/KokkosTutorial_02_ViewsAndSpaces.pdf + for (int i = 0; i < num_iter; ++i) { + // NOTE: The following lines expose the Kokkos View overheads + ViewCollection tmp_view_collection = view_collection; + double my_value = tmp_view_collection.sum_views(i, execute_kernel); + if (my_value > max_value) max_value = my_value; + } + double view_collection_time = view_collection_timer.seconds(); + + bool success = std::fabs(max_value - N * NVIEWS) < 1.E-6; + std::cout << "View Time 2 = " << view_collection_time << " seconds" + << std::endl; + if (success) { + std::cout << "Serial run:" << std::endl; + std::cout << "SUCCESS" << std::endl; + } else { + std::cout << "FAILURE" << std::endl; + } +} + +int main(int argc, char* argv[]) { + // The benchmark is only testing reference counting for views on host. +#if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_SERIAL) || \ + defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_HPX) + int N = 1; + int num_iter = 1 << 27; + bool execute_kernel = false; + + for (int i = 0; i < argc; i++) { + if ((strcmp(argv[i], "-N") == 0)) { + N = atoi(argv[++i]); + if (N < 1) { + std::cout << "Array extent must be >= 1" << std::endl; + exit(1); + } + } else if (strcmp(argv[i], "-i") == 0) { + num_iter = atoi(argv[++i]); + if (num_iter < 1) { + std::cout << "Number of iterations must be >= 1" << std::endl; + exit(1); + } + } else if (strcmp(argv[i], "-k") == 0) { + execute_kernel = true; + } else if ((strcmp(argv[i], "-h") == 0)) { + printf(" Options:\n"); + printf(" -N : Array extent\n"); + printf(" -i : Number of iterations\n"); + printf(" -k: Execute the summation kernel\n"); + printf(" -h: Print this message\n\n"); + exit(1); + } + } + + std::cout << "Array extent = " << N << std::endl; + std::cout << "Iterations = " << num_iter << std::endl; + std::cout << "Execute summation kernel = " << std::boolalpha << execute_kernel + << std::noboolalpha << std::endl; + + // Test inside a Kokkos kernel. + Kokkos::initialize(argc, argv); + { test_view_collection_kk(N, num_iter, execute_kernel); } + + // Test outside Kokkos kernel. + test_view_collection_serial(N, num_iter, execute_kernel); + + Kokkos::finalize(); +#endif + + return 0; +} diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index dbfef2267f..d58645f98a 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -233,7 +233,7 @@ do cuda_args="$cuda_args $1" ;; #Handle more known nvcc args - --extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler) + --extended-lambda|--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler|--disable-warnings) cuda_args="$cuda_args $1" ;; #Handle known nvcc args that have an argument diff --git a/lib/kokkos/cmake/Dependencies.cmake b/lib/kokkos/cmake/Dependencies.cmake index 611c089b2e..fb1e73b579 100644 --- a/lib/kokkos/cmake/Dependencies.cmake +++ b/lib/kokkos/cmake/Dependencies.cmake @@ -1,6 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib - TEST_OPTIONAL_TPLS CUSPARSE ) TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) diff --git a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in index 8d5ef0de42..d3ac39ffa3 100644 --- a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in +++ b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in @@ -225,8 +225,13 @@ FUNCTION(kokkos_compilation) # if built w/o CUDA support, we want to basically make this a no-op SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@) + + IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17) + SET(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..") + ENDIF() + # search relative first and then absolute - SET(_HINTS "${CMAKE_CURRENT_LIST_DIR}/../.." "@CMAKE_INSTALL_PREFIX@") + SET(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@") # find kokkos_launch_compiler FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER diff --git a/lib/kokkos/cmake/KokkosCore_config.h.in b/lib/kokkos/cmake/KokkosCore_config.h.in index 94f8fc4214..a93007ff83 100644 --- a/lib/kokkos/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/cmake/KokkosCore_config.h.in @@ -37,6 +37,7 @@ #cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA // deprecated #cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR #cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC +#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY #cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS #cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY @@ -52,6 +53,8 @@ #cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated #cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION #cmakedefine KOKKOS_ENABLE_IMPL_MDSPAN +#cmakedefine KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY +#cmakedefine KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND #cmakedefine KOKKOS_ENABLE_ATOMICS_BYPASS /* TPL Settings */ @@ -65,6 +68,7 @@ #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX #cmakedefine KOKKOS_ARCH_ARMV81 #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX2 +#cmakedefine KOKKOS_ARCH_ARMV9_GRACE #cmakedefine KOKKOS_ARCH_A64FX #cmakedefine KOKKOS_ARCH_AVX #cmakedefine KOKKOS_ARCH_AVX2 @@ -116,7 +120,6 @@ #cmakedefine KOKKOS_ARCH_AMD_GFX942 #cmakedefine KOKKOS_ARCH_AMD_GFX1030 #cmakedefine KOKKOS_ARCH_AMD_GFX1100 -#cmakedefine KOKKOS_ARCH_AMD_GFX1103 #cmakedefine KOKKOS_ARCH_AMD_GPU #cmakedefine KOKKOS_ARCH_VEGA // deprecated #cmakedefine KOKKOS_ARCH_VEGA906 // deprecated diff --git a/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake b/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake index 5a62c530fc..445f4e93a5 100644 --- a/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake +++ b/lib/kokkos/cmake/Modules/FindTPLCUDA.cmake @@ -7,37 +7,38 @@ IF (NOT CUDAToolkit_ROOT) ENDIF() ENDIF() -# FIXME CMake 3.28.4 creates more targets than we export -IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0" AND CMAKE_VERSION VERSION_LESS "3.28.4") - find_package(CUDAToolkit) -ELSE() - include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake) +IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC AND CMAKE_VERSION VERSION_LESS "3.20.1") + MESSAGE(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1") ENDIF() - -IF (TARGET CUDA::cudart) - SET(FOUND_CUDART TRUE) - KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cudart) -ELSE() - SET(FOUND_CUDART FALSE) -ENDIF() - -IF (TARGET CUDA::cuda_driver) - SET(FOUND_CUDA_DRIVER TRUE) - KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver) -ELSE() - SET(FOUND_CUDA_DRIVER FALSE) -ENDIF() - -include(FindPackageHandleStandardArgs) -IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) - SET(KOKKOS_CUDA_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1") -ELSE() - SET(KOKKOS_CUDA_ERROR DEFAULT_MSG) -ENDIF() -FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${KOKKOS_CUDA_ERROR} FOUND_CUDART FOUND_CUDA_DRIVER) -IF (FOUND_CUDA_DRIVER AND FOUND_CUDART) +IF(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0") + find_package(CUDAToolkit REQUIRED) KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart ) + KOKKOS_EXPORT_CMAKE_TPL(CUDAToolkit REQUIRED) +ELSE() + include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake) + + IF (TARGET CUDA::cudart) + SET(FOUND_CUDART TRUE) + KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cudart) + ELSE() + SET(FOUND_CUDART FALSE) + ENDIF() + + IF (TARGET CUDA::cuda_driver) + SET(FOUND_CUDA_DRIVER TRUE) + KOKKOS_EXPORT_IMPORTED_TPL(CUDA::cuda_driver) + ELSE() + SET(FOUND_CUDA_DRIVER FALSE) + ENDIF() + + include(FindPackageHandleStandardArgs) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${DEFAULT_MSG} FOUND_CUDART FOUND_CUDA_DRIVER) + IF (FOUND_CUDA_DRIVER AND FOUND_CUDART) + KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE + LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart + ) + ENDIF() ENDIF() diff --git a/lib/kokkos/cmake/deps/CUDA.cmake b/lib/kokkos/cmake/deps/CUDA.cmake index 68bf5b3d57..5b6afd6151 100644 --- a/lib/kokkos/cmake/deps/CUDA.cmake +++ b/lib/kokkos/cmake/deps/CUDA.cmake @@ -35,7 +35,6 @@ IF(NOT _CUDA_FAILURE) GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS) GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY}) - KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) ELSE() SET(TPL_ENABLE_CUDA OFF) ENDIF() diff --git a/lib/kokkos/cmake/deps/CUSPARSE.cmake b/lib/kokkos/cmake/deps/CUSPARSE.cmake deleted file mode 100644 index b016971ab9..0000000000 --- a/lib/kokkos/cmake/deps/CUSPARSE.cmake +++ /dev/null @@ -1,26 +0,0 @@ -#@HEADER -# ************************************************************************ -# -# Kokkos v. 4.0 -# Copyright (2022) National Technology & Engineering -# Solutions of Sandia, LLC (NTESS). -# -# Under the terms of Contract DE-NA0003525 with NTESS, -# the U.S. Government retains certain rights in this software. -# -# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -# -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ************************************************************************ -# @HEADER - -#include(${TRIBITS_DEPS_DIR}/CUDA.cmake) - -#IF (TPL_ENABLE_CUDA) -# GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) -# GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) -# GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) -# KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) -#ENDIF() - diff --git a/lib/kokkos/cmake/fake_tribits.cmake b/lib/kokkos/cmake/fake_tribits.cmake index 4c5331ec79..a18d2ac518 100644 --- a/lib/kokkos/cmake/fake_tribits.cmake +++ b/lib/kokkos/cmake/fake_tribits.cmake @@ -118,14 +118,6 @@ FUNCTION(KOKKOS_ADD_TEST) ENDIF() ENDFUNCTION() -FUNCTION(KOKKOS_ADD_ADVANCED_TEST) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_ADVANCED_TEST(${ARGN}) - else() - # TODO Write this - endif() -ENDFUNCTION() - MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) diff --git a/lib/kokkos/cmake/kokkos_arch.cmake b/lib/kokkos/cmake/kokkos_arch.cmake index df11c76cc3..a581d9f945 100644 --- a/lib/kokkos/cmake/kokkos_arch.cmake +++ b/lib/kokkos/cmake/kokkos_arch.cmake @@ -28,6 +28,7 @@ KOKKOS_CHECK_DEPRECATED_OPTIONS( #------------------------------------------------------------------------------- SET(KOKKOS_ARCH_LIST) +include(CheckCXXCompilerFlag) KOKKOS_DEPRECATED_LIST(ARCH ARCH) @@ -49,6 +50,7 @@ DECLARE_AND_CHECK_HOST_ARCH(ARMV81 "ARMv8.1 Compatible CPU") DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX "ARMv8 Cavium ThunderX CPU") DECLARE_AND_CHECK_HOST_ARCH(ARMV8_THUNDERX2 "ARMv8 Cavium ThunderX2 CPU") DECLARE_AND_CHECK_HOST_ARCH(A64FX "ARMv8.2 with SVE Support") +DECLARE_AND_CHECK_HOST_ARCH(ARMV9_GRACE "ARMv9 NVIDIA Grace CPU") DECLARE_AND_CHECK_HOST_ARCH(SNB "Intel Sandy/Ivy Bridge CPUs") DECLARE_AND_CHECK_HOST_ARCH(HSW "Intel Haswell CPUs") DECLARE_AND_CHECK_HOST_ARCH(BDW "Intel Broadwell Xeon E-class CPUs") @@ -101,9 +103,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS gfx90a gfx90a gfx908 gfx908) LIST(APPEND SUPPORTED_AMD_GPUS MI50/60 MI50/60) LIST(APPEND SUPPORTED_AMD_ARCHS VEGA906 AMD_GFX906) LIST(APPEND CORRESPONDING_AMD_FLAGS gfx906 gfx906) -LIST(APPEND SUPPORTED_AMD_GPUS PHOENIX RX7900XTX V620/W6800 V620/W6800) -LIST(APPEND SUPPORTED_AMD_ARCHS AMD_GFX1103 AMD_GFX1100 NAVI1030 AMD_GFX1030) -LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1103 gfx1100 gfx1030 gfx1030) +LIST(APPEND SUPPORTED_AMD_GPUS RX7900XTX RX7900XTX V620/W6800 V620/W6800) +LIST(APPEND SUPPORTED_AMD_ARCHS NAVI1100 AMD_GFX1100 NAVI1030 AMD_GFX1030) +LIST(APPEND CORRESPONDING_AMD_FLAGS gfx1100 gfx1100 gfx1030 gfx1030) #FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17 FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) @@ -189,12 +191,6 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) ELSEIF(CUDAToolkit_BIN_DIR) GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS --cuda-path=${CUDAToolkit_BIN_DIR}/..) ENDIF() -ELSEIF (KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - SET(CUDA_ARCH_FLAG "-gpu") - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -cuda) - IF (KOKKOS_ENABLE_CUDA) # FIXME ideally unreachable when CUDA not enabled - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS -cuda) - ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) SET(CUDA_ARCH_FLAG "-arch") ENDIF() @@ -209,6 +205,11 @@ ENDIF() #------------------------------- KOKKOS_HIP_OPTIONS --------------------------- +KOKKOS_OPTION(IMPL_AMDGPU_FLAGS "" STRING "Set compiler flags for AMD GPUs") +KOKKOS_OPTION(IMPL_AMDGPU_LINK "" STRING "Set linker flags for AMD GPUs") +MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_FLAGS) +MARK_AS_ADVANCED(Kokkos_IMPL_AMDGPU_LINK) + #clear anything that might be in the cache GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS) IF(KOKKOS_ENABLE_HIP) @@ -301,6 +302,20 @@ IF (KOKKOS_ARCH_A64FX) ) ENDIF() +IF (KOKKOS_ARCH_ARMV9_GRACE) + SET(KOKKOS_ARCH_ARM_NEON ON) + check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2) + check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS) + IF (COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS) + COMPILER_SPECIFIC_FLAGS( + COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID + DEFAULT -mcpu=neoverse-n2 -msve-vector-bits=128 + ) + ELSE() + MESSAGE(WARNING "Compiler does not support ARMv9 Grace architecture") + ENDIF() +ENDIF() + IF (KOKKOS_ARCH_ZEN) COMPILER_SPECIFIC_FLAGS( COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID @@ -535,17 +550,17 @@ IF (KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC) SET(KOKKOS_ARCH_AVX512XEON OFF) ENDIF() +# FIXME_NVCC nvcc doesn't seem to support Arm Neon. +IF(KOKKOS_ARCH_ARM_NEON AND KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + UNSET(KOKKOS_ARCH_ARM_NEON) +ENDIF() + IF (NOT KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA) IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) COMPILER_SPECIFIC_FLAGS( Clang -fcuda-rdc NVIDIA --relocatable-device-code=true - NVHPC -gpu=rdc ) - ELSEIF(KOKKOS_ENABLE_CUDA) - COMPILER_SPECIFIC_FLAGS( - NVHPC -gpu=nordc - ) ENDIF() ENDIF() @@ -571,7 +586,7 @@ IF (KOKKOS_ENABLE_HIP) COMPILER_SPECIFIC_FLAGS( DEFAULT -fgpu-rdc ) - IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) + IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC AND NOT KOKKOS_IMPL_AMDGPU_FLAGS) COMPILER_SPECIFIC_LINK_OPTIONS( DEFAULT --hip-link ) @@ -654,15 +669,9 @@ FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) IF(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) SET(CMAKE_CUDA_ARCHITECTURES ${KOKKOS_CUDA_ARCHITECTURES} PARENT_SCOPE) ELSE() - IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${FLAG}) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}") - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${NVHPC_CUDA_ARCH}") - ELSE() - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") - IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") - ENDIF() + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OR KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") ENDIF() ENDIF() ENDIF() @@ -704,14 +713,16 @@ FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG) MESSAGE(WARNING "Given AMD GPU architecture ${ARCH}, but Kokkos_ENABLE_HIP, Kokkos_ENABLE_SYCL, Kokkos_ENABLE_OPENACC, and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) ELSE() - IF(KOKKOS_ENABLE_HIP) - SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE) - ENDIF() - SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE) - GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") - IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") - ENDIF() + IF(KOKKOS_ENABLE_HIP) + SET(KOKKOS_HIP_ARCHITECTURES ${FLAG} PARENT_SCOPE) + ENDIF() + IF(NOT KOKKOS_IMPL_AMDGPU_FLAGS) + SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE) + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") + ENDIF() + IF(KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") + ENDIF() ENDIF() ENDIF() ENDFUNCTION() @@ -724,6 +735,15 @@ FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS) CHECK_AMDGPU_ARCH(${ARCH} ${FLAG}) ENDFOREACH() +IF(KOKKOS_IMPL_AMDGPU_FLAGS) + IF (NOT AMDGPU_ARCH_ALREADY_SPECIFIED) + MESSAGE(FATAL_ERROR "When IMPL_AMDGPU_FLAGS is set the architecture autodectection is disabled. " + "Please explicitly set the GPU architecture.") + ENDIF() + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${KOKKOS_IMPL_AMDGPU_FLAGS}") + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${KOKKOS_IMPL_AMDGPU_LINK}") +ENDIF() + MACRO(SET_AND_CHECK_AMD_ARCH ARCH FLAG) KOKKOS_SET_OPTION(ARCH_${ARCH} ON) CHECK_AMDGPU_ARCH(${ARCH} ${FLAG}) @@ -984,7 +1004,7 @@ IF (KOKKOS_ARCH_HOPPER90) ENDIF() #HIP detection of gpu arch -IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED) +IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED AND NOT KOKKOS_IMPL_AMDGPU_FLAGS) FIND_PROGRAM(ROCM_ENUMERATOR rocm_agent_enumerator) IF(NOT ROCM_ENUMERATOR) MESSAGE(FATAL_ERROR "Autodetection of AMD GPU architecture not possible as " diff --git a/lib/kokkos/cmake/kokkos_compiler_id.cmake b/lib/kokkos/cmake/kokkos_compiler_id.cmake index 9135ca2b41..e8bfadb64e 100644 --- a/lib/kokkos/cmake/kokkos_compiler_id.cmake +++ b/lib/kokkos/cmake/kokkos_compiler_id.cmake @@ -42,12 +42,8 @@ IF(Kokkos_ENABLE_CUDA) # If launcher was found and nvcc_wrapper was not specified as # compiler and `CMAKE_CXX_COMPILIER_LAUNCHER` is not set, set to use launcher. # Will ensure CMAKE_CXX_COMPILER is replaced by nvcc_wrapper - IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang - AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) + IF(Kokkos_COMPILE_LAUNCHER AND NOT INTERNAL_HAVE_COMPILER_NVCC AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL Clang) IF(CMAKE_CXX_COMPILER_LAUNCHER) - IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - MESSAGE(STATUS "Using nvc++ as device compiler requires Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON!") - ENDIF() MESSAGE(FATAL_ERROR "Cannot use CMAKE_CXX_COMPILER_LAUNCHER if the CMAKE_CXX_COMPILER is not able to compile CUDA code, i.e. nvcc_wrapper or clang++!") ENDIF() # the first argument to launcher is always the C++ compiler defined by cmake @@ -149,56 +145,85 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Fujitsu) ENDIF() # Enforce the minimum compilers supported by Kokkos. -SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) 8.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) 10.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) 15.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 8.2.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 19.0.5 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) 2021.1.1 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) 2023.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 11.0.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 5.2.0 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI 22.3 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC 19.29 or higher") -SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported") +IF(NOT CMAKE_CXX_STANDARD) + SET(CMAKE_CXX_STANDARD 17) +ENDIF() +IF(CMAKE_CXX_STANDARD EQUAL 17) + SET(KOKKOS_CLANG_CPU_MINIMUM 8.0.0) + SET(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0) + SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0) + SET(KOKKOS_GCC_MINIMUM 8.2.0) + SET(KOKKOS_INTEL_MINIMUM 19.0.5) + SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1) + SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0) + SET(KOKKOS_NVCC_MINIMUM 11.0.0) + SET(KOKKOS_HIPCC_MINIMUM 5.2.0) + SET(KOKKOS_NVHPC_MINIMUM 22.3) + SET(KOKKOS_MSVC_MINIMUM 19.29) +ELSE() + SET(KOKKOS_CLANG_CPU_MINIMUM 14.0.0) + SET(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0) + SET(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0) + SET(KOKKOS_GCC_MINIMUM 10.1.0) + SET(KOKKOS_INTEL_MINIMUM "not supported") + SET(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0) + SET(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0) + SET(KOKKOS_NVCC_MINIMUM 12.0.0) + SET(KOKKOS_HIPCC_MINIMUM 5.2.0) + SET(KOKKOS_NVHPC_MINIMUM 22.3) + SET(KOKKOS_MSVC_MINIMUM 19.30) +ENDIF() + +SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos for C++${CMAKE_CXX_STANDARD}. Required minimum compiler versions:") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) ${KOKKOS_CLANG_CPU_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel ${KOKKOS_INTEL_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC ${KOKKOS_HIPCC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVHPC/PGI ${KOKKOS_NVHPC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n MSVC ${KOKKOS_MSVC_MINIMUM}") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n XL/XLClang not supported") SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\nCompiler: ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION}\n") IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT Kokkos_ENABLE_CUDA) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CPU_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_CUDA) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 10.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_CLANG_CUDA_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 8.2.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_GCC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.0.5) + IF((NOT CMAKE_CXX_STANDARD EQUAL 17) OR (KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_MINIMUM})) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2021.1.1) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND Kokkos_ENABLE_SYCL) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2023.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVCC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 5.2.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_HIPCC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 22.3) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_NVHPC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() # Treat PGI internally as NVHPC to simplify handling both compilers. @@ -206,13 +231,13 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR KOKKOS_CXX_COMPILER_ID STREQUAL NV # backward-compatible to pgc++. SET(KOKKOS_CXX_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 19.29) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_MSVC_MINIMUM}) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL OR KOKKOS_CXX_COMPILER_ID STREQUAL XLClang) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND Kokkos_ENABLE_OPENMPTARGET) - IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS KOKKOS_CLANG_OPENMPTARGET_MINIMUM) MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") ENDIF() ENDIF() diff --git a/lib/kokkos/cmake/kokkos_enable_options.cmake b/lib/kokkos/cmake/kokkos_enable_options.cmake index 32788e7aa0..53764b0c68 100644 --- a/lib/kokkos/cmake/kokkos_enable_options.cmake +++ b/lib/kokkos/cmake/kokkos_enable_options.cmake @@ -48,6 +48,8 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda # resolved but we keep the option around a bit longer to be safe. KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)") KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler") +KOKKOS_ENABLE_OPTION(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA") + KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" ) KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" ) KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP") @@ -75,8 +77,12 @@ KOKKOS_ENABLE_OPTION(IMPL_HIP_UNIFIED_MEMORY OFF "Whether to leverage unified me # This option will go away eventually, but allows fallback to old implementation when needed. KOKKOS_ENABLE_OPTION(DESUL_ATOMICS_EXTERNAL OFF "Whether to use an external desul installation") KOKKOS_ENABLE_OPTION(ATOMICS_BYPASS OFF "**NOT RECOMMENDED** Whether to make atomics non-atomic for non-threaded MPI-only use cases") +KOKKOS_ENABLE_OPTION(IMPL_REF_COUNT_BRANCH_UNLIKELY ON "Whether to use the C++20 `[[unlikely]]` attribute in the view reference counting") +mark_as_advanced(Kokkos_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY) +KOKKOS_ENABLE_OPTION(IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND OFF "Whether to enable a workaround for invalid use of View of Views that causes program hang on destruction.") +mark_as_advanced(Kokkos_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND) -KOKKOS_ENABLE_OPTION(IMPL_MDSPAN OFF "Whether to enable experimental mdspan support") +KOKKOS_ENABLE_OPTION(IMPL_MDSPAN ON "Whether to enable experimental mdspan support") KOKKOS_ENABLE_OPTION(MDSPAN_EXTERNAL OFF BOOL "Whether to use an external version of mdspan") KOKKOS_ENABLE_OPTION(IMPL_SKIP_COMPILER_MDSPAN ON BOOL "Whether to use an internal version of mdspan even if the compiler supports mdspan") mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN) @@ -131,7 +137,7 @@ FUNCTION(check_device_specific_options) ENDIF() ENDFUNCTION() -CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC) +CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC IMPL_CUDA_UNIFIED_MEMORY) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH) diff --git a/lib/kokkos/cmake/kokkos_functions.cmake b/lib/kokkos/cmake/kokkos_functions.cmake index 9dab1ca00e..d1f1e0d7a7 100644 --- a/lib/kokkos/cmake/kokkos_functions.cmake +++ b/lib/kokkos/cmake/kokkos_functions.cmake @@ -709,7 +709,12 @@ MACRO(kokkos_find_imported NAME) ENDIF() IF (NOT TPL_LIBRARY_SUFFIXES) - SET(TPL_LIBRARY_SUFFIXES lib lib64) + SET(TPL_LIBRARY_SUFFIXES lib) + IF(KOKKOS_IMPL_32BIT) + LIST(APPEND TPL_LIBRARY_SUFFIXES lib32) + ELSE() + LIST(APPEND TPL_LIBRARY_SUFFIXES lib64) + ENDIF() ENDIF() SET(${NAME}_INCLUDE_DIRS) diff --git a/lib/kokkos/cmake/kokkos_test_cxx_std.cmake b/lib/kokkos/cmake/kokkos_test_cxx_std.cmake index b075a3e36b..5b45674e05 100644 --- a/lib/kokkos/cmake/kokkos_test_cxx_std.cmake +++ b/lib/kokkos/cmake/kokkos_test_cxx_std.cmake @@ -124,12 +124,8 @@ IF(KOKKOS_ENABLE_CUDA) ELSEIF(CMAKE_CXX_EXTENSIONS) MESSAGE(FATAL_ERROR "Compiling CUDA code with clang doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF") ENDIF() - ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA AND NOT (Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) - IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) - MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. To allow nvc++ as Cuda compiler, Kokkos_ENABLE_IMPL_NVHPC_AS_DEVICE_COMPILER=ON must be set!") - ELSE() - MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or NVC++ or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}") - ENDIF() + ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang or use kokkos_launch_compiler, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}") ENDIF() ENDIF() diff --git a/lib/kokkos/cmake/kokkos_tpls.cmake b/lib/kokkos/cmake/kokkos_tpls.cmake index 6ef3b79bde..cda9e0d600 100644 --- a/lib/kokkos/cmake/kokkos_tpls.cmake +++ b/lib/kokkos/cmake/kokkos_tpls.cmake @@ -103,13 +103,19 @@ if (Kokkos_ENABLE_IMPL_MDSPAN AND Kokkos_ENABLE_MDSPAN_EXTERNAL) endif() IF (Kokkos_ENABLE_OPENMP) - find_package(OpenMP REQUIRED) + find_package(OpenMP REQUIRED COMPONENTS CXX) # FIXME_TRILINOS Trilinos doesn't allow for Kokkos to use find_dependency # so we just append the flags here instead of linking with the OpenMP target. IF(KOKKOS_HAS_TRILINOS) COMPILER_SPECIFIC_FLAGS(DEFAULT ${OpenMP_CXX_FLAGS}) ELSE() - KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED) + KOKKOS_EXPORT_CMAKE_TPL(OpenMP REQUIRED COMPONENTS CXX) + ENDIF() + IF(Kokkos_ENABLE_HIP AND KOKKOS_COMPILE_LANGUAGE STREQUAL HIP) + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS ${OpenMP_CXX_FLAGS}) + ENDIF() + IF(Kokkos_ENABLE_CUDA AND KOKKOS_COMPILE_LANGUAGE STREQUAL CUDA) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -Xcompiler ${OpenMP_CXX_FLAGS}) ENDIF() ENDIF() diff --git a/lib/kokkos/cmake/kokkos_tribits.cmake b/lib/kokkos/cmake/kokkos_tribits.cmake index 060a7a8472..6da543a2c8 100644 --- a/lib/kokkos/cmake/kokkos_tribits.cmake +++ b/lib/kokkos/cmake/kokkos_tribits.cmake @@ -160,6 +160,12 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME) ) ENDIF() ENDIF() + # We noticed problems with -fvisibility=hidden for inline static variables + # if Kokkos was built as shared library. + IF(BUILD_SHARED_LIBS) + SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY VISIBILITY_INLINES_HIDDEN ON) + SET_PROPERTY(TARGET ${PACKAGE_NAME}_${ROOT_NAME} PROPERTY CXX_VISIBILITY_PRESET hidden) + ENDIF() ENDFUNCTION() FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME) @@ -241,34 +247,6 @@ MACRO(KOKKOS_CONFIGURE_CORE) KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_FwdBackend.hpp "KOKKOS_FWD" "fwd/Kokkos_Fwd" "${KOKKOS_ENABLED_DEVICES}") KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_SetupBackend.hpp "KOKKOS_SETUP" "setup/Kokkos_Setup" "${DEVICE_SETUP_LIST}") KOKKOS_CONFIG_HEADER( KokkosCore_Config_HeaderSet.in KokkosCore_Config_DeclareBackend.hpp "KOKKOS_DECLARE" "decl/Kokkos_Declare" "${KOKKOS_ENABLED_DEVICES}") - SET(_DEFAULT_HOST_MEMSPACE "::Kokkos::HostSpace") - KOKKOS_OPTION(DEFAULT_DEVICE_MEMORY_SPACE "" STRING "Override default device memory space") - KOKKOS_OPTION(DEFAULT_HOST_MEMORY_SPACE "" STRING "Override default host memory space") - KOKKOS_OPTION(DEFAULT_DEVICE_EXECUTION_SPACE "" STRING "Override default device execution space") - KOKKOS_OPTION(DEFAULT_HOST_PARALLEL_EXECUTION_SPACE "" STRING "Override default host parallel execution space") - IF (NOT Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE STREQUAL "") - SET(_DEVICE_PARALLEL ${Kokkos_DEFAULT_DEVICE_EXECUTION_SPACE}) - MESSAGE(STATUS "Override default device execution space: ${_DEVICE_PARALLEL}") - SET(KOKKOS_DEVICE_SPACE_ACTIVE ON) - ELSE() - IF (_DEVICE_PARALLEL STREQUAL "NoTypeDefined") - SET(KOKKOS_DEVICE_SPACE_ACTIVE OFF) - ELSE() - SET(KOKKOS_DEVICE_SPACE_ACTIVE ON) - ENDIF() - ENDIF() - IF (NOT Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE STREQUAL "") - SET(_HOST_PARALLEL ${Kokkos_DEFAULT_HOST_PARALLEL_EXECUTION_SPACE}) - MESSAGE(STATUS "Override default host parallel execution space: ${_HOST_PARALLEL}") - SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON) - ELSE() - IF (_HOST_PARALLEL STREQUAL "NoTypeDefined") - SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE OFF) - ELSE() - SET(KOKKOS_HOSTPARALLEL_SPACE_ACTIVE ON) - ENDIF() - ENDIF() - #We are ready to configure the header CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY) ENDMACRO() @@ -484,15 +462,10 @@ ENDFUNCTION() FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) - IF(KOKKOS_HAS_TRILINOS) - #ignore the target, tribits doesn't do anything directly with targets - TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) - ELSE() #append to a list for later - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - FOREACH(DIR ${ARGN}) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) - ENDFOREACH() - ENDIF() + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + FOREACH(DIR ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) + ENDFOREACH() ENDFUNCTION() FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) diff --git a/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake deleted file mode 100644 index 4709f8002b..0000000000 --- a/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake +++ /dev/null @@ -1,26 +0,0 @@ -#@HEADER -# ************************************************************************ -# -# Kokkos v. 4.0 -# Copyright (2022) National Technology & Engineering -# Solutions of Sandia, LLC (NTESS). -# -# Under the terms of Contract DE-NA0003525 with NTESS, -# the U.S. Government retains certain rights in this software. -# -# Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -# -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#@HEADER - -# Check for CUDA support - -IF (NOT TPL_ENABLE_CUDA) - MESSAGE(FATAL_ERROR "\nCUSPARSE requires CUDA") -ELSE() - GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) - GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) - GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) -ENDIF() - diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp index e821570a8d..a37a2bdceb 100644 --- a/lib/kokkos/containers/src/Kokkos_DualView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -944,13 +944,13 @@ class DualView : public ViewTraits { if (sizeMismatch) { ::Kokkos::realloc(arg_prop, d_view, n0, n1, n2, n3, n4, n5, n6, n7); - if (alloc_prop_input::initialize) { + if constexpr (alloc_prop_input::initialize) { h_view = create_mirror_view(typename t_host::memory_space(), d_view); } else { h_view = create_mirror_view(Kokkos::WithoutInitializing, typename t_host::memory_space(), d_view); } - } else if (alloc_prop_input::initialize) { + } else if constexpr (alloc_prop_input::initialize) { if constexpr (alloc_prop_input::has_execution_space) { const auto& exec_space = Impl::get_property(arg_prop); @@ -1038,12 +1038,10 @@ class DualView : public ViewTraits { /* Resize on Device */ if (sizeMismatch) { ::Kokkos::resize(properties, d_view, n0, n1, n2, n3, n4, n5, n6, n7); - if (alloc_prop_input::initialize) { - h_view = create_mirror_view(typename t_host::memory_space(), d_view); - } else { - h_view = create_mirror_view(Kokkos::WithoutInitializing, - typename t_host::memory_space(), d_view); - } + // this part of the lambda was relocated in a method as it contains a + // `if constexpr`. In some cases, both branches were evaluated + // leading to a compile error + resync_host(properties); /* Mark Device copy as modified */ ++modified_flags(1); @@ -1054,13 +1052,10 @@ class DualView : public ViewTraits { /* Resize on Host */ if (sizeMismatch) { ::Kokkos::resize(properties, h_view, n0, n1, n2, n3, n4, n5, n6, n7); - if (alloc_prop_input::initialize) { - d_view = create_mirror_view(typename t_dev::memory_space(), h_view); - - } else { - d_view = create_mirror_view(Kokkos::WithoutInitializing, - typename t_dev::memory_space(), h_view); - } + // this part of the lambda was relocated in a method as it contains a + // `if constexpr`. In some cases, both branches were evaluated + // leading to a compile error + resync_device(properties); /* Mark Host copy as modified */ ++modified_flags(0); @@ -1099,6 +1094,39 @@ class DualView : public ViewTraits { } } + private: + // resync host mirror from device + // this code was relocated from a lambda as it contains a `if constexpr`. + // In some cases, both branches were evaluated, leading to a compile error + template + inline void resync_host(Impl::ViewCtorProp const&) { + using alloc_prop_input = Impl::ViewCtorProp; + + if constexpr (alloc_prop_input::initialize) { + h_view = create_mirror_view(typename t_host::memory_space(), d_view); + } else { + h_view = create_mirror_view(Kokkos::WithoutInitializing, + typename t_host::memory_space(), d_view); + } + } + + // resync device mirror from host + // this code was relocated from a lambda as it contains a `if constexpr` + // In some cases, both branches were evaluated leading to a compile error + template + inline void resync_device(Impl::ViewCtorProp const&) { + using alloc_prop_input = Impl::ViewCtorProp; + + if constexpr (alloc_prop_input::initialize) { + d_view = create_mirror_view(typename t_dev::memory_space(), h_view); + + } else { + d_view = create_mirror_view(Kokkos::WithoutInitializing, + typename t_dev::memory_space(), h_view); + } + } + + public: void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index 5fa59f1b7c..5f7fcaf69e 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -1657,8 +1657,7 @@ KOKKOS_FUNCTION auto as_view_of_rank_n( if constexpr (std::is_same_v || std::is_same_v || - std::is_same_v || - is_layouttiled::value) { + std::is_same_v) { for (int i = N; i < 7; ++i) layout.dimension[i] = KOKKOS_IMPL_CTOR_DEFAULT_ARG; } @@ -1933,254 +1932,155 @@ struct MirrorDRVType { } // namespace Impl namespace Impl { + +// create a mirror +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline typename DynRankView::HostMirror create_mirror( - const DynRankView& src, - const Impl::ViewCtorProp& arg_prop, - std::enable_if_t::has_memory_space>* = - nullptr) { - using src_type = DynRankView; - using dst_type = typename src_type::HostMirror; - - using alloc_prop_input = Impl::ViewCtorProp; - - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); +inline auto create_mirror(const DynRankView& src, + const Impl::ViewCtorProp& arg_prop) { + check_view_ctor_args_create_mirror(); auto prop_copy = Impl::with_properties_if_unset( arg_prop, std::string(src.label()).append("_mirror")); - return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank())); -} + if constexpr (Impl::ViewCtorProp::has_memory_space) { + using dst_type = typename Impl::MirrorDRVType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type; -template -inline auto create_mirror( - const DynRankView& src, - const Impl::ViewCtorProp& arg_prop, - std::enable_if_t::has_memory_space>* = - nullptr) { - using dst_type = typename Impl::MirrorDRVType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::view_type; + return dst_type(prop_copy, + Impl::reconstructLayout(src.layout(), src.rank())); + } else { + using src_type = DynRankView; + using dst_type = typename src_type::HostMirror; - using alloc_prop_input = Impl::ViewCtorProp; - - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); - - auto prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string(src.label()).append("_mirror")); - - return dst_type(prop_copy, Impl::reconstructLayout(src.layout(), src.rank())); + return dst_type(prop_copy, + Impl::reconstructLayout(src.layout(), src.rank())); + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } } // namespace Impl -// Create a mirror in host space -template -inline typename DynRankView::HostMirror create_mirror( - const DynRankView& src, - std::enable_if_t::specialize, - void>::value>* = nullptr) { - return Impl::create_mirror(src, Kokkos::Impl::ViewCtorProp<>{}); +// public interface +template ::specialize>>> +inline auto create_mirror(const DynRankView& src) { + return Impl::create_mirror(src, Kokkos::view_alloc()); } -template -inline typename DynRankView::HostMirror create_mirror( - Kokkos::Impl::WithoutInitializing_t wi, const DynRankView& src, - std::enable_if_t::specialize, - void>::value>* = nullptr) { +// public interface that accepts a without initializing flag +template ::specialize>>> +inline auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, + const DynRankView& src) { return Impl::create_mirror(src, Kokkos::view_alloc(wi)); } -template -inline typename DynRankView::HostMirror create_mirror( - const Impl::ViewCtorProp& arg_prop, - const DynRankView& src, - std::enable_if_t< - std::is_void::specialize>::value && - !Impl::ViewCtorProp::has_memory_space>* = nullptr) { - return Impl::create_mirror(src, arg_prop); -} - -// Create a mirror in a new space +// public interface that accepts a space template ::value && - std::is_void::specialize>::value>> -typename Impl::MirrorDRVType::view_type create_mirror( - const Space&, const Kokkos::DynRankView& src) { + std::is_void_v::specialize>>> +auto create_mirror(const Space&, const Kokkos::DynRankView& src) { return Impl::create_mirror( src, Kokkos::view_alloc(typename Space::memory_space{})); } -template -typename Impl::MirrorDRVType::view_type create_mirror( - Kokkos::Impl::WithoutInitializing_t wi, const Space&, - const Kokkos::DynRankView& src, - std::enable_if_t::specialize, - void>::value>* = nullptr) { +// public interface that accepts a space and a without initializing flag +template ::value && + std::is_void_v::specialize>>> +auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, + const Kokkos::DynRankView& src) { return Impl::create_mirror( src, Kokkos::view_alloc(wi, typename Space::memory_space{})); } -template -inline auto create_mirror( - const Impl::ViewCtorProp& arg_prop, - const DynRankView& src, - std::enable_if_t< - std::is_void::specialize>::value && - Impl::ViewCtorProp::has_memory_space>* = nullptr) { - using ReturnType = typename Impl::MirrorDRVType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::view_type; - return ReturnType{Impl::create_mirror(src, arg_prop)}; +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> +inline auto create_mirror(const Impl::ViewCtorProp& arg_prop, + const DynRankView& src) { + return Impl::create_mirror(src, arg_prop); } namespace Impl { -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - std::is_same< - typename DynRankView::memory_space, - typename DynRankView::HostMirror::memory_space>::value && - std::is_same< - typename DynRankView::data_type, - typename DynRankView::HostMirror::data_type>::value, - typename DynRankView::HostMirror> -create_mirror_view(const DynRankView& src, - const typename Impl::ViewCtorProp&) { - return src; -} +// create a mirror view +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - !(std::is_same< - typename DynRankView::memory_space, - typename DynRankView::HostMirror::memory_space>::value && - std::is_same< - typename DynRankView::data_type, - typename DynRankView::HostMirror::data_type>::value), - typename DynRankView::HostMirror> -create_mirror_view( +inline auto create_mirror_view( const DynRankView& src, - const typename Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); + [[maybe_unused]] const typename Impl::ViewCtorProp& + arg_prop) { + if constexpr (!Impl::ViewCtorProp::has_memory_space) { + if constexpr (std::is_same::memory_space, + typename DynRankView< + T, P...>::HostMirror::memory_space>::value && + std::is_same::data_type, + typename DynRankView< + T, P...>::HostMirror::data_type>::value) { + return typename DynRankView::HostMirror(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } else { + if constexpr (Impl::MirrorDRViewType::memory_space, + T, P...>::is_same_memspace) { + return typename Impl::MirrorDRViewType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } -template ::has_memory_space>> -inline std::enable_if_t< - Kokkos::is_space< - typename Impl::ViewCtorProp::memory_space>::value && - Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace, - typename Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::view_type> -create_mirror_view(const Kokkos::DynRankView& src, - const typename Impl::ViewCtorProp&) { - return src; -} - -template ::has_memory_space>> -inline std::enable_if_t< - Kokkos::is_space< - typename Impl::ViewCtorProp::memory_space>::value && - !Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace, - typename Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::view_type> -create_mirror_view( - const Kokkos::DynRankView& src, - const typename Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} } // namespace Impl -// Create a mirror view in host space +// public interface template -inline std::enable_if_t< - (std::is_same< - typename DynRankView::memory_space, - typename DynRankView::HostMirror::memory_space>::value && - std::is_same::data_type, - typename DynRankView::HostMirror::data_type>::value), - typename DynRankView::HostMirror> -create_mirror_view(const Kokkos::DynRankView& src) { - return src; -} - -template -inline std::enable_if_t< - !(std::is_same< - typename DynRankView::memory_space, - typename DynRankView::HostMirror::memory_space>::value && - std::is_same< - typename DynRankView::data_type, - typename DynRankView::HostMirror::data_type>::value), - typename DynRankView::HostMirror> -create_mirror_view(const Kokkos::DynRankView& src) { - return Kokkos::create_mirror(src); +inline auto create_mirror_view(const Kokkos::DynRankView& src) { + return Impl::create_mirror_view(src, Kokkos::view_alloc()); } +// public interface that accepts a without initializing flag template inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, const DynRankView& src) { return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); } -// Create a mirror view in a new space -// FIXME_C++17 Improve SFINAE here. +// public interface that accepts a space template ::value>> -inline typename Impl::MirrorDRViewType::view_type -create_mirror_view( - const Space&, const Kokkos::DynRankView& src, - std::enable_if_t< - Impl::MirrorDRViewType::is_same_memspace>* = nullptr) { - return src; +inline auto create_mirror_view(const Space&, + const Kokkos::DynRankView& src) { + return Impl::create_mirror_view( + src, Kokkos::view_alloc(typename Space::memory_space())); } -// FIXME_C++17 Improve SFINAE here. +// public interface that accepts a space and a without initializing flag template ::value>> -inline typename Impl::MirrorDRViewType::view_type -create_mirror_view( - const Space& space, const Kokkos::DynRankView& src, - std::enable_if_t< - !Impl::MirrorDRViewType::is_same_memspace>* = nullptr) { - return Kokkos::create_mirror(space, src); -} - -template + typename Enable = std::enable_if_t::value>> inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::DynRankView& src) { @@ -2188,6 +2088,8 @@ inline auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, src, Kokkos::view_alloc(typename Space::memory_space{}, wi)); } +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template inline auto create_mirror_view( const typename Impl::ViewCtorProp& arg_prop, @@ -2195,75 +2097,51 @@ inline auto create_mirror_view( return Impl::create_mirror_view(src, arg_prop); } -template +// create a mirror view and deep copy it +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>::value>> auto create_mirror_view_and_copy( - const Impl::ViewCtorProp&, - const Kokkos::DynRankView& src, - std::enable_if_t< - std::is_void::specialize>::value && - Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop, + const Kokkos::DynRankView& src) { using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - // same behavior as deep_copy(src, src) - if (!alloc_prop_input::has_execution_space) - fence( - "Kokkos::create_mirror_view_and_copy: fence before returning src view"); - return src; -} + Impl::check_view_ctor_args_create_mirror_view_and_copy(); -template -auto create_mirror_view_and_copy( - const Impl::ViewCtorProp& arg_prop, - const Kokkos::DynRankView& src, - std::enable_if_t< - std::is_void::specialize>::value && - !Impl::MirrorDRViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { - using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - using Space = typename alloc_prop_input::memory_space; - using Mirror = typename Impl::MirrorDRViewType::view_type; + if constexpr (Impl::MirrorDRViewType< + typename Impl::ViewCtorProp::memory_space, + T, P...>::is_same_memspace) { + // same behavior as deep_copy(src, src) + if constexpr (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src " + "view"); + return src; + } else { + using Space = typename alloc_prop_input::memory_space; + using Mirror = typename Impl::MirrorDRViewType::view_type; - auto arg_prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string{}, WithoutInitializing, - typename Space::execution_space{}); + auto arg_prop_copy = Impl::with_properties_if_unset( + arg_prop, std::string{}, WithoutInitializing, + typename Space::execution_space{}); - std::string& label = Impl::get_property(arg_prop_copy); - if (label.empty()) label = src.label(); - auto mirror = typename Mirror::non_const_type{ - arg_prop_copy, Impl::reconstructLayout(src.layout(), src.rank())}; - if constexpr (alloc_prop_input::has_execution_space) { - deep_copy(Impl::get_property(arg_prop_copy), - mirror, src); - } else - deep_copy(mirror, src); - return mirror; + std::string& label = Impl::get_property(arg_prop_copy); + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type{ + arg_prop_copy, Impl::reconstructLayout(src.layout(), src.rank())}; + if constexpr (alloc_prop_input::has_execution_space) { + deep_copy(Impl::get_property(arg_prop_copy), + mirror, src); + } else + deep_copy(mirror, src); + return mirror; + } +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) + __builtin_unreachable(); +#endif } template diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp index 12885edbae..a4b74e246e 100644 --- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -590,96 +590,81 @@ struct MirrorDynamicViewType { } // namespace Impl namespace Impl { -template -inline auto create_mirror( - const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp& arg_prop, - std::enable_if_t::has_memory_space>* = - nullptr) { - using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); +// create a mirror +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc +template +inline auto create_mirror(const Kokkos::Experimental::DynamicView& src, + const Impl::ViewCtorProp& arg_prop) { + using alloc_prop_input = Impl::ViewCtorProp; + check_view_ctor_args_create_mirror(); auto prop_copy = Impl::with_properties_if_unset( arg_prop, std::string(src.label()).append("_mirror")); - auto ret = typename Kokkos::Experimental::DynamicView::HostMirror( - prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); + if constexpr (Impl::ViewCtorProp::has_memory_space) { + using MemorySpace = typename alloc_prop_input::memory_space; - ret.resize_serial(src.extent(0)); + auto ret = typename Kokkos::Impl::MirrorDynamicViewType< + MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(), + src.chunk_max() * src.chunk_size()); - return ret; + ret.resize_serial(src.extent(0)); + + return ret; + } else { + auto ret = typename Kokkos::Experimental::DynamicView::HostMirror( + prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); + + ret.resize_serial(src.extent(0)); + + return ret; + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } -template -inline auto create_mirror( - const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp& arg_prop, - std::enable_if_t::has_memory_space>* = - nullptr) { - using alloc_prop_input = Impl::ViewCtorProp; - - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); - - using MemorySpace = typename alloc_prop_input::memory_space; - auto prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string(src.label()).append("_mirror")); - - auto ret = typename Kokkos::Impl::MirrorDynamicViewType< - MemorySpace, T, P...>::view_type(prop_copy, src.chunk_size(), - src.chunk_max() * src.chunk_size()); - - ret.resize_serial(src.extent(0)); - - return ret; -} } // namespace Impl -// Create a mirror in host space -template +// public interface +template ::specialize>>> inline auto create_mirror( const Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); } -template +// public interface that accepts a without initializing flag +template ::specialize>>> inline auto create_mirror( Kokkos::Impl::WithoutInitializing_t wi, const Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror(src, Kokkos::view_alloc(wi)); } -// Create a mirror in a new space -template +// public interface that accepts a space +template ::value && + std::is_void_v::specialize>>> inline auto create_mirror( const Space&, const Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror( src, Kokkos::view_alloc(typename Space::memory_space{})); } -template +// public interface that accepts a space and a without initializing flag +template ::value && + std::is_void_v::specialize>>> typename Kokkos::Impl::MirrorDynamicViewType::view_type create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::Experimental::DynamicView& src) { @@ -687,7 +672,11 @@ create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, src, Kokkos::view_alloc(wi, typename Space::memory_space{})); } -template +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> inline auto create_mirror( const Impl::ViewCtorProp& arg_prop, const Kokkos::Experimental::DynamicView& src) { @@ -696,76 +685,56 @@ inline auto create_mirror( namespace Impl { +// create a mirror view +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - (std::is_same< - typename Kokkos::Experimental::DynamicView::memory_space, - typename Kokkos::Experimental::DynamicView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::DynamicView::data_type, - typename Kokkos::Experimental::DynamicView< - T, P...>::HostMirror::data_type>::value), - typename Kokkos::Experimental::DynamicView::HostMirror> -create_mirror_view(const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp&) { - return src; +inline auto create_mirror_view( + const Kokkos::Experimental::DynamicView& src, + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop) { + if constexpr (!Impl::ViewCtorProp::has_memory_space) { + if constexpr (std::is_same::memory_space, + typename Kokkos::Experimental::DynamicView< + T, P...>::HostMirror::memory_space>::value && + std::is_same::data_type, + typename Kokkos::Experimental::DynamicView< + T, P...>::HostMirror::data_type>::value) { + return + typename Kokkos::Experimental::DynamicView::HostMirror(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } else { + if constexpr (Impl::MirrorDynamicViewType< + typename Impl::ViewCtorProp< + ViewCtorArgs...>::memory_space, + T, P...>::is_same_memspace) { + return typename Impl::MirrorDynamicViewType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - !(std::is_same< - typename Kokkos::Experimental::DynamicView::memory_space, - typename Kokkos::Experimental::DynamicView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::DynamicView::data_type, - typename Kokkos::Experimental::DynamicView< - T, P...>::HostMirror::data_type>::value), - typename Kokkos::Experimental::DynamicView::HostMirror> -create_mirror_view(const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::create_mirror(arg_prop, src); -} - -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorDynamicViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp&) { - return src; -} - -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorDynamicViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::Experimental::DynamicView& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} } // namespace Impl -// Create a mirror view in host space +// public interface template inline auto create_mirror_view( const typename Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{}); } +// public interface that accepts a without initializing flag template inline auto create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, @@ -773,15 +742,18 @@ inline auto create_mirror_view( return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); } -// Create a mirror in a new space -template +// public interface that accepts a space +template ::value>> inline auto create_mirror_view( const Space&, const Kokkos::Experimental::DynamicView& src) { return Impl::create_mirror_view(src, view_alloc(typename Space::memory_space{})); } -template +// public interface that accepts a space and a without initializing flag +template ::value>> inline auto create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::Experimental::DynamicView& src) { @@ -789,6 +761,8 @@ inline auto create_mirror_view( src, Kokkos::view_alloc(wi, typename Space::memory_space{})); } +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template inline auto create_mirror_view( const Impl::ViewCtorProp& arg_prop, @@ -985,80 +959,57 @@ struct ViewCopy, } // namespace Impl -template +// create a mirror view and deep copy it +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>::value>> auto create_mirror_view_and_copy( - const Impl::ViewCtorProp&, - const Kokkos::Experimental::DynamicView& src, - std::enable_if_t< - std::is_void::specialize>::value && - Impl::MirrorDynamicViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop, + const Kokkos::Experimental::DynamicView& src) { using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - // same behavior as deep_copy(src, src) - if (!alloc_prop_input::has_execution_space) - fence( - "Kokkos::create_mirror_view_and_copy: fence before returning src view"); - return src; + Impl::check_view_ctor_args_create_mirror_view_and_copy(); + + if constexpr (Impl::MirrorDynamicViewType< + typename Impl::ViewCtorProp::memory_space, + T, P...>::is_same_memspace) { + // same behavior as deep_copy(src, src) + if constexpr (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src " + "view"); + return src; + } else { + using Space = typename alloc_prop_input::memory_space; + using Mirror = + typename Impl::MirrorDynamicViewType::view_type; + + auto arg_prop_copy = Impl::with_properties_if_unset( + arg_prop, std::string{}, WithoutInitializing, + typename Space::execution_space{}); + + std::string& label = Impl::get_property(arg_prop_copy); + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type( + arg_prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); + mirror.resize_serial(src.extent(0)); + if constexpr (alloc_prop_input::has_execution_space) { + deep_copy(Impl::get_property(arg_prop_copy), + mirror, src); + } else + deep_copy(mirror, src); + return mirror; + } +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) + __builtin_unreachable(); +#endif } -template -auto create_mirror_view_and_copy( - const Impl::ViewCtorProp& arg_prop, - const Kokkos::Experimental::DynamicView& src, - std::enable_if_t< - std::is_void::specialize>::value && - !Impl::MirrorDynamicViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { - using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - using Space = typename alloc_prop_input::memory_space; - using Mirror = - typename Impl::MirrorDynamicViewType::view_type; - - auto arg_prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string{}, WithoutInitializing, - typename Space::execution_space{}); - - std::string& label = Impl::get_property(arg_prop_copy); - if (label.empty()) label = src.label(); - auto mirror = typename Mirror::non_const_type( - arg_prop_copy, src.chunk_size(), src.chunk_max() * src.chunk_size()); - mirror.resize_serial(src.extent(0)); - if constexpr (alloc_prop_input::has_execution_space) { - deep_copy(Impl::get_property(arg_prop_copy), - mirror, src); - } else - deep_copy(mirror, src); - return mirror; -} - -template +template ::value>> auto create_mirror_view_and_copy( const Space&, const Kokkos::Experimental::DynamicView& src, std::string const& name = "") { diff --git a/lib/kokkos/containers/src/Kokkos_OffsetView.hpp b/lib/kokkos/containers/src/Kokkos_OffsetView.hpp index 91a7e4a927..3adc70b190 100644 --- a/lib/kokkos/containers/src/Kokkos_OffsetView.hpp +++ b/lib/kokkos/containers/src/Kokkos_OffsetView.hpp @@ -471,62 +471,31 @@ class OffsetView : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::are_integral::value && (2 == Rank) && - is_default_map && is_layout_left && (traits::rank_dynamic == 0)), + is_default_map && + (is_layout_left || is_layout_right || is_layout_stride)), reference_type> operator()(const I0& i0, const I1& i1) const { KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) const size_t j0 = i0 - m_begins[0]; const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_dim.N0 * j1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::are_integral::value && (2 == Rank) && - is_default_map && is_layout_left && (traits::rank_dynamic != 0)), - reference_type> - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) - const size_t j0 = i0 - m_begins[0]; - const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_stride * j1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::are_integral::value && (2 == Rank) && - is_default_map && is_layout_right && (traits::rank_dynamic == 0)), - reference_type> - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) - const size_t j0 = i0 - m_begins[0]; - const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_dim.N1 * j0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::are_integral::value && (2 == Rank) && - is_default_map && is_layout_right && (traits::rank_dynamic != 0)), - reference_type> - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) - const size_t j0 = i0 - m_begins[0]; - const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_stride * j0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t<(Kokkos::Impl::are_integral::value && - (2 == Rank) && is_default_map && is_layout_stride), - reference_type> - operator()(const I0& i0, const I1& i1) const { - KOKKOS_IMPL_OFFSETVIEW_OPERATOR_VERIFY((m_track, m_map, m_begins, i0, i1)) - const size_t j0 = i0 - m_begins[0]; - const size_t j1 = i1 - m_begins[1]; - return m_map.m_impl_handle[j0 * m_map.m_impl_offset.m_stride.S0 + - j1 * m_map.m_impl_offset.m_stride.S1]; + if constexpr (is_layout_left) { + if constexpr (traits::rank_dynamic == 0) + return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_dim.N0 * j1]; + else + return m_map.m_impl_handle[j0 + m_map.m_impl_offset.m_stride * j1]; + } else if constexpr (is_layout_right) { + if constexpr (traits::rank_dynamic == 0) + return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_dim.N1 * j0]; + else + return m_map.m_impl_handle[j1 + m_map.m_impl_offset.m_stride * j0]; + } else { + static_assert(is_layout_stride); + return m_map.m_impl_handle[j0 * m_map.m_impl_offset.m_stride.S0 + + j1 * m_map.m_impl_offset.m_stride.S1]; + } +#if defined(KOKKOS_COMPILER_INTEL) + __builtin_unreachable(); +#endif } //------------------------------ @@ -1841,71 +1810,73 @@ struct MirrorOffsetType { } // namespace Impl namespace Impl { -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space, - typename Kokkos::Experimental::OffsetView::HostMirror> -create_mirror(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp& arg_prop) { - return typename Kokkos::Experimental::OffsetView::HostMirror( - Kokkos::create_mirror(arg_prop, src.view()), src.begins()); -} -template ::has_memory_space>> +// create a mirror +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc +template inline auto create_mirror(const Kokkos::Experimental::OffsetView& src, const Impl::ViewCtorProp& arg_prop) { - using alloc_prop_input = Impl::ViewCtorProp; - using Space = typename Impl::ViewCtorProp::memory_space; + check_view_ctor_args_create_mirror(); - static_assert( - !alloc_prop_input::has_label, - "The view constructor arguments passed to Kokkos::create_mirror " - "must not include a label!"); - static_assert( - !alloc_prop_input::has_pointer, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not include a pointer!"); - static_assert( - !alloc_prop_input::allow_padding, - "The view constructor arguments passed to Kokkos::create_mirror must " - "not explicitly allow padding!"); + if constexpr (Impl::ViewCtorProp::has_memory_space) { + using Space = typename Impl::ViewCtorProp::memory_space; - auto prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string(src.label()).append("_mirror")); + auto prop_copy = Impl::with_properties_if_unset( + arg_prop, std::string(src.label()).append("_mirror")); - return typename Kokkos::Impl::MirrorOffsetType::view_type( - prop_copy, src.layout(), - {src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), - src.begin(5), src.begin(6), src.begin(7)}); + return typename Kokkos::Impl::MirrorOffsetType::view_type( + prop_copy, src.layout(), + {src.begin(0), src.begin(1), src.begin(2), src.begin(3), src.begin(4), + src.begin(5), src.begin(6), src.begin(7)}); + } else { + return typename Kokkos::Experimental::OffsetView::HostMirror( + Kokkos::create_mirror(arg_prop, src.view()), src.begins()); + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } + } // namespace Impl -// Create a mirror in host space -template +// public interface +template ::specialize>>> inline auto create_mirror( const Kokkos::Experimental::OffsetView& src) { return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); } -template +// public interface that accepts a without initializing flag +template ::specialize>>> inline auto create_mirror( Kokkos::Impl::WithoutInitializing_t wi, const Kokkos::Experimental::OffsetView& src) { return Impl::create_mirror(src, Kokkos::view_alloc(wi)); } -// Create a mirror in a new space +// public interface that accepts a space template ::value>> + typename Enable = std::enable_if_t< + Kokkos::is_space::value && + std::is_void_v::specialize>>> inline auto create_mirror( const Space&, const Kokkos::Experimental::OffsetView& src) { return Impl::create_mirror( src, Kokkos::view_alloc(typename Space::memory_space{})); } -template +// public interface that accepts a space and a without initializing flag +template ::value && + std::is_void_v::specialize>>> typename Kokkos::Impl::MirrorOffsetType::view_type create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::Experimental::OffsetView& src) { @@ -1913,7 +1884,11 @@ create_mirror(Kokkos::Impl::WithoutInitializing_t wi, const Space&, src, Kokkos::view_alloc(typename Space::memory_space{}, wi)); } -template +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> inline auto create_mirror( const Impl::ViewCtorProp& arg_prop, const Kokkos::Experimental::OffsetView& src) { @@ -1921,76 +1896,56 @@ inline auto create_mirror( } namespace Impl { + +// create a mirror view +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - (std::is_same< - typename Kokkos::Experimental::OffsetView::memory_space, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::OffsetView::data_type, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::data_type>::value), - typename Kokkos::Experimental::OffsetView::HostMirror> -create_mirror_view(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp&) { - return src; +inline auto create_mirror_view( + const Kokkos::Experimental::OffsetView& src, + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop) { + if constexpr (!Impl::ViewCtorProp::has_memory_space) { + if constexpr (std::is_same::memory_space, + typename Kokkos::Experimental::OffsetView< + T, P...>::HostMirror::memory_space>::value && + std::is_same::data_type, + typename Kokkos::Experimental::OffsetView< + T, P...>::HostMirror::data_type>::value) { + return + typename Kokkos::Experimental::OffsetView::HostMirror(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } else { + if constexpr (Impl::MirrorOffsetViewType::memory_space, + T, P...>::is_same_memspace) { + return typename Impl::MirrorOffsetViewType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - !(std::is_same< - typename Kokkos::Experimental::OffsetView::memory_space, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::Experimental::OffsetView::data_type, - typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::data_type>::value), - typename Kokkos::Experimental::OffsetView::HostMirror> -create_mirror_view(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::create_mirror(arg_prop, src); -} - -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorOffsetViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp&) { - return src; -} - -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorOffsetViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::Experimental::OffsetView& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} } // namespace Impl -// Create a mirror view in host space +// public interface template inline auto create_mirror_view( const typename Kokkos::Experimental::OffsetView& src) { return Impl::create_mirror_view(src, Impl::ViewCtorProp<>{}); } +// public interface that accepts a without initializing flag template inline auto create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, @@ -1998,7 +1953,7 @@ inline auto create_mirror_view( return Impl::create_mirror_view(src, Kokkos::view_alloc(wi)); } -// Create a mirror view in a new space +// public interface that accepts a space template ::value>> inline auto create_mirror_view( @@ -2007,7 +1962,9 @@ inline auto create_mirror_view( src, Kokkos::view_alloc(typename Space::memory_space{})); } -template +// public interface that accepts a space and a without initializing flag +template ::value>> inline auto create_mirror_view( Kokkos::Impl::WithoutInitializing_t wi, const Space&, const Kokkos::Experimental::OffsetView& src) { @@ -2015,6 +1972,8 @@ inline auto create_mirror_view( src, Kokkos::view_alloc(typename Space::memory_space{}, wi)); } +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template inline auto create_mirror_view( const Impl::ViewCtorProp& arg_prop, @@ -2022,7 +1981,9 @@ inline auto create_mirror_view( return Impl::create_mirror_view(src, arg_prop); } -// Create a mirror view and deep_copy in a new space +// create a mirror view and deep copy it +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc template typename Kokkos::Impl::MirrorOffsetViewType< typename Impl::ViewCtorProp::memory_space, T, diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp index 78a6a238ec..c3a8b67df8 100644 --- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -805,56 +805,94 @@ class UnorderedMap { return *this; } + // Re-allocate the views of the calling UnorderedMap according to src + // capacity, and deep copy the src data. template std::enable_if_t, key_type>::value && std::is_same, value_type>::value> create_copy_view( UnorderedMap const &src) { if (m_hash_lists.data() != src.m_hash_lists.data()) { - insertable_map_type tmp; + allocate_view(src); + deep_copy_view(src); + } + } - tmp.m_bounded_insert = src.m_bounded_insert; - tmp.m_hasher = src.m_hasher; - tmp.m_equal_to = src.m_equal_to; - tmp.m_size() = src.m_size(); - tmp.m_available_indexes = bitset_type(src.capacity()); - tmp.m_hash_lists = size_type_view( - view_alloc(WithoutInitializing, "UnorderedMap hash list"), - src.m_hash_lists.extent(0)); - tmp.m_next_index = size_type_view( - view_alloc(WithoutInitializing, "UnorderedMap next index"), - src.m_next_index.extent(0)); - tmp.m_keys = - key_type_view(view_alloc(WithoutInitializing, "UnorderedMap keys"), - src.m_keys.extent(0)); - tmp.m_values = value_type_view( - view_alloc(WithoutInitializing, "UnorderedMap values"), - src.m_values.extent(0)); - tmp.m_scalars = scalars_view("UnorderedMap scalars"); + // Allocate views of the calling UnorderedMap with the same capacity as the + // src. + template + std::enable_if_t, key_type>::value && + std::is_same, value_type>::value> + allocate_view( + UnorderedMap const &src) { + insertable_map_type tmp; - Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes); + tmp.m_bounded_insert = src.m_bounded_insert; + tmp.m_hasher = src.m_hasher; + tmp.m_equal_to = src.m_equal_to; + tmp.m_size() = src.m_size(); + tmp.m_available_indexes = bitset_type(src.capacity()); + tmp.m_hash_lists = size_type_view( + view_alloc(WithoutInitializing, "UnorderedMap hash list"), + src.m_hash_lists.extent(0)); + tmp.m_next_index = size_type_view( + view_alloc(WithoutInitializing, "UnorderedMap next index"), + src.m_next_index.extent(0)); + tmp.m_keys = + key_type_view(view_alloc(WithoutInitializing, "UnorderedMap keys"), + src.m_keys.extent(0)); + tmp.m_values = + value_type_view(view_alloc(WithoutInitializing, "UnorderedMap values"), + src.m_values.extent(0)); + tmp.m_scalars = scalars_view("UnorderedMap scalars"); + + *this = tmp; + } + + // Deep copy view data from src. This requires that the src capacity is + // identical to the capacity of the calling UnorderedMap. + template + std::enable_if_t, key_type>::value && + std::is_same, value_type>::value> + deep_copy_view( + UnorderedMap const &src) { +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE_4 + // To deep copy UnorderedMap, capacity must be identical + KOKKOS_EXPECTS(capacity() == src.capacity()); +#else + if (capacity() != src.capacity()) { + allocate_view(src); +#ifdef KOKKOS_ENABLE_DEPRECATION_WARNINGS + Kokkos::Impl::log_warning( + "Warning: deep_copy_view() allocating views is deprecated. Must call " + "with UnorderedMaps of identical capacity, or use " + "create_copy_view().\n"); +#endif + } +#endif + + if (m_hash_lists.data() != src.m_hash_lists.data()) { + Kokkos::deep_copy(m_available_indexes, src.m_available_indexes); using raw_deep_copy = Kokkos::Impl::DeepCopy; - raw_deep_copy(tmp.m_hash_lists.data(), src.m_hash_lists.data(), + raw_deep_copy(m_hash_lists.data(), src.m_hash_lists.data(), sizeof(size_type) * src.m_hash_lists.extent(0)); - raw_deep_copy(tmp.m_next_index.data(), src.m_next_index.data(), + raw_deep_copy(m_next_index.data(), src.m_next_index.data(), sizeof(size_type) * src.m_next_index.extent(0)); - raw_deep_copy(tmp.m_keys.data(), src.m_keys.data(), + raw_deep_copy(m_keys.data(), src.m_keys.data(), sizeof(key_type) * src.m_keys.extent(0)); if (!is_set) { - raw_deep_copy(tmp.m_values.data(), src.m_values.data(), + raw_deep_copy(m_values.data(), src.m_values.data(), sizeof(impl_value_type) * src.m_values.extent(0)); } - raw_deep_copy(tmp.m_scalars.data(), src.m_scalars.data(), + raw_deep_copy(m_scalars.data(), src.m_scalars.data(), sizeof(int) * num_scalars); Kokkos::fence( - "Kokkos::UnorderedMap::create_copy_view: fence after copy to tmp"); - - *this = tmp; + "Kokkos::UnorderedMap::deep_copy_view: fence after copy to dst."); } } @@ -932,13 +970,25 @@ class UnorderedMap { friend struct Impl::UnorderedMapPrint; }; -// Specialization of deep_copy for two UnorderedMap objects. +// Specialization of deep_copy() for two UnorderedMap objects. template inline void deep_copy( UnorderedMap &dst, const UnorderedMap &src) { - dst.create_copy_view(src); + dst.deep_copy_view(src); +} + +// Specialization of create_mirror() for an UnorderedMap object. +template +typename UnorderedMap::HostMirror +create_mirror( + const UnorderedMap &src) { + typename UnorderedMap::HostMirror + dst; + dst.allocate_view(src); + return dst; } } // namespace Kokkos diff --git a/lib/kokkos/containers/unit_tests/TestDualView.hpp b/lib/kokkos/containers/unit_tests/TestDualView.hpp index a15e5fa299..2512cb5c49 100644 --- a/lib/kokkos/containers/unit_tests/TestDualView.hpp +++ b/lib/kokkos/containers/unit_tests/TestDualView.hpp @@ -55,8 +55,8 @@ struct test_dualview_alloc { bool result = false; test_dualview_alloc(unsigned int size) { - result = run_me >( - size, 3); + result = + run_me>(size, 3); } }; @@ -154,7 +154,7 @@ struct test_dualview_combinations { } test_dualview_combinations(unsigned int size, bool with_init) { - result = run_me >( + result = run_me>( size, 3, with_init); } }; @@ -253,21 +253,18 @@ struct test_dual_view_deep_copy { } // end run_me test_dual_view_deep_copy() { - run_me >(10, 5, - true); - run_me >(10, 5, - false); + run_me>(10, 5, true); + run_me>(10, 5, + false); // Test zero length but allocated (a.d_view.data!=nullptr but // a.d_view.span()==0) - run_me >(0, 5, true); - run_me >(0, 5, - false); + run_me>(0, 5, true); + run_me>(0, 5, false); // Test default constructed view - run_me >(-1, 5, - true); - run_me >(-1, 5, - false); + run_me>(-1, 5, true); + run_me>(-1, 5, + false); } }; @@ -282,15 +279,20 @@ struct test_dualview_resize { const unsigned int m = 5; const unsigned int factor = 2; - ViewType a("A", n, m); + ViewType a; + if constexpr (Initialize) + a = ViewType("A", n, m); + else + a = ViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "A"), n, m); + Kokkos::deep_copy(a.d_view, 1); /* Covers case "Resize on Device" */ a.modify_device(); - if (Initialize) - Kokkos::resize(Kokkos::WithoutInitializing, a, factor * n, factor * m); - else + if constexpr (Initialize) Kokkos::resize(a, factor * n, factor * m); + else + Kokkos::resize(Kokkos::WithoutInitializing, a, factor * n, factor * m); ASSERT_EQ(a.extent(0), n * factor); ASSERT_EQ(a.extent(1), m * factor); @@ -298,33 +300,38 @@ struct test_dualview_resize { a.sync_host(); // Check device view is initialized as expected - scalar_type a_d_sum = 0; // Execute on the execution_space associated with t_dev's memory space using t_dev_exec_space = typename ViewType::t_dev::memory_space::execution_space; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, a.d_view.extent(0)), - SumViewEntriesFunctor(a.d_view), - a_d_sum); + Kokkos::View errors_d( + "errors"); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>( + {0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}), + KOKKOS_LAMBDA(int i, int j) { + if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data()); + }); + int errors_d_scalar; + Kokkos::deep_copy(errors_d_scalar, errors_d); // Check host view is synced as expected - scalar_type a_h_sum = 0; + int errors_h_scalar = 0; for (size_t i = 0; i < a.h_view.extent(0); ++i) for (size_t j = 0; j < a.h_view.extent(1); ++j) { - a_h_sum += a.h_view(i, j); + if (a.h_view(i, j) != 1) ++errors_h_scalar; } // Check - ASSERT_EQ(a_h_sum, a_d_sum); - ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1))); + ASSERT_EQ(errors_d_scalar, 0); + ASSERT_EQ(errors_h_scalar, 0); /* Covers case "Resize on Host" */ a.modify_host(); - if (Initialize) - Kokkos::resize(Kokkos::WithoutInitializing, a, n / factor, m / factor); - else + if constexpr (Initialize) Kokkos::resize(a, n / factor, m / factor); + else + Kokkos::resize(Kokkos::WithoutInitializing, a, n / factor, m / factor); ASSERT_EQ(a.extent(0), n / factor); ASSERT_EQ(a.extent(1), m / factor); @@ -332,30 +339,33 @@ struct test_dualview_resize { a.sync_device(Kokkos::DefaultExecutionSpace{}); // Check device view is initialized as expected - a_d_sum = 0; + Kokkos::deep_copy(errors_d, 0); // Execute on the execution_space associated with t_dev's memory space using t_dev_exec_space = typename ViewType::t_dev::memory_space::execution_space; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, a.d_view.extent(0)), - SumViewEntriesFunctor(a.d_view), - a_d_sum); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>( + {0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}), + KOKKOS_LAMBDA(int i, int j) { + if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data()); + }); + Kokkos::deep_copy(errors_d_scalar, errors_d); // Check host view is synced as expected - a_h_sum = 0; + errors_h_scalar = 0; for (size_t i = 0; i < a.h_view.extent(0); ++i) for (size_t j = 0; j < a.h_view.extent(1); ++j) { - a_h_sum += a.h_view(i, j); + if (a.h_view(i, j) != 1) ++errors_h_scalar; } // Check - ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1))); - ASSERT_EQ(a_h_sum, a_d_sum); + ASSERT_EQ(errors_d_scalar, 0); + ASSERT_EQ(errors_h_scalar, 0); } // end run_me test_dualview_resize() { - run_me >(); + run_me>(); } }; @@ -369,40 +379,51 @@ struct test_dualview_realloc { const unsigned int n = 10; const unsigned int m = 5; - ViewType a("A", n, m); - if (Initialize) - Kokkos::realloc(Kokkos::WithoutInitializing, a, n, m); - else + ViewType a; + if constexpr (Initialize) { + a = ViewType("A", n, m); Kokkos::realloc(a, n, m); + } else { + a = ViewType(Kokkos::view_alloc(Kokkos::WithoutInitializing, "A"), n, m); + Kokkos::realloc(Kokkos::WithoutInitializing, a, n, m); + } + ASSERT_EQ(a.extent(0), n); + ASSERT_EQ(a.extent(1), m); Kokkos::deep_copy(a.d_view, 1); + a.modify_device(); a.sync_host(); // Check device view is initialized as expected - scalar_type a_d_sum = 0; // Execute on the execution_space associated with t_dev's memory space using t_dev_exec_space = typename ViewType::t_dev::memory_space::execution_space; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(0, a.d_view.extent(0)), - SumViewEntriesFunctor(a.d_view), - a_d_sum); + Kokkos::View errors_d( + "errors"); + Kokkos::parallel_for( + Kokkos::MDRangePolicy>( + {0, 0}, {a.d_view.extent(0), a.d_view.extent(1)}), + KOKKOS_LAMBDA(int i, int j) { + if (a.d_view(i, j) != 1) Kokkos::atomic_inc(errors_d.data()); + }); + int errors_d_scalar; + Kokkos::deep_copy(errors_d_scalar, errors_d); // Check host view is synced as expected - scalar_type a_h_sum = 0; + int errors_h_scalar = 0; for (size_t i = 0; i < a.h_view.extent(0); ++i) for (size_t j = 0; j < a.h_view.extent(1); ++j) { - a_h_sum += a.h_view(i, j); + if (a.h_view(i, j) != 1) ++errors_h_scalar; } // Check - ASSERT_EQ(a_h_sum, scalar_type(a.extent(0) * a.extent(1))); - ASSERT_EQ(a_h_sum, a_d_sum); + ASSERT_EQ(errors_d_scalar, 0); + ASSERT_EQ(errors_h_scalar, 0); } // end run_me test_dualview_realloc() { - run_me >(); + run_me>(); } }; @@ -463,12 +484,23 @@ TEST(TEST_CATEGORY, dualview_deep_copy) { test_dualview_deep_copy(); } +struct NoDefaultConstructor { + NoDefaultConstructor(int i_) : i(i_) {} + KOKKOS_FUNCTION operator int() const { return i; } + + int i; +}; + TEST(TEST_CATEGORY, dualview_realloc) { test_dualview_realloc(); + Impl::test_dualview_realloc(); } TEST(TEST_CATEGORY, dualview_resize) { test_dualview_resize(); + Impl::test_dualview_resize(); } namespace { diff --git a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp index f63f1c6afe..4a7e826ecb 100644 --- a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp +++ b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp @@ -68,7 +68,7 @@ struct TestInsert { } while (rehash_on_fail && failed_count > 0u); // Trigger the m_size mutable bug. - typename map_type::HostMirror map_h; + auto map_h = create_mirror(map); execution_space().fence(); Kokkos::deep_copy(map_h, map); execution_space().fence(); @@ -367,7 +367,7 @@ void test_deep_copy(uint32_t num_nodes) { } } - host_map_type hmap; + auto hmap = create_mirror(map); Kokkos::deep_copy(hmap, map); ASSERT_EQ(map.size(), hmap.size()); @@ -380,6 +380,7 @@ void test_deep_copy(uint32_t num_nodes) { } map_type mmap; + mmap.allocate_view(hmap); Kokkos::deep_copy(mmap, hmap); const_map_type cmap = mmap; @@ -424,7 +425,7 @@ TEST(TEST_CATEGORY, UnorderedMap_valid_empty) { Map n{}; n = Map{m.capacity()}; n.rehash(m.capacity()); - Kokkos::deep_copy(n, m); + n.create_copy_view(m); ASSERT_TRUE(m.is_allocated()); ASSERT_TRUE(n.is_allocated()); } diff --git a/lib/kokkos/containers/unit_tests/TestVector.hpp b/lib/kokkos/containers/unit_tests/TestVector.hpp index a7d341b789..abed2676d7 100644 --- a/lib/kokkos/containers/unit_tests/TestVector.hpp +++ b/lib/kokkos/containers/unit_tests/TestVector.hpp @@ -21,6 +21,8 @@ #include #include #include +#include +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() #include namespace Test { @@ -231,7 +233,7 @@ void test_vector_allocate(unsigned int size) { TEST(TEST_CATEGORY, vector_combination) { test_vector_allocate(10); test_vector_combinations(10); - test_vector_combinations(3057); + test_vector_combinations(3057); } TEST(TEST_CATEGORY, vector_insert) { diff --git a/lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp b/lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp index 7201cd402a..e8558628dc 100644 --- a/lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp +++ b/lib/kokkos/containers/unit_tests/TestWithoutInitializing.hpp @@ -37,6 +37,17 @@ #endif ///@} +/// Some tests are skipped for unified memory space +#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) +#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE \ + if constexpr (std::is_same_v) \ + GTEST_SKIP() << "skipping since unified memory requires additional " \ + "fences"; +#else +#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE +#endif + TEST(TEST_CATEGORY, resize_realloc_no_init_dualview) { using namespace Kokkos::Test::Tools; listen_tool_events(Config::DisableAll(), Config::EnableKernels()); @@ -657,6 +668,7 @@ TEST(TEST_CATEGORY, create_mirror_no_init_dynamicview) { TEST(TEST_CATEGORY, create_mirror_view_and_copy_dynamicview) { GTEST_SKIP_IF_CUDAUVM_MEMORY_SPACE + GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE using namespace Kokkos::Test::Tools; listen_tool_events(Config::DisableAll(), Config::EnableKernels(), diff --git a/lib/kokkos/core/perf_test/test_atomic.cpp b/lib/kokkos/core/perf_test/test_atomic.cpp index ce3059f47d..af74723e7e 100644 --- a/lib/kokkos/core/perf_test/test_atomic.cpp +++ b/lib/kokkos/core/perf_test/test_atomic.cpp @@ -390,7 +390,7 @@ static void Test_Atomic(benchmark::State& state) { static constexpr int LOOP = 100'000; -BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); +BENCHMARK(Test_Atomic)->Arg(30'000)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); @@ -398,4 +398,3 @@ BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); -BENCHMARK(Test_Atomic)->Arg(LOOP)->Iterations(10); diff --git a/lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp b/lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp index b838c8eccf..bc35d1c776 100644 --- a/lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp +++ b/lib/kokkos/core/perf_test/test_atomic_minmax_simple.cpp @@ -183,7 +183,8 @@ double atomic_contentious_max_replacement(benchmark::State& state, Kokkos::parallel_reduce( con_length, KOKKOS_LAMBDA(const int i, T& inner) { - inner = Kokkos::atomic_max_fetch(&(input(0)), inner + 1); + inner = Kokkos::atomic_max_fetch(&(input(0)), + Kokkos::min(inner, max - 1) + 1); if (i == con_length - 1) { Kokkos::atomic_max_fetch(&(input(0)), max); inner = max; @@ -223,7 +224,8 @@ double atomic_contentious_min_replacement(benchmark::State& state, Kokkos::parallel_reduce( con_length, KOKKOS_LAMBDA(const int i, T& inner) { - inner = Kokkos::atomic_min_fetch(&(input(0)), inner - 1); + inner = Kokkos::atomic_min_fetch(&(input(0)), + Kokkos::max(inner, min + 1) - 1); if (i == con_length - 1) { Kokkos::atomic_min_fetch(&(input(0)), min); inner = min; @@ -246,7 +248,7 @@ static void Atomic_ContentiousMinReplacements(benchmark::State& state) { auto inp = prepare_input(1, std::numeric_limits::max()); for (auto _ : state) { - const auto time = atomic_contentious_max_replacement(state, inp, length); + const auto time = atomic_contentious_min_replacement(state, inp, length); state.SetIterationTime(time); } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp index 276d03da26..fd86976d3b 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda.hpp @@ -166,8 +166,17 @@ class Cuda { Cuda(); - Cuda(cudaStream_t stream, - Impl::ManageStream manage_stream = Impl::ManageStream::no); + explicit Cuda(cudaStream_t stream) : Cuda(stream, Impl::ManageStream::no) {} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "Cuda execution space should be constructed explicitly.") + Cuda(cudaStream_t stream) + : Cuda(stream) {} +#endif + + Cuda(cudaStream_t stream, Impl::ManageStream manage_stream); KOKKOS_DEPRECATED Cuda(cudaStream_t stream, bool manage_stream); @@ -186,7 +195,7 @@ class Cuda { /// /// This matches the __CUDA_ARCH__ specification. KOKKOS_DEPRECATED static size_type device_arch() { - const cudaDeviceProp& cudaProp = Cuda().cuda_device_prop(); + const cudaDeviceProp cudaProp = Cuda().cuda_device_prop(); return cudaProp.major * 100 + cudaProp.minor; } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 0944937e1b..6ae24022c8 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -31,7 +31,6 @@ #include #include -//#include #include #include @@ -59,12 +58,6 @@ const std::unique_ptr &Kokkos::Impl::cuda_get_deep_copy_space( namespace Kokkos { namespace Impl { -namespace { - -static std::atomic num_uvm_allocations(0); - -} // namespace - void DeepCopyCuda(void *dst, const void *src, size_t n) { KOKKOS_IMPL_CUDA_SAFE_CALL((CudaInternal::singleton().cuda_memcpy_wrapper( dst, src, n, cudaMemcpyDefault))); @@ -184,6 +177,29 @@ void *impl_allocate_common(const int device_id, cudaError_t error_code = cudaSuccess; #ifndef CUDART_VERSION #error CUDART_VERSION undefined! +#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + // This is intended for Grace-Hopper (and future unified memory architectures) + // The idea is to use host allocator and then advise to keep it in HBM on the + // device, but that requires CUDA 12.2 + static_assert(CUDART_VERSION >= 12020, + "CUDA runtime version >=12.2 required when " + "Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY is set. " + "Please update your CUDA runtime version or " + "reconfigure with " + "-D Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=OFF"); + if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr + error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal); + if (error_code == cudaSuccess) { + // One would think cudaMemLocation{device_id, + // cudaMemLocationTypeDevice} would work but it doesn't. I.e. the order of + // members doesn't seem to be defined. + cudaMemLocation loc; + loc.id = device_id; + loc.type = cudaMemLocationTypeDevice; + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2( + ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc)); + } + } #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) if (arg_alloc_size >= memory_threshold_g) { error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream); @@ -196,18 +212,19 @@ void *impl_allocate_common(const int device_id, "Kokkos::Cuda: backend fence after async malloc"); } } - } else + } else { + error_code = cudaMalloc(&ptr, arg_alloc_size); + } +#else + error_code = cudaMalloc(&ptr, arg_alloc_size); #endif - { error_code = cudaMalloc(&ptr, arg_alloc_size); } + if (error_code != cudaSuccess) { // TODO tag as unlikely branch // This is the only way to clear the last error, which // we should do here since we're turning it into an // exception here cudaGetLastError(); - throw Experimental::CudaRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - CudaMalloc); + Kokkos::Impl::throw_bad_alloc(arg_handle.name, arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -252,8 +269,6 @@ void *CudaUVMSpace::impl_allocate( Cuda::impl_static_fence( "Kokkos::CudaUVMSpace::impl_allocate: Pre UVM Allocation"); if (arg_alloc_size > 0) { - Kokkos::Impl::num_uvm_allocations++; - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); cudaError_t error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal); @@ -263,10 +278,7 @@ void *CudaUVMSpace::impl_allocate( // we should do here since we're turning it into an // exception here cudaGetLastError(); - throw Experimental::CudaRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - CudaMallocManaged); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } #ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST @@ -307,10 +319,7 @@ void *CudaHostPinnedSpace::impl_allocate( // we should do here since we're turning it into an // exception here cudaGetLastError(); - throw Experimental::CudaRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - CudaHostAlloc); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -341,27 +350,27 @@ void CudaSpace::impl_deallocate( Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, reported_size); } - try { #ifndef CUDART_VERSION #error CUDART_VERSION undefined! +#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); #elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) - if (arg_alloc_size >= memory_threshold_g) { - Impl::cuda_device_synchronize( - "Kokkos::Cuda: backend fence before async free"); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeAsync(arg_alloc_ptr, m_stream)); - Impl::cuda_device_synchronize( - "Kokkos::Cuda: backend fence after async free"); - } else { - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); - } -#else + if (arg_alloc_size >= memory_threshold_g) { + Impl::cuda_device_synchronize( + "Kokkos::Cuda: backend fence before async free"); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeAsync(arg_alloc_ptr, m_stream)); + Impl::cuda_device_synchronize( + "Kokkos::Cuda: backend fence after async free"); + } else { KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); -#endif - } catch (...) { } +#else + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); +#endif } void CudaUVMSpace::deallocate(void *const arg_alloc_ptr, const size_t arg_alloc_size) const { @@ -387,13 +396,9 @@ void CudaUVMSpace::impl_deallocate( Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, reported_size); } - try { - if (arg_alloc_ptr != nullptr) { - Kokkos::Impl::num_uvm_allocations--; - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); - } - } catch (...) { + if (arg_alloc_ptr != nullptr) { + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr)); } Cuda::impl_static_fence( "Kokkos::CudaUVMSpace::impl_deallocate: Post UVM Deallocation"); @@ -420,11 +425,8 @@ void CudaHostPinnedSpace::impl_deallocate( Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, reported_size); } - try { - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); - KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeHost(arg_alloc_ptr)); - } catch (...) { - } + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device)); + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFreeHost(arg_alloc_ptr)); } } // namespace Kokkos @@ -463,8 +465,12 @@ void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes, #include +#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::CudaSpace); +#else +KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::CudaSpace); +#endif KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( Kokkos::CudaUVMSpace); KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp index 0e20193e8b..e1d062d72d 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.hpp @@ -88,6 +88,19 @@ class CudaSpace { void* allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; +#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + template + void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const { + return allocate(arg_alloc_size); + } + template + void* allocate(const ExecutionSpace&, const char* arg_label, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const { + return allocate(arg_label, arg_alloc_size, arg_logical_size); + } +#endif + /**\brief Deallocate untracked memory in the cuda space */ void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const; void deallocate(const char* arg_label, void* const arg_alloc_ptr, @@ -337,7 +350,11 @@ static_assert( template <> struct MemorySpaceAccess { enum : bool { assignable = false }; - enum : bool { accessible = false }; +#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY) + enum : bool{accessible = false}; +#else + enum : bool { accessible = true }; +#endif enum : bool { deepcopy = true }; }; @@ -558,8 +575,12 @@ struct DeepCopy #include -#include namespace Kokkos { namespace Impl { @@ -69,52 +68,6 @@ inline void cuda_internal_safe_call(cudaError e, const char* name, Kokkos::Impl::cuda_internal_safe_call(call, #call, __FILE__, __LINE__) } // namespace Impl - -namespace Experimental { - -class CudaRawMemoryAllocationFailure : public RawMemoryAllocationFailure { - private: - using base_t = RawMemoryAllocationFailure; - - cudaError_t m_error_code = cudaSuccess; - - static FailureMode get_failure_mode(cudaError_t error_code) { - switch (error_code) { - case cudaErrorMemoryAllocation: return FailureMode::OutOfMemoryError; - case cudaErrorInvalidValue: return FailureMode::InvalidAllocationSize; - // TODO handle cudaErrorNotSupported for cudaMallocManaged - default: return FailureMode::Unknown; - } - } - - public: - // using base_t::base_t; - // would trigger - // - // error: cannot determine the exception specification of the default - // constructor due to a circular dependency - // - // using NVCC 9.1 and gcc 7.4 - CudaRawMemoryAllocationFailure( - size_t arg_attempted_size, size_t arg_attempted_alignment, - FailureMode arg_failure_mode = FailureMode::OutOfMemoryError, - AllocationMechanism arg_mechanism = - AllocationMechanism::StdMalloc) noexcept - : base_t(arg_attempted_size, arg_attempted_alignment, arg_failure_mode, - arg_mechanism) {} - - CudaRawMemoryAllocationFailure(size_t arg_attempted_size, - cudaError_t arg_error_code, - AllocationMechanism arg_mechanism) noexcept - : base_t(arg_attempted_size, /* CudaSpace doesn't handle alignment? */ 1, - get_failure_mode(arg_error_code), arg_mechanism), - m_error_code(arg_error_code) {} - - void append_additional_error_information(std::ostream& o) const override; -}; - -} // end namespace Experimental - } // namespace Kokkos #endif // KOKKOS_ENABLE_CUDA diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp index fcc3ff04ff..625d8c317a 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Graph_Impl.hpp @@ -72,7 +72,7 @@ struct GraphImpl { GraphNodeImpl; - // Not moveable or copyable; it spends its whole life as a shared_ptr in the + // Not movable or copyable; it spends its whole life as a shared_ptr in the // Graph object GraphImpl() = delete; GraphImpl(GraphImpl const&) = delete; @@ -115,12 +115,9 @@ struct GraphImpl { template // requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl - // Also requires that the kernel has the graph node tag in it's policy + // Also requires that the kernel has the graph node tag in its policy void add_node(std::shared_ptr const& arg_node_ptr) { - static_assert( - NodeImpl::kernel_type::Policy::is_graph_kernel::value, - "Something has gone horribly wrong, but it's too complicated to " - "explain here. Buy Daisy a coffee and she'll explain it to you."); + static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value); KOKKOS_EXPECTS(bool(arg_node_ptr)); // The Kernel launch from the execute() method has been shimmed to insert // the node into the graph diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp index 849e8b3b30..158c8acdda 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -607,6 +607,22 @@ Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default //---------------------------------- +#ifdef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY + // Check if unified memory is available + int cuda_result; + cudaDeviceGetAttribute(&cuda_result, cudaDevAttrConcurrentManagedAccess, + cuda_device_id); + if (cuda_result == 0) { + Kokkos::abort( + "Kokkos::Cuda::initialize ERROR: Unified memory is not available on " + "this device\n" + "Please recompile Kokkos with " + "-DKokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=OFF\n"); + } +#endif + + //---------------------------------- + cudaStream_t singleton_stream; KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(cuda_device_id)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamCreate(&singleton_stream)); @@ -705,6 +721,10 @@ void Cuda::print_configuration(std::ostream &os, bool /*verbose*/) const { #else os << "no\n"; #endif +#ifdef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY + os << " KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY: "; + os << "yes\n"; +#endif os << "\nCuda Runtime Configuration:\n"; @@ -737,6 +757,14 @@ namespace Impl { int g_cuda_space_factory_initialized = initialize_space_factory("150_Cuda"); +int CudaInternal::m_cudaArch = -1; +cudaDeviceProp CudaInternal::m_deviceProp; +std::set CudaInternal::cuda_devices = {}; +std::map CudaInternal::constantMemHostStagingPerDevice = + {}; +std::map CudaInternal::constantMemReusablePerDevice = {}; +std::map CudaInternal::constantMemMutexPerDevice = {}; + } // namespace Impl } // namespace Kokkos diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp index 24f4af3101..ffaa0f5474 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp @@ -91,10 +91,10 @@ class CudaInternal { int m_cudaDev = -1; // Device Properties - inline static int m_cudaArch = -1; + static int m_cudaArch; static int concurrency(); - inline static cudaDeviceProp m_deviceProp; + static cudaDeviceProp m_deviceProp; // Scratch Spaces for Reductions mutable std::size_t m_scratchSpaceCount; @@ -120,11 +120,10 @@ class CudaInternal { bool was_initialized = false; bool was_finalized = false; - inline static std::set cuda_devices = {}; - inline static std::map constantMemHostStagingPerDevice = - {}; - inline static std::map constantMemReusablePerDevice = {}; - inline static std::map constantMemMutexPerDevice = {}; + static std::set cuda_devices; + static std::map constantMemHostStagingPerDevice; + static std::map constantMemReusablePerDevice; + static std::map constantMemMutexPerDevice; static CudaInternal& singleton(); @@ -421,23 +420,6 @@ class CudaInternal { return cudaStreamSynchronize(stream); } - // The following are only available for cuda 11.2 and greater -#if (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020) - template - cudaError_t cuda_malloc_async_wrapper(void** devPtr, size_t size, - cudaStream_t hStream = nullptr) const { - if constexpr (setCudaDevice) set_cuda_device(); - return cudaMallocAsync(devPtr, size, get_input_stream(hStream)); - } - - template - cudaError_t cuda_free_async_wrapper(void* devPtr, - cudaStream_t hStream = nullptr) const { - if constexpr (setCudaDevice) set_cuda_device(); - return cudaFreeAsync(devPtr, get_input_stream(hStream)); - } -#endif - // C++ API routines template cudaError_t cuda_func_get_attributes_wrapper(cudaFuncAttributes* attr, diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp index 9f7be45c83..71e7751821 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp @@ -539,17 +539,9 @@ class ParallelFor, m_vector_size(arg_policy.impl_vector_length()) { auto internal_space_instance = m_policy.space().impl_internal_space_instance(); - cudaFuncAttributes attr = - CudaParallelLaunch::get_cuda_func_attributes( - internal_space_instance->m_cudaDev); - m_team_size = - m_team_size >= 0 - ? m_team_size - : Kokkos::Impl::cuda_get_opt_block_size( - internal_space_instance, attr, m_functor, m_vector_size, - m_policy.team_scratch_size(0), - m_policy.thread_scratch_size(0)) / - m_vector_size; + m_team_size = m_team_size >= 0 ? m_team_size + : arg_policy.team_size_recommended( + arg_functor, ParallelForTag()); m_shmem_begin = (sizeof(double) * (m_team_size + 2)); m_shmem_size = @@ -585,13 +577,7 @@ class ParallelFor, "Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory")); } - if (int(m_team_size) > - int(Kokkos::Impl::cuda_get_max_block_size( - internal_space_instance, attr, arg_functor, - arg_policy.impl_vector_length(), - arg_policy.team_scratch_size(0), - arg_policy.thread_scratch_size(0)) / - arg_policy.impl_vector_length())) { + if (m_team_size > arg_policy.team_size_max(arg_functor, ParallelForTag())) { Kokkos::Impl::throw_runtime_exception(std::string( "Kokkos::Impl::ParallelFor< Cuda > requested too large team size.")); } @@ -909,17 +895,11 @@ class ParallelReduce:: - get_cuda_func_attributes(internal_space_instance->m_cudaDev); - m_team_size = - m_team_size >= 0 - ? m_team_size - : Kokkos::Impl::cuda_get_opt_block_size( - internal_space_instance, attr, - m_functor_reducer.get_functor(), m_vector_size, - m_policy.team_scratch_size(0), - m_policy.thread_scratch_size(0)) / - m_vector_size; + m_team_size = m_team_size >= 0 ? m_team_size + : arg_policy.team_size_recommended( + arg_functor_reducer.get_functor(), + arg_functor_reducer.get_reducer(), + ParallelReduceTag()); m_team_begin = UseShflReduction diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp index c8d6641d1e..18aca15065 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp @@ -28,35 +28,20 @@ extern "C" { /* Cuda runtime function, declared in * Requires capability 2.x or better. */ -extern __device__ void __assertfail(const void *message, const void *file, - unsigned int line, const void *function, - size_t charsize); +[[noreturn]] __device__ void __assertfail(const void *message, const void *file, + unsigned int line, + const void *function, + size_t charsize); } namespace Kokkos { namespace Impl { -// required to workaround failures in random number generator unit tests with -// pre-volta architectures -#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) -__device__ inline void cuda_abort(const char *const message) { -#else -[[noreturn]] __device__ inline void cuda_abort(const char *const message) { -#endif +[[noreturn]] __device__ static void cuda_abort(const char *const message) { const char empty[] = ""; __assertfail((const void *)message, (const void *)empty, (unsigned int)0, (const void *)empty, sizeof(char)); - - // This loop is never executed. It's intended to suppress warnings that the - // function returns, even though it does not. This is necessary because - // __assertfail is not marked as [[noreturn]], even though it does not return. - // Disable with KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK to workaround failures - // in random number generator unit tests with pre-volta architectures -#if !defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) - while (true) - ; -#endif } } // namespace Impl diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP.hpp index 3a88e97ee3..439075fc6c 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP.hpp @@ -48,8 +48,19 @@ class HIP { using scratch_memory_space = ScratchMemorySpace; HIP(); - HIP(hipStream_t stream, - Impl::ManageStream manage_stream = Impl::ManageStream::no); + + explicit HIP(hipStream_t stream) : HIP(stream, Impl::ManageStream::no) {} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "HIP execution space should be constructed explicitly.") + HIP(hipStream_t stream) + : HIP(stream) {} +#endif + + HIP(hipStream_t stream, Impl::ManageStream manage_stream); + KOKKOS_DEPRECATED HIP(hipStream_t stream, bool manage_stream); //@} diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp index 43d63c090b..fa45dcfec3 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Error.hpp @@ -22,8 +22,6 @@ #include -#include - namespace Kokkos { namespace Impl { @@ -44,39 +42,4 @@ inline void hip_internal_safe_call(hipError_t e, const char* name, #define KOKKOS_IMPL_HIP_SAFE_CALL(call) \ Kokkos::Impl::hip_internal_safe_call(call, #call, __FILE__, __LINE__) -namespace Kokkos { -namespace Experimental { - -class HIPRawMemoryAllocationFailure : public RawMemoryAllocationFailure { - private: - hipError_t m_error_code = hipSuccess; - - static FailureMode get_failure_mode(hipError_t error_code) { - switch (error_code) { - case hipErrorMemoryAllocation: return FailureMode::OutOfMemoryError; - case hipErrorInvalidValue: return FailureMode::InvalidAllocationSize; - default: return FailureMode::Unknown; - } - } - - public: - HIPRawMemoryAllocationFailure(size_t arg_attempted_size, - hipError_t arg_error_code, - AllocationMechanism arg_mechanism) noexcept - : RawMemoryAllocationFailure( - arg_attempted_size, /* HIPSpace doesn't handle alignment? */ 1, - get_failure_mode(arg_error_code), arg_mechanism), - m_error_code(arg_error_code) {} - - void append_additional_error_information(std::ostream& o) const override { - if (m_error_code != hipSuccess) { - o << " The HIP allocation returned the error code \"" - << hipGetErrorName(m_error_code) << "\"."; - } - } -}; - -} // namespace Experimental -} // namespace Kokkos - #endif diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp index 7cc06d02fb..a0989fe671 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp @@ -40,7 +40,7 @@ class GraphImpl { GraphNodeImpl; - // Not moveable or copyable; it spends its whole life as a shared_ptr in the + // Not movable or copyable; it spends its whole life as a shared_ptr in the // Graph object. GraphImpl() = delete; GraphImpl(GraphImpl const&) = delete; @@ -108,7 +108,7 @@ inline void GraphImpl::add_node( } // Requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl -// Also requires that the kernel has the graph node tag in it's policy +// Also requires that the kernel has the graph node tag in its policy template inline void GraphImpl::add_node( std::shared_ptr const& arg_node_ptr) { diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp index 22c0db047f..e0b25c6939 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.cpp @@ -353,6 +353,22 @@ void HIPInternal::finalize() { m_num_scratch_locks = 0; } +int HIPInternal::m_hipDev = -1; +unsigned HIPInternal::m_multiProcCount = 0; +unsigned HIPInternal::m_maxWarpCount = 0; +std::array HIPInternal::m_maxBlock = {0, 0, 0}; +unsigned HIPInternal::m_maxWavesPerCU = 0; +int HIPInternal::m_shmemPerSM = 0; +int HIPInternal::m_maxShmemPerBlock = 0; +int HIPInternal::m_maxThreadsPerSM = 0; + +hipDeviceProp_t HIPInternal::m_deviceProp; + +std::mutex HIPInternal::scratchFunctorMutex; +unsigned long *HIPInternal::constantMemHostStaging = nullptr; +hipEvent_t HIPInternal::constantMemReusable = nullptr; +std::mutex HIPInternal::constantMemMutex; + //---------------------------------------------------------------------------- Kokkos::HIP::size_type hip_internal_multiprocessor_count() { diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp index 7b55f519c2..19349e90bb 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp @@ -35,8 +35,7 @@ struct HIPTraits { static constexpr int WarpSize = 64; static constexpr int WarpIndexMask = 0x003f; /* hexadecimal for 63 */ static constexpr int WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/ -#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \ - defined(KOKKOS_ARCH_AMD_GFX1103) +#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) static constexpr int WarpSize = 32; static constexpr int WarpIndexMask = 0x001f; /* hexadecimal for 31 */ static constexpr int WarpIndexShift = 5; /* WarpSize == 1 << WarpShift*/ @@ -71,16 +70,16 @@ class HIPInternal { public: using size_type = ::Kokkos::HIP::size_type; - inline static int m_hipDev = -1; - inline static unsigned m_multiProcCount = 0; - inline static unsigned m_maxWarpCount = 0; - inline static std::array m_maxBlock = {0, 0, 0}; - inline static unsigned m_maxWavesPerCU = 0; - inline static int m_shmemPerSM = 0; - inline static int m_maxShmemPerBlock = 0; - inline static int m_maxThreadsPerSM = 0; + static int m_hipDev; + static unsigned m_multiProcCount; + static unsigned m_maxWarpCount; + static std::array m_maxBlock; + static unsigned m_maxWavesPerCU; + static int m_shmemPerSM; + static int m_maxShmemPerBlock; + static int m_maxThreadsPerSM; - inline static hipDeviceProp_t m_deviceProp; + static hipDeviceProp_t m_deviceProp; static int concurrency(); @@ -93,7 +92,7 @@ class HIPInternal { size_type *m_scratchFlags = nullptr; mutable size_type *m_scratchFunctor = nullptr; mutable size_type *m_scratchFunctorHost = nullptr; - inline static std::mutex scratchFunctorMutex; + static std::mutex scratchFunctorMutex; hipStream_t m_stream = nullptr; uint32_t m_instance_id = @@ -112,9 +111,9 @@ class HIPInternal { // FIXME_HIP: these want to be per-device, not per-stream... use of 'static' // here will break once there are multiple devices though - inline static unsigned long *constantMemHostStaging = nullptr; - inline static hipEvent_t constantMemReusable = nullptr; - inline static std::mutex constantMemMutex; + static unsigned long *constantMemHostStaging; + static hipEvent_t constantMemReusable; + static std::mutex constantMemMutex; static HIPInternal &singleton(); diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp index 55b6218d1c..1629511646 100644 --- a/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp +++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_ParallelReduce_MDRange.hpp @@ -50,6 +50,7 @@ class ParallelReduce class ParallelReduce, HIP> { public: - using Policy = TeamPolicyInternal; + using Policy = TeamPolicy; using FunctorType = typename CombinedFunctorReducerType::functor_type; using ReducerType = typename CombinedFunctorReducerType::reducer_type; @@ -46,6 +46,7 @@ class ParallelReduce is_first_hip_managed_allocation(true); @@ -66,7 +67,6 @@ void* HIPSpace::allocate( return impl_allocate(arg_label, arg_alloc_size, arg_logical_size); } void* HIPSpace::impl_allocate( - const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size, const Kokkos::Tools::SpaceHandle arg_handle) const { @@ -77,10 +77,7 @@ void* HIPSpace::impl_allocate( // This is the only way to clear the last error, which we should do here // since we're turning it into an exception here (void)hipGetLastError(); - throw Experimental::HIPRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - HIPMalloc); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -111,10 +108,7 @@ void* HIPHostPinnedSpace::impl_allocate( // This is the only way to clear the last error, which we should do here // since we're turning it into an exception here (void)hipGetLastError(); - throw Experimental::HIPRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - HIPHostMalloc); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -178,10 +172,7 @@ Kokkos::HIP::runtime WARNING: Kokkos did not find an environment variable 'HSA_X // This is the only way to clear the last error, which we should do here // since we're turning it into an exception here (void)hipGetLastError(); - throw Experimental::HIPRawMemoryAllocationFailure( - arg_alloc_size, error_code, - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - HIPMallocManaged); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } KOKKOS_IMPL_HIP_SAFE_CALL(hipMemAdvise( ptr, arg_alloc_size, hipMemAdviseSetCoarseGrain, m_device)); diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp b/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp index 6d541a6414..1f3d078344 100644 --- a/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp @@ -153,7 +153,7 @@ void HPX::impl_instance_fence_locked(const std::string &name) const { auto &s = impl_get_sender(); hpx::this_thread::experimental::sync_wait(std::move(s)); - s = hpx::execution::experimental::unique_any_sender( + s = hpx::execution::experimental::unique_any_sender<>( hpx::execution::experimental::just()); }); } @@ -184,7 +184,7 @@ void HPX::impl_static_fence(const std::string &name) { } hpx::this_thread::experimental::sync_wait(std::move(s)); - s = hpx::execution::experimental::unique_any_sender( + s = hpx::execution::experimental::unique_any_sender<>( hpx::execution::experimental::just()); }); } diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX.hpp index 26181a7c05..245dc128ca 100644 --- a/lib/kokkos/core/src/HPX/Kokkos_HPX.hpp +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX.hpp @@ -168,17 +168,31 @@ class HPX { : m_instance_data(Kokkos::Impl::HostSharedPtr( &m_default_instance_data, &default_instance_deleter)) {} ~HPX() = default; - HPX(instance_mode mode) + explicit HPX(instance_mode mode) : m_instance_data( mode == instance_mode::independent ? (Kokkos::Impl::HostSharedPtr( new instance_data(m_next_instance_id++))) : Kokkos::Impl::HostSharedPtr( &m_default_instance_data, &default_instance_deleter)) {} - HPX(hpx::execution::experimental::unique_any_sender<> &&sender) + explicit HPX(hpx::execution::experimental::unique_any_sender<> &&sender) : m_instance_data(Kokkos::Impl::HostSharedPtr( new instance_data(m_next_instance_id++, std::move(sender)))) {} +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "HPX execution space should be constructed explicitly.") + HPX(instance_mode mode) + : HPX(mode) {} + + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "HPX execution space should be constructed explicitly.") + HPX(hpx::execution::experimental::unique_any_sender<> &&sender) + : HPX(std::move(sender)) {} +#endif + HPX(HPX &&other) = default; HPX(const HPX &other) = default; diff --git a/lib/kokkos/core/src/Kokkos_Array.hpp b/lib/kokkos/core/src/Kokkos_Array.hpp index ba1626bb72..4d905fbc55 100644 --- a/lib/kokkos/core/src/Kokkos_Array.hpp +++ b/lib/kokkos/core/src/Kokkos_Array.hpp @@ -29,7 +29,6 @@ #include #include #include -#include #include namespace Kokkos { @@ -80,7 +79,11 @@ struct ArrayBoundsCheck { /**\brief Derived from the C++17 'std::array'. * Dropping the iterator interface. */ +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 template +#else +template +#endif struct Array { public: /** @@ -129,10 +132,38 @@ struct Array { KOKKOS_INLINE_FUNCTION constexpr const_pointer data() const { return &m_internal_implementation_private_member_data[0]; } + + friend KOKKOS_FUNCTION constexpr bool operator==(Array const& lhs, + Array const& rhs) noexcept { + for (size_t i = 0; i != N; ++i) + if (lhs[i] != rhs[i]) return false; + return true; + } + + friend KOKKOS_FUNCTION constexpr bool operator!=(Array const& lhs, + Array const& rhs) noexcept { + return !(lhs == rhs); + } + + private: + template + friend KOKKOS_INLINE_FUNCTION constexpr std::enable_if_t< + Impl::is_swappable::value> + kokkos_swap(Array& a, + Array& b) noexcept(Impl::is_nothrow_swappable_v) { + for (std::size_t i = 0; i < N; ++i) { + kokkos_swap(a[i], b[i]); + } + } }; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 template struct Array { +#else +template +struct Array { +#endif public: using reference = T&; using const_reference = std::add_const_t&; @@ -167,25 +198,35 @@ struct Array { KOKKOS_INLINE_FUNCTION pointer data() { return nullptr; } KOKKOS_INLINE_FUNCTION const_pointer data() const { return nullptr; } - KOKKOS_DEFAULTED_FUNCTION ~Array() = default; - KOKKOS_DEFAULTED_FUNCTION Array() = default; - KOKKOS_DEFAULTED_FUNCTION Array(const Array&) = default; - KOKKOS_DEFAULTED_FUNCTION Array& operator=(const Array&) = default; + friend KOKKOS_FUNCTION constexpr bool operator==(Array const&, + Array const&) noexcept { + return true; + } + friend KOKKOS_FUNCTION constexpr bool operator!=(Array const&, + Array const&) noexcept { + return false; + } - // Some supported compilers are not sufficiently C++11 compliant - // for default move constructor and move assignment operator. - // Array( Array && ) = default ; - // Array & operator = ( Array && ) = default ; + private: + friend KOKKOS_INLINE_FUNCTION constexpr void kokkos_swap( + Array&, Array&) noexcept {} }; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +namespace Impl { +struct KokkosArrayContiguous {}; +struct KokkosArrayStrided {}; +} // namespace Impl + template <> -struct Array { - struct contiguous {}; - struct strided {}; +struct KOKKOS_DEPRECATED Array { + using contiguous = Impl::KokkosArrayContiguous; + using strided = Impl::KokkosArrayStrided; }; template -struct Array::contiguous> { +struct KOKKOS_DEPRECATED + Array { private: T* m_elem; size_t m_size; @@ -253,7 +294,8 @@ struct Array::contiguous> { }; template -struct Array::strided> { +struct KOKKOS_DEPRECATED + Array { private: T* m_elem; size_t m_size; @@ -320,10 +362,37 @@ struct Array::strided> { size_type arg_stride) : m_elem(arg_ptr), m_size(arg_size), m_stride(arg_stride) {} }; +#endif template Array(T, Us...)->Array; +namespace Impl { + +template +KOKKOS_FUNCTION constexpr Array, N> to_array_impl( + T (&a)[N], std::index_sequence) { + return {{a[I]...}}; +} + +template +KOKKOS_FUNCTION constexpr Array, N> to_array_impl( + T(&&a)[N], std::index_sequence) { + return {{std::move(a[I])...}}; +} + +} // namespace Impl + +template +KOKKOS_FUNCTION constexpr auto to_array(T (&a)[N]) { + return Impl::to_array_impl(a, std::make_index_sequence{}); +} + +template +KOKKOS_FUNCTION constexpr auto to_array(T(&&a)[N]) { + return Impl::to_array_impl(std::move(a), std::make_index_sequence{}); +} + } // namespace Kokkos // @@ -333,6 +402,7 @@ struct std::tuple_size> template struct std::tuple_element> { + static_assert(I < N); using type = T; }; @@ -340,21 +410,25 @@ namespace Kokkos { template KOKKOS_FUNCTION constexpr T& get(Array& a) noexcept { + static_assert(I < N); return a[I]; } template KOKKOS_FUNCTION constexpr T const& get(Array const& a) noexcept { + static_assert(I < N); return a[I]; } template KOKKOS_FUNCTION constexpr T&& get(Array&& a) noexcept { + static_assert(I < N); return std::move(a[I]); } template KOKKOS_FUNCTION constexpr T const&& get(Array const&& a) noexcept { + static_assert(I < N); return std::move(a[I]); } diff --git a/lib/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp b/lib/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp index 9acacef901..bf57dcae65 100644 --- a/lib/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp +++ b/lib/kokkos/core/src/Kokkos_Atomics_Desul_Volatile_Wrapper.hpp @@ -22,7 +22,6 @@ static_assert(false, #ifndef KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_ #define KOKKOS_DESUL_ATOMICS_VOLATILE_WRAPPER_HPP_ #include -#include #include #ifdef KOKKOS_ENABLE_ATOMICS_BYPASS diff --git a/lib/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp b/lib/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp index eebdd20f15..26db69ac1f 100644 --- a/lib/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp +++ b/lib/kokkos/core/src/Kokkos_Atomics_Desul_Wrapper.hpp @@ -22,8 +22,6 @@ static_assert(false, #ifndef KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_ #define KOKKOS_DESUL_ATOMICS_WRAPPER_HPP_ #include - -#include #include #include diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp index 4d405116cc..7dd2a9ddbb 100644 --- a/lib/kokkos/core/src/Kokkos_Complex.hpp +++ b/lib/kokkos/core/src/Kokkos_Complex.hpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace Kokkos { @@ -256,6 +257,12 @@ class return *this; } + template + friend constexpr const RT& get(const complex&) noexcept; + + template + friend constexpr const RT&& get(const complex&&) noexcept; + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 //! Copy constructor from volatile. template < @@ -423,6 +430,75 @@ class #endif // KOKKOS_ENABLE_DEPRECATED_CODE_4 }; +} // namespace Kokkos + +// Tuple protocol for complex based on https://wg21.link/P2819R2 (voted into +// the C++26 working draft on 2023-11) + +template +struct std::tuple_size> + : std::integral_constant {}; + +template +struct std::tuple_element> { + static_assert(I < 2); + using type = RealType; +}; + +namespace Kokkos { + +// get<...>(...) defined here so as not to be hidden friends, as per P2819R2 + +template +KOKKOS_FUNCTION constexpr RealType& get(complex& z) noexcept { + static_assert(I < 2); + if constexpr (I == 0) + return z.real(); + else + return z.imag(); +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +template +KOKKOS_FUNCTION constexpr RealType&& get(complex&& z) noexcept { + static_assert(I < 2); + if constexpr (I == 0) + return std::move(z.real()); + else + return std::move(z.imag()); +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +template +KOKKOS_FUNCTION constexpr const RealType& get( + const complex& z) noexcept { + static_assert(I < 2); + if constexpr (I == 0) + return z.re_; + else + return z.im_; +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +template +KOKKOS_FUNCTION constexpr const RealType&& get( + const complex&& z) noexcept { + static_assert(I < 2); + if constexpr (I == 0) + return std::move(z.re_); + else + return std::move(z.im_); +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + //============================================================================== // {{{1 diff --git a/lib/kokkos/core/src/Kokkos_CopyViews.hpp b/lib/kokkos/core/src/Kokkos_CopyViews.hpp index 08f6ba8d69..e856b19247 100644 --- a/lib/kokkos/core/src/Kokkos_CopyViews.hpp +++ b/lib/kokkos/core/src/Kokkos_CopyViews.hpp @@ -221,10 +221,12 @@ struct ViewFill { ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, const ExecSpace& space) : a(a_), val(val_) { + // MDRangePolicy is not supported for 7D views + // Iterate separately over extent(2) Kokkos::parallel_for("Kokkos::ViewFill-7D", policy_type(space, {0, 0, 0, 0, 0, 0}, - {a.extent(0), a.extent(1), a.extent(2), - a.extent(3), a.extent(5), a.extent(6)}), + {a.extent(0), a.extent(1), a.extent(3), + a.extent(4), a.extent(5), a.extent(6)}), *this); } @@ -249,6 +251,8 @@ struct ViewFill { ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, const ExecSpace& space) : a(a_), val(val_) { + // MDRangePolicy is not supported for 8D views + // Iterate separately over extent(2) and extent(4) Kokkos::parallel_for("Kokkos::ViewFill-8D", policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), @@ -293,9 +297,11 @@ struct ViewCopy { ViewTypeA a; ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<2, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -323,9 +329,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<3, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -354,9 +362,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<4, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -386,9 +396,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<5, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -418,9 +430,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -450,9 +464,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -461,6 +477,8 @@ struct ViewCopy { ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, const ExecSpace space = ExecSpace()) : a(a_), b(b_) { + // MDRangePolicy is not supported for 7D views + // Iterate separately over extent(2) Kokkos::parallel_for("Kokkos::ViewCopy-7D", policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), @@ -483,9 +501,11 @@ struct ViewCopy { ViewTypeB b; static const Kokkos::Iterate outer_iteration_pattern = - Kokkos::layout_iterate_type_selector::outer_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::outer_iteration_pattern; static const Kokkos::Iterate inner_iteration_pattern = - Kokkos::layout_iterate_type_selector::inner_iteration_pattern; + Kokkos::Impl::layout_iterate_type_selector< + Layout>::inner_iteration_pattern; using iterate_type = Kokkos::Rank<6, outer_iteration_pattern, inner_iteration_pattern>; using policy_type = @@ -494,6 +514,8 @@ struct ViewCopy { ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, const ExecSpace space = ExecSpace()) : a(a_), b(b_) { + // MDRangePolicy is not supported for 8D views + // Iterate separately over extent(2) and extent(4) Kokkos::parallel_for("Kokkos::ViewCopy-8D", policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), @@ -539,11 +561,8 @@ void view_copy(const ExecutionSpace& space, const DstType& dst, int64_t strides[DstType::rank + 1]; dst.stride(strides); Kokkos::Iterate iterate; - if (Kokkos::is_layouttiled::value) { - iterate = Kokkos::layout_iterate_type_selector< - typename DstType::array_layout>::outer_iteration_pattern; - } else if (std::is_same::value) { + if (std::is_same::value) { iterate = Kokkos::Iterate::Right; } else if (std::is_same::value) { @@ -630,11 +649,8 @@ void view_copy(const DstType& dst, const SrcType& src) { int64_t strides[DstType::rank + 1]; dst.stride(strides); Kokkos::Iterate iterate; - if (Kokkos::is_layouttiled::value) { - iterate = Kokkos::layout_iterate_type_selector< - typename DstType::array_layout>::outer_iteration_pattern; - } else if (std::is_same::value) { + if (std::is_same::value) { iterate = Kokkos::Iterate::Right; } else if (std::is_same::value) { @@ -3092,8 +3108,7 @@ inline std::enable_if_t< std::is_same::array_layout, Kokkos::LayoutRight>::value || std::is_same::array_layout, - Kokkos::LayoutStride>::value || - is_layouttiled::array_layout>::value> + Kokkos::LayoutStride>::value> impl_resize(const Impl::ViewCtorProp& arg_prop, Kokkos::View& v, const typename Kokkos::View::array_layout& layout) { @@ -3139,8 +3154,7 @@ inline std::enable_if_t< std::is_same::array_layout, Kokkos::LayoutRight>::value || std::is_same::array_layout, - Kokkos::LayoutStride>::value || - is_layouttiled::array_layout>::value)> + Kokkos::LayoutStride>::value)> impl_resize(const Impl::ViewCtorProp& arg_prop, Kokkos::View& v, const typename Kokkos::View::array_layout& layout) { @@ -3235,7 +3249,10 @@ impl_realloc(Kokkos::View& v, const size_t n0, const size_t n1, v = view_type(); // Best effort to deallocate in case no other view refers // to the shared allocation v = view_type(arg_prop_copy, n0, n1, n2, n3, n4, n5, n6, n7); - } else if (alloc_prop_input::initialize) { + return; + } + + if constexpr (alloc_prop_input::initialize) { if constexpr (alloc_prop_input::has_execution_space) { const auto& exec_space = Impl::get_property(arg_prop); @@ -3308,8 +3325,7 @@ inline std::enable_if_t< std::is_same::array_layout, Kokkos::LayoutRight>::value || std::is_same::array_layout, - Kokkos::LayoutStride>::value || - is_layouttiled::array_layout>::value> + Kokkos::LayoutStride>::value> impl_realloc(Kokkos::View& v, const typename Kokkos::View::array_layout& layout, const Impl::ViewCtorProp& arg_prop) { @@ -3331,7 +3347,10 @@ impl_realloc(Kokkos::View& v, if (v.layout() != layout) { v = view_type(); // Deallocate first, if the only view to allocation v = view_type(arg_prop, layout); - } else if (alloc_prop_input::initialize) { + return; + } + + if constexpr (alloc_prop_input::initialize) { if constexpr (alloc_prop_input::has_execution_space) { const auto& exec_space = Impl::get_property(arg_prop); @@ -3351,8 +3370,7 @@ inline std::enable_if_t< std::is_same::array_layout, Kokkos::LayoutRight>::value || std::is_same::array_layout, - Kokkos::LayoutStride>::value || - is_layouttiled::array_layout>::value)> + Kokkos::LayoutStride>::value)> impl_realloc(Kokkos::View& v, const typename Kokkos::View::array_layout& layout, const Impl::ViewCtorProp& arg_prop) { @@ -3452,6 +3470,7 @@ struct MirrorType { using view_type = Kokkos::View; }; +// collection of static asserts for create_mirror and create_mirror_view template void check_view_ctor_args_create_mirror() { using alloc_prop_input = Impl::ViewCtorProp; @@ -3470,232 +3489,231 @@ void check_view_ctor_args_create_mirror() { "not explicitly allow padding!"); } +// create a mirror +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t::has_memory_space, - typename Kokkos::View::HostMirror> -create_mirror(const Kokkos::View& src, - const Impl::ViewCtorProp& arg_prop) { - using src_type = View; - using dst_type = typename src_type::HostMirror; - +inline auto create_mirror(const Kokkos::View& src, + const Impl::ViewCtorProp& arg_prop) { check_view_ctor_args_create_mirror(); auto prop_copy = Impl::with_properties_if_unset( arg_prop, std::string(src.label()).append("_mirror")); - return dst_type(prop_copy, src.layout()); -} - -// Create a mirror in a new space (specialization for different space) -template ::has_memory_space>> -auto create_mirror(const Kokkos::View& src, - const Impl::ViewCtorProp& arg_prop) { - check_view_ctor_args_create_mirror(); - - auto prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string(src.label()).append("_mirror")); - using alloc_prop = decltype(prop_copy); - - return typename Impl::MirrorType::view_type(prop_copy, src.layout()); + if constexpr (Impl::ViewCtorProp::has_memory_space) { + using memory_space = typename decltype(prop_copy)::memory_space; + using dst_type = + typename Impl::MirrorType::view_type; + return dst_type(prop_copy, src.layout()); + } else { + using dst_type = typename View::HostMirror; + return dst_type(prop_copy, src.layout()); + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } } // namespace Impl -template -std::enable_if_t::specialize>::value, - typename Kokkos::View::HostMirror> -create_mirror(Kokkos::View const& v) { - return Impl::create_mirror(v, Impl::ViewCtorProp<>{}); +// public interface +template ::specialize>>> +auto create_mirror(Kokkos::View const& src) { + return Impl::create_mirror(src, Impl::ViewCtorProp<>{}); } -template -std::enable_if_t::specialize>::value, - typename Kokkos::View::HostMirror> -create_mirror(Kokkos::Impl::WithoutInitializing_t wi, - Kokkos::View const& v) { - return Impl::create_mirror(v, view_alloc(wi)); +// public interface that accepts a without initializing flag +template ::specialize>>> +auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, + Kokkos::View const& src) { + return Impl::create_mirror(src, view_alloc(wi)); } +// public interface that accepts a space template ::value>> -std::enable_if_t::specialize>::value, - typename Impl::MirrorType::view_type> -create_mirror(Space const&, Kokkos::View const& v) { - return Impl::create_mirror(v, view_alloc(typename Space::memory_space{})); -} - -template ::specialize>::value && - Impl::ViewCtorProp::has_memory_space>> + Kokkos::is_space::value && + std::is_void_v::specialize>>> +auto create_mirror(Space const&, Kokkos::View const& src) { + return Impl::create_mirror(src, view_alloc(typename Space::memory_space{})); +} + +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> auto create_mirror(Impl::ViewCtorProp const& arg_prop, - Kokkos::View const& v) { - return Impl::create_mirror(v, arg_prop); -} - -template -std::enable_if_t< - std::is_void::specialize>::value && - !Impl::ViewCtorProp::has_memory_space, - typename Kokkos::View::HostMirror> -create_mirror(Impl::ViewCtorProp const& arg_prop, - Kokkos::View const& v) { - return Impl::create_mirror(v, arg_prop); + Kokkos::View const& src) { + return Impl::create_mirror(src, arg_prop); } +// public interface that accepts a space and a without initializing flag template ::value>> -std::enable_if_t::specialize>::value, - typename Impl::MirrorType::view_type> -create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const&, - Kokkos::View const& v) { - return Impl::create_mirror(v, view_alloc(typename Space::memory_space{}, wi)); + typename Enable = std::enable_if_t< + Kokkos::is_space::value && + std::is_void_v::specialize>>> +auto create_mirror(Kokkos::Impl::WithoutInitializing_t wi, Space const&, + Kokkos::View const& src) { + return Impl::create_mirror(src, + view_alloc(typename Space::memory_space{}, wi)); } namespace Impl { +// choose a `Kokkos::create_mirror` adapted for the provided view and the +// provided arguments +template +inline auto choose_create_mirror( + const View& src, const Impl::ViewCtorProp& arg_prop) { + // Due to the fact that users can overload `Kokkos::create_mirror`, but also + // that they may not have implemented all of its different possible + // variations, this function chooses the correct private or public version of + // it to call. + // This helper should be used by any overload of + // `Kokkos::Impl::create_mirror_view`. + + if constexpr (std::is_void_v) { + // if the view is not specialized, just call the Impl function + + // using ADL to find the later defined overload of the function + using namespace Kokkos::Impl; + + return create_mirror(src, arg_prop); + } else { + // otherwise, recreate the public call + using ViewProp = Impl::ViewCtorProp; + + // using ADL to find the later defined overload of the function + using namespace Kokkos; + + if constexpr (sizeof...(ViewCtorArgs) == 0) { + // if there are no view constructor args, call the specific public + // function + return create_mirror(src); + } else if constexpr (sizeof...(ViewCtorArgs) == 1 && + ViewProp::has_memory_space) { + // if there is one view constructor arg and it has a memory space, call + // the specific public function + return create_mirror(typename ViewProp::memory_space{}, src); + } else if constexpr (sizeof...(ViewCtorArgs) == 1 && + !ViewProp::initialize) { + // if there is one view constructor arg and it has a without initializing + // mark, call the specific public function + return create_mirror(typename Kokkos::Impl::WithoutInitializing_t{}, src); + } else if constexpr (sizeof...(ViewCtorArgs) == 2 && + ViewProp::has_memory_space && !ViewProp::initialize) { + // if there is two view constructor args and they have a memory space and + // a without initializing mark, call the specific public function + return create_mirror(typename Kokkos::Impl::WithoutInitializing_t{}, + typename ViewProp::memory_space{}, src); + } else { + // if there are other constructor args, call the generic public function + + // Beware, there are some libraries using Kokkos that don't implement + // this overload (hence the reason for this present function to exist). + return create_mirror(arg_prop, src); + } + } + +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif +} + +// create a mirror view +// private interface that accepts arbitrary view constructor args passed by a +// view_alloc template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - (std::is_same< - typename Kokkos::View::memory_space, - typename Kokkos::View::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::View::data_type, - typename Kokkos::View::HostMirror::data_type>::value), - typename Kokkos::View::HostMirror> -create_mirror_view(const Kokkos::View& src, - const Impl::ViewCtorProp&) { - check_view_ctor_args_create_mirror(); - return src; -} - -template -inline std::enable_if_t< - !Impl::ViewCtorProp::has_memory_space && - !(std::is_same::memory_space, - typename Kokkos::View< - T, P...>::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::View::data_type, - typename Kokkos::View::HostMirror::data_type>::value), - typename Kokkos::View::HostMirror> -create_mirror_view(const Kokkos::View& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); -} - -// Create a mirror view in a new space (specialization for same space) -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::View& src, - const Impl::ViewCtorProp&) { - check_view_ctor_args_create_mirror(); - return src; -} - -// Create a mirror view in a new space (specialization for different space) -template ::has_memory_space>> -std::enable_if_t::memory_space, - T, P...>::is_same_memspace, - typename Impl::MirrorViewType< - typename Impl::ViewCtorProp::memory_space, - T, P...>::view_type> -create_mirror_view(const Kokkos::View& src, - const Impl::ViewCtorProp& arg_prop) { - return Kokkos::Impl::create_mirror(src, arg_prop); +inline auto create_mirror_view( + const Kokkos::View& src, + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop) { + if constexpr (!Impl::ViewCtorProp::has_memory_space) { + if constexpr (std::is_same::memory_space, + typename Kokkos::View< + T, P...>::HostMirror::memory_space>::value && + std::is_same::data_type, + typename Kokkos::View< + T, P...>::HostMirror::data_type>::value) { + check_view_ctor_args_create_mirror(); + return typename Kokkos::View::HostMirror(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } else { + if constexpr (Impl::MirrorViewType::memory_space, + T, P...>::is_same_memspace) { + check_view_ctor_args_create_mirror(); + return typename Impl::MirrorViewType< + typename Impl::ViewCtorProp::memory_space, T, + P...>::view_type(src); + } else { + return Kokkos::Impl::choose_create_mirror(src, arg_prop); + } + } +#if defined(KOKKOS_COMPILER_INTEL) || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC)) + __builtin_unreachable(); +#endif } } // namespace Impl +// public interface template -std::enable_if_t< - std::is_same< - typename Kokkos::View::memory_space, - typename Kokkos::View::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::View::data_type, - typename Kokkos::View::HostMirror::data_type>::value, - typename Kokkos::View::HostMirror> -create_mirror_view(const Kokkos::View& src) { - return src; +auto create_mirror_view(const Kokkos::View& src) { + return Impl::create_mirror_view(src, view_alloc()); } +// public interface that accepts a without initializing flag template -std::enable_if_t< - !(std::is_same< - typename Kokkos::View::memory_space, - typename Kokkos::View::HostMirror::memory_space>::value && - std::is_same< - typename Kokkos::View::data_type, - typename Kokkos::View::HostMirror::data_type>::value), - typename Kokkos::View::HostMirror> -create_mirror_view(const Kokkos::View& src) { - return Kokkos::create_mirror(src); +auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, + Kokkos::View const& src) { + return Impl::create_mirror_view(src, view_alloc(wi)); } -template -typename Kokkos::View::HostMirror create_mirror_view( - Kokkos::Impl::WithoutInitializing_t wi, Kokkos::View const& v) { - return Impl::create_mirror_view(v, view_alloc(wi)); -} - -// FIXME_C++17 Improve SFINAE here. +// public interface that accepts a space template ::value>> -typename Impl::MirrorViewType::view_type create_mirror_view( - const Space&, const Kokkos::View& src, - std::enable_if_t::is_same_memspace>* = - nullptr) { - return src; -} - -// FIXME_C++17 Improve SFINAE here. -template ::value>> -typename Impl::MirrorViewType::view_type create_mirror_view( - const Space& space, const Kokkos::View& src, - std::enable_if_t::is_same_memspace>* = - nullptr) { - return Kokkos::create_mirror(space, src); +auto create_mirror_view(const Space&, const Kokkos::View& src) { + return Impl::create_mirror_view(src, + view_alloc(typename Space::memory_space())); } +// public interface that accepts a space and a without initializing flag template ::value>> -typename Impl::MirrorViewType::view_type create_mirror_view( - Kokkos::Impl::WithoutInitializing_t wi, Space const&, - Kokkos::View const& v) { +auto create_mirror_view(Kokkos::Impl::WithoutInitializing_t wi, Space const&, + Kokkos::View const& src) { return Impl::create_mirror_view( - v, view_alloc(typename Space::memory_space{}, wi)); + src, view_alloc(typename Space::memory_space{}, wi)); } -template +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> auto create_mirror_view(const Impl::ViewCtorProp& arg_prop, - const Kokkos::View& v) { - return Impl::create_mirror_view(v, arg_prop); + const Kokkos::View& src) { + return Impl::create_mirror_view(src, arg_prop); } -template -auto create_mirror_view_and_copy( - const Impl::ViewCtorProp&, - const Kokkos::View& src, - std::enable_if_t< - std::is_void::specialize>::value && - Impl::MirrorViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { +namespace Impl { + +// collection of static asserts for create_mirror_view_and_copy +template +void check_view_ctor_args_create_mirror_view_and_copy() { using alloc_prop_input = Impl::ViewCtorProp; + static_assert( alloc_prop_input::has_memory_space, "The view constructor arguments passed to " @@ -3708,52 +3726,53 @@ auto create_mirror_view_and_copy( "The view constructor arguments passed to " "Kokkos::create_mirror_view_and_copy must " "not explicitly allow padding!"); - - // same behavior as deep_copy(src, src) - if (!alloc_prop_input::has_execution_space) - fence( - "Kokkos::create_mirror_view_and_copy: fence before returning src view"); - return src; } -template +} // namespace Impl + +// create a mirror view and deep copy it +// public interface that accepts arbitrary view constructor args passed by a +// view_alloc +template ::specialize>>> auto create_mirror_view_and_copy( - const Impl::ViewCtorProp& arg_prop, - const Kokkos::View& src, - std::enable_if_t< - std::is_void::specialize>::value && - !Impl::MirrorViewType< - typename Impl::ViewCtorProp::memory_space, T, - P...>::is_same_memspace>* = nullptr) { + [[maybe_unused]] const Impl::ViewCtorProp& arg_prop, + const Kokkos::View& src) { using alloc_prop_input = Impl::ViewCtorProp; - static_assert( - alloc_prop_input::has_memory_space, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must include a memory space!"); - static_assert(!alloc_prop_input::has_pointer, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not include a pointer!"); - static_assert(!alloc_prop_input::allow_padding, - "The view constructor arguments passed to " - "Kokkos::create_mirror_view_and_copy must " - "not explicitly allow padding!"); - using Space = typename alloc_prop_input::memory_space; - using Mirror = typename Impl::MirrorViewType::view_type; - auto arg_prop_copy = Impl::with_properties_if_unset( - arg_prop, std::string{}, WithoutInitializing, - typename Space::execution_space{}); + Impl::check_view_ctor_args_create_mirror_view_and_copy(); - std::string& label = Impl::get_property(arg_prop_copy); - if (label.empty()) label = src.label(); - auto mirror = typename Mirror::non_const_type{arg_prop_copy, src.layout()}; - if constexpr (alloc_prop_input::has_execution_space) { - deep_copy(Impl::get_property(arg_prop_copy), - mirror, src); - } else - deep_copy(mirror, src); - return mirror; + if constexpr (Impl::MirrorViewType::is_same_memspace) { + // same behavior as deep_copy(src, src) + if constexpr (!alloc_prop_input::has_execution_space) + fence( + "Kokkos::create_mirror_view_and_copy: fence before returning src " + "view"); + return src; + } else { + using Space = typename alloc_prop_input::memory_space; + using Mirror = typename Impl::MirrorViewType::view_type; + + auto arg_prop_copy = Impl::with_properties_if_unset( + arg_prop, std::string{}, WithoutInitializing, + typename Space::execution_space{}); + + std::string& label = Impl::get_property(arg_prop_copy); + if (label.empty()) label = src.label(); + auto mirror = typename Mirror::non_const_type{arg_prop_copy, src.layout()}; + if constexpr (alloc_prop_input::has_execution_space) { + deep_copy(Impl::get_property(arg_prop_copy), + mirror, src); + } else + deep_copy(mirror, src); + return mirror; + } +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) + __builtin_unreachable(); +#endif } // Previously when using auto here, the intel compiler 19.3 would diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp index 5f251eeb26..b8d7f77deb 100644 --- a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -40,7 +40,12 @@ struct ParallelReduceTag {}; struct ChunkSize { int value; + explicit ChunkSize(int value_) : value(value_) {} +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT("ChunkSize should be constructed explicitly.") ChunkSize(int value_) : value(value_) {} +#endif }; /** \brief Execution policy for work over a range of an integral type. @@ -714,6 +719,58 @@ class TeamPolicy } }; +// Execution space not provided deduces to TeamPolicy<> + +TeamPolicy()->TeamPolicy<>; + +TeamPolicy(int, int)->TeamPolicy<>; +TeamPolicy(int, int, int)->TeamPolicy<>; +TeamPolicy(int, Kokkos::AUTO_t const&)->TeamPolicy<>; +TeamPolicy(int, Kokkos::AUTO_t const&, int)->TeamPolicy<>; +TeamPolicy(int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&)->TeamPolicy<>; +TeamPolicy(int, int, Kokkos::AUTO_t const&)->TeamPolicy<>; + +// DefaultExecutionSpace deduces to TeamPolicy<> + +TeamPolicy(DefaultExecutionSpace const&, int, int)->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, int, int)->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&) + ->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&, int) + ->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, Kokkos::AUTO_t const&, + Kokkos::AUTO_t const&) + ->TeamPolicy<>; +TeamPolicy(DefaultExecutionSpace const&, int, int, Kokkos::AUTO_t const&) + ->TeamPolicy<>; + +// ES != DefaultExecutionSpace deduces to TeamPolicy + +template >> +TeamPolicy(ES const&, int, int)->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, int, int)->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, Kokkos::AUTO_t const&)->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, int)->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, Kokkos::AUTO_t const&, Kokkos::AUTO_t const&) + ->TeamPolicy; + +template >> +TeamPolicy(ES const&, int, int, Kokkos::AUTO_t const&)->TeamPolicy; + namespace Impl { template @@ -968,9 +1025,9 @@ struct TeamThreadMDRange, TeamHandle> { static constexpr auto par_vector = Impl::TeamMDRangeParVector::NotParVector; static constexpr Iterate direction = - OuterDir == Iterate::Default - ? layout_iterate_type_selector::outer_iteration_pattern - : iter; + OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector< + ArrayLayout>::outer_iteration_pattern + : iter; template KOKKOS_FUNCTION TeamThreadMDRange(TeamHandleType const& team_, Args&&... args) @@ -983,7 +1040,7 @@ struct TeamThreadMDRange, TeamHandle> { }; template -TeamThreadMDRange(TeamHandle const&, Args&&...) +KOKKOS_DEDUCTION_GUIDE TeamThreadMDRange(TeamHandle const&, Args&&...) ->TeamThreadMDRange, TeamHandle>; template @@ -1004,9 +1061,9 @@ struct ThreadVectorMDRange, TeamHandle> { static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector; static constexpr Iterate direction = - OuterDir == Iterate::Default - ? layout_iterate_type_selector::outer_iteration_pattern - : iter; + OuterDir == Iterate::Default ? Impl::layout_iterate_type_selector< + ArrayLayout>::outer_iteration_pattern + : iter; template KOKKOS_INLINE_FUNCTION ThreadVectorMDRange(TeamHandleType const& team_, @@ -1020,7 +1077,7 @@ struct ThreadVectorMDRange, TeamHandle> { }; template -ThreadVectorMDRange(TeamHandle const&, Args&&...) +KOKKOS_DEDUCTION_GUIDE ThreadVectorMDRange(TeamHandle const&, Args&&...) ->ThreadVectorMDRange, TeamHandle>; template @@ -1041,9 +1098,9 @@ struct TeamVectorMDRange, TeamHandle> { static constexpr auto par_vector = Impl::TeamMDRangeParVector::ParVector; static constexpr Iterate direction = - iter == Iterate::Default - ? layout_iterate_type_selector::outer_iteration_pattern - : iter; + iter == Iterate::Default ? Impl::layout_iterate_type_selector< + ArrayLayout>::outer_iteration_pattern + : iter; template KOKKOS_INLINE_FUNCTION TeamVectorMDRange(TeamHandleType const& team_, @@ -1057,7 +1114,7 @@ struct TeamVectorMDRange, TeamHandle> { }; template -TeamVectorMDRange(TeamHandle const&, Args&&...) +KOKKOS_DEDUCTION_GUIDE TeamVectorMDRange(TeamHandle const&, Args&&...) ->TeamVectorMDRange, TeamHandle>; template #include #include +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN +#include +#else +#include +#endif namespace Kokkos { + +#ifndef KOKKOS_ENABLE_IMPL_MDSPAN +constexpr size_t dynamic_extent = std::numeric_limits::max(); +#endif + namespace Experimental { -constexpr ptrdiff_t dynamic_extent = -1; - -template +template struct Extents { /* TODO @enhancement flesh this out more */ }; -template +template struct PrependExtent; -template +template struct PrependExtent, NewExtent> { using type = Extents; }; -template +template struct AppendExtent; -template +template struct AppendExtent, NewExtent> { using type = Extents; }; - } // end namespace Experimental namespace Impl { @@ -75,33 +82,32 @@ struct _parse_impl { // We have to treat the case of int**[x] specially, since it *doesn't* go // backwards -template +template struct _parse_impl, std::enable_if_t<_all_remaining_extents_dynamic::value>> - : _parse_impl> { -}; + : _parse_impl> {}; // int*(*[x])[y] should still work also (meaning int[][x][][y]) -template +template struct _parse_impl< T*, Kokkos::Experimental::Extents, std::enable_if_t::value>> { using _next = Kokkos::Experimental::AppendExtent< typename _parse_impl, void>::type, - Kokkos::Experimental::dynamic_extent>; + Kokkos::dynamic_extent>; using type = typename _next::type; }; -template +template struct _parse_impl, void> - : _parse_impl< - T, Kokkos::Experimental::Extents // TODO @pedantic this - // could be a - // narrowing cast - > {}; + : _parse_impl // TODO @pedantic + // this could be a + // narrowing cast + > {}; } // end namespace _parse_view_extents_impl @@ -111,38 +117,34 @@ struct ParseViewExtents { DataType, Kokkos::Experimental::Extents<>>::type; }; -template +template struct ApplyExtent { using type = ValueType[Ext]; }; template -struct ApplyExtent { +struct ApplyExtent { using type = ValueType*; }; -template +template struct ApplyExtent { using type = typename ApplyExtent::type[N]; }; -template +template struct ApplyExtent { using type = ValueType * [Ext]; }; template -struct ApplyExtent { - using type = - typename ApplyExtent::type*; +struct ApplyExtent { + using type = typename ApplyExtent::type*; }; template -struct ApplyExtent { - using type = - typename ApplyExtent::type[N]; +struct ApplyExtent { + using type = typename ApplyExtent::type[N]; }; } // end namespace Impl diff --git a/lib/kokkos/core/src/Kokkos_Graph.hpp b/lib/kokkos/core/src/Kokkos_Graph.hpp index 643bdcc02c..9cc6650e26 100644 --- a/lib/kokkos/core/src/Kokkos_Graph.hpp +++ b/lib/kokkos/core/src/Kokkos_Graph.hpp @@ -167,6 +167,9 @@ Graph create_graph(Closure&& arg_closure) { #include #endif #endif +#ifdef SYCL_EXT_ONEAPI_GRAPH +#include +#endif #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH #undef KOKKOS_IMPL_PUBLIC_INCLUDE #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_GRAPH diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp index a1fb0f5a67..8b5f29f95b 100644 --- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -113,7 +113,6 @@ class HostSpace { const size_t arg_alloc_size, const size_t arg_logical_size = 0) const; - private: void* impl_allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size = 0, const Kokkos::Tools::SpaceHandle = @@ -124,7 +123,6 @@ class HostSpace { const Kokkos::Tools::SpaceHandle = Kokkos::Tools::make_space_handle(name())) const; - public: /**\brief Return Name of the MemorySpace */ static constexpr const char* name() { return m_name; } diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp index ca4d956784..37b80e54a8 100644 --- a/lib/kokkos/core/src/Kokkos_Layout.hpp +++ b/lib/kokkos/core/src/Kokkos_Layout.hpp @@ -217,81 +217,12 @@ enum class Iterate { Right // Right indices stride fastest }; -// To check for LayoutTiled -// This is to hide extra compile-time 'identifier' info within the LayoutTiled -// class by not relying on template specialization to include the ArgN*'s -template -struct is_layouttiled : std::false_type {}; - -template -struct is_layouttiled> - : std::true_type {}; - -namespace Experimental { - -/// LayoutTiled -// Must have Rank >= 2 -template < - Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0, - unsigned ArgN1, unsigned ArgN2 = 0, unsigned ArgN3 = 0, unsigned ArgN4 = 0, - unsigned ArgN5 = 0, unsigned ArgN6 = 0, unsigned ArgN7 = 0, - bool IsPowerOfTwo = - (Kokkos::Impl::is_integral_power_of_two(ArgN0) && - Kokkos::Impl::is_integral_power_of_two(ArgN1) && - (Kokkos::Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0)) && - (Kokkos::Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0)))> -struct LayoutTiled { - static_assert(IsPowerOfTwo, - "LayoutTiled must be given power-of-two tile dimensions"); - - using array_layout = LayoutTiled; - static constexpr Iterate outer_pattern = OuterP; - static constexpr Iterate inner_pattern = InnerP; - - enum { N0 = ArgN0 }; - enum { N1 = ArgN1 }; - enum { N2 = ArgN2 }; - enum { N3 = ArgN3 }; - enum { N4 = ArgN4 }; - enum { N5 = ArgN5 }; - enum { N6 = ArgN6 }; - enum { N7 = ArgN7 }; - - size_t dimension[ARRAY_LAYOUT_MAX_RANK]; - - enum : bool { is_extent_constructible = true }; - - LayoutTiled(LayoutTiled const&) = default; - LayoutTiled(LayoutTiled&&) = default; - LayoutTiled& operator=(LayoutTiled const&) = default; - LayoutTiled& operator=(LayoutTiled&&) = default; - - KOKKOS_INLINE_FUNCTION - explicit constexpr LayoutTiled(size_t argN0 = 0, size_t argN1 = 0, - size_t argN2 = 0, size_t argN3 = 0, - size_t argN4 = 0, size_t argN5 = 0, - size_t argN6 = 0, size_t argN7 = 0) - : dimension{argN0, argN1, argN2, argN3, argN4, argN5, argN6, argN7} {} - - friend bool operator==(const LayoutTiled& left, const LayoutTiled& right) { - for (unsigned int rank = 0; rank < ARRAY_LAYOUT_MAX_RANK; ++rank) - if (left.dimension[rank] != right.dimension[rank]) return false; - return true; - } - - friend bool operator!=(const LayoutTiled& left, const LayoutTiled& right) { - return !(left == right); - } -}; - -} // namespace Experimental +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +template +struct KOKKOS_DEPRECATED is_layouttiled : std::false_type {}; +#endif +namespace Impl { // For use with view_copy template struct layout_iterate_type_selector { @@ -320,42 +251,13 @@ struct layout_iterate_type_selector { static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Default; }; +} // namespace Impl -template -struct layout_iterate_type_selector> { - static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left; - static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left; -}; - -template -struct layout_iterate_type_selector> { - static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right; - static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Left; -}; - -template -struct layout_iterate_type_selector> { - static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Left; - static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right; -}; - -template -struct layout_iterate_type_selector> { - static const Kokkos::Iterate outer_iteration_pattern = Kokkos::Iterate::Right; - static const Kokkos::Iterate inner_iteration_pattern = Kokkos::Iterate::Right; -}; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +template +using layout_iterate_type_selector KOKKOS_DEPRECATED = + Impl::layout_iterate_type_selector; +#endif } // namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index b255d2a519..0a0acd303f 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -55,9 +55,22 @@ #ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H #include +#include #include #endif +#if !defined(KOKKOS_ENABLE_CXX17) +#if __has_include() +#include +#else +#include +#endif +#if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 10 +#error \ + "Compiling with support for C++20 or later requires a libstdc++ version later than 9" +#endif +#endif + //---------------------------------------------------------------------------- /** Pick up compiler specific #define macros: * @@ -332,6 +345,10 @@ #define KOKKOS_DEFAULTED_FUNCTION #endif +#if !defined(KOKKOS_DEDUCTION_GUIDE) +#define KOKKOS_DEDUCTION_GUIDE +#endif + #if !defined(KOKKOS_IMPL_HOST_FUNCTION) #define KOKKOS_IMPL_HOST_FUNCTION #endif @@ -562,8 +579,44 @@ static constexpr bool kokkos_omp_on_host() { return false; } #define KOKKOS_IMPL_WARNING(desc) KOKKOS_IMPL_DO_PRAGMA(message(#desc)) #endif +// clang-format off +#if defined(__NVCOMPILER) + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \ + _Pragma("diag_suppress 1216") + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \ + _Pragma("diag_default 1216") +#elif defined(__EDG__) + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \ + _Pragma("warning push") \ + _Pragma("warning disable 1478") + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \ + _Pragma("warning pop") +#elif defined(__GNUC__) || defined(__clang__) + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \ + _Pragma("GCC diagnostic pop") +#elif defined(_MSC_VER) + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() \ + _Pragma("warning(push)") \ + _Pragma("warning(disable: 4996)") + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() \ + _Pragma("warning(pop)") +#else + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() + #define KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() +#endif +// clang-format on + #define KOKKOS_ATTRIBUTE_NODISCARD [[nodiscard]] +#ifndef KOKKOS_ENABLE_CXX17 +#define KOKKOS_IMPL_ATTRIBUTE_UNLIKELY [[unlikely]] +#else +#define KOKKOS_IMPL_ATTRIBUTE_UNLIKELY +#endif + #if (defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) || \ defined(KOKKOS_COMPILER_INTEL) || defined(KOKKOS_COMPILER_INTEL_LLVM) || \ defined(KOKKOS_COMPILER_NVHPC)) && \ diff --git a/lib/kokkos/core/src/Kokkos_MathematicalFunctions.hpp b/lib/kokkos/core/src/Kokkos_MathematicalFunctions.hpp index 3fead8dd29..19967782e5 100644 --- a/lib/kokkos/core/src/Kokkos_MathematicalFunctions.hpp +++ b/lib/kokkos/core/src/Kokkos_MathematicalFunctions.hpp @@ -277,12 +277,20 @@ KOKKOS_INLINE_FUNCTION long long abs(long long n) { #endif } KOKKOS_INLINE_FUNCTION float abs(float x) { +#ifdef KOKKOS_ENABLE_SYCL + return sycl::fabs(x); // sycl::abs is only provided for integral types +#else using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs; return abs(x); +#endif } KOKKOS_INLINE_FUNCTION double abs(double x) { +#ifdef KOKKOS_ENABLE_SYCL + return sycl::fabs(x); // sycl::abs is only provided for integral types +#else using KOKKOS_IMPL_MATH_FUNCTIONS_NAMESPACE::abs; return abs(x); +#endif } inline long double abs(long double x) { using std::abs; diff --git a/lib/kokkos/core/src/Kokkos_Pair.hpp b/lib/kokkos/core/src/Kokkos_Pair.hpp index 9be8d8d7aa..e569fefc14 100644 --- a/lib/kokkos/core/src/Kokkos_Pair.hpp +++ b/lib/kokkos/core/src/Kokkos_Pair.hpp @@ -413,12 +413,13 @@ KOKKOS_FORCEINLINE_FUNCTION pair tie(T1& x, T2& y) { return (pair(x, y)); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 // // Specialization of Kokkos::pair for a \c void second argument. This // is not actually a "pair"; it only contains one element, the first. // template -struct pair { +struct KOKKOS_DEPRECATED pair { using first_type = T1; using second_type = void; @@ -448,41 +449,48 @@ struct pair { // Specialization of relational operators for Kokkos::pair. // +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 1110) +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() +#endif template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator==( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator==( const pair& lhs, const pair& rhs) { return lhs.first == rhs.first; } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!=( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!=( const pair& lhs, const pair& rhs) { return !(lhs == rhs); } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<( const pair& lhs, const pair& rhs) { return lhs.first < rhs.first; } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<=( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<=( const pair& lhs, const pair& rhs) { return !(rhs < lhs); } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>( const pair& lhs, const pair& rhs) { return rhs < lhs; } template -KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>=( +KOKKOS_DEPRECATED KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>=( const pair& lhs, const pair& rhs) { return !(lhs < rhs); } +#if defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 1110) +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() +#endif +#endif namespace Impl { template diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp index 484f6c0d5f..122239df79 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp @@ -137,9 +137,9 @@ inline void parallel_for(const std::string& str, const ExecPolicy& policy, ExecPolicy inner_policy = policy; Kokkos::Tools::Impl::begin_parallel_for(inner_policy, functor, str, kpID); - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelFor closure(functor, inner_policy); - Kokkos::Impl::shared_allocation_tracking_enable(); + auto closure = + Kokkos::Impl::construct_with_shared_allocation_tracking_disabled< + Impl::ParallelFor>(functor, inner_policy); closure.execute(); @@ -352,10 +352,10 @@ inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, ExecutionPolicy inner_policy = policy; Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID); - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelScan closure(functor, - inner_policy); - Kokkos::Impl::shared_allocation_tracking_enable(); + auto closure = + Kokkos::Impl::construct_with_shared_allocation_tracking_disabled< + Impl::ParallelScan>(functor, + inner_policy); closure.execute(); @@ -398,18 +398,19 @@ inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID); if constexpr (Kokkos::is_view::value) { - Kokkos::Impl::shared_allocation_tracking_disable(); - Impl::ParallelScanWithTotal - closure(functor, inner_policy, return_value); - Kokkos::Impl::shared_allocation_tracking_enable(); + auto closure = + Kokkos::Impl::construct_with_shared_allocation_tracking_disabled< + Impl::ParallelScanWithTotal>( + functor, inner_policy, return_value); closure.execute(); } else { - Kokkos::Impl::shared_allocation_tracking_disable(); Kokkos::View view(&return_value); - Impl::ParallelScanWithTotal - closure(functor, inner_policy, view); - Kokkos::Impl::shared_allocation_tracking_enable(); + auto closure = + Kokkos::Impl::construct_with_shared_allocation_tracking_disabled< + Impl::ParallelScanWithTotal>(functor, inner_policy, + view); closure.execute(); } diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index d499eba6dc..53913266f1 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -72,7 +72,7 @@ struct Sum { }; template -Sum(View const&) +KOKKOS_DEDUCTION_GUIDE Sum(View const&) ->Sum::memory_space>; template @@ -117,7 +117,7 @@ struct Prod { }; template -Prod(View const&) +KOKKOS_DEDUCTION_GUIDE Prod(View const&) ->Prod::memory_space>; template @@ -164,7 +164,7 @@ struct Min { }; template -Min(View const&) +KOKKOS_DEDUCTION_GUIDE Min(View const&) ->Min::memory_space>; template @@ -212,7 +212,7 @@ struct Max { }; template -Max(View const&) +KOKKOS_DEDUCTION_GUIDE Max(View const&) ->Max::memory_space>; template @@ -258,7 +258,7 @@ struct LAnd { }; template -LAnd(View const&) +KOKKOS_DEDUCTION_GUIDE LAnd(View const&) ->LAnd::memory_space>; template @@ -305,7 +305,7 @@ struct LOr { }; template -LOr(View const&) +KOKKOS_DEDUCTION_GUIDE LOr(View const&) ->LOr::memory_space>; template @@ -352,7 +352,7 @@ struct BAnd { }; template -BAnd(View const&) +KOKKOS_DEDUCTION_GUIDE BAnd(View const&) ->BAnd::memory_space>; template @@ -399,7 +399,7 @@ struct BOr { }; template -BOr(View const&) +KOKKOS_DEDUCTION_GUIDE BOr(View const&) ->BOr::memory_space>; template @@ -458,7 +458,8 @@ struct MinLoc { }; template -MinLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinLoc( + View, Properties...> const&) ->MinLoc, Properties...>::memory_space>; @@ -513,7 +514,8 @@ struct MaxLoc { }; template -MaxLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MaxLoc( + View, Properties...> const&) ->MaxLoc, Properties...>::memory_space>; @@ -577,7 +579,7 @@ struct MinMax { }; template -MinMax(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinMax(View, Properties...> const&) ->MinMax, Properties...>::memory_space>; @@ -646,7 +648,8 @@ struct MinMaxLoc { }; template -MinMaxLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinMaxLoc( + View, Properties...> const&) ->MinMaxLoc, Properties...>::memory_space>; @@ -713,7 +716,8 @@ struct MaxFirstLoc { }; template -MaxFirstLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MaxFirstLoc( + View, Properties...> const&) ->MaxFirstLoc, Properties...>::memory_space>; @@ -782,7 +786,7 @@ struct MaxFirstLocCustomComparator { template -MaxFirstLocCustomComparator( +KOKKOS_DEDUCTION_GUIDE MaxFirstLocCustomComparator( View, Properties...> const&, ComparatorType) ->MaxFirstLocCustomComparator, @@ -846,7 +850,8 @@ struct MinFirstLoc { }; template -MinFirstLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinFirstLoc( + View, Properties...> const&) ->MinFirstLoc, Properties...>::memory_space>; @@ -915,7 +920,7 @@ struct MinFirstLocCustomComparator { template -MinFirstLocCustomComparator( +KOKKOS_DEDUCTION_GUIDE MinFirstLocCustomComparator( View, Properties...> const&, ComparatorType) ->MinFirstLocCustomComparator, @@ -990,7 +995,8 @@ struct MinMaxFirstLastLoc { }; template -MinMaxFirstLastLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE MinMaxFirstLastLoc( + View, Properties...> const&) ->MinMaxFirstLastLoc, Properties...>::memory_space>; @@ -1069,7 +1075,7 @@ struct MinMaxFirstLastLocCustomComparator { template -MinMaxFirstLastLocCustomComparator( +KOKKOS_DEDUCTION_GUIDE MinMaxFirstLastLocCustomComparator( View, Properties...> const&, ComparatorType) ->MinMaxFirstLastLocCustomComparator< Scalar, Index, ComparatorType, @@ -1133,7 +1139,8 @@ struct FirstLoc { }; template -FirstLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE FirstLoc( + View, Properties...> const&) ->FirstLoc, Properties...>::memory_space>; @@ -1194,7 +1201,7 @@ struct LastLoc { }; template -LastLoc(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE LastLoc(View, Properties...> const&) ->LastLoc, Properties...>::memory_space>; @@ -1261,7 +1268,8 @@ struct StdIsPartitioned { }; template -StdIsPartitioned(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE StdIsPartitioned( + View, Properties...> const&) ->StdIsPartitioned, Properties...>::memory_space>; @@ -1323,7 +1331,8 @@ struct StdPartitionPoint { }; template -StdPartitionPoint(View, Properties...> const&) +KOKKOS_DEDUCTION_GUIDE StdPartitionPoint( + View, Properties...> const&) ->StdPartitionPoint, Properties...>::memory_space>; @@ -1502,18 +1511,18 @@ struct ParallelReduceAdaptor { using Analysis = FunctorAnalysis; - Kokkos::Impl::shared_allocation_tracking_disable(); - CombinedFunctorReducer functor_reducer( - functor, typename Analysis::Reducer( - ReducerSelector::select(functor, return_value))); - // FIXME Remove "Wrapper" once all backends implement the new interface - Impl::ParallelReduce::execution_space> - closure(functor_reducer, inner_policy, - return_value_adapter::return_value(return_value, functor)); - Kokkos::Impl::shared_allocation_tracking_enable(); + using CombinedFunctorReducerType = + CombinedFunctorReducer; + auto closure = construct_with_shared_allocation_tracking_disabled< + Impl::ParallelReduce::execution_space>>( + CombinedFunctorReducerType( + functor, typename Analysis::Reducer( + ReducerSelector::select(functor, return_value))), + inner_policy, + return_value_adapter::return_value(return_value, functor)); closure.execute(); Kokkos::Tools::Impl::end_parallel_reduce( diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp index 484a0e6f62..04d1fcf151 100644 --- a/lib/kokkos/core/src/Kokkos_View.hpp +++ b/lib/kokkos/core/src/Kokkos_View.hpp @@ -38,6 +38,8 @@ static_assert(false, #ifdef KOKKOS_ENABLE_IMPL_MDSPAN #include +#include +#include #endif #include @@ -372,6 +374,35 @@ struct ViewTraits { //------------------------------------ }; +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN +namespace Impl { +struct UnsupportedKokkosArrayLayout; + +template +struct MDSpanViewTraits { + using mdspan_type = UnsupportedKokkosArrayLayout; +}; + +// "Natural" mdspan for a view if the View's ArrayLayout is supported. +template +struct MDSpanViewTraits::type>> { + using index_type = std::size_t; + using extents_type = + typename Impl::ExtentsFromDataType::type; + using mdspan_layout_type = + typename Impl::LayoutFromArrayLayout::type; + using accessor_type = Impl::SpaceAwareAccessor< + typename Traits::memory_space, + Kokkos::default_accessor>; + using mdspan_type = mdspan; +}; +} // namespace Impl +#endif // KOKKOS_ENABLE_IMPL_MDSPAN + /** \class View * \brief View to an array of data. * @@ -522,7 +553,6 @@ constexpr bool is_assignable(const Kokkos::View& dst, //---------------------------------------------------------------------------- #include -#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -541,6 +571,8 @@ inline constexpr Kokkos::ALL_t ALL{}; #pragma omp end declare target #endif +inline constexpr Kokkos::Impl::SequentialHostInit_t SequentialHostInit{}; + inline constexpr Kokkos::Impl::WithoutInitializing_t WithoutInitializing{}; inline constexpr Kokkos::Impl::AllowPadding_t AllowPadding{}; @@ -923,57 +955,30 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_left && (rank_dynamic == 0)), + (2 == rank) && is_default_map && + (is_layout_left || is_layout_right || is_layout_stride)), reference_type> operator()(I0 i0, I1 i1) const { check_operator_parens_valid_args(i0, i1); KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_left && (rank_dynamic != 0)), - reference_type> - operator()(I0 i0, I1 i1) const { - check_operator_parens_valid_args(i0, i1); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_right && (rank_dynamic == 0)), - reference_type> - operator()(I0 i0, I1 i1) const { - check_operator_parens_valid_args(i0, i1); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_right && (rank_dynamic != 0)), - reference_type> - operator()(I0 i0, I1 i1) const { - check_operator_parens_valid_args(i0, i1); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t<(Kokkos::Impl::always_true::value && // - (2 == rank) && is_default_map && is_layout_stride), - reference_type> - operator()(I0 i0, I1 i1) const { - check_operator_parens_valid_args(i0, i1); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1) - return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + - i1 * m_map.m_impl_offset.m_stride.S1]; + if constexpr (is_layout_left) { + if constexpr (rank_dynamic == 0) + return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; + else + return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; + } else if constexpr (is_layout_right) { + if constexpr (rank_dynamic == 0) + return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; + else + return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; + } else { + static_assert(is_layout_stride); + return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + + i1 * m_map.m_impl_offset.m_stride.S1]; + } +#if defined KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif } // Rank 0 -> 8 operator() except for rank-1 and rank-2 with default map which @@ -1066,57 +1071,30 @@ class View : public ViewTraits { template KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_left && (rank_dynamic == 0)), + is_default_map && + (is_layout_left || is_layout_right || is_layout_stride)), reference_type> access(I0 i0, I1 i1, Is... extra) const { check_access_member_function_valid_args(i0, i1, extra...); KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_left && (rank_dynamic != 0)), - reference_type> - access(I0 i0, I1 i1, Is... extra) const { - check_access_member_function_valid_args(i0, i1, extra...); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_right && (rank_dynamic == 0)), - reference_type> - access(I0 i0, I1 i1, Is... extra) const { - check_access_member_function_valid_args(i0, i1, extra...); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION std::enable_if_t< - (Kokkos::Impl::always_true::value && (2 == rank) && - is_default_map && is_layout_right && (rank_dynamic != 0)), - reference_type> - access(I0 i0, I1 i1, Is... extra) const { - check_access_member_function_valid_args(i0, i1, extra...); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; - } - - template - KOKKOS_FORCEINLINE_FUNCTION - std::enable_if_t<(Kokkos::Impl::always_true::value && - (2 == rank) && is_default_map && is_layout_stride), - reference_type> - access(I0 i0, I1 i1, Is... extra) const { - check_access_member_function_valid_args(i0, i1, extra...); - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(m_track, m_map, i0, i1, extra...) - return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + - i1 * m_map.m_impl_offset.m_stride.S1]; + if constexpr (is_layout_left) { + if constexpr (rank_dynamic == 0) + return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; + else + return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; + } else if constexpr (is_layout_right) { + if constexpr (rank_dynamic == 0) + return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; + else + return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; + } else { + static_assert(is_layout_stride); + return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + + i1 * m_map.m_impl_offset.m_stride.S1]; + } +#if defined KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif } //------------------------------ @@ -1442,8 +1420,7 @@ class View : public ViewTraits { std::is_same_v || std::is_same_v || - is_layouttiled::value) { + Kokkos::LayoutStride>) { size_t i0 = arg_layout.dimension[0]; size_t i1 = arg_layout.dimension[1]; size_t i2 = arg_layout.dimension[2]; @@ -1495,8 +1472,7 @@ class View : public ViewTraits { std::is_same_v || std::is_same_v || - is_layouttiled::value) { + Kokkos::LayoutStride>) { size_t i0 = arg_layout.dimension[0]; size_t i1 = arg_layout.dimension[1]; size_t i2 = arg_layout.dimension[2]; @@ -1725,6 +1701,79 @@ class View : public ViewTraits { "Layout is not constructible from extent arguments. Use " "overload taking a layout object instead."); } + + //---------------------------------------- + // MDSpan converting constructors +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN + template ::mdspan_type> + KOKKOS_INLINE_FUNCTION +#ifndef KOKKOS_ENABLE_CXX17 + explicit(traits::is_managed) +#endif + View(const typename Impl::MDSpanViewTraits::mdspan_type& mds, + std::enable_if_t< + !std::is_same_v>* = + nullptr) + : View(mds.data_handle(), + Impl::array_layout_from_mapping< + typename traits::array_layout, + typename Impl::MDSpanViewTraits::mdspan_type>( + mds.mapping())) { + } + + template + KOKKOS_INLINE_FUNCTION +#ifndef KOKKOS_ENABLE_CXX17 + explicit(!std::is_convertible_v< + Kokkos::mdspan, + typename Impl::MDSpanViewTraits::mdspan_type>) +#endif + View(const Kokkos::mdspan& mds) + : View(typename Impl::MDSpanViewTraits::mdspan_type(mds)) { + } + + //---------------------------------------- + // Conversion to MDSpan + template ::mdspan_type, + typename = std::enable_if_t, + std::false_type, + std::is_assignable, + ImplNaturalMDSpanType>>::value>> + KOKKOS_INLINE_FUNCTION constexpr operator mdspan< + OtherElementType, OtherExtents, OtherLayoutPolicy, OtherAccessor>() { + using mdspan_type = typename Impl::MDSpanViewTraits::mdspan_type; + return mdspan_type{data(), + Impl::mapping_from_view_mapping(m_map)}; + } + + template >, + typename = std::enable_if_t>> + KOKKOS_INLINE_FUNCTION constexpr auto to_mdspan( + const OtherAccessorType& other_accessor = + typename Impl::MDSpanViewTraits::accessor_type()) { + using mdspan_type = typename Impl::MDSpanViewTraits::mdspan_type; + using ret_mdspan_type = + mdspan; + return ret_mdspan_type{data(), + Impl::mapping_from_view_mapping(m_map), + other_accessor}; + } +#endif // KOKKOS_ENABLE_IMPL_MDSPAN }; template @@ -1878,23 +1927,6 @@ KOKKOS_INLINE_FUNCTION bool operator!=(const View& lhs, namespace Kokkos { namespace Impl { -inline void shared_allocation_tracking_disable() { - Kokkos::Impl::SharedAllocationRecord::tracking_disable(); -} - -inline void shared_allocation_tracking_enable() { - Kokkos::Impl::SharedAllocationRecord::tracking_enable(); -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - template struct CommonViewValueType; diff --git a/lib/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp b/lib/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp index acc0dcd3c6..c8a5d28ba8 100644 --- a/lib/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp +++ b/lib/kokkos/core/src/OpenACC/Kokkos_OpenACCSpace.cpp @@ -67,16 +67,7 @@ void *Kokkos::Experimental::OpenACCSpace::impl_allocate( ptr = acc_malloc(arg_alloc_size); if (!ptr) { - size_t alignment = 1; // OpenACC does not handle alignment - using Kokkos::Experimental::RawMemoryAllocationFailure; - auto failure_mode = - arg_alloc_size > 0 - ? RawMemoryAllocationFailure::FailureMode::OutOfMemoryError - : RawMemoryAllocationFailure::FailureMode::InvalidAllocationSize; - auto alloc_mechanism = - RawMemoryAllocationFailure::AllocationMechanism::OpenACCMalloc; - throw RawMemoryAllocationFailure(arg_alloc_size, alignment, failure_mode, - alloc_mechanism); + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { diff --git a/lib/kokkos/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp b/lib/kokkos/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp index 4fce680aef..2b98018e3b 100644 --- a/lib/kokkos/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp +++ b/lib/kokkos/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp @@ -44,10 +44,12 @@ class Kokkos::Impl::ParallelFor, auto team_size = m_policy.team_size(); auto vector_length = m_policy.impl_vector_length(); + int const async_arg = m_policy.space().acc_async_queue(); + auto const a_functor(m_functor); #pragma acc parallel loop gang vector num_gangs(league_size) \ - vector_length(team_size* vector_length) copyin(a_functor) + vector_length(team_size* vector_length) copyin(a_functor) async(async_arg) for (int i = 0; i < league_size * team_size * vector_length; i++) { int league_id = i / (team_size * vector_length); typename Policy::member_type team(league_id, league_size, team_size, @@ -145,10 +147,12 @@ class Kokkos::Impl::ParallelFor, auto team_size = m_policy.team_size(); auto vector_length = m_policy.impl_vector_length(); + int const async_arg = m_policy.space().acc_async_queue(); + auto const a_functor(m_functor); #pragma acc parallel loop gang num_gangs(league_size) num_workers(team_size) \ - vector_length(vector_length) copyin(a_functor) + vector_length(vector_length) copyin(a_functor) async(async_arg) for (int i = 0; i < league_size; i++) { int league_id = i; typename Policy::member_type team(league_id, league_size, team_size, diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp index 81f2c5c305..0f65ba43a0 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.cpp @@ -72,9 +72,28 @@ int OpenMP::concurrency(OpenMP const &instance) { int OpenMP::concurrency() const { return impl_thread_pool_size(); } #endif +void OpenMP::impl_static_fence(std::string const &name) { + Kokkos::Tools::Experimental::Impl::profile_fence_event( + name, + Kokkos::Tools::Experimental::SpecialSynchronizationCases:: + GlobalDeviceSynchronization, + []() { + std::lock_guard lock_all_instances( + Impl::OpenMPInternal::all_instances_mutex); + for (auto *instance_ptr : Impl::OpenMPInternal::all_instances) { + std::lock_guard lock_instance( + instance_ptr->m_instance_mutex); + } + }); +} + void OpenMP::fence(const std::string &name) const { Kokkos::Tools::Experimental::Impl::profile_fence_event( - name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, []() {}); + name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, + [this]() { + auto *internal_instance = this->impl_internal_space_instance(); + std::lock_guard lock(internal_instance->m_instance_mutex); + }); } bool OpenMP::impl_is_initialized() noexcept { @@ -94,7 +113,7 @@ int OpenMP::impl_thread_pool_size() const noexcept { } int OpenMP::impl_max_hardware_threads() noexcept { - return Impl::g_openmp_hardware_max_threads; + return Impl::OpenMPInternal::max_hardware_threads(); } namespace Impl { diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp index 11292af84a..a403909f67 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP.hpp @@ -67,7 +67,15 @@ class OpenMP { OpenMP(); - OpenMP(int pool_size); + explicit OpenMP(int pool_size); + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "OpenMP execution space should be constructed explicitly.") + OpenMP(int pool_size) + : OpenMP(pool_size) {} +#endif /// \brief Print configuration information to the given output stream. void print_configuration(std::ostream& os, bool verbose = false) const; @@ -146,14 +154,6 @@ inline int OpenMP::impl_thread_pool_rank() noexcept { KOKKOS_IF_ON_DEVICE((return -1;)) } -inline void OpenMP::impl_static_fence(std::string const& name) { - Kokkos::Tools::Experimental::Impl::profile_fence_event( - name, - Kokkos::Tools::Experimental::SpecialSynchronizationCases:: - GlobalDeviceSynchronization, - []() {}); -} - inline bool OpenMP::is_asynchronous(OpenMP const& /*instance*/) noexcept { return false; } diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp index 32172fbc6c..473a322eec 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp @@ -31,20 +31,18 @@ #include #include +namespace { +int g_openmp_hardware_max_threads = 1; +} + namespace Kokkos { namespace Impl { -void OpenMPInternal::acquire_lock() { - while (1 == desul::atomic_compare_exchange(&m_pool_mutex, 0, 1, - desul::MemoryOrderAcquire(), - desul::MemoryScopeDevice())) { - // do nothing - } -} +std::vector OpenMPInternal::all_instances; +std::mutex OpenMPInternal::all_instances_mutex; -void OpenMPInternal::release_lock() { - desul::atomic_store(&m_pool_mutex, 0, desul::MemoryOrderRelease(), - desul::MemoryScopeDevice()); +int OpenMPInternal::max_hardware_threads() noexcept { + return g_openmp_hardware_max_threads; } void OpenMPInternal::clear_thread_data() { @@ -123,17 +121,11 @@ void OpenMPInternal::resize_thread_data(size_t pool_reduce_bytes, if (nullptr != m_pool[rank]) { m_pool[rank]->disband_pool(); - space.deallocate(m_pool[rank], old_alloc_bytes); + // impl_deallocate to not fence here + space.impl_deallocate("[unlabeled]", m_pool[rank], old_alloc_bytes); } - void *ptr = nullptr; - try { - ptr = space.allocate(alloc_bytes); - } catch ( - Kokkos::Experimental::RawMemoryAllocationFailure const &failure) { - // For now, just rethrow the error message the existing way - Kokkos::Impl::throw_runtime_exception(failure.get_error_message()); - } + void *ptr = space.allocate("Kokkos::OpenMP::scratch_mem", alloc_bytes); m_pool[rank] = new (ptr) HostThreadTeamData(); @@ -204,9 +196,9 @@ void OpenMPInternal::initialize(int thread_count) { // Before any other call to OMP query the maximum number of threads // and save the value for re-initialization unit testing. - Impl::g_openmp_hardware_max_threads = get_current_max_threads(); + g_openmp_hardware_max_threads = get_current_max_threads(); - int process_num_threads = Impl::g_openmp_hardware_max_threads; + int process_num_threads = g_openmp_hardware_max_threads; if (Kokkos::hwloc::available()) { process_num_threads = Kokkos::hwloc::get_available_numa_count() * @@ -219,11 +211,11 @@ void OpenMPInternal::initialize(int thread_count) { // process_num_threads if thread_count > 0, set // g_openmp_hardware_max_threads to thread_count if (thread_count < 0) { - thread_count = Impl::g_openmp_hardware_max_threads; + thread_count = g_openmp_hardware_max_threads; } else if (thread_count == 0) { - if (Impl::g_openmp_hardware_max_threads != process_num_threads) { - Impl::g_openmp_hardware_max_threads = process_num_threads; - omp_set_num_threads(Impl::g_openmp_hardware_max_threads); + if (g_openmp_hardware_max_threads != process_num_threads) { + g_openmp_hardware_max_threads = process_num_threads; + omp_set_num_threads(g_openmp_hardware_max_threads); } } else { if (Kokkos::show_warnings() && thread_count > process_num_threads) { @@ -234,16 +226,16 @@ void OpenMPInternal::initialize(int thread_count) { << ", requested thread : " << std::setw(3) << thread_count << std::endl; } - Impl::g_openmp_hardware_max_threads = thread_count; - omp_set_num_threads(Impl::g_openmp_hardware_max_threads); + g_openmp_hardware_max_threads = thread_count; + omp_set_num_threads(g_openmp_hardware_max_threads); } // setup thread local -#pragma omp parallel num_threads(Impl::g_openmp_hardware_max_threads) +#pragma omp parallel num_threads(g_openmp_hardware_max_threads) { Impl::SharedAllocationRecord::tracking_enable(); } auto &instance = OpenMPInternal::singleton(); - instance.m_pool_size = Impl::g_openmp_hardware_max_threads; + instance.m_pool_size = g_openmp_hardware_max_threads; // New, unified host thread team data: { @@ -288,10 +280,9 @@ void OpenMPInternal::finalize() { if (this == &singleton()) { auto const &instance = singleton(); // Silence Cuda Warning - const int nthreads = - instance.m_pool_size <= Impl::g_openmp_hardware_max_threads - ? Impl::g_openmp_hardware_max_threads - : instance.m_pool_size; + const int nthreads = instance.m_pool_size <= g_openmp_hardware_max_threads + ? g_openmp_hardware_max_threads + : instance.m_pool_size; (void)nthreads; #pragma omp parallel num_threads(nthreads) @@ -300,10 +291,22 @@ void OpenMPInternal::finalize() { // allow main thread to track Impl::SharedAllocationRecord::tracking_enable(); - Impl::g_openmp_hardware_max_threads = 1; + g_openmp_hardware_max_threads = 1; } m_initialized = false; + + // guard erasing from all_instances + { + std::scoped_lock lock(all_instances_mutex); + + auto it = std::find(all_instances.begin(), all_instances.end(), this); + if (it == all_instances.end()) + Kokkos::abort( + "Execution space instance to be removed couldn't be found!"); + *it = all_instances.back(); + all_instances.pop_back(); + } } void OpenMPInternal::print_configuration(std::ostream &s) const { @@ -311,7 +314,7 @@ void OpenMPInternal::print_configuration(std::ostream &s) const { if (m_initialized) { const int numa_count = 1; - const int core_per_numa = Impl::g_openmp_hardware_max_threads; + const int core_per_numa = g_openmp_hardware_max_threads; const int thread_per_core = 1; s << " thread_pool_topology[ " << numa_count << " x " << core_per_numa diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp index 35b9aa93ba..2aed723b18 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp @@ -47,8 +47,6 @@ namespace Impl { class OpenMPInternal; -inline int g_openmp_hardware_max_threads = 1; - struct OpenMPTraits { static constexpr int MAX_THREAD_COUNT = 512; }; @@ -56,7 +54,13 @@ struct OpenMPTraits { class OpenMPInternal { private: OpenMPInternal(int arg_pool_size) - : m_pool_size{arg_pool_size}, m_level{omp_get_level()}, m_pool() {} + : m_pool_size{arg_pool_size}, m_level{omp_get_level()}, m_pool() { + // guard pushing to all_instances + { + std::scoped_lock lock(all_instances_mutex); + all_instances.push_back(this); + } + } ~OpenMPInternal() { clear_thread_data(); } @@ -66,7 +70,6 @@ class OpenMPInternal { int m_pool_size; int m_level; - int m_pool_mutex = 0; HostThreadTeamData* m_pool[OpenMPTraits::MAX_THREAD_COUNT]; @@ -81,14 +84,10 @@ class OpenMPInternal { void clear_thread_data(); + static int max_hardware_threads() noexcept; + int thread_pool_size() const { return m_pool_size; } - // Acquire lock used to protect access to m_pool - void acquire_lock(); - - // Release lock used to protect access to m_pool - void release_lock(); - void resize_thread_data(size_t pool_reduce_bytes, size_t team_reduce_bytes, size_t team_shared_bytes, size_t thread_local_bytes); @@ -107,6 +106,11 @@ class OpenMPInternal { bool verify_is_initialized(const char* const label) const; void print_configuration(std::ostream& s) const; + + std::mutex m_instance_mutex; + + static std::vector all_instances; + static std::mutex all_instances_mutex; }; inline bool execute_in_serial(OpenMP const& space = OpenMP()) { @@ -157,7 +161,7 @@ inline std::vector create_OpenMP_instances( "Kokkos::abort: Partition not enough resources left to create the last " "instance."); } - instances[weights.size() - 1] = resources_left; + instances[weights.size() - 1] = OpenMP(resources_left); return instances; } diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp index 823a7e668e..79d7d295c0 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_For.hpp @@ -108,6 +108,8 @@ class ParallelFor, Kokkos::OpenMP> { public: inline void execute() const { + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); if (execute_in_serial(m_policy.space())) { exec_range(m_functor, m_policy.begin(), m_policy.end()); return; @@ -202,6 +204,9 @@ class ParallelFor, public: inline void execute() const { + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); + #ifndef KOKKOS_COMPILER_INTEL if (execute_in_serial(m_iter.m_rp.space())) { exec_range(0, m_iter.m_rp.m_num_tiles); @@ -333,7 +338,8 @@ class ParallelFor, const size_t team_shared_size = m_shmem_size; const size_t thread_local_size = 0; // Never shrinks - m_instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -343,8 +349,6 @@ class ParallelFor, m_functor, *(m_instance->get_thread_data()), 0, m_policy.league_size(), m_policy.league_size()); - m_instance->release_lock(); - return; } @@ -383,8 +387,6 @@ class ParallelFor, data.disband_team(); } - - m_instance->release_lock(); } inline ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp index 05fd1c9dce..d22e1e7eda 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Reduce.hpp @@ -83,7 +83,8 @@ class ParallelReduce, const size_t pool_reduce_bytes = reducer.value_size(); - m_instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_bytes, 0 // team_reduce_bytes , @@ -106,6 +107,7 @@ class ParallelReduce, update); reducer.final(ptr); + return; } const int pool_size = m_instance->thread_pool_size(); @@ -157,8 +159,6 @@ class ParallelReduce, m_result_ptr[j] = ptr[j]; } } - - m_instance->release_lock(); } //---------------------------------------- @@ -218,7 +218,8 @@ class ParallelReduceacquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_bytes, 0 // team_reduce_bytes , @@ -241,8 +242,6 @@ class ParallelReducerelease_lock(); - return; } #endif @@ -299,8 +298,6 @@ class ParallelReducerelease_lock(); } //---------------------------------------- @@ -415,7 +412,8 @@ class ParallelReduceacquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -433,8 +431,6 @@ class ParallelReducerelease_lock(); - return; } @@ -510,8 +506,6 @@ class ParallelReducerelease_lock(); } //---------------------------------------- diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp index f843aef3a8..b9ce25d3ee 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel_Scan.hpp @@ -70,6 +70,9 @@ class ParallelScan, const int value_count = Analysis::value_count(m_functor); const size_t pool_reduce_bytes = 2 * Analysis::value_size(m_functor); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); + m_instance->resize_thread_data(pool_reduce_bytes, 0 // team_reduce_bytes , 0 // team_shared_bytes @@ -193,7 +196,8 @@ class ParallelScanWithTotal, const int value_count = Analysis::value_count(m_functor); const size_t pool_reduce_bytes = 2 * Analysis::value_size(m_functor); - m_instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(m_instance->m_instance_mutex); m_instance->resize_thread_data(pool_reduce_bytes, 0 // team_reduce_bytes , @@ -213,8 +217,6 @@ class ParallelScanWithTotal, *m_result_ptr = update; - m_instance->release_lock(); - return; } @@ -266,8 +268,6 @@ class ParallelScanWithTotal, *m_result_ptr = update_base; } } - - m_instance->release_lock(); } //---------------------------------------- diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 3e67d8d625..54c1574d71 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -52,18 +52,7 @@ HostThreadTeamDataSingleton::HostThreadTeamDataSingleton() num_pool_reduce_bytes, num_team_reduce_bytes, num_team_shared_bytes, num_thread_local_bytes); - void* ptr = nullptr; - try { - ptr = space.allocate(alloc_bytes); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const& f) { - // For now, just rethrow the error message with a note - // Note that this could, in turn, trigger an out of memory exception, - // but it's pretty unlikely, so we won't worry about it for now. - // TODO reasonable error message when `std::string` causes OOM error - Kokkos::Impl::throw_runtime_exception( - std::string("Failure to allocate scratch memory: ") + - f.get_error_message()); - } + void* ptr = space.allocate("Kokkos::Impl::HostThreadTeamData", alloc_bytes); HostThreadTeamData::scratch_assign( ptr, alloc_bytes, num_pool_reduce_bytes, num_team_reduce_bytes, diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index 01b6694865..2877d940fa 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -73,7 +73,8 @@ class TaskQueueSpecialization> { execution_space().impl_internal_space_instance(); const int pool_size = get_max_team_count(scheduler.get_execution_space()); - instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(instance->m_instance_mutex); // TODO @tasking @new_feature DSH allow team sizes other than 1 const int team_size = 1; // Threads per core @@ -152,8 +153,6 @@ class TaskQueueSpecialization> { } self.disband_team(); } // end pragma omp parallel - - instance->release_lock(); } static uint32_t get_max_team_count(execution_space const& espace) { @@ -238,7 +237,8 @@ class TaskQueueSpecializationConstrained< execution_space().impl_internal_space_instance(); const int pool_size = instance->thread_pool_size(); - instance->acquire_lock(); + // Serialize kernels on the same execution space instance + std::lock_guard lock(instance->m_instance_mutex); const int team_size = 1; // Threads per core instance->resize_thread_data(0 /* global reduce buffer */ @@ -250,6 +250,7 @@ class TaskQueueSpecializationConstrained< 0 /* thread local buffer */ ); assert(pool_size % team_size == 0); + auto& queue = scheduler.queue(); queue.initialize_team_queues(pool_size / team_size); @@ -343,8 +344,6 @@ class TaskQueueSpecializationConstrained< } self.disband_team(); } // end pragma omp parallel - - instance->release_lock(); } template diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp index a37e1758a2..5937c093ba 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_UniqueToken.hpp @@ -105,7 +105,8 @@ class UniqueToken { /// \brief upper bound for acquired values, i.e. 0 <= value < size() KOKKOS_INLINE_FUNCTION int size() const noexcept { - KOKKOS_IF_ON_HOST((return Kokkos::Impl::g_openmp_hardware_max_threads;)) + KOKKOS_IF_ON_HOST( + (return Kokkos::Impl::OpenMPInternal::max_hardware_threads();)) KOKKOS_IF_ON_DEVICE((return 0;)) } diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget.hpp index ea4e7f6bab..84c7b85f11 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget.hpp @@ -146,7 +146,8 @@ struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> { /*--------------------------------------------------------------------------*/ #include -#include +#include +#include #include /*--------------------------------------------------------------------------*/ diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp index a414b34d7c..635b0e0504 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp @@ -54,9 +54,11 @@ void* OpenMPTargetSpace::impl_allocate( static_assert(sizeof(void*) == sizeof(uintptr_t), "Error sizeof(void*) != sizeof(uintptr_t)"); - void* ptr; + void* ptr = omp_target_alloc(arg_alloc_size, omp_get_default_device()); - ptr = omp_target_alloc(arg_alloc_size, omp_get_default_device()); + if (!ptr) { + Kokkos::Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); + } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp index b39f5aca35..6c5eb048e3 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp @@ -71,8 +71,6 @@ void OpenMPTargetExec::verify_initialized(const char* const label) { void* OpenMPTargetExec::m_scratch_ptr = nullptr; int64_t OpenMPTargetExec::m_scratch_size = 0; -int* OpenMPTargetExec::m_lock_array = nullptr; -uint64_t OpenMPTargetExec::m_lock_size = 0; uint32_t* OpenMPTargetExec::m_uniquetoken_ptr = nullptr; int OpenMPTargetExec::MAX_ACTIVE_THREADS = 0; std::mutex OpenMPTargetExec::m_mutex_scratch_ptr; @@ -84,15 +82,6 @@ void OpenMPTargetExec::clear_scratch() { m_scratch_size = 0; } -void OpenMPTargetExec::clear_lock_array() { - if (m_lock_array != nullptr) { - Kokkos::Experimental::OpenMPTargetSpace space; - space.deallocate(m_lock_array, m_lock_size); - m_lock_array = nullptr; - m_lock_size = 0; - } -} - void* OpenMPTargetExec::get_scratch_ptr() { return m_scratch_ptr; } void OpenMPTargetExec::resize_scratch(int64_t team_size, int64_t shmem_size_L0, @@ -135,35 +124,6 @@ void OpenMPTargetExec::resize_scratch(int64_t team_size, int64_t shmem_size_L0, } } -int* OpenMPTargetExec::get_lock_array(int num_teams) { - Kokkos::Experimental::OpenMPTargetSpace space; - int max_active_league_size = MAX_ACTIVE_THREADS / 32; - int lock_array_elem = - (num_teams > max_active_league_size) ? num_teams : max_active_league_size; - if (m_lock_size < (lock_array_elem * sizeof(int))) { - space.deallocate(m_lock_array, m_lock_size); - m_lock_size = lock_array_elem * sizeof(int); - m_lock_array = static_cast(space.allocate(m_lock_size)); - - // FIXME_OPENMPTARGET - Creating a target region here to initialize the - // lock_array with 0's fails. Hence creating an equivalent host array to - // achieve the same. Value of host array are then copied to the lock_array. - int* h_lock_array = static_cast( - omp_target_alloc(m_lock_size, omp_get_initial_device())); - - for (int i = 0; i < lock_array_elem; ++i) h_lock_array[i] = 0; - - if (0 < m_lock_size) - KOKKOS_IMPL_OMPT_SAFE_CALL(omp_target_memcpy( - m_lock_array, h_lock_array, m_lock_size, 0, 0, - omp_get_default_device(), omp_get_initial_device())); - - omp_target_free(h_lock_array, omp_get_initial_device()); - } - - return m_lock_array; -} - } // namespace Impl } // namespace Kokkos diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp index 3387108da3..44e9119ea8 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp @@ -106,7 +106,6 @@ void OpenMPTargetInternal::print_configuration(std::ostream& os, void OpenMPTargetInternal::impl_finalize() { m_is_initialized = false; Kokkos::Impl::OpenMPTargetExec space; - if (space.m_lock_array != nullptr) space.clear_lock_array(); if (space.m_uniquetoken_ptr != nullptr) Kokkos::kokkos_free( diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_MDRangePolicy.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_MDRangePolicy.hpp index d718f56d38..e353676b61 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_MDRangePolicy.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_MDRangePolicy.hpp @@ -22,6 +22,10 @@ namespace Kokkos { namespace Impl { +using OpenMPTargetIterateLeft = std::integral_constant; +using OpenMPTargetIterateRight = + std::integral_constant; + template struct ThreadAndVectorNestLevel +#include +#include "Kokkos_OpenMPTarget_MDRangePolicy.hpp" + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +class ParallelFor, + Kokkos::Experimental::OpenMPTarget> { + private: + using Policy = Kokkos::MDRangePolicy; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using Index = typename Policy::index_type; + + const FunctorType m_functor; + const Policy m_policy; + + public: + inline void execute() const { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + FunctorType functor(m_functor); + Policy policy = m_policy; + + typename Policy::point_type unused; + static_assert(1 < Policy::rank && Policy::rank < 7); + static_assert(Policy::inner_direction == Iterate::Left || + Policy::inner_direction == Iterate::Right); + + execute_tile( + unused, functor, policy, + std::integral_constant()); + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + +#pragma omp target teams distribute parallel for collapse(2) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) + for (auto i1 = begin_1; i1 < end_1; ++i1) { + if constexpr (std::is_void::value) + functor(i0, i1); + else + functor(typename Policy::work_tag(), i0, i1); + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + +#pragma omp target teams distribute parallel for collapse(3) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + if constexpr (std::is_void::value) + functor(i0, i1, i2); + else + functor(typename Policy::work_tag(), i0, i1, i2); + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + +#pragma omp target teams distribute parallel for collapse(4) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + if constexpr (std::is_void::value) + functor(i0, i1, i2, i3); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3); + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + +#pragma omp target teams distribute parallel for collapse(5) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); + } + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateRight) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + const Index begin_5 = policy.m_lower[5]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + const Index end_5 = policy.m_upper[5]; + +#pragma omp target teams distribute parallel for collapse(6) map(to : functor) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i5 = begin_5; i5 < end_5; ++i5) { + { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, i5); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + i5); + } + } + } + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + +#pragma omp target teams distribute parallel for collapse(2) map(to : functor) + for (auto i1 = begin_1; i1 < end_1; ++i1) + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1); + else + functor(typename Policy::work_tag(), i0, i1); + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + +#pragma omp target teams distribute parallel for collapse(3) map(to : functor) + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, i2); + else + functor(typename Policy::work_tag(), i0, i1, i2); + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + +#pragma omp target teams distribute parallel for collapse(4) map(to : functor) + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, i2, i3); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3); + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + +#pragma omp target teams distribute parallel for collapse(5) map(to : functor) + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); + } + } + } + } + } + } + + template + inline std::enable_if_t execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy, OpenMPTargetIterateLeft) const { + (void)offset; + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + const Index begin_5 = policy.m_lower[5]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + const Index end_5 = policy.m_upper[5]; + +#pragma omp target teams distribute parallel for collapse(6) map(to : functor) + for (auto i5 = begin_5; i5 < end_5; ++i5) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, i5); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + i5); + } + } + } + } + } + } + } + } + + inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} + // TODO DZP: based on a conversation with Christian, we're using 256 as a + // heuristic here. We need something better once we can query these kinds of + // properties + template + static int max_tile_size_product(const Policy&, const Functor&) { + return 256; + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* KOKKOS_OPENMPTARGET_PARALLELFOR_MDRANGE_HPP */ diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_MDRange.hpp similarity index 61% rename from lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp rename to lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_MDRange.hpp index 6878531730..e86a121974 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_MDRange.hpp @@ -14,397 +14,14 @@ // //@HEADER -#ifndef KOKKOS_OPENMPTARGET_PARALLEL_MDRANGE_HPP -#define KOKKOS_OPENMPTARGET_PARALLEL_MDRANGE_HPP +#ifndef KOKKOS_OPENMPTARGET_PARALLELREDUCE_MDRANGE_HPP +#define KOKKOS_OPENMPTARGET_PARALLELREDUCE_MDRANGE_HPP #include #include -#include +#include "Kokkos_OpenMPTarget_MDRangePolicy.hpp" #include -// WORKAROUND OPENMPTARGET: sometimes tile sizes don't make it correctly, -// this was tracked down to a bug in clang with regards of mapping structs -// with arrays of long in it. Arrays of int might be fine though ... -#define KOKKOS_IMPL_MDRANGE_USE_NO_TILES // undef EOF - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template -class ParallelFor, - Kokkos::Experimental::OpenMPTarget> { - private: - using Policy = Kokkos::MDRangePolicy; - using WorkTag = typename Policy::work_tag; - using Member = typename Policy::member_type; - using Index = typename Policy::index_type; - - const FunctorType m_functor; - const Policy m_policy; - - public: - inline void execute() const { - OpenMPTargetExec::verify_is_process( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - OpenMPTargetExec::verify_initialized( - "Kokkos::Experimental::OpenMPTarget parallel_for"); - FunctorType functor(m_functor); - Policy policy = m_policy; - -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - typename Policy::point_type unused; - - execute_tile(unused, functor, policy); -#else - const int64_t begin = 0; - const int64_t end = m_policy.m_num_tiles; - -#pragma omp target teams distribute map(to : functor) num_teams(end - begin) - { - for (ptrdiff_t tile_idx = begin; tile_idx < end; ++tile_idx) { - -#pragma omp parallel - { - typename Policy::point_type offset; - if (Policy::outer_direction == Policy::Left) { - for (int i = 0; i < Policy::rank; ++i) { - offset[i] = (tile_idx % policy.m_tile_end[i]) * policy.m_tile[i] + - policy.m_lower[i]; - tile_idx /= policy.m_tile_end[i]; - } - } else { - for (int i = Policy::rank - 1; i >= 0; --i) { - offset[i] = (tile_idx % policy.m_tile_end[i]) * policy.m_tile[i] + - policy.m_lower[i]; - tile_idx /= policy.m_tile_end[i]; - } - } - execute_tile(offset, functor, policy); - } - } - } -#endif - } - - template - inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; - const Index begin_0 = policy.m_lower[0]; - const Index begin_1 = policy.m_lower[1]; - - const Index end_0 = policy.m_upper[0]; - const Index end_1 = policy.m_upper[1]; - -#pragma omp target teams distribute parallel for collapse(2) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { - if constexpr (std::is_void::value) - functor(i0, i1); - else - functor(typename Policy::work_tag(), i0, i1); - } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - -#pragma omp for collapse(2) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) { - if constexpr (std::is_void::value) - functor(i0, i1); - else - functor(typename Policy::work_tag(), i0, i1); - } -#endif - } - - template - inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; - const Index begin_0 = policy.m_lower[0]; - const Index begin_1 = policy.m_lower[1]; - const Index begin_2 = policy.m_lower[2]; - - const Index end_0 = policy.m_upper[0]; - const Index end_1 = policy.m_upper[1]; - const Index end_2 = policy.m_upper[2]; - -#pragma omp target teams distribute parallel for collapse(3) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { - for (auto i2 = begin_2; i2 < end_2; ++i2) { - if constexpr (std::is_void::value) - functor(i0, i1, i2); - else - functor(typename Policy::work_tag(), i0, i1, i2); - } - } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - - const ptrdiff_t begin_2 = offset[2]; - ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; - end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; - -#pragma omp for collapse(3) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) - for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) { - if constexpr (std::is_void::value) - functor(i0, i1, i2); - else - functor(typename Policy::work_tag(), i0, i1, i2); - } -#endif - } - - template - inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; - const Index begin_0 = policy.m_lower[0]; - const Index begin_1 = policy.m_lower[1]; - const Index begin_2 = policy.m_lower[2]; - const Index begin_3 = policy.m_lower[3]; - - const Index end_0 = policy.m_upper[0]; - const Index end_1 = policy.m_upper[1]; - const Index end_2 = policy.m_upper[2]; - const Index end_3 = policy.m_upper[3]; - -#pragma omp target teams distribute parallel for collapse(4) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { - for (auto i2 = begin_2; i2 < end_2; ++i2) { - for (auto i3 = begin_3; i3 < end_3; ++i3) { - if constexpr (std::is_void::value) - functor(i0, i1, i2, i3); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3); - } - } - } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - - const ptrdiff_t begin_2 = offset[2]; - ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; - end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; - - const ptrdiff_t begin_3 = offset[3]; - ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; - end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; - -#pragma omp for collapse(4) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) - for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) - for (ptrdiff_t i3 = begin_3; i3 < end_3; ++i3) { - if constexpr (std::is_void::value) - functor(i0, i1, i2, i3); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3); - } -#endif - } - - template - inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; - const Index begin_0 = policy.m_lower[0]; - const Index begin_1 = policy.m_lower[1]; - const Index begin_2 = policy.m_lower[2]; - const Index begin_3 = policy.m_lower[3]; - const Index begin_4 = policy.m_lower[4]; - - const Index end_0 = policy.m_upper[0]; - const Index end_1 = policy.m_upper[1]; - const Index end_2 = policy.m_upper[2]; - const Index end_3 = policy.m_upper[3]; - const Index end_4 = policy.m_upper[4]; - -#pragma omp target teams distribute parallel for collapse(5) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { - for (auto i2 = begin_2; i2 < end_2; ++i2) { - for (auto i3 = begin_3; i3 < end_3; ++i3) { - for (auto i4 = begin_4; i4 < end_4; ++i4) { - if constexpr (std::is_same::value) - functor(i0, i1, i2, i3, i4); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); - } - } - } - } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - - const ptrdiff_t begin_2 = offset[2]; - ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; - end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; - - const ptrdiff_t begin_3 = offset[3]; - ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; - end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; - - const ptrdiff_t begin_4 = offset[4]; - ptrdiff_t end_4 = begin_4 + policy.m_tile[4]; - end_4 = end_4 < policy.m_upper[4] ? end_4 : policy.m_upper[4]; - -#pragma omp for collapse(5) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) - for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) - for (ptrdiff_t i3 = begin_3; i3 < end_3; ++i3) - for (ptrdiff_t i4 = begin_4; i4 < end_4; ++i4) { - if constexpr (std::is_same::value) - functor(i0, i1, i2, i3, i4); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3, i4); - } -#endif - } - - template - inline std::enable_if_t execute_tile( - typename Policy::point_type offset, const FunctorType& functor, - const Policy& policy) const { -#ifdef KOKKOS_IMPL_MDRANGE_USE_NO_TILES - (void)offset; - const Index begin_0 = policy.m_lower[0]; - const Index begin_1 = policy.m_lower[1]; - const Index begin_2 = policy.m_lower[2]; - const Index begin_3 = policy.m_lower[3]; - const Index begin_4 = policy.m_lower[4]; - const Index begin_5 = policy.m_lower[5]; - - const Index end_0 = policy.m_upper[0]; - const Index end_1 = policy.m_upper[1]; - const Index end_2 = policy.m_upper[2]; - const Index end_3 = policy.m_upper[3]; - const Index end_4 = policy.m_upper[4]; - const Index end_5 = policy.m_upper[5]; - -#pragma omp target teams distribute parallel for collapse(6) map(to : functor) - for (auto i0 = begin_0; i0 < end_0; ++i0) { - for (auto i1 = begin_1; i1 < end_1; ++i1) { - for (auto i2 = begin_2; i2 < end_2; ++i2) { - for (auto i3 = begin_3; i3 < end_3; ++i3) { - for (auto i4 = begin_4; i4 < end_4; ++i4) { - for (auto i5 = begin_5; i5 < end_5; ++i5) { - { - if constexpr (std::is_same::value) - functor(i0, i1, i2, i3, i4, i5); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, - i5); - } - } - } - } - } - } - } -#else - const ptrdiff_t begin_0 = offset[0]; - ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; - end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; - - const ptrdiff_t begin_1 = offset[1]; - ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; - end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; - - const ptrdiff_t begin_2 = offset[2]; - ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; - end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; - - const ptrdiff_t begin_3 = offset[3]; - ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; - end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; - - const ptrdiff_t begin_4 = offset[4]; - ptrdiff_t end_4 = begin_4 + policy.m_tile[4]; - end_4 = end_4 < policy.m_upper[4] ? end_4 : policy.m_upper[4]; - - const ptrdiff_t begin_5 = offset[5]; - ptrdiff_t end_5 = begin_5 + policy.m_tile[5]; - end_5 = end_5 < policy.m_upper[5] ? end_5 : policy.m_upper[5]; - -#pragma omp for collapse(6) - for (ptrdiff_t i0 = begin_0; i0 < end_0; ++i0) - for (ptrdiff_t i1 = begin_1; i1 < end_1; ++i1) - for (ptrdiff_t i2 = begin_2; i2 < end_2; ++i2) - for (ptrdiff_t i3 = begin_3; i3 < end_3; ++i3) - for (ptrdiff_t i4 = begin_4; i4 < end_4; ++i4) - for (ptrdiff_t i5 = begin_5; i5 < end_5; ++i5) { - if constexpr (std::is_same::value) - functor(i0, i1, i2, i3, i4, i5); - else - functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5); - } -#endif - } - - inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} - // TODO DZP: based on a conversation with Christian, we're using 256 as a - // heuristic here. We need something better once we can query these kinds of - // properties - template - static int max_tile_size_product(const Policy&, const Functor&) { - return 256; - } -}; - -} // namespace Impl -} // namespace Kokkos - //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -438,14 +55,14 @@ class ParallelReduce m_scratch_memory_lock; - public: inline void execute() const { + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); execute_tile( - m_functor_reducer.get_functor(), m_policy, m_result_ptr); + m_functor_reducer.get_functor(), m_policy, m_result_ptr, + std::integral_constant()); } template @@ -456,13 +73,330 @@ class ParallelReduce::accessible), - m_scratch_memory_lock(OpenMPTargetExec::m_mutex_scratch_ptr) {} + typename ViewType::memory_space>::accessible) {} template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) + +#pragma omp target teams distribute parallel for collapse(2) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, result); + else + functor(typename Policy::work_tag(), i0, i1, result); + } + } + } else { +#pragma omp target teams distribute parallel for collapse(2) map(to : functor) \ +reduction(+:result) + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, result); + else + functor(typename Policy::work_tag(), i0, i1, result); + } + } + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); + } + + template + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join( \ + omp_out, omp_in)) \ + initializer( \ + OpenMPTargetReducerWrapper ::init( \ + omp_priv)) + +#pragma omp target teams distribute parallel for collapse(3) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, i2, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, result); + } + } + } + } else { +#pragma omp target teams distribute parallel for collapse(3) map(to : functor) \ +reduction(+:result) + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_void::value) + functor(i0, i1, i2, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, result); + } + } + } + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); + } + + template + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[3]; + const Index begin_3 = policy.m_lower[2]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) + +#pragma omp target teams distribute parallel for collapse(4) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, result); + } + } + } + } + } else { +#pragma omp target teams distribute parallel for collapse(4) map(to : functor) \ +reduction(+:result) + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, result); + } + } + } + } + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); + } + + template + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) + +#pragma omp target teams distribute parallel for collapse(5) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + result); + } + } + } + } + } + } else { +#pragma omp target teams distribute parallel for collapse(5) map(to : functor) \ +reduction(+:result) + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, + result); + } + } + } + } + } + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); + } + + template + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateLeft) const { + const Index begin_0 = policy.m_lower[0]; + const Index begin_1 = policy.m_lower[1]; + const Index begin_2 = policy.m_lower[2]; + const Index begin_3 = policy.m_lower[3]; + const Index begin_4 = policy.m_lower[4]; + const Index begin_5 = policy.m_lower[5]; + + const Index end_0 = policy.m_upper[0]; + const Index end_1 = policy.m_upper[1]; + const Index end_2 = policy.m_upper[2]; + const Index end_3 = policy.m_upper[3]; + const Index end_4 = policy.m_upper[4]; + const Index end_5 = policy.m_upper[5]; + + ValueType result = ValueType(); + + // FIXME_OPENMPTARGET: Unable to separate directives and their companion + // loops which leads to code duplication for different reduction types. + if constexpr (UseReducer) { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) + +#pragma omp target teams distribute parallel for collapse(6) map(to \ + : functor) \ + reduction(custom \ + : result) + for (auto i5 = begin_5; i5 < end_5; ++i5) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, i5, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5, + result); + } + } + } + } + } + } + } else { +#pragma omp target teams distribute parallel for collapse(6) map(to : functor) \ +reduction(+:result) + for (auto i5 = begin_5; i5 < end_5; ++i5) { + for (auto i4 = begin_4; i4 < end_4; ++i4) { + for (auto i3 = begin_3; i3 < end_3; ++i3) { + for (auto i2 = begin_2; i2 < end_2; ++i2) { + for (auto i1 = begin_1; i1 < end_1; ++i1) { + for (auto i0 = begin_0; i0 < end_0; ++i0) { + if constexpr (std::is_same::value) + functor(i0, i1, i2, i3, i4, i5, result); + else + functor(typename Policy::work_tag(), i0, i1, i2, i3, i4, i5, + result); + } + } + } + } + } + } + } + + ParReduceCopy::memcpy_result(ptr, &result, sizeof(ValueType), + m_result_ptr_on_device); + } + + template + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; @@ -509,9 +443,9 @@ reduction(+:result) } template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -567,9 +501,9 @@ reduction(+:result) } template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[3]; @@ -630,9 +564,9 @@ reduction(+:result) } template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -701,9 +635,9 @@ reduction(+:result) } template - inline std::enable_if_t execute_tile(const FunctorType& functor, - const Policy& policy, - pointer_type ptr) const { + inline std::enable_if_t execute_tile( + const FunctorType& functor, const Policy& policy, pointer_type ptr, + OpenMPTargetIterateRight) const { const Index begin_0 = policy.m_lower[0]; const Index begin_1 = policy.m_lower[1]; const Index begin_2 = policy.m_lower[2]; @@ -788,5 +722,4 @@ reduction(+:result) //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#undef KOKKOS_IMPL_MDRANGE_USE_NO_TILES -#endif /* KOKKOS_OPENMPTARGET_PARALLEL_HPP */ +#endif /* KOKKOS_OPENMPTARGET_PARALLELREDUCE_MDRANGE_HPP */ diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Range.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Range.hpp index caa568a892..4a112ed11d 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Range.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Range.hpp @@ -55,13 +55,13 @@ class ParallelReduce, const pointer_type m_result_ptr; bool m_result_ptr_on_device; const int m_result_ptr_num_elems; - // Only let one ParallelReduce instance at a time use the scratch memory. - // The constructor acquires the mutex which is released in the destructor. - std::scoped_lock m_scratch_memory_lock; using TagType = typename Policy::work_tag; public: void execute() const { + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); const FunctorType& functor = m_functor_reducer.get_functor(); if constexpr (FunctorHasJoin) { // Enter this loop if the Functor has a init-join. @@ -108,8 +108,7 @@ class ParallelReduce, m_result_ptr_on_device( MemorySpaceAccess::accessible), - m_result_ptr_num_elems(arg_result_view.size()), - m_scratch_memory_lock(OpenMPTargetExec::m_mutex_scratch_ptr) {} + m_result_ptr_num_elems(arg_result_view.size()) {} }; } // namespace Impl diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Team.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Team.hpp index 8abffa47a4..16c0eedb81 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Team.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelReduce_Team.hpp @@ -470,12 +470,11 @@ class ParallelReduce m_scratch_memory_lock; - public: void execute() const { + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); const FunctorType& functor = m_functor_reducer.get_functor(); if constexpr (FunctorHasJoin) { ParReduceSpecialize::execute_init_join(functor, m_policy, m_result_ptr, @@ -521,8 +520,7 @@ class ParallelReduce::value( - arg_functor_reducer.get_functor(), arg_policy.team_size())), - m_scratch_memory_lock(OpenMPTargetExec::m_mutex_scratch_ptr) {} + arg_functor_reducer.get_functor(), arg_policy.team_size())) {} }; } // namespace Impl diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp index 30195d96e0..b0d6932802 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp @@ -143,7 +143,7 @@ class ParallelScan, local_offset_value = element_values(team_id, i - 1); // FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs #if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \ - !defined(KOKKOS_ARCH_AMD_GFX1100) && !defined(KOKKOS_ARCH_AMD_GFX1103) + !defined(KOKKOS_ARCH_AMD_GFX1100) if constexpr (Analysis::Reducer::has_join_member_function()) { if constexpr (std::is_void_v) a_functor_reducer.get_functor().join(local_offset_value, @@ -177,6 +177,10 @@ class ParallelScan, const idx_type chunk_size = 128; const idx_type n_chunks = (N + chunk_size - 1) / chunk_size; + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); + // This could be scratch memory per team Kokkos::View @@ -225,6 +229,10 @@ class ParallelScanWithTotal, const int64_t n_chunks = (N + chunk_size - 1) / chunk_size; if (N > 0) { + // Only let one ParallelReduce instance at a time use the scratch memory. + std::scoped_lock scratch_memory_lock( + OpenMPTargetExec::m_mutex_scratch_ptr); + // This could be scratch memory per team Kokkos::View diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL.cpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL.cpp index 9a246f7642..4de6931918 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL.cpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL.cpp @@ -110,6 +110,31 @@ void SYCL::print_configuration(std::ostream& os, bool verbose) const { #else os << "macro KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES : undefined\n"; #endif +#ifdef SYCL_EXT_ONEAPI_GRAPH + os << "macro SYCL_EXT_ONEAPI_GRAPH : defined\n"; +#else + os << "macro SYCL_EXT_ONEAPI_GRAPH : undefined\n"; +#endif +#ifdef SYCL_EXT_INTEL_QUEUE_IMMEDIATE_COMMAND_LIST + if (sycl_queue() + .has_property< + sycl::ext::intel::property::queue::immediate_command_list>()) + os << "Immediate command lists enforced\n"; + else if (sycl_queue() + .has_property()) + os << "Standard command queue enforced\n"; + else +#endif + { + os << "Immediate command lists and standard command queue allowed.\n"; + if (const char* environment_setting = + std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS")) + os << "SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=" + << environment_setting << " takes precedence.\n"; + else + os << "SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS not defined.\n"; + } int counter = 0; int active_device = Kokkos::device_id(); diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp new file mode 100644 index 0000000000..9c39df9415 --- /dev/null +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNodeKernel.hpp @@ -0,0 +1,157 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SYCL_GRAPHNODEKERNEL_HPP +#define KOKKOS_SYCL_GRAPHNODEKERNEL_HPP + +#include + +#include + +#include +#include +#include + +#include + +namespace Kokkos { +namespace Impl { + +template +class GraphNodeKernelImpl + : public PatternImplSpecializationFromTag< + PatternTag, Functor, PolicyType, Args..., + Kokkos::Experimental::SYCL>::type { + public: + using Policy = PolicyType; + using graph_kernel = GraphNodeKernelImpl; + using base_t = typename PatternImplSpecializationFromTag< + PatternTag, Functor, Policy, Args..., Kokkos::Experimental::SYCL>::type; + + // TODO use the name and executionspace + template + GraphNodeKernelImpl(std::string, Kokkos::Experimental::SYCL const&, + Functor arg_functor, PolicyDeduced&& arg_policy, + ArgsDeduced&&... args) + : base_t(std::move(arg_functor), (PolicyDeduced &&) arg_policy, + (ArgsDeduced &&) args...) {} + + template + GraphNodeKernelImpl(Kokkos::Experimental::SYCL const& exec_space, + Functor arg_functor, PolicyDeduced&& arg_policy) + : GraphNodeKernelImpl("", exec_space, std::move(arg_functor), + (PolicyDeduced &&) arg_policy) {} + + void set_sycl_graph_ptr( + sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::modifiable>* + arg_graph) { + m_graph_ptr = arg_graph; + } + + void set_sycl_graph_node_ptr( + std::optional* arg_node) { + m_graph_node_ptr = arg_node; + } + + std::optional& get_sycl_graph_node() + const { + return *m_graph_node_ptr; + } + + sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::modifiable>& + get_sycl_graph() const { + return *m_graph_ptr; + } + + private: + Kokkos::ObservingRawPtr> + m_graph_ptr = nullptr; + Kokkos::ObservingRawPtr> + m_graph_node_ptr = nullptr; +}; + +struct SYCLGraphNodeAggregateKernel { + using graph_kernel = SYCLGraphNodeAggregateKernel; + + // Aggregates don't need a policy, but for the purposes of checking the static + // assertions about graph kernels, + struct Policy { + using is_graph_kernel = std::true_type; + }; +}; + +template ::type> +struct get_graph_node_kernel_type + : type_identity> {}; + +template +struct get_graph_node_kernel_type + : type_identity, + Kokkos::ParallelReduceTag>> {}; + +template +auto& get_sycl_graph_from_kernel(KernelType const& kernel) { + using graph_node_kernel_t = + typename get_graph_node_kernel_type::type; + auto const& kernel_as_graph_kernel = + static_cast(kernel); + auto& graph = kernel_as_graph_kernel.get_sycl_graph(); + + return graph; +} + +template +auto& get_sycl_graph_node_from_kernel(KernelType const& kernel) { + using graph_node_kernel_t = + typename get_graph_node_kernel_type::type; + auto const& kernel_as_graph_kernel = + static_cast(kernel); + auto& graph_node = kernel_as_graph_kernel.get_sycl_graph_node(); + + return graph_node; +} + +template +void sycl_attach_kernel_to_node(Kernel& kernel, const Lambda& lambda) { + sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::modifiable>& graph = + Impl::get_sycl_graph_from_kernel(kernel); + std::optional& graph_node = + Impl::get_sycl_graph_node_from_kernel(kernel); + KOKKOS_ENSURES(!graph_node); + graph_node = graph.add(lambda); + KOKKOS_ENSURES(graph_node); + // FIXME_SYCL_GRAPH not yet implemented in the compiler + // KOKKOS_ENSURES(graph_node.get_type() == + // sycl::ext::oneapi::experimental::node_type::kernel) +} + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp new file mode 100644 index 0000000000..6bbe6711a2 --- /dev/null +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_GraphNode_Impl.hpp @@ -0,0 +1,56 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SYCL_GRAPHNODE_IMPL_HPP +#define KOKKOS_SYCL_GRAPHNODE_IMPL_HPP + +#include + +#include + +#include + +#include + +namespace Kokkos { +namespace Impl { +template <> +struct GraphNodeBackendSpecificDetails { + std::optional node; + + explicit GraphNodeBackendSpecificDetails() = default; + + explicit GraphNodeBackendSpecificDetails( + _graph_node_is_root_ctor_tag) noexcept {} +}; + +template +struct GraphNodeBackendDetailsBeforeTypeErasure { + protected: + GraphNodeBackendDetailsBeforeTypeErasure( + Kokkos::Experimental::SYCL const &, Kernel &, PredecessorRef const &, + GraphNodeBackendSpecificDetails &) noexcept {} + + GraphNodeBackendDetailsBeforeTypeErasure( + Kokkos::Experimental::SYCL const &, _graph_node_is_root_ctor_tag, + GraphNodeBackendSpecificDetails &) noexcept {} +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp new file mode 100644 index 0000000000..1dc4a9c997 --- /dev/null +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Graph_Impl.hpp @@ -0,0 +1,174 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SYCL_GRAPH_IMPL_HPP +#define KOKKOS_SYCL_GRAPH_IMPL_HPP + +#include + +#include + +#include +#include + +#include + +#include + +namespace Kokkos { +namespace Impl { +template <> +class GraphImpl { + public: + using node_details_t = + GraphNodeBackendSpecificDetails; + using root_node_impl_t = GraphNodeImpl; + using aggregate_kernel_impl_t = SYCLGraphNodeAggregateKernel; + using aggregate_node_impl_t = + GraphNodeImpl; + + // Not movable or copyable; it spends its whole life as a shared_ptr in the + // Graph object. + GraphImpl() = delete; + GraphImpl(GraphImpl const&) = delete; + GraphImpl(GraphImpl&&) = delete; + GraphImpl& operator=(GraphImpl const&) = delete; + GraphImpl& operator=(GraphImpl&&) = delete; + + ~GraphImpl(); + + explicit GraphImpl(Kokkos::Experimental::SYCL instance); + + void add_node(std::shared_ptr const& arg_node_ptr); + + template + void add_node(std::shared_ptr const& arg_node_ptr); + + template + void add_predecessor(NodeImplPtr arg_node_ptr, PredecessorRef arg_pred_ref); + + void submit(); + + Kokkos::Experimental::SYCL const& get_execution_space() const noexcept; + + auto create_root_node_ptr(); + + template + auto create_aggregate_ptr(PredecessorRefs&&...); + + private: + void instantiate_graph() { m_graph_exec = m_graph.finalize(); } + + Kokkos::Experimental::SYCL m_execution_space; + sycl::ext::oneapi::experimental::command_graph< + sycl::ext::oneapi::experimental::graph_state::modifiable> + m_graph; + std::optional> + m_graph_exec; +}; + +inline GraphImpl::~GraphImpl() { + m_execution_space.fence("Kokkos::GraphImpl::~GraphImpl: Graph Destruction"); +} + +inline GraphImpl::GraphImpl( + Kokkos::Experimental::SYCL instance) + : m_execution_space(std::move(instance)), + m_graph(m_execution_space.sycl_queue().get_context(), + m_execution_space.sycl_queue().get_device()) {} + +inline void GraphImpl::add_node( + std::shared_ptr const& arg_node_ptr) { + // add an empty node that needs to be set up before finalizing the graph + arg_node_ptr->node_details_t::node = m_graph.add(); +} + +// Requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl +// Also requires that the kernel has the graph node tag in its policy +template +inline void GraphImpl::add_node( + std::shared_ptr const& arg_node_ptr) { + static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value); + KOKKOS_EXPECTS(arg_node_ptr); + // The Kernel launch from the execute() method has been shimmed to insert + // the node into the graph + auto& kernel = arg_node_ptr->get_kernel(); + auto& node = static_cast(arg_node_ptr.get())->node; + KOKKOS_EXPECTS(!node); + kernel.set_sycl_graph_ptr(&m_graph); + kernel.set_sycl_graph_node_ptr(&node); + kernel.execute(); + KOKKOS_ENSURES(node); +} + +// Requires PredecessorRef is a specialization of GraphNodeRef that has +// already been added to this graph and NodeImpl is a specialization of +// GraphNodeImpl that has already been added to this graph. +template +inline void GraphImpl::add_predecessor( + NodeImplPtr arg_node_ptr, PredecessorRef arg_pred_ref) { + KOKKOS_EXPECTS(arg_node_ptr); + auto pred_ptr = GraphAccess::get_node_ptr(arg_pred_ref); + KOKKOS_EXPECTS(pred_ptr); + + auto& pred_node = pred_ptr->node_details_t::node; + KOKKOS_EXPECTS(pred_node); + + auto& node = arg_node_ptr->node_details_t::node; + KOKKOS_EXPECTS(node); + + m_graph.make_edge(*pred_node, *node); +} + +inline void GraphImpl::submit() { + if (!m_graph_exec) { + instantiate_graph(); + } + m_execution_space.sycl_queue().ext_oneapi_graph(*m_graph_exec); +} + +inline Kokkos::Experimental::SYCL const& +GraphImpl::get_execution_space() const noexcept { + return m_execution_space; +} + +inline auto GraphImpl::create_root_node_ptr() { + KOKKOS_EXPECTS(!m_graph_exec); + auto rv = std::make_shared(get_execution_space(), + _graph_node_is_root_ctor_tag{}); + rv->node_details_t::node = m_graph.add(); + return rv; +} + +template +inline auto GraphImpl::create_aggregate_ptr( + PredecessorRefs&&...) { + // The attachment to predecessors, which is all we really need, happens + // in the generic layer, which calls through to add_predecessor for + // each predecessor ref, so all we need to do here is create the (trivial) + // aggregate node. + return std::make_shared(m_execution_space, + _graph_node_kernel_ctor_tag{}, + aggregate_kernel_impl_t{}); +} +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp index 0e67adb578..5843dca812 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.cpp @@ -166,26 +166,27 @@ int SYCLInternal::acquire_team_scratch_space() { return current_team_scratch; } -sycl::device_ptr SYCLInternal::resize_team_scratch_space( +Kokkos::Impl::sycl_device_ptr SYCLInternal::resize_team_scratch_space( int scratch_pool_id, std::int64_t bytes, bool force_shrink) { // Multiple ParallelFor/Reduce Teams can call this function at the same time // and invalidate the m_team_scratch_ptr. We use a pool to avoid any race // condition. - if (m_team_scratch_current_size[scratch_pool_id] == 0) { + auto mem_space = Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue); + if (m_team_scratch_current_size[scratch_pool_id] == 0 && bytes > 0) { m_team_scratch_current_size[scratch_pool_id] = bytes; - m_team_scratch_ptr[scratch_pool_id] = - Kokkos::kokkos_malloc( - "Kokkos::Experimental::SYCLDeviceUSMSpace::TeamScratchMemory", - m_team_scratch_current_size[scratch_pool_id]); + m_team_scratch_ptr[scratch_pool_id] = mem_space.allocate( + "Kokkos::Experimental::SYCL::InternalTeamScratchMemory", + m_team_scratch_current_size[scratch_pool_id]); } if ((bytes > m_team_scratch_current_size[scratch_pool_id]) || ((bytes < m_team_scratch_current_size[scratch_pool_id]) && (force_shrink))) { + mem_space.deallocate(m_team_scratch_ptr[scratch_pool_id], + m_team_scratch_current_size[scratch_pool_id]); m_team_scratch_current_size[scratch_pool_id] = bytes; - m_team_scratch_ptr[scratch_pool_id] = - Kokkos::kokkos_realloc( - m_team_scratch_ptr[scratch_pool_id], - m_team_scratch_current_size[scratch_pool_id]); + m_team_scratch_ptr[scratch_pool_id] = mem_space.allocate( + "Kokkos::Experimental::SYCL::InternalTeamScratchMemory", + m_team_scratch_current_size[scratch_pool_id]); } return m_team_scratch_ptr[scratch_pool_id]; } @@ -234,8 +235,8 @@ void SYCLInternal::finalize() { for (int i = 0; i < m_n_team_scratch; ++i) { if (m_team_scratch_current_size[i] > 0) { - Kokkos::kokkos_free( - m_team_scratch_ptr[i]); + device_mem_space.deallocate(m_team_scratch_ptr[i], + m_team_scratch_current_size[i]); m_team_scratch_current_size[i] = 0; m_team_scratch_ptr[i] = nullptr; } @@ -250,7 +251,8 @@ void SYCLInternal::finalize() { m_queue.reset(); } -sycl::device_ptr SYCLInternal::scratch_space(const std::size_t size) { +Kokkos::Impl::sycl_device_ptr SYCLInternal::scratch_space( + const std::size_t size) { if (verify_is_initialized("scratch_space") && m_scratchSpaceCount < scratch_count(size)) { auto mem_space = Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue); @@ -270,7 +272,8 @@ sycl::device_ptr SYCLInternal::scratch_space(const std::size_t size) { return m_scratchSpace; } -sycl::host_ptr SYCLInternal::scratch_host(const std::size_t size) { +Kokkos::Impl::sycl_host_ptr SYCLInternal::scratch_host( + const std::size_t size) { if (verify_is_initialized("scratch_unified") && m_scratchHostCount < scratch_count(size)) { auto mem_space = Kokkos::Experimental::SYCLHostUSMSpace(*m_queue); @@ -290,7 +293,8 @@ sycl::host_ptr SYCLInternal::scratch_host(const std::size_t size) { return m_scratchHost; } -sycl::device_ptr SYCLInternal::scratch_flags(const std::size_t size) { +Kokkos::Impl::sycl_device_ptr SYCLInternal::scratch_flags( + const std::size_t size) { if (verify_is_initialized("scratch_flags") && m_scratchFlagsCount < scratch_count(size)) { auto mem_space = Kokkos::Experimental::SYCLDeviceUSMSpace(*m_queue); diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp index ab7e8ce71e..2d784ef8a5 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Instance.hpp @@ -43,13 +43,12 @@ class SYCLInternal { SYCLInternal& operator=(SYCLInternal&&) = delete; SYCLInternal(SYCLInternal&&) = delete; - sycl::device_ptr scratch_space(const std::size_t size); - sycl::device_ptr scratch_flags(const std::size_t size); - sycl::host_ptr scratch_host(const std::size_t size); + Kokkos::Impl::sycl_device_ptr scratch_space(const std::size_t size); + Kokkos::Impl::sycl_device_ptr scratch_flags(const std::size_t size); + Kokkos::Impl::sycl_host_ptr scratch_host(const std::size_t size); int acquire_team_scratch_space(); - sycl::device_ptr resize_team_scratch_space(int scratch_pool_id, - std::int64_t bytes, - bool force_shrink = false); + Kokkos::Impl::sycl_device_ptr resize_team_scratch_space( + int scratch_pool_id, std::int64_t bytes, bool force_shrink = false); void register_team_scratch_event(int scratch_pool_id, sycl::event event); uint32_t impl_get_instance_id() const; @@ -59,21 +58,22 @@ class SYCLInternal { uint32_t m_maxConcurrency = 0; uint64_t m_maxShmemPerBlock = 0; - std::size_t m_scratchSpaceCount = 0; - sycl::device_ptr m_scratchSpace = nullptr; - std::size_t m_scratchHostCount = 0; - sycl::host_ptr m_scratchHost = nullptr; - std::size_t m_scratchFlagsCount = 0; - sycl::device_ptr m_scratchFlags = nullptr; + std::size_t m_scratchSpaceCount = 0; + Kokkos::Impl::sycl_device_ptr m_scratchSpace = nullptr; + std::size_t m_scratchHostCount = 0; + Kokkos::Impl::sycl_host_ptr m_scratchHost = nullptr; + std::size_t m_scratchFlagsCount = 0; + Kokkos::Impl::sycl_device_ptr m_scratchFlags = nullptr; // mutex to access shared memory mutable std::mutex m_mutexScratchSpace; // Team Scratch Level 1 Space - static constexpr int m_n_team_scratch = 10; - mutable int64_t m_team_scratch_current_size[m_n_team_scratch] = {}; - mutable sycl::device_ptr m_team_scratch_ptr[m_n_team_scratch] = {}; - mutable int m_current_team_scratch = 0; - mutable sycl::event m_team_scratch_event[m_n_team_scratch] = {}; + static constexpr int m_n_team_scratch = 10; + mutable int64_t m_team_scratch_current_size[m_n_team_scratch] = {}; + mutable Kokkos::Impl::sycl_device_ptr + m_team_scratch_ptr[m_n_team_scratch] = {}; + mutable int m_current_team_scratch = 0; + mutable sycl::event m_team_scratch_event[m_n_team_scratch] = {}; mutable std::mutex m_team_scratch_mutex; uint32_t m_instance_id = Kokkos::Tools::Experimental::Impl::idForInstance< diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp index 7fbf5420f8..cb7b1048da 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_MDRange.hpp @@ -120,7 +120,7 @@ class Kokkos::Impl::ParallelFor, desul::ensure_sycl_lock_arrays_on_device(q); - auto parallel_for_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { const auto range = compute_ranges(); const sycl::range<3> global_range = range.get_global_range(); const sycl::range<3> local_range = range.get_local_range(); @@ -153,12 +153,22 @@ class Kokkos::Impl::ParallelFor, {global_x, global_y, global_z}, {local_x, local_y, local_z}) .exec_range(); }); - }); -#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); -#endif + }; - return parallel_for_event; +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + return {}; + } else +#endif + { + auto parallel_for_event = q.submit(cgh_lambda); + +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); +#endif + return parallel_for_event; + } } public: @@ -181,12 +191,6 @@ class Kokkos::Impl::ParallelFor, functor_wrapper.register_event(event); } - ParallelFor(const ParallelFor&) = delete; - ParallelFor(ParallelFor&&) = delete; - ParallelFor& operator=(const ParallelFor&) = delete; - ParallelFor& operator=(ParallelFor&&) = delete; - ~ParallelFor() = default; - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) : m_functor(arg_functor), m_policy(arg_policy), diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp index b4de7eb89f..8ef43d392c 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Range.hpp @@ -17,11 +17,15 @@ #ifndef KOKKOS_SYCL_PARALLEL_FOR_RANGE_HPP_ #define KOKKOS_SYCL_PARALLEL_FOR_RANGE_HPP_ +#ifdef SYCL_EXT_ONEAPI_AUTO_LOCAL_RANGE +#include +#endif #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES #include #endif namespace Kokkos::Impl { +#ifndef SYCL_EXT_ONEAPI_AUTO_LOCAL_RANGE template struct FunctorWrapperRangePolicyParallelFor { using WorkTag = typename Policy::work_tag; @@ -37,14 +41,15 @@ struct FunctorWrapperRangePolicyParallelFor { typename Policy::index_type m_begin; FunctorWrapper m_functor_wrapper; }; +#endif // Same as above but for a user-provided workgroup size template struct FunctorWrapperRangePolicyParallelForCustom { using WorkTag = typename Policy::work_tag; - void operator()(sycl::item<1> item) const { - const typename Policy::index_type id = item.get_linear_id(); + void operator()(sycl::nd_item<1> item) const { + const typename Policy::index_type id = item.get_global_linear_id(); if (id < m_work_size) { const auto shifted_id = id + m_begin; if constexpr (std::is_void_v) @@ -74,27 +79,47 @@ class Kokkos::Impl::ParallelFor, const Policy m_policy; template - static sycl::event sycl_direct_launch(const Policy& policy, - const Functor& functor, - const sycl::event& memcpy_event) { + sycl::event sycl_direct_launch(const Policy& policy, const Functor& functor, + const sycl::event& memcpy_event) const { // Convenience references const Kokkos::Experimental::SYCL& space = policy.space(); sycl::queue& q = space.sycl_queue(); desul::ensure_sycl_lock_arrays_on_device(q); - auto parallel_for_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES cgh.depends_on(memcpy_event); #else (void)memcpy_event; #endif + if (policy.chunk_size() <= 1) { +#ifdef SYCL_EXT_ONEAPI_AUTO_LOCAL_RANGE + const auto actual_range = policy.end() - policy.begin(); + FunctorWrapperRangePolicyParallelForCustom f{ + policy.begin(), functor, actual_range}; + // Round the actual range up to the closest power of two not exceeding + // the maximum workgroup size + const auto max_wgroup_size = + q.get_device().get_info(); + const auto wgroup_size_multiple = Kokkos::bit_floor( + std::min(max_wgroup_size, actual_range)); + + const auto launch_range = (actual_range + wgroup_size_multiple - 1) / + wgroup_size_multiple * wgroup_size_multiple; + sycl::nd_range<1> range( + launch_range, sycl::ext::oneapi::experimental::auto_range<1>()); + cgh.parallel_for< + FunctorWrapperRangePolicyParallelForCustom>(range, + f); +#else FunctorWrapperRangePolicyParallelFor f{policy.begin(), functor}; sycl::range<1> range(policy.end() - policy.begin()); cgh.parallel_for>( range, f); +#endif } else { // Use the chunk size as workgroup size. We need to make sure that the // range the kernel is launched with is a multiple of the workgroup @@ -111,12 +136,22 @@ class Kokkos::Impl::ParallelFor, FunctorWrapperRangePolicyParallelForCustom>(range, f); } - }); -#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); -#endif + }; - return parallel_for_event; +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + return {}; + } else +#endif + { + auto parallel_for_event = q.submit(cgh_lambda); + +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); +#endif + return parallel_for_event; + } } public: @@ -137,12 +172,6 @@ class Kokkos::Impl::ParallelFor, functor_wrapper.register_event(event); } - ParallelFor(const ParallelFor&) = delete; - ParallelFor(ParallelFor&&) = delete; - ParallelFor& operator=(const ParallelFor&) = delete; - ParallelFor& operator=(ParallelFor&&) = delete; - ~ParallelFor() = default; - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) : m_functor(arg_functor), m_policy(arg_policy) {} }; diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp index ecb4a863da..cf7f582bc7 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelFor_Team.hpp @@ -22,13 +22,14 @@ #include #include +#include #include template class Kokkos::Impl::ParallelFor, Kokkos::Experimental::SYCL> { public: - using Policy = TeamPolicyInternal; + using Policy = TeamPolicy; using functor_type = FunctorType; using size_type = ::Kokkos::Experimental::SYCL::size_type; @@ -44,24 +45,19 @@ class Kokkos::Impl::ParallelFor, size_type const m_vector_size; int m_shmem_begin; int m_shmem_size; - sycl::device_ptr m_global_scratch_ptr; size_t m_scratch_size[2]; - // Only let one ParallelFor instance at a time use the team scratch memory. - // The constructor acquires the mutex which is released in the destructor. - std::scoped_lock m_scratch_buffers_lock; - int m_scratch_pool_id = -1; template - sycl::event sycl_direct_launch(const Policy& policy, + sycl::event sycl_direct_launch(const sycl_device_ptr global_scratch_ptr, const FunctorWrapper& functor_wrapper, const sycl::event& memcpy_event) const { // Convenience references - const Kokkos::Experimental::SYCL& space = policy.space(); + const Kokkos::Experimental::SYCL& space = m_policy.space(); sycl::queue& q = space.sycl_queue(); desul::ensure_sycl_lock_arrays_on_device(q); - auto parallel_for_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { // FIXME_SYCL accessors seem to need a size greater than zero at least for // host queues sycl::local_accessor team_scratch_memory_L0( @@ -72,7 +68,6 @@ class Kokkos::Impl::ParallelFor, // Avoid capturing *this since it might not be trivially copyable const auto shmem_begin = m_shmem_begin; const size_t scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]}; - sycl::device_ptr const global_scratch_ptr = m_global_scratch_ptr; auto lambda = [=](sycl::nd_item<2> item) { const member_type team_member( @@ -114,28 +109,53 @@ class Kokkos::Impl::ParallelFor, sycl::range<2>(m_team_size, m_league_size * final_vector_size), sycl::range<2>(m_team_size, final_vector_size)), lambda); - }); -#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); + }; + +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + return {}; + } else #endif - return parallel_for_event; + { + auto parallel_for_event = q.submit(cgh_lambda); + +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier(std::vector{parallel_for_event}); +#endif + return parallel_for_event; + } } public: inline void execute() const { if (m_league_size == 0) return; - auto& space = *m_policy.space().impl_internal_space_instance(); + auto& instance = *m_policy.space().impl_internal_space_instance(); + + // Only let one instance at a time resize the instance's scratch memory + // allocations. + std::scoped_lock team_scratch_lock( + instance.m_team_scratch_mutex); + + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + int scratch_pool_id = instance.acquire_team_scratch_space(); + const sycl_device_ptr global_scratch_ptr = + static_cast>(instance.resize_team_scratch_space( + scratch_pool_id, + static_cast(m_scratch_size[1]) * m_league_size)); + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& - indirectKernelMem = space.get_indirect_kernel_mem(); + indirectKernelMem = instance.get_indirect_kernel_mem(); auto functor_wrapper = Experimental::Impl::make_sycl_function_wrapper( m_functor, indirectKernelMem); - sycl::event event = sycl_direct_launch(m_policy, functor_wrapper, + sycl::event event = sycl_direct_launch(global_scratch_ptr, functor_wrapper, functor_wrapper.get_copy_event()); functor_wrapper.register_event(event); - space.register_team_scratch_event(m_scratch_pool_id, event); + instance.register_team_scratch_event(scratch_pool_id, event); } ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy) @@ -143,10 +163,7 @@ class Kokkos::Impl::ParallelFor, m_policy(arg_policy), m_league_size(arg_policy.league_size()), m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()), - m_scratch_buffers_lock(arg_policy.space() - .impl_internal_space_instance() - ->m_team_scratch_mutex) { + m_vector_size(arg_policy.impl_vector_length()) { // FIXME_SYCL optimize if (m_team_size < 0) m_team_size = @@ -159,22 +176,14 @@ class Kokkos::Impl::ParallelFor, m_scratch_size[0] = m_shmem_size; m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); - // Functor's reduce memory, team scan memory, and team shared memory depend - // upon team size. - auto& space = *m_policy.space().impl_internal_space_instance(); - m_scratch_pool_id = space.acquire_team_scratch_space(); - m_global_scratch_ptr = - static_cast>(space.resize_team_scratch_space( - m_scratch_pool_id, - static_cast(m_scratch_size[1]) * m_league_size)); - - if (static_cast(space.m_maxShmemPerBlock) < + const auto& instance = *m_policy.space().impl_internal_space_instance(); + if (static_cast(instance.m_maxShmemPerBlock) < m_shmem_size - m_shmem_begin) { std::stringstream out; out << "Kokkos::Impl::ParallelFor insufficient shared memory! " "Requested " << m_shmem_size - m_shmem_begin << " bytes but maximum is " - << space.m_maxShmemPerBlock << '\n'; + << instance.m_maxShmemPerBlock << '\n'; Kokkos::Impl::throw_runtime_exception(out.str()); } diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp index f55280e22e..0774b24bca 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_MDRange.hpp @@ -77,9 +77,7 @@ class Kokkos::Impl::ParallelReduce::accessible), - m_scratch_buffers_lock( - m_space.impl_internal_space_instance()->m_mutexScratchSpace) {} + typename View::memory_space>::accessible) {} private: template @@ -94,10 +92,10 @@ class Kokkos::Impl::ParallelReduce results_ptr; + sycl_device_ptr results_ptr; auto host_result_ptr = (m_result_ptr && !m_result_ptr_device_accessible) - ? static_cast>( + ? static_cast>( instance.scratch_host(sizeof(value_type) * value_count)) : nullptr; @@ -108,13 +106,13 @@ class Kokkos::Impl::ParallelReduce>( + results_ptr = static_cast>( instance.scratch_space(sizeof(value_type) * value_count)); auto device_accessible_result_ptr = m_result_ptr_device_accessible @@ -129,12 +127,20 @@ class Kokkos::Impl::ParallelReduce{parallel_reduce_event}); + }; + +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + } else #endif - last_reduction_event = parallel_reduce_event; + { + last_reduction_event = q.submit(cgh_lambda); +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); +#endif + } } else { // Otherwise (when n_tiles is not zero), we perform a reduction on the // values in all workgroups separately, write the workgroup results back @@ -155,16 +161,16 @@ class Kokkos::Impl::ParallelReduce>( + results_ptr = static_cast>( instance.scratch_space(sizeof(value_type) * value_count * n_wgroups)); auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast>(m_result_ptr) : static_cast>(host_result_ptr); - auto scratch_flags = static_cast>( + auto scratch_flags = static_cast>( instance.scratch_flags(sizeof(unsigned int))); - auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { sycl::local_accessor local_mem( sycl::range<1>(wgroup_size) * value_count, cgh); sycl::local_accessor num_teams_done(1, cgh); @@ -298,12 +304,19 @@ class Kokkos::Impl::ParallelReduce{parallel_reduce_event}); + }; +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + } else #endif - last_reduction_event = parallel_reduce_event; + { + last_reduction_event = q.submit(cgh_lambda); +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); +#endif + } } // At this point, the reduced value is written to the entry in results_ptr @@ -311,6 +324,11 @@ class Kokkos::Impl::ParallelReduce::execute: result " "not device-accessible"); @@ -330,6 +348,12 @@ class Kokkos::Impl::ParallelReduce scratch_buffers_lock( + instance.m_mutexScratchSpace); + using IndirectKernelMem = Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -349,10 +373,6 @@ class Kokkos::Impl::ParallelReduce m_scratch_buffers_lock; }; #endif /* KOKKOS_SYCL_PARALLEL_REDUCE_MDRANGE_HPP */ diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp index 5333e3c8a8..2d46ffc77d 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Range.hpp @@ -50,9 +50,7 @@ class Kokkos::Impl::ParallelReduce::accessible), - m_scratch_buffers_lock( - p.space().impl_internal_space_instance()->m_mutexScratchSpace) {} + typename View::memory_space>::accessible) {} private: template @@ -69,10 +67,10 @@ class Kokkos::Impl::ParallelReduce results_ptr = nullptr; + sycl_device_ptr results_ptr = nullptr; auto host_result_ptr = (m_result_ptr && !m_result_ptr_device_accessible) - ? static_cast>( + ? static_cast>( instance.scratch_host(sizeof(value_type) * value_count)) : nullptr; auto device_accessible_result_ptr = @@ -88,10 +86,10 @@ class Kokkos::Impl::ParallelReduce>( + results_ptr = static_cast>( instance.scratch_space(sizeof(value_type) * value_count)); - auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { const auto begin = policy.begin(); #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES cgh.depends_on(memcpy_event); @@ -114,24 +112,32 @@ class Kokkos::Impl::ParallelReduce{parallel_reduce_event}); + }; + +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + } else #endif - last_reduction_event = parallel_reduce_event; + { + last_reduction_event = q.submit(cgh_lambda); +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); +#endif + } } else { // Otherwise (when size > 1), we perform a reduction on the values in all // workgroups separately, write the workgroup results back to global // memory and recurse until only one workgroup does the reduction and thus // gets the final value. - auto scratch_flags = static_cast>( + auto scratch_flags = static_cast>( instance.scratch_flags(sizeof(unsigned int))); auto reduction_lambda_factory = [&](sycl::local_accessor local_mem, sycl::local_accessor num_teams_done, - sycl::device_ptr results_ptr, int values_per_thread) { + sycl_device_ptr results_ptr, int values_per_thread) { const auto begin = policy.begin(); auto lambda = [=](sycl::nd_item<1> item) { @@ -241,7 +247,7 @@ class Kokkos::Impl::ParallelReduce num_teams_done(1, cgh); auto dummy_reduction_lambda = @@ -302,7 +308,7 @@ class Kokkos::Impl::ParallelReduce>(instance.scratch_space( + static_cast>(instance.scratch_space( sizeof(value_type) * value_count * n_wgroups)); sycl::local_accessor local_mem( @@ -320,12 +326,20 @@ class Kokkos::Impl::ParallelReduce(n_wgroups * wgroup_size, wgroup_size), reduction_lambda); - }); -#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier( - std::vector{parallel_reduce_event}); + }; + +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + } else #endif - last_reduction_event = parallel_reduce_event; + { + last_reduction_event = q.submit(cgh_lambda); +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); +#endif + } } // At this point, the reduced value is written to the entry in results_ptr @@ -333,6 +347,11 @@ class Kokkos::Impl::ParallelReduce::execute: result " "not device-accessible"); @@ -347,6 +366,12 @@ class Kokkos::Impl::ParallelReduce scratch_buffers_lock( + instance.m_mutexScratchSpace); + using IndirectKernelMem = Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -366,10 +391,6 @@ class Kokkos::Impl::ParallelReduce m_scratch_buffers_lock; }; #endif /* KOKKOS_SYCL_PARALLEL_REDUCE_RANGE_HPP */ diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp index 27165c59e3..b443bcbf90 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelReduce_Team.hpp @@ -23,6 +23,7 @@ #include #include +#include #include template @@ -30,7 +31,7 @@ class Kokkos::Impl::ParallelReduce, Kokkos::Experimental::SYCL> { public: - using Policy = TeamPolicyInternal; + using Policy = TeamPolicy; using FunctorType = typename CombinedFunctorReducerType::functor_type; using ReducerType = typename CombinedFunctorReducerType::reducer_type; @@ -54,24 +55,18 @@ class Kokkos::Impl::ParallelReduce m_global_scratch_ptr; size_t m_scratch_size[2]; const size_type m_league_size; int m_team_size; const size_type m_vector_size; - // Only let one ParallelReduce instance at a time use the team scratch memory - // and the host scratch memory. The constructor acquires the mutex which is - // released in the destructor. - std::scoped_lock m_scratch_buffers_lock; - int m_scratch_pool_id = -1; - template + template sycl::event sycl_direct_launch( - const PolicyType& policy, + const sycl_device_ptr global_scratch_ptr, const CombinedFunctorReducerWrapper& functor_reducer_wrapper, const sycl::event& memcpy_event) const { // Convenience references - const Kokkos::Experimental::SYCL& space = policy.space(); + const Kokkos::Experimental::SYCL& space = m_policy.space(); Kokkos::Experimental::Impl::SYCLInternal& instance = *space.impl_internal_space_instance(); sycl::queue& q = space.sycl_queue(); @@ -82,7 +77,7 @@ class Kokkos::Impl::ParallelReduce>( + ? static_cast>( instance.scratch_host(sizeof(value_type) * value_count)) : nullptr; @@ -95,14 +90,14 @@ class Kokkos::Impl::ParallelReduce>(instance.scratch_space( + static_cast>(instance.scratch_space( sizeof(value_type) * std::max(value_count, 1u))); auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast>(m_result_ptr) : static_cast>(host_result_ptr); - auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { + auto cgh_lambda = [&](sycl::handler& cgh) { // FIXME_SYCL accessors seem to need a size greater than zero at least // for host queues sycl::local_accessor team_scratch_memory_L0( @@ -113,7 +108,6 @@ class Kokkos::Impl::ParallelReduce const global_scratch_ptr = m_global_scratch_ptr; #ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES cgh.depends_on(memcpy_event); @@ -144,19 +138,26 @@ class Kokkos::Impl::ParallelReduce{parallel_reduce_event}); + }; +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + } else #endif - last_reduction_event = parallel_reduce_event; + { + last_reduction_event = q.submit(cgh_lambda); +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); +#endif + } } else { // Otherwise, (if the total range has more than one element) we perform a // reduction on the values in all workgroups separately, write the // workgroup results back to global memory and recurse until only one // workgroup does the reduction and thus gets the final value. - auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) { - auto scratch_flags = static_cast>( + auto cgh_lambda = [&](sycl::handler& cgh) { + auto scratch_flags = static_cast>( instance.scratch_flags(sizeof(unsigned int))); // FIXME_SYCL accessors seem to need a size greater than zero at least @@ -170,12 +171,11 @@ class Kokkos::Impl::ParallelReduce const global_scratch_ptr = m_global_scratch_ptr; sycl::local_accessor num_teams_done(1, cgh); auto team_reduction_factory = [&](sycl::local_accessor local_mem, - sycl::device_ptr results_ptr) { + sycl_device_ptr results_ptr) { auto device_accessible_result_ptr = m_result_ptr_device_accessible ? static_cast>(m_result_ptr) @@ -331,7 +331,7 @@ class Kokkos::Impl::ParallelReduce((size + wgroup_size - 1) / wgroup_size, 1); results_ptr = - static_cast>(instance.scratch_space( + static_cast>(instance.scratch_space( sizeof(value_type) * std::max(value_count, 1u) * init_size)); size_t max_work_groups = @@ -359,12 +359,19 @@ class Kokkos::Impl::ParallelReduce(m_team_size, n_wgroups * m_vector_size), sycl::range<2>(m_team_size, m_vector_size)), reduction_lambda); - }); -#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES - q.ext_oneapi_submit_barrier( - std::vector{parallel_reduce_event}); + }; +#ifdef SYCL_EXT_ONEAPI_GRAPH + if constexpr (Policy::is_graph_kernel::value) { + sycl_attach_kernel_to_node(*this, cgh_lambda); + } else #endif - last_reduction_event = parallel_reduce_event; + { + last_reduction_event = q.submit(cgh_lambda); +#ifndef KOKKOS_IMPL_SYCL_USE_IN_ORDER_QUEUES + q.ext_oneapi_submit_barrier( + std::vector{last_reduction_event}); +#endif + } } // At this point, the reduced value is written to the entry in results_ptr @@ -372,6 +379,11 @@ class Kokkos::Impl::ParallelReduce::execute: result not " "device-accessible"); @@ -386,6 +398,22 @@ class Kokkos::Impl::ParallelReduce scratch_buffers_lock( + instance.m_mutexScratchSpace); + std::scoped_lock team_scratch_lock( + instance.m_team_scratch_mutex); + + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + int scratch_pool_id = instance.acquire_team_scratch_space(); + const sycl_device_ptr global_scratch_ptr = + static_cast>(instance.resize_team_scratch_space( + scratch_pool_id, + static_cast(m_scratch_size[1]) * m_league_size)); + using IndirectKernelMem = Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem; IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -395,14 +423,24 @@ class Kokkos::Impl::ParallelReduce + ParallelReduce(CombinedFunctorReducerType const& arg_functor_reducer, + Policy const& arg_policy, ViewType const& arg_result) + : m_functor_reducer(arg_functor_reducer), + m_policy(arg_policy), + m_result_ptr(arg_result.data()), + m_result_ptr_device_accessible( + MemorySpaceAccess::accessible), + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.impl_vector_length()) { // FIXME_SYCL optimize if (m_team_size < 0) m_team_size = m_policy.team_size_recommended( @@ -423,22 +461,15 @@ class Kokkos::Impl::ParallelReduce>(space.resize_team_scratch_space( - m_scratch_pool_id, - static_cast(m_scratch_size[1]) * m_league_size)); - - if (static_cast(space.m_maxShmemPerBlock) < + const Kokkos::Experimental::Impl::SYCLInternal& instance = + *m_policy.space().impl_internal_space_instance(); + if (static_cast(instance.m_maxShmemPerBlock) < m_shmem_size - m_shmem_begin) { std::stringstream out; out << "Kokkos::Impl::ParallelFor insufficient shared memory! " "Requested " << m_shmem_size - m_shmem_begin << " bytes but maximum is " - << space.m_maxShmemPerBlock << '\n'; + << instance.m_maxShmemPerBlock << '\n'; Kokkos::Impl::throw_runtime_exception(out.str()); } @@ -448,25 +479,6 @@ class Kokkos::Impl::ParallelReduce requested too large team size."); } - - public: - template - ParallelReduce(CombinedFunctorReducerType const& arg_functor_reducer, - Policy const& arg_policy, ViewType const& arg_result) - : m_functor_reducer(arg_functor_reducer), - m_policy(arg_policy), - m_result_ptr(arg_result.data()), - m_result_ptr_device_accessible( - MemorySpaceAccess::accessible), - m_league_size(arg_policy.league_size()), - m_team_size(arg_policy.team_size()), - m_vector_size(arg_policy.impl_vector_length()), - m_scratch_buffers_lock(arg_policy.space() - .impl_internal_space_instance() - ->m_team_scratch_mutex) { - initialize(); - } }; #endif diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp index 977b69bc9e..bdb5b88377 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_ParallelScan_Range.hpp @@ -18,6 +18,7 @@ #define KOKKOS_SYCL_PARALLEL_SCAN_RANGE_HPP #include +#include #include #include @@ -35,20 +36,38 @@ void workgroup_scan(sycl::nd_item item, const FunctorType& final_reducer, auto sg = item.get_sub_group(); const int sg_group_id = sg.get_group_id()[0]; const int id_in_sg = sg.get_local_id()[0]; + const int local_range = std::min(sg.get_local_range()[0], global_range); - for (int stride = 1; stride < global_range; stride <<= 1) { - auto tmp = sg.shuffle_up(local_value, stride); +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine = [&](int stride) { + if (stride < local_range) { + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right(sg, local_value, + stride); + if (id_in_sg >= stride) final_reducer.join(&local_value, &tmp); + } + }; + shuffle_combine(1); + shuffle_combine(2); + shuffle_combine(4); + shuffle_combine(8); + shuffle_combine(16); + KOKKOS_ASSERT(local_range <= 32); +#else + for (int stride = 1; stride < local_range; stride <<= 1) { + auto tmp = + Kokkos::Impl::SYCLReduction::shift_group_right(sg, local_value, stride); if (id_in_sg >= stride) final_reducer.join(&local_value, &tmp); } +#endif const int max_subgroup_size = sg.get_max_local_range()[0]; const int n_active_subgroups = (global_range + max_subgroup_size - 1) / max_subgroup_size; - const int local_range = sg.get_local_range()[0]; if (id_in_sg == local_range - 1 && sg_group_id < n_active_subgroups) local_mem[sg_group_id] = local_value; - local_value = sg.shuffle_up(local_value, 1); + local_value = + Kokkos::Impl::SYCLReduction::shift_group_right(sg, local_value, 1); if (id_in_sg == 0) final_reducer.init(&local_value); sycl::group_barrier(item.get_group()); @@ -61,8 +80,29 @@ void workgroup_scan(sycl::nd_item item, const FunctorType& final_reducer, const auto upper_bound = std::min(local_range, n_active_subgroups - round * local_range); auto local_sg_value = local_mem[idx < n_active_subgroups ? idx : 0]; +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine_sg = [&](int stride) { + if (stride < upper_bound) { + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right( + sg, local_sg_value, stride); + if (id_in_sg >= stride) { + if (idx < n_active_subgroups) + final_reducer.join(&local_sg_value, &tmp); + else + local_sg_value = tmp; + } + } + }; + shuffle_combine_sg(1); + shuffle_combine_sg(2); + shuffle_combine_sg(4); + shuffle_combine_sg(8); + shuffle_combine_sg(16); + KOKKOS_ASSERT(upper_bound <= 32); +#else for (int stride = 1; stride < upper_bound; stride <<= 1) { - auto tmp = sg.shuffle_up(local_sg_value, stride); + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right( + sg, local_sg_value, stride); if (id_in_sg >= stride) { if (idx < n_active_subgroups) final_reducer.join(&local_sg_value, &tmp); @@ -70,6 +110,7 @@ void workgroup_scan(sycl::nd_item item, const FunctorType& final_reducer, local_sg_value = tmp; } } +#endif if (idx < n_active_subgroups) { local_mem[idx] = local_sg_value; if (round > 0) @@ -111,14 +152,10 @@ class ParallelScanSYCLBase { const CombinedFunctorReducer m_functor_reducer; const Policy m_policy; - sycl::host_ptr m_scratch_host = nullptr; + sycl_host_ptr m_scratch_host = nullptr; pointer_type m_result_ptr; const bool m_result_ptr_device_accessible; - // Only let one ParallelScan instance at a time use the host scratch memory. - // The constructor acquires the mutex which is released in the destructor. - std::scoped_lock m_scratch_buffers_lock; - private: template sycl::event sycl_direct_launch(const FunctorWrapper& functor_wrapper, @@ -131,95 +168,93 @@ class ParallelScanSYCLBase { const auto size = m_policy.end() - m_policy.begin(); - auto scratch_flags = static_cast>( + auto scratch_flags = static_cast>( instance.scratch_flags(sizeof(unsigned int))); const auto begin = m_policy.begin(); // Initialize global memory - auto scan_lambda_factory = - [&](sycl::local_accessor local_mem, - sycl::local_accessor num_teams_done, - sycl::device_ptr global_mem_, - sycl::device_ptr group_results_) { - auto lambda = [=](sycl::nd_item<1> item) { - auto global_mem = global_mem_; - auto group_results = group_results_; + auto scan_lambda_factory = [&](sycl::local_accessor local_mem, + sycl::local_accessor + num_teams_done, + sycl_device_ptr global_mem_, + sycl_device_ptr group_results_) { + auto lambda = [=](sycl::nd_item<1> item) { + auto global_mem = global_mem_; + auto group_results = group_results_; - const CombinedFunctorReducer< - FunctorType, typename Analysis::Reducer>& functor_reducer = - functor_wrapper.get_functor(); - const FunctorType& functor = functor_reducer.get_functor(); - const typename Analysis::Reducer& reducer = - functor_reducer.get_reducer(); + const CombinedFunctorReducer& + functor_reducer = functor_wrapper.get_functor(); + const FunctorType& functor = functor_reducer.get_functor(); + const typename Analysis::Reducer& reducer = + functor_reducer.get_reducer(); - const auto n_wgroups = item.get_group_range()[0]; - const int wgroup_size = item.get_local_range()[0]; + const auto n_wgroups = item.get_group_range()[0]; + const int wgroup_size = item.get_local_range()[0]; - const int local_id = item.get_local_linear_id(); - const index_type global_id = item.get_global_linear_id(); + const int local_id = item.get_local_linear_id(); + const index_type global_id = item.get_global_linear_id(); - // Initialize local memory - value_type local_value; - reducer.init(&local_value); - if (global_id < size) { - if constexpr (std::is_void::value) - functor(global_id + begin, local_value, false); - else - functor(WorkTag(), global_id + begin, local_value, false); + // Initialize local memory + value_type local_value; + reducer.init(&local_value); + if (global_id < size) { + if constexpr (std::is_void::value) + functor(global_id + begin, local_value, false); + else + functor(WorkTag(), global_id + begin, local_value, false); + } + + workgroup_scan<>(item, reducer, local_mem, local_value, wgroup_size); + + // Write results to global memory + if (global_id < size) global_mem[global_id] = local_value; + + if (local_id == wgroup_size - 1) { + group_results[item.get_group_linear_id()] = + local_mem[item.get_sub_group().get_group_range()[0] - 1]; + + sycl::atomic_ref + scratch_flags_ref(*scratch_flags); + num_teams_done[0] = ++scratch_flags_ref; + } + item.barrier(sycl::access::fence_space::global_space); + if (num_teams_done[0] == n_wgroups) { + if (local_id == 0) *scratch_flags = 0; + value_type total; + reducer.init(&total); + + for (unsigned int offset = 0; offset < n_wgroups; + offset += wgroup_size) { + index_type id = local_id + offset; + if (id < static_cast(n_wgroups)) + local_value = group_results[id]; + else + reducer.init(&local_value); + workgroup_scan<>( + item, reducer, local_mem, local_value, + std::min(n_wgroups - offset, wgroup_size)); + if (id < static_cast(n_wgroups)) { + reducer.join(&local_value, &total); + group_results[id] = local_value; } - - workgroup_scan<>(item, reducer, local_mem, local_value, - wgroup_size); - - // Write results to global memory - if (global_id < size) global_mem[global_id] = local_value; - - if (local_id == wgroup_size - 1) { - group_results[item.get_group_linear_id()] = - local_mem[item.get_sub_group().get_group_range()[0] - 1]; - - sycl::atomic_ref - scratch_flags_ref(*scratch_flags); - num_teams_done[0] = ++scratch_flags_ref; - } - item.barrier(sycl::access::fence_space::global_space); - if (num_teams_done[0] == n_wgroups) { - if (local_id == 0) *scratch_flags = 0; - value_type total; - reducer.init(&total); - - for (unsigned int offset = 0; offset < n_wgroups; - offset += wgroup_size) { - index_type id = local_id + offset; - if (id < static_cast(n_wgroups)) - local_value = group_results[id]; - else - reducer.init(&local_value); - workgroup_scan<>( - item, reducer, local_mem, local_value, - std::min(n_wgroups - offset, wgroup_size)); - if (id < static_cast(n_wgroups)) { - reducer.join(&local_value, &total); - group_results[id] = local_value; - } - reducer.join( - &total, - &local_mem[item.get_sub_group().get_group_range()[0] - 1]); - if (offset + wgroup_size < n_wgroups) - item.barrier(sycl::access::fence_space::global_space); - } - } - }; - return lambda; - }; + reducer.join( + &total, + &local_mem[item.get_sub_group().get_group_range()[0] - 1]); + if (offset + wgroup_size < n_wgroups) + item.barrier(sycl::access::fence_space::global_space); + } + } + }; + return lambda; + }; size_t wgroup_size; size_t n_wgroups; - sycl::device_ptr global_mem; - sycl::device_ptr group_results; + sycl_device_ptr global_mem; + sycl_device_ptr group_results; desul::ensure_sycl_lock_arrays_on_device(q); @@ -254,9 +289,9 @@ class ParallelScanSYCLBase { // FIXME_SYCL consider only storing one value per block and recreate // initial results in the end before doing the final pass global_mem = - static_cast>(instance.scratch_space( + static_cast>(instance.scratch_space( n_wgroups * (wgroup_size + 1) * sizeof(value_type))); - m_scratch_host = static_cast>( + m_scratch_host = static_cast>( instance.scratch_host(sizeof(value_type))); group_results = global_mem + n_wgroups * wgroup_size; @@ -334,6 +369,11 @@ class ParallelScanSYCLBase { auto& instance = *m_policy.space().impl_internal_space_instance(); + // Only let one instance at a time resize the instance's scratch memory + // allocations. + std::scoped_lock scratch_buffers_lock( + instance.m_mutexScratchSpace); + Kokkos::Experimental::Impl::SYCLInternal::IndirectKernelMem& indirectKernelMem = instance.get_indirect_kernel_mem(); @@ -352,10 +392,7 @@ class ParallelScanSYCLBase { : m_functor_reducer(arg_functor, typename Analysis::Reducer{arg_functor}), m_policy(arg_policy), m_result_ptr(arg_result_ptr), - m_result_ptr_device_accessible(arg_result_ptr_device_accessible), - m_scratch_buffers_lock(m_policy.space() - .impl_internal_space_instance() - ->m_mutexScratchSpace) {} + m_result_ptr_device_accessible(arg_result_ptr_device_accessible) {} }; } // namespace Kokkos::Impl diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp index 9cc8008cdf..19fad29150 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Space.cpp @@ -56,6 +56,23 @@ void DeepCopyAsyncSYCL(void* dst, const void* src, size_t n) { /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ +namespace { + +std::string_view get_memory_space_name(sycl::usm::alloc allocation_kind) { + switch (allocation_kind) { + case sycl::usm::alloc::host: + return Kokkos::Experimental::SYCLHostUSMSpace::name(); + case sycl::usm::alloc::device: + return Kokkos::Experimental::SYCLDeviceUSMSpace::name(); + case sycl::usm::alloc::shared: + return Kokkos::Experimental::SYCLSharedUSMSpace::name(); + default: + Kokkos::abort("bug: unknown sycl allocation type"); + return "unreachable"; + } +} + +} // namespace namespace Kokkos { namespace Experimental { @@ -75,17 +92,17 @@ SYCLHostUSMSpace::SYCLHostUSMSpace() SYCLHostUSMSpace::SYCLHostUSMSpace(sycl::queue queue) : m_queue(std::move(queue)) {} -void* allocate_sycl( - const char* arg_label, const size_t arg_alloc_size, - const size_t arg_logical_size, const Kokkos::Tools::SpaceHandle arg_handle, - const RawMemoryAllocationFailure::AllocationMechanism failure_tag, - const sycl::usm::alloc allocation_kind, const sycl::queue& queue) { +void* allocate_sycl(const char* arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle, + const sycl::usm::alloc allocation_kind, + const sycl::queue& queue) { void* const hostPtr = sycl::malloc(arg_alloc_size, queue, allocation_kind); - if (hostPtr == nullptr) - throw RawMemoryAllocationFailure( - arg_alloc_size, 1, RawMemoryAllocationFailure::FailureMode::Unknown, - failure_tag); + if (hostPtr == nullptr) { + Kokkos::Impl::throw_bad_alloc(get_memory_space_name(allocation_kind), + arg_alloc_size, arg_label); + } if (Kokkos::Profiling::profileLibraryLoaded()) { const size_t reported_size = @@ -106,12 +123,10 @@ void* SYCLDeviceUSMSpace::allocate(const Kokkos::Experimental::SYCL& exec_space, const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocDevice, - sycl::usm::alloc::device, - *exec_space.impl_internal_space_instance()->m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::device, + *exec_space.impl_internal_space_instance()->m_queue); } void* SYCLDeviceUSMSpace::allocate(const size_t arg_alloc_size) const { @@ -121,11 +136,9 @@ void* SYCLDeviceUSMSpace::allocate(const size_t arg_alloc_size) const { void* SYCLDeviceUSMSpace::allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocDevice, - sycl::usm::alloc::device, m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::device, m_queue); } void* SYCLSharedUSMSpace::allocate(const SYCL& exec_space, @@ -136,12 +149,10 @@ void* SYCLSharedUSMSpace::allocate(const SYCL& exec_space, const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocShared, - sycl::usm::alloc::shared, - *exec_space.impl_internal_space_instance()->m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::shared, + *exec_space.impl_internal_space_instance()->m_queue); } void* SYCLSharedUSMSpace::allocate(const size_t arg_alloc_size) const { @@ -150,11 +161,9 @@ void* SYCLSharedUSMSpace::allocate(const size_t arg_alloc_size) const { void* SYCLSharedUSMSpace::allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocShared, - sycl::usm::alloc::shared, m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::shared, m_queue); } void* SYCLHostUSMSpace::allocate(const SYCL& exec_space, @@ -164,12 +173,10 @@ void* SYCLHostUSMSpace::allocate(const SYCL& exec_space, void* SYCLHostUSMSpace::allocate(const SYCL& exec_space, const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocHost, - sycl::usm::alloc::host, - *exec_space.impl_internal_space_instance()->m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::host, + *exec_space.impl_internal_space_instance()->m_queue); } void* SYCLHostUSMSpace::allocate(const size_t arg_alloc_size) const { @@ -178,11 +185,9 @@ void* SYCLHostUSMSpace::allocate(const size_t arg_alloc_size) const { void* SYCLHostUSMSpace::allocate(const char* arg_label, const size_t arg_alloc_size, const size_t arg_logical_size) const { - return allocate_sycl( - arg_label, arg_alloc_size, arg_logical_size, - Kokkos::Tools::make_space_handle(name()), - RawMemoryAllocationFailure::AllocationMechanism::SYCLMallocHost, - sycl::usm::alloc::host, m_queue); + return allocate_sycl(arg_label, arg_alloc_size, arg_logical_size, + Kokkos::Tools::make_space_handle(name()), + sycl::usm::alloc::host, m_queue); } void sycl_deallocate(const char* arg_label, void* const arg_alloc_ptr, diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp index dbba382758..1e42faa5a8 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_Team.hpp @@ -22,6 +22,7 @@ #ifdef KOKKOS_ENABLE_SYCL #include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -133,72 +134,71 @@ class SYCLTeamMember { const unsigned int team_rank_ = team_rank(); // First combine the values in the same subgroup +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine = [&](int shift) { + if (vector_range * shift < sub_group_range) { + const value_type tmp = Kokkos::Impl::SYCLReduction::shift_group_left( + sg, value, vector_range * shift); + if (team_rank_ + shift < team_size_) reducer.join(value, tmp); + } + }; + shuffle_combine(1); + shuffle_combine(2); + shuffle_combine(4); + shuffle_combine(8); + shuffle_combine(16); + KOKKOS_ASSERT(sub_group_range <= 32); +#else for (unsigned int shift = 1; vector_range * shift < sub_group_range; shift <<= 1) { - const value_type tmp = sg.shuffle_down(value, vector_range * shift); + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_left( + sg, value, vector_range * shift); if (team_rank_ + shift < team_size_) reducer.join(value, tmp); } - value = sg.shuffle(value, 0); +#endif + value = Kokkos::Impl::SYCLReduction::select_from_group(sg, value, 0); - const auto n_subgroups = sg.get_group_range()[0]; + const int n_subgroups = sg.get_group_range()[0]; if (n_subgroups == 1) { reducer.reference() = value; return; } - // We need to chunk up the whole reduction because we might not have - // allocated enough memory. - const unsigned int maximum_work_range = - std::min(m_team_reduce_size / sizeof(value_type), n_subgroups); + // It was found experimentally that 16 is a good value for Intel PVC. + // Since there is a maximum number of 1024 threads with subgroup size 16, + // we have a maximum of 64 subgroups per workgroup which means 64/16=4 + // rounds for loading values into the reduction_array, and 16 redundant + // reduction steps executed by every thread. + constexpr int step_width = 16; + auto tmp_alloc = sycl::ext::oneapi::group_local_memory_for_overwrite< + value_type[step_width]>(m_item.get_group()); + auto& reduction_array = *tmp_alloc; const auto id_in_sg = sg.get_local_id()[0]; - auto reduction_array = - static_cast>(m_team_reduce); - // Load values into the first maximum_work_range values of the reduction + // Load values into the first step_width values of the reduction // array in chunks. This means that only sub groups with an id in the // corresponding chunk load values. - const auto group_id = sg.get_group_id()[0]; - if (id_in_sg == 0 && group_id < maximum_work_range) + const int group_id = sg.get_group_id()[0]; + if (id_in_sg == 0 && group_id < step_width) reduction_array[group_id] = value; sycl::group_barrier(m_item.get_group()); - for (unsigned int start = maximum_work_range; start < n_subgroups; - start += maximum_work_range) { + for (int start = step_width; start < n_subgroups; start += step_width) { if (id_in_sg == 0 && group_id >= start && - group_id < - std::min(start + maximum_work_range, n_subgroups)) + group_id < std::min(start + step_width, n_subgroups)) reducer.join(reduction_array[group_id - start], value); sycl::group_barrier(m_item.get_group()); } - // Let the first subgroup do the final reduction - if (group_id == 0) { - const auto local_range = sg.get_local_range()[0]; - auto result = - reduction_array[id_in_sg < maximum_work_range ? id_in_sg : 0]; - // In case the maximum_work_range is larger than the range of the first - // subgroup, we first combine the items with a higher index. - for (unsigned int offset = local_range; offset < maximum_work_range; - offset += local_range) - if (id_in_sg + offset < maximum_work_range) - reducer.join(result, reduction_array[id_in_sg + offset]); - sycl::group_barrier(sg); + // Do the final reduction for all threads redundantly + value = reduction_array[0]; + for (int i = 1; i < std::min(step_width, n_subgroups); ++i) + reducer.join(value, reduction_array[i]); - // Now do the actual subgroup reduction. - const auto min_range = - std::min(maximum_work_range, local_range); - for (unsigned int stride = 1; stride < min_range; stride <<= 1) { - const auto tmp = sg.shuffle_down(result, stride); - if (id_in_sg + stride < min_range) reducer.join(result, tmp); - } - if (id_in_sg == 0) reduction_array[0] = result; - } + reducer.reference() = value; + // Make sure that every thread is done using the reduction array. sycl::group_barrier(m_item.get_group()); - - reducer.reference() = reduction_array[0]; - // Make sure that the reduction array hasn't been modified in the meantime. - m_item.barrier(sycl::access::fence_space::local_space); } //-------------------------------------------------------------------------- @@ -223,7 +223,8 @@ class SYCLTeamMember { // First combine the values in the same subgroup for (unsigned int stride = 1; vector_range * stride < sub_group_range; stride <<= 1) { - auto tmp = sg.shuffle_up(value, vector_range * stride); + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right( + sg, value, vector_range * stride); if (id_in_sg >= vector_range * stride) value += tmp; } @@ -249,7 +250,8 @@ class SYCLTeamMember { sub_group_range, n_active_subgroups - round * sub_group_range); auto local_value = base_data[idx]; for (unsigned int stride = 1; stride < upper_bound; stride <<= 1) { - auto tmp = sg.shuffle_up(local_value, stride); + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_right( + sg, local_value, stride); if (id_in_sg >= stride) { if (idx < n_active_subgroups) local_value += tmp; @@ -267,7 +269,8 @@ class SYCLTeamMember { } auto total = base_data[n_active_subgroups - 1]; - const auto update = sg.shuffle_up(value, vector_range); + const auto update = + Kokkos::Impl::SYCLReduction::shift_group_right(sg, value, vector_range); Type intermediate = (group_id > 0 ? base_data[group_id - 1] : 0) + (id_in_sg >= vector_range ? update : 0); @@ -320,7 +323,7 @@ class SYCLTeamMember { typename ReducerType::value_type tmp2 = tmp; for (int i = grange1; (i >>= 1);) { - tmp2 = sg.shuffle_down(tmp, i); + tmp2 = Kokkos::Impl::SYCLReduction::shift_group_left(sg, tmp, i); if (static_cast(tidx1) < i) { reducer.join(tmp, tmp2); } @@ -331,8 +334,9 @@ class SYCLTeamMember { // because floating point summation is not associative // and thus different threads could have different results. - tmp2 = sg.shuffle(tmp, (sg.get_local_id() / grange1) * grange1); - value = tmp2; + tmp2 = Kokkos::Impl::SYCLReduction::select_from_group( + sg, tmp, (sg.get_local_id() / grange1) * grange1); + value = tmp2; reducer.reference() = tmp2; } @@ -342,7 +346,7 @@ class SYCLTeamMember { KOKKOS_INLINE_FUNCTION SYCLTeamMember(sycl::local_ptr shared, const std::size_t shared_begin, const std::size_t shared_size, - sycl::device_ptr scratch_level_1_ptr, + sycl_device_ptr scratch_level_1_ptr, const std::size_t scratch_level_1_size, const sycl::nd_item<2> item, const int arg_league_rank, const int arg_league_size) @@ -839,7 +843,8 @@ parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< // [t] += [t-4] if t >= 4 // ... for (int j = 1; j < static_cast(grange1); j <<= 1) { - value_type tmp = sg.shuffle_up(val, j); + value_type tmp = + Kokkos::Impl::SYCLReduction::shift_group_right(sg, val, j); if (j <= static_cast(tidx1)) { reducer.join(val, tmp); } @@ -850,7 +855,8 @@ parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct< // Update i's contribution into the val and add it to accum for next round if (i < loop_boundaries.end) closure(i, val, true); - accum = sg.shuffle(val, mask + vector_offset); + accum = Kokkos::Impl::SYCLReduction::select_from_group( + sg, val, mask + vector_offset); } reducer.reference() = accum; } @@ -927,7 +933,8 @@ KOKKOS_INLINE_FUNCTION void single( const auto grange1 = item.get_local_range(1); const auto sg = item.get_sub_group(); if (item.get_local_id(1) == 0) lambda(val); - val = sg.shuffle(val, (sg.get_local_id() / grange1) * grange1); + val = Kokkos::Impl::SYCLReduction::select_from_group( + sg, val, (sg.get_local_id() / grange1) * grange1); } template diff --git a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp index c308384af0..abf0bd8f53 100644 --- a/lib/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp +++ b/lib/kokkos/core/src/SYCL/Kokkos_SYCL_WorkgroupReduction.hpp @@ -21,8 +21,53 @@ namespace Kokkos::Impl::SYCLReduction { -// FIXME_SYCL It appears that using shuffles is slower than going through local -// memory. +template +struct TrivialWrapper { + std::byte array[N]; +}; + +// shuffle down +template +T shift_group_left(sycl::sub_group sg, T x, + sycl::sub_group::linear_id_type delta) { + if constexpr (std::is_trivially_copyable_v) + return sycl::shift_group_left(sg, x, delta); + else { + auto tmp = sycl::shift_group_left( + sg, reinterpret_cast&>(x), delta); + return reinterpret_cast(tmp); + } +} + +// shuffle up +template +T shift_group_right(sycl::sub_group sg, T x, + sycl::sub_group::linear_id_type delta) { + if constexpr (std::is_trivially_copyable_v) + return sycl::shift_group_right(sg, x, delta); + else { + auto tmp = sycl::shift_group_right( + sg, reinterpret_cast&>(x), delta); + return reinterpret_cast(tmp); + } +} + +// shuffle +template +T select_from_group(sycl::sub_group sg, T x, + sycl::sub_group::id_type remote_local_id) { + if constexpr (std::is_trivially_copyable_v) + return sycl::select_from_group(sg, x, remote_local_id); + else { + auto tmp = sycl::select_from_group( + sg, reinterpret_cast&>(x), remote_local_id); + return reinterpret_cast(tmp); + } +} + +// FIXME_SYCL For some types, shuffle reductions are competitive with local +// memory reductions but they are significantly slower for the value type used +// in combined reductions with multiple double arguments. template inline constexpr bool use_shuffle_based_algorithm = false; // std::is_reference_v; @@ -30,7 +75,7 @@ inline constexpr bool use_shuffle_based_algorithm = false; template std::enable_if_t> workgroup_reduction( sycl::nd_item& item, sycl::local_accessor local_mem, - sycl::device_ptr results_ptr, + sycl_device_ptr results_ptr, sycl::global_ptr device_accessible_result_ptr, const unsigned int value_count_, const ReducerType& final_reducer, bool final, unsigned int max_size) { @@ -102,24 +147,40 @@ std::enable_if_t> workgroup_reduction( template std::enable_if_t> workgroup_reduction( sycl::nd_item& item, sycl::local_accessor local_mem, - ValueType local_value, sycl::device_ptr results_ptr, + ValueType local_value, sycl_device_ptr results_ptr, sycl::global_ptr device_accessible_result_ptr, const ReducerType& final_reducer, bool final, unsigned int max_size) { const auto local_id = item.get_local_linear_id(); // Perform the actual workgroup reduction in each subgroup // separately. - auto sg = item.get_sub_group(); - const int id_in_sg = sg.get_local_id()[0]; - const auto local_range = - std::min(sg.get_local_range()[0], max_size); + auto sg = item.get_sub_group(); + const int id_in_sg = sg.get_local_id()[0]; + const int local_range = std::min(sg.get_local_range()[0], max_size); const auto upper_stride_bound = - std::min(local_range - id_in_sg, max_size - local_id); + std::min(local_range - id_in_sg, max_size - local_id); +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine = [&](int stride) { + if (stride < local_range) { + auto tmp = Kokkos::Impl::SYCLReduction::shift_group_left(sg, local_value, + stride); + if (stride < upper_stride_bound) final_reducer.join(&local_value, &tmp); + } + }; + shuffle_combine(1); + shuffle_combine(2); + shuffle_combine(4); + shuffle_combine(8); + shuffle_combine(16); + KOKKOS_ASSERT(local_range <= 32); +#else for (unsigned int stride = 1; stride < local_range; stride <<= 1) { - auto tmp = sg.shuffle_down(local_value, stride); + auto tmp = + Kokkos::Impl::SYCLReduction::shift_group_left(sg, local_value, stride); if (stride < upper_stride_bound) final_reducer.join(&local_value, &tmp); } +#endif // Copy the subgroup results into the first positions of the // reduction array. @@ -140,7 +201,7 @@ std::enable_if_t> workgroup_reduction( // the first subgroup, we first combine the items with a higher // index. if (n_active_subgroups > local_range) { - for (unsigned int offset = local_range; offset < n_active_subgroups; + for (int offset = local_range; offset < n_active_subgroups; offset += local_range) if (id_in_sg + offset < n_active_subgroups) { final_reducer.join(&sg_value, &local_mem[(id_in_sg + offset)]); @@ -149,11 +210,29 @@ std::enable_if_t> workgroup_reduction( } // Then, we proceed as before. +#if defined(KOKKOS_ARCH_INTEL_GPU) || defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + auto shuffle_combine_sg = [&](int stride) { + if (stride < local_range) { + auto tmp = + Kokkos::Impl::SYCLReduction::shift_group_left(sg, sg_value, stride); + if (id_in_sg + stride < n_active_subgroups) + final_reducer.join(&sg_value, &tmp); + } + }; + shuffle_combine_sg(1); + shuffle_combine_sg(2); + shuffle_combine_sg(4); + shuffle_combine_sg(8); + shuffle_combine_sg(16); + KOKKOS_ASSERT(local_range <= 32); +#else for (unsigned int stride = 1; stride < local_range; stride <<= 1) { - auto tmp = sg.shuffle_down(sg_value, stride); + auto tmp = + Kokkos::Impl::SYCLReduction::shift_group_left(sg, sg_value, stride); if (id_in_sg + stride < n_active_subgroups) final_reducer.join(&sg_value, &tmp); } +#endif // Finally, we copy the workgroup results back to global memory // to be used in the next iteration. If this is the last diff --git a/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp b/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp index 39b201976b..44d797f1cc 100644 --- a/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp +++ b/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp @@ -35,6 +35,9 @@ namespace Kokkos { namespace Impl { +std::vector SerialInternal::all_instances; +std::mutex SerialInternal::all_instances_mutex; + bool SerialInternal::is_initialized() { return m_is_initialized; } void SerialInternal::initialize() { @@ -43,6 +46,12 @@ void SerialInternal::initialize() { Impl::SharedAllocationRecord::tracking_enable(); m_is_initialized = true; + + // guard pushing to all_instances + { + std::scoped_lock lock(all_instances_mutex); + all_instances.push_back(this); + } } void SerialInternal::finalize() { @@ -59,6 +68,17 @@ void SerialInternal::finalize() { } m_is_initialized = false; + + // guard erasing from all_instances + { + std::scoped_lock lock(all_instances_mutex); + auto it = std::find(all_instances.begin(), all_instances.end(), this); + if (it == all_instances.end()) + Kokkos::abort( + "Execution space instance to be removed couldn't be found!"); + std::swap(*it, all_instances.back()); + all_instances.pop_back(); + } } SerialInternal& SerialInternal::singleton() { @@ -97,9 +117,12 @@ void SerialInternal::resize_thread_team_data(size_t pool_reduce_bytes, m_thread_team_data.disband_team(); m_thread_team_data.disband_pool(); - space.deallocate("Kokkos::Serial::scratch_mem", - m_thread_team_data.scratch_buffer(), - m_thread_team_data.scratch_bytes()); + // impl_deallocate doesn't fence which we try to avoid here since that + // interferes with the using the m_instance_mutex for ensuring proper + // kernel enqueuing + space.impl_deallocate("Kokkos::Serial::scratch_mem", + m_thread_team_data.scratch_buffer(), + m_thread_team_data.scratch_bytes()); } if (pool_reduce_bytes < old_pool_reduce) { @@ -119,13 +142,7 @@ void SerialInternal::resize_thread_team_data(size_t pool_reduce_bytes, HostThreadTeamData::scratch_size(pool_reduce_bytes, team_reduce_bytes, team_shared_bytes, thread_local_bytes); - void* ptr = nullptr; - try { - ptr = space.allocate("Kokkos::Serial::scratch_mem", alloc_bytes); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - // For now, just rethrow the error message the existing way - Kokkos::Impl::throw_runtime_exception(failure.get_error_message()); - } + void* ptr = space.allocate("Kokkos::Serial::scratch_mem", alloc_bytes); m_thread_team_data.scratch_assign(static_cast(ptr), alloc_bytes, pool_reduce_bytes, team_reduce_bytes, @@ -147,7 +164,9 @@ Serial::Serial(NewInstance) : m_space_instance(new Impl::SerialInternal, [](Impl::SerialInternal* ptr) { ptr->finalize(); delete ptr; - }) {} + }) { + m_space_instance->initialize(); +} void Serial::print_configuration(std::ostream& os, bool /*verbose*/) const { os << "Host Serial Execution Space:\n"; diff --git a/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp b/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp index 43eb4992ed..81d43b31b3 100644 --- a/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp @@ -60,7 +60,10 @@ class SerialInternal { static SerialInternal& singleton(); - std::mutex m_thread_team_data_mutex; + std::mutex m_instance_mutex; + + static std::vector all_instances; + static std::mutex all_instances_mutex; // Resize thread team data scratch memory void resize_thread_team_data(size_t pool_reduce_bytes, @@ -113,7 +116,15 @@ class Serial { Serial(); - Serial(NewInstance); + explicit Serial(NewInstance); + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + template + KOKKOS_DEPRECATED_WITH_COMMENT( + "Serial execution space should be constructed explicitly.") + Serial(NewInstance) + : Serial(NewInstance{}) {} +#endif /// \brief True if and only if this method is being called in a /// thread-parallel function. @@ -137,7 +148,14 @@ class Serial { name, Kokkos::Tools::Experimental::SpecialSynchronizationCases:: GlobalDeviceSynchronization, - []() {}); // TODO: correct device ID + []() { + std::lock_guard lock_all_instances( + Impl::SerialInternal::all_instances_mutex); + for (auto* instance_ptr : Impl::SerialInternal::all_instances) { + std::lock_guard lock_instance( + instance_ptr->m_instance_mutex); + } + }); // TODO: correct device ID Kokkos::memory_fence(); } @@ -145,7 +163,10 @@ class Serial { "Kokkos::Serial::fence: Unnamed Instance Fence") const { Kokkos::Tools::Experimental::Impl::profile_fence_event( name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, - []() {}); // TODO: correct device ID + [this]() { + auto* internal_instance = this->impl_internal_space_instance(); + std::lock_guard lock(internal_instance->m_instance_mutex); + }); // TODO: correct device ID Kokkos::memory_fence(); } diff --git a/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp b/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp index 67978aa3e9..34e115eca9 100644 --- a/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp +++ b/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_MDRange.hpp @@ -43,7 +43,14 @@ class ParallelFor, } public: - inline void execute() const { this->exec(); } + inline void execute() const { + // Make sure kernels are running sequentially even when using multiple + // threads + auto* internal_instance = + m_iter.m_rp.space().impl_internal_space_instance(); + std::lock_guard lock(internal_instance->m_instance_mutex); + this->exec(); + } template static int max_tile_size_product(const Policy&, const Functor&) { /** @@ -104,9 +111,11 @@ class ParallelReduce lock( - internal_instance->m_thread_team_data_mutex); + + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); diff --git a/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp b/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp index 91b4c56711..80faec9041 100644 --- a/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp +++ b/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Range.hpp @@ -49,6 +49,10 @@ class ParallelFor, Kokkos::Serial> { public: inline void execute() const { + // Make sure kernels are running sequentially even when using multiple + // threads + auto* internal_instance = m_policy.space().impl_internal_space_instance(); + std::lock_guard lock(internal_instance->m_instance_mutex); this->template exec(); } @@ -103,9 +107,11 @@ class ParallelReduce, const size_t thread_local_size = 0; // Never shrinks auto* internal_instance = m_policy.space().impl_internal_space_instance(); - // Need to lock resize_thread_team_data - std::lock_guard lock( - internal_instance->m_thread_team_data_mutex); + + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -187,10 +193,12 @@ class ParallelScan, const size_t team_shared_size = 0; // Never shrinks const size_t thread_local_size = 0; // Never shrinks - // Need to lock resize_thread_team_data auto* internal_instance = m_policy.space().impl_internal_space_instance(); - std::lock_guard lock( - internal_instance->m_thread_team_data_mutex); + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); + internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -253,10 +261,12 @@ class ParallelScanWithTotal, const size_t team_shared_size = 0; // Never shrinks const size_t thread_local_size = 0; // Never shrinks - // Need to lock resize_thread_team_data auto* internal_instance = m_policy.space().impl_internal_space_instance(); - std::lock_guard lock( - internal_instance->m_thread_team_data_mutex); + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); + internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); diff --git a/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp b/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp index a25b51496e..a523cc86c9 100644 --- a/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp +++ b/lib/kokkos/core/src/Serial/Kokkos_Serial_Parallel_Team.hpp @@ -247,9 +247,11 @@ class ParallelFor, const size_t thread_local_size = 0; // Never shrinks auto* internal_instance = m_policy.space().impl_internal_space_instance(); - // Need to lock resize_thread_team_data - std::lock_guard lock( - internal_instance->m_thread_team_data_mutex); + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); + internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); @@ -319,9 +321,11 @@ class ParallelReduce lock( - internal_instance->m_thread_team_data_mutex); + // Make sure kernels are running sequentially even when using multiple + // threads, lock resize_thread_team_data + std::lock_guard instance_lock( + internal_instance->m_instance_mutex); + internal_instance->resize_thread_team_data( pool_reduce_size, team_reduce_size, team_shared_size, thread_local_size); diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_Team.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_Team.hpp index fd0f221365..a3501a437d 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_Threads_Team.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_Team.hpp @@ -188,8 +188,6 @@ class ThreadsExecTeamMember { using type = typename if_c::type; - if (m_instance == nullptr) return value; - if (team_rank() != team_size() - 1) * ((volatile type*)m_instance->scratch_memory()) = value; @@ -229,8 +227,6 @@ class ThreadsExecTeamMember { using type = typename if_c::type; - if (m_instance == nullptr) return; - type* const local_value = ((type*)m_instance->scratch_memory()); // Set this thread's contribution @@ -285,8 +281,6 @@ class ThreadsExecTeamMember { using type = typename if_c::type; - if (m_instance == nullptr) return type(0); - volatile type* const work_value = ((type*)m_instance->scratch_memory()); *work_value = value; @@ -358,6 +352,7 @@ class ThreadsExecTeamMember { m_chunk_size(team.chunk_size()), m_league_chunk_end(0), m_team_alloc(team.team_alloc()) { + KOKKOS_ASSERT(m_instance != nullptr); if (team.league_size()) { // Execution is using device-team interface: diff --git a/lib/kokkos/core/src/View/Kokkos_ViewAlloc.hpp b/lib/kokkos/core/src/View/Kokkos_ViewAlloc.hpp new file mode 100644 index 0000000000..1ade75692f --- /dev/null +++ b/lib/kokkos/core/src/View/Kokkos_ViewAlloc.hpp @@ -0,0 +1,363 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#include +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#endif + +#ifndef KOKKOS_VIEW_ALLOC_HPP +#define KOKKOS_VIEW_ALLOC_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace Kokkos::Impl { + +template +bool is_zero_byte(const T& x) { + constexpr std::byte all_zeroes[sizeof(T)] = {}; + return std::memcmp(&x, all_zeroes, sizeof(T)) == 0; +} + +//---------------------------------------------------------------------------- + +/* + * The construction, assignment to default, and destruction + * are merged into a single functor. + * Primarily to work around an unresolved CUDA back-end bug + * that would lose the destruction cuda device function when + * called from the shared memory tracking destruction. + * Secondarily to have two fewer partial specializations. + */ +template ::value> +struct ViewValueFunctor; + +template +struct ViewValueFunctor { + using ExecSpace = typename DeviceType::execution_space; + + struct DestroyTag {}; + struct ConstructTag {}; + + ExecSpace space; + ValueType* ptr; + size_t n; + std::string name; + bool default_exec_space; + + template + KOKKOS_INLINE_FUNCTION + std::enable_if_t::value> + operator()(ConstructTag const&, const size_t i) const { + new (ptr + i) ValueType(); + } + + KOKKOS_INLINE_FUNCTION void operator()(DestroyTag const&, + const size_t i) const { + (ptr + i)->~ValueType(); + } + + ViewValueFunctor() = default; + ViewValueFunctor(const ViewValueFunctor&) = default; + ViewValueFunctor& operator=(const ViewValueFunctor&) = default; + + ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, + size_t const arg_n, std::string arg_name) + : space(arg_space), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(false) { + functor_instantiate_workaround(); + } + + ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, + std::string arg_name) + : space(ExecSpace{}), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(true) { + functor_instantiate_workaround(); + } + + template + std::enable_if_t::value && + std::is_trivially_copy_assignable::value> + construct_dispatch() { + ValueType value{}; +// On A64FX memset seems to do the wrong thing with regards to first touch +// leading to the significant performance issues +#ifndef KOKKOS_ARCH_A64FX + if (Impl::is_zero_byte(value)) { + uint64_t kpID = 0; + if (Kokkos::Profiling::profileLibraryLoaded()) { + // We are not really using parallel_for here but using beginParallelFor + // instead of begin_parallel_for (and adding "via memset") is the best + // we can do to indicate that this is not supposed to be tunable (and + // doesn't really execute a parallel_for). + Kokkos::Profiling::beginParallelFor( + "Kokkos::View::initialization [" + name + "] via memset", + Kokkos::Profiling::Experimental::device_id(space), &kpID); + } + (void)ZeroMemset( + space, Kokkos::View>(ptr, n)); + + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } + if (default_exec_space) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); + } else { +#endif + parallel_for_implementation(); +#ifndef KOKKOS_ARCH_A64FX + } +#endif + } + + template + std::enable_if_t::value && + std::is_trivially_copy_assignable::value)> + construct_dispatch() { + parallel_for_implementation(); + } + + template + void parallel_for_implementation() { + using PolicyType = + Kokkos::RangePolicy, Tag>; + PolicyType policy(space, 0, n); + uint64_t kpID = 0; + if (Kokkos::Profiling::profileLibraryLoaded()) { + const std::string functor_name = + (std::is_same_v + ? "Kokkos::View::destruction [" + name + "]" + : "Kokkos::View::initialization [" + name + "]"); + Kokkos::Profiling::beginParallelFor( + functor_name, Kokkos::Profiling::Experimental::device_id(space), + &kpID); + } + +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, + true); + } +#endif + const Kokkos::Impl::ParallelFor closure( + *this, policy); + closure.execute(); + if (default_exec_space || std::is_same_v) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } + } + + void construct_shared_allocation() { construct_dispatch(); } + + void destroy_shared_allocation() { +#ifdef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND + if constexpr (std::is_same_v) + for (size_t i = 0; i < n; ++i) (ptr + i)->~ValueType(); + else +#endif + { + parallel_for_implementation(); + } + } + + // This function is to ensure that the functor with DestroyTag is instantiated + // This is a workaround to avoid "cudaErrorInvalidDeviceFunction" error later + // when the function is queried with cudaFuncGetAttributes + void functor_instantiate_workaround() { +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ + defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) + if (false) { + parallel_for_implementation(); + } +#endif + } +}; + +template +struct ViewValueFunctor { + using ExecSpace = typename DeviceType::execution_space; + using PolicyType = Kokkos::RangePolicy>; + + ExecSpace space; + ValueType* ptr; + size_t n; + std::string name; + bool default_exec_space; + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { ptr[i] = ValueType(); } + + ViewValueFunctor() = default; + ViewValueFunctor(const ViewValueFunctor&) = default; + ViewValueFunctor& operator=(const ViewValueFunctor&) = default; + + ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, + size_t const arg_n, std::string arg_name) + : space(arg_space), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(false) {} + + ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, + std::string arg_name) + : space(ExecSpace{}), + ptr(arg_ptr), + n(arg_n), + name(std::move(arg_name)), + default_exec_space(true) {} + + template + std::enable_if_t::value && + std::is_trivially_copy_assignable::value> + construct_shared_allocation() { + // Shortcut for zero initialization +// On A64FX memset seems to do the wrong thing with regards to first touch +// leading to the significant performance issues +#ifndef KOKKOS_ARCH_A64FX + ValueType value{}; + if (Impl::is_zero_byte(value)) { + uint64_t kpID = 0; + if (Kokkos::Profiling::profileLibraryLoaded()) { + // We are not really using parallel_for here but using beginParallelFor + // instead of begin_parallel_for (and adding "via memset") is the best + // we can do to indicate that this is not supposed to be tunable (and + // doesn't really execute a parallel_for). + Kokkos::Profiling::beginParallelFor( + "Kokkos::View::initialization [" + name + "] via memset", + Kokkos::Profiling::Experimental::device_id(space), &kpID); + } + + (void)ZeroMemset( + space, Kokkos::View>(ptr, n)); + + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } + if (default_exec_space) + space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); + } else { +#endif + parallel_for_implementation(); +#ifndef KOKKOS_ARCH_A64FX + } +#endif + } + + template + std::enable_if_t::value && + std::is_trivially_copy_assignable::value)> + construct_shared_allocation() { + parallel_for_implementation(); + } + + void parallel_for_implementation() { + PolicyType policy(space, 0, n); + uint64_t kpID = 0; + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor( + "Kokkos::View::initialization [" + name + "]", + Kokkos::Profiling::Experimental::device_id(space), &kpID); + } +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, + true); + } +#endif + const Kokkos::Impl::ParallelFor closure( + *this, policy); + closure.execute(); + if (default_exec_space) + space.fence( + "Kokkos::Impl::ViewValueFunctor: Fence after setting values in " + "view"); + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } + } + + void destroy_shared_allocation() {} +}; + +template +struct ViewValueFunctorSequentialHostInit { + using ExecSpace = typename DeviceType::execution_space; + using MemSpace = typename DeviceType::memory_space; + static_assert(SpaceAccessibility::accessible); + + ValueType* ptr; + size_t n; + + ViewValueFunctorSequentialHostInit() = default; + + ViewValueFunctorSequentialHostInit(ExecSpace const& /*arg_space*/, + ValueType* const arg_ptr, + size_t const arg_n, + std::string /*arg_name*/) + : ptr(arg_ptr), n(arg_n) {} + + ViewValueFunctorSequentialHostInit(ValueType* const arg_ptr, + size_t const arg_n, + std::string /*arg_name*/) + : ptr(arg_ptr), n(arg_n) {} + + void construct_shared_allocation() { + if constexpr (std::is_trivial_v) { + // value-initialization is equivalent to filling with zeros + std::memset(static_cast(ptr), 0, n * sizeof(ValueType)); + } else { + for (size_t i = 0; i < n; ++i) { + new (ptr + i) ValueType(); + } + } + } + + void destroy_shared_allocation() { + if constexpr (std::is_trivially_destructible_v) { + // do nothing, don't bother calling the destructor + } else { + for (size_t i = 0; i < n; ++i) { + (ptr + i)->~ValueType(); + } + } + } +}; + +} // namespace Kokkos::Impl + +#endif // KOKKOS_VIEW_ALLOC_HPP diff --git a/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Accessor.hpp b/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Accessor.hpp new file mode 100644 index 0000000000..8814cc015e --- /dev/null +++ b/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Accessor.hpp @@ -0,0 +1,220 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#endif + +#ifndef KOKKOS_MDSPAN_ACCESSOR_HPP +#define KOKKOS_MDSPAN_ACCESSOR_HPP + +#include +#include +#include +#include + +namespace Kokkos { + +// For now use the accessors in Impl namespace, as an +// implementation detail for rebasing View on mdspan +namespace Impl { + +template +struct SpaceAwareAccessor { + // Part of Accessor Requirements + using element_type = typename NestedAccessor::element_type; + using reference = typename NestedAccessor::reference; + using data_handle_type = typename NestedAccessor::data_handle_type; + using offset_policy = + SpaceAwareAccessor; + + // Specific to SpaceAwareAccessor + using memory_space = MemorySpace; + using nested_accessor_type = NestedAccessor; + + static_assert(is_memory_space_v); + + KOKKOS_DEFAULTED_FUNCTION + constexpr SpaceAwareAccessor() = default; + + template < + class OtherMemorySpace, class OtherNestedAccessorType, + std::enable_if_t< + MemorySpaceAccess::assignable && + std::is_constructible_v, + int> = 0> + KOKKOS_FUNCTION constexpr SpaceAwareAccessor( + const SpaceAwareAccessor& + other) noexcept + : nested_acc(other.nested_acc) {} + + KOKKOS_FUNCTION + SpaceAwareAccessor(const NestedAccessor& acc) : nested_acc(acc) {} + + KOKKOS_FUNCTION + explicit operator NestedAccessor() const { return nested_acc; } + + KOKKOS_FUNCTION + constexpr reference access(data_handle_type p, size_t i) const noexcept { + Kokkos::Impl::runtime_check_memory_access_violation( + "Kokkos::SpaceAwareAccessor ERROR: attempt to access inaccessible " + "memory space"); + return nested_acc.access(p, i); + } + + KOKKOS_FUNCTION + constexpr typename offset_policy::data_handle_type offset(data_handle_type p, + size_t i) const + noexcept { + return nested_acc.offset(p, i); + } + + // Canonical way for accessing nested accessor see ISO C++ + // [linalg.scaled.scaledaccessor] + KOKKOS_FUNCTION + constexpr const NestedAccessor& nested_accessor() const noexcept { + return nested_acc; + } + + private: +// We either compile with our custom mdspan impl +// in which case we discover inside it whether no_unique_address +// works, or we use C++23 in which case it better be available +#ifdef _MDSPAN_NO_UNIQUE_ADDRESS + _MDSPAN_NO_UNIQUE_ADDRESS +#else + [[no_unique_address]] +#endif + NestedAccessor nested_acc; + template + friend struct SpaceAwareAccessor; +}; + +template +struct SpaceAwareAccessor { + // Part of Accessor Requirements + using element_type = typename NestedAccessor::element_type; + using reference = typename NestedAccessor::reference; + using data_handle_type = typename NestedAccessor::data_handle_type; + + using offset_policy = + SpaceAwareAccessor; + + // Specific to SpaceAwareAccessor + using memory_space = AnonymousSpace; + using nested_accessor_type = NestedAccessor; + + KOKKOS_DEFAULTED_FUNCTION + constexpr SpaceAwareAccessor() = default; + + template , + int> = 0> + KOKKOS_FUNCTION constexpr SpaceAwareAccessor( + const SpaceAwareAccessor& + other) noexcept + : nested_acc(other.nested_acc) {} + + KOKKOS_FUNCTION + SpaceAwareAccessor(const NestedAccessor& acc) : nested_acc(acc) {} + + KOKKOS_FUNCTION + explicit operator NestedAccessor() const { return nested_acc; } + + KOKKOS_FUNCTION + constexpr reference access(data_handle_type p, size_t i) const noexcept { + return nested_acc.access(p, i); + } + + KOKKOS_FUNCTION + constexpr typename offset_policy::data_handle_type offset(data_handle_type p, + size_t i) const + noexcept { + return nested_acc.offset(p, i); + } + + // Canonical way for accessing nested accessor see ISO C++ + // [linalg.scaled.scaledaccessor] + KOKKOS_FUNCTION + constexpr const NestedAccessor& nested_accessor() const noexcept { + return nested_acc; + } + + private: +// We either compile with our custom mdspan impl +// in which case we discover inside it whether no_unique_address +// works, or we use C++23 in which case it better be available +#ifdef _MDSPAN_NO_UNIQUE_ADDRESS + _MDSPAN_NO_UNIQUE_ADDRESS +#else + [[no_unique_address]] +#endif + NestedAccessor nested_acc; + template + friend struct SpaceAwareAccessor; +}; + +// Like atomic_accessor_relaxed proposed for ISO C++26 but with +// defaulted memory scope - similar to how desul's AtomicRef has a memory scope +template +struct AtomicAccessorRelaxed { + using element_type = ElementType; + using reference = + desul::AtomicRef; + using data_handle_type = ElementType*; + using offset_policy = AtomicAccessorRelaxed; + + KOKKOS_DEFAULTED_FUNCTION + AtomicAccessorRelaxed() = default; + + // Conversions from non-const to const element type + template >* = nullptr> + KOKKOS_FUNCTION constexpr AtomicAccessorRelaxed( + Kokkos::default_accessor) noexcept {} + + template >* = nullptr> + KOKKOS_FUNCTION constexpr AtomicAccessorRelaxed( + AtomicAccessorRelaxed) noexcept {} + + template >* = nullptr> + KOKKOS_FUNCTION explicit operator default_accessor() const { + return default_accessor{}; + } + + KOKKOS_FUNCTION + reference access(data_handle_type p, size_t i) const noexcept { + return reference(p[i]); + } + + KOKKOS_FUNCTION + data_handle_type offset(data_handle_type p, size_t i) const noexcept { + return p + i; + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Extents.hpp b/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Extents.hpp index 3846b52d23..29d1e00adf 100644 --- a/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Extents.hpp +++ b/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Extents.hpp @@ -37,9 +37,6 @@ struct ViewDimension; template struct ViewDataType; -} // namespace Kokkos::Impl - -namespace Kokkos::Experimental::Impl { // A few things to note -- // - mdspan allows for 0-rank extents similarly to View, so we don't need @@ -106,6 +103,20 @@ struct DataTypeFromExtents { // Will cause a compile error if it is malformed (i.e. dynamic after static) using type = typename ::Kokkos::Impl::ViewDataType::type; }; -} // namespace Kokkos::Experimental::Impl + +template +constexpr KOKKOS_INLINE_FUNCTION auto extents_from_view_mapping_impl( + const VM &view_mapping, std::index_sequence) { + return Extents{view_mapping.extent(Indices)...}; +} + +template +constexpr KOKKOS_INLINE_FUNCTION auto extents_from_view_mapping( + const VM &view_mapping) { + static_assert(Extents::rank() == VM::Rank); + return extents_from_view_mapping_impl( + view_mapping, std::make_index_sequence{}); +} +} // namespace Kokkos::Impl #endif // KOKKOS_EXPERIMENTAL_MDSPAN_EXTENTS_HPP diff --git a/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Layout.hpp b/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Layout.hpp new file mode 100644 index 0000000000..089628137d --- /dev/null +++ b/lib/kokkos/core/src/View/MDSpan/Kokkos_MDSpan_Layout.hpp @@ -0,0 +1,156 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +static_assert(false, + "Including non-public Kokkos header files is not allowed."); +#endif + +#ifndef KOKKOS_EXPERIMENTAL_MDSPAN_LAYOUT_HPP +#define KOKKOS_EXPERIMENTAL_MDSPAN_LAYOUT_HPP + +#include "Kokkos_MDSpan_Extents.hpp" +#include + +namespace Kokkos::Impl { + +template +struct LayoutFromArrayLayout; + +template <> +struct LayoutFromArrayLayout { + using type = Kokkos::Experimental::layout_left_padded; +}; + +template <> +struct LayoutFromArrayLayout { + using type = Kokkos::Experimental::layout_right_padded; +}; + +template <> +struct LayoutFromArrayLayout { + using type = layout_stride; +}; + +template +KOKKOS_INLINE_FUNCTION auto array_layout_from_mapping( + const typename MDSpanType::mapping_type &mapping) { + using mapping_type = typename MDSpanType::mapping_type; + using extents_type = typename mapping_type::extents_type; + + constexpr auto rank = extents_type::rank(); + const auto &ext = mapping.extents(); + + static_assert(rank <= ARRAY_LAYOUT_MAX_RANK, + "Unsupported rank for mdspan (must be <= 8)"); + + if constexpr (std::is_same_v) { + return Kokkos::LayoutStride{ + rank > 0 ? ext.extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 0 ? mapping.stride(0) : 0, + rank > 1 ? ext.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 1 ? mapping.stride(1) : 0, + rank > 2 ? ext.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 2 ? mapping.stride(2) : 0, + rank > 3 ? ext.extent(3) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 3 ? mapping.stride(3) : 0, + rank > 4 ? ext.extent(4) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 4 ? mapping.stride(4) : 0, + rank > 5 ? ext.extent(5) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 5 ? mapping.stride(5) : 0, + rank > 6 ? ext.extent(6) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 6 ? mapping.stride(6) : 0, + rank > 7 ? ext.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 7 ? mapping.stride(7) : 0, + }; + } else { + // FIXME: Kokkos Layouts don't store stride (it's in the mapping) + // We could conceivably fix this by adding an extra ViewCtorProp for + // an abritrary padding. For now we will check for this. + if constexpr (rank > 1 && + (std::is_same_v> || + std::is_same_v>)) { + [[maybe_unused]] constexpr size_t strided_index = + std::is_same_v< + typename mapping_type::layout_type, + Kokkos::Experimental::layout_left_padded> + ? 1 + : rank - 2; + [[maybe_unused]] constexpr size_t extent_index = + std::is_same_v< + typename mapping_type::layout_type, + Kokkos::Experimental::layout_left_padded> + ? 0 + : rank - 1; + KOKKOS_ASSERT(mapping.stride(strided_index) == ext.extent(extent_index)); + } + + return ArrayLayout{rank > 0 ? ext.extent(0) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 1 ? ext.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 2 ? ext.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 3 ? ext.extent(3) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 4 ? ext.extent(4) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 5 ? ext.extent(5) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 6 ? ext.extent(6) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + rank > 7 ? ext.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG}; + } +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +template +KOKKOS_INLINE_FUNCTION auto mapping_from_view_mapping(const VM &view_mapping) { + using mapping_type = typename MDSpanType::mapping_type; + using extents_type = typename mapping_type::extents_type; + + // std::span is not available in C++17 (our current requirements), + // so we need to use the std::array constructor for layout mappings. + // FIXME When C++20 is available, we can use std::span here instead + std::size_t strides[VM::Rank]; + view_mapping.stride_fill(&strides[0]); + if constexpr (std::is_same_v) { + return mapping_type(Kokkos::mdspan_non_standard, + extents_from_view_mapping(view_mapping), + strides); + } else if constexpr (VM::Rank > 1 && + std::is_same_v>) { + return mapping_type(extents_from_view_mapping(view_mapping), + strides[1]); + } else if constexpr (VM::Rank > 1 && + std::is_same_v>) { + return mapping_type(extents_from_view_mapping(view_mapping), + strides[VM::Rank - 2]); + } else { + return mapping_type(extents_from_view_mapping(view_mapping)); + } +#ifdef KOKKOS_COMPILER_INTEL + __builtin_unreachable(); +#endif +} + +} // namespace Kokkos::Impl + +#endif // KOKKOS_EXPERIMENTAL_MDSPAN_LAYOUT_HPP diff --git a/lib/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp b/lib/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp index bd12c5c6a9..d13c90825c 100644 --- a/lib/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp +++ b/lib/kokkos/core/src/decl/Kokkos_Declare_SYCL.hpp @@ -19,6 +19,9 @@ #if defined(KOKKOS_ENABLE_SYCL) #include +#ifdef SYCL_EXT_ONEAPI_GRAPH +#include +#endif #include #include #include diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index c7addbe337..6f862718bc 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -91,6 +91,7 @@ void combine(Kokkos::InitializationSettings& out, KOKKOS_IMPL_COMBINE_SETTING(map_device_id_by); KOKKOS_IMPL_COMBINE_SETTING(device_id); KOKKOS_IMPL_COMBINE_SETTING(disable_warnings); + KOKKOS_IMPL_COMBINE_SETTING(print_configuration); KOKKOS_IMPL_COMBINE_SETTING(tune_internals); KOKKOS_IMPL_COMBINE_SETTING(tools_help); KOKKOS_IMPL_COMBINE_SETTING(tools_libs); @@ -610,6 +611,7 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #else declare_configuration_metadata("options", "KOKKOS_ENABLE_LIBDL", "no"); #endif + declare_configuration_metadata("architecture", "Default Device", typeid(Kokkos::DefaultExecutionSpace).name()); @@ -750,9 +752,6 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) { #elif defined(KOKKOS_ARCH_AMD_GFX1100) declare_configuration_metadata("architecture", "GPU architecture", "AMD_GFX1100"); -#elif defined(KOKKOS_ARCH_AMD_GFX1103) - declare_configuration_metadata("architecture", "GPU architecture", - "AMD_GFX1103"); #else declare_configuration_metadata("architecture", "GPU architecture", "none"); @@ -788,34 +787,18 @@ void initialize_internal(const Kokkos::InitializationSettings& settings) { post_initialize_internal(settings); } -void pre_finalize_internal() { - typename decltype(finalize_hooks)::size_type numSuccessfulCalls = 0; +// declared noexcept such that std::terminate is called if any of the registered +// function throws +void call_registered_finalize_hook_functions() noexcept { while (!finalize_hooks.empty()) { - auto f = finalize_hooks.top(); - try { - f(); - } catch (...) { - std::cerr << "Kokkos::finalize: A finalize hook (set via " - "Kokkos::push_finalize_hook) threw an exception that it did " - "not catch." - " Per std::atexit rules, this results in std::terminate. " - "This is " - "finalize hook number " - << numSuccessfulCalls - << " (1-based indexing) " - "out of " - << finalize_hooks.size() - << " to call. Remember that " - "Kokkos::finalize calls finalize hooks in reverse order " - "from how they " - "were pushed." - << std::endl; - std::terminate(); - } + auto const& func = finalize_hooks.top(); + func(); finalize_hooks.pop(); - ++numSuccessfulCalls; } +} +void pre_finalize_internal() { + call_registered_finalize_hook_functions(); Kokkos::Profiling::finalize(); } diff --git a/lib/kokkos/core/src/impl/Kokkos_Default_Graph_Impl.hpp b/lib/kokkos/core/src/impl/Kokkos_Default_Graph_Impl.hpp index 3693dff3d4..05d4854919 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Default_Graph_Impl.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Default_Graph_Impl.hpp @@ -56,7 +56,7 @@ struct GraphImpl : private ExecutionSpaceInstanceStorage { //---------------------------------------------------------------------------- // {{{2 - // Not moveable or copyable; it spends its whole live as a shared_ptr in the + // Not movable or copyable; it spends its whole live as a shared_ptr in the // Graph object GraphImpl() = default; GraphImpl(GraphImpl const&) = delete; @@ -82,10 +82,7 @@ struct GraphImpl : private ExecutionSpaceInstanceStorage { template // requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl void add_node(std::shared_ptr const& arg_node_ptr) { - static_assert( - NodeImpl::kernel_type::Policy::is_graph_kernel::value, - "Something has gone horribly wrong, but it's too complicated to " - "explain here. Buy Daisy a coffee and she'll explain it to you."); + static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value); // Since this is always called before any calls to add_predecessor involving // it, we can treat this node as a sink until we discover otherwise. arg_node_ptr->node_details_t::set_kernel(arg_node_ptr->get_kernel()); diff --git a/lib/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp b/lib/kokkos/core/src/impl/Kokkos_DesulAtomicsConfig.hpp similarity index 72% rename from lib/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp rename to lib/kokkos/core/src/impl/Kokkos_DesulAtomicsConfig.hpp index 4cf170f5f1..02ab127d5c 100644 --- a/lib/kokkos/core/src/Kokkos_Atomics_Desul_Config.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_DesulAtomicsConfig.hpp @@ -13,15 +13,9 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //@HEADER -#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE -#include -static_assert(false, - "Including non-public Kokkos header files is not allowed."); -#endif -#ifndef KOKKOS_ATOMICS_DESUL_CONFIG_HPP -#define KOKKOS_ATOMICS_DESUL_CONFIG_HPP -#include +#ifndef KOKKOS_DESUL_ATOMICS_CONFIG_HPP +#define KOKKOS_DESUL_ATOMICS_CONFIG_HPP #if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) #define DESUL_CUDA_ARCH_IS_PRE_PASCAL @@ -32,4 +26,4 @@ static_assert(false, #define DESUL_CUDA_ARCH_IS_PRE_VOLTA #endif -#endif // KOKKOS_ATOMICS_DESUL_CONFIG_HPP +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.cpp b/lib/kokkos/core/src/impl/Kokkos_Error.cpp index de6e83ed1f..0dcd5d523d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Error.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Error.cpp @@ -18,133 +18,54 @@ #define KOKKOS_IMPL_PUBLIC_INCLUDE #endif -#include -#include - #include -#include #include +#include #include #include // show_warnings #include -#include -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -void throw_runtime_exception(const std::string &msg) { +void Kokkos::Impl::throw_runtime_exception(const std::string &msg) { throw std::runtime_error(msg); } -void log_warning(const std::string &msg) { +void Kokkos::Impl::throw_bad_alloc(std::string_view memory_space_name, + std::size_t size, std::string_view label) { + std::stringstream ss; + ss << "Kokkos ERROR: " << memory_space_name + << " memory space failed to allocate " << human_memory_size(size) + << " (label=\"" << label << "\")."; + throw std::runtime_error(ss.str()); +} + +void Kokkos::Impl::log_warning(const std::string &msg) { if (show_warnings()) { std::cerr << msg << std::flush; } } -std::string human_memory_size(size_t arg_bytes) { +std::string Kokkos::Impl::human_memory_size(size_t arg_bytes) { double bytes = arg_bytes; const double K = 1024; const double M = K * 1024; const double G = M * 1024; + const double T = G * 1024; std::ostringstream out; if (bytes < K) { out << std::setprecision(4) << bytes << " B"; } else if (bytes < M) { bytes /= K; - out << std::setprecision(4) << bytes << " K"; + out << std::setprecision(4) << bytes << " KiB"; } else if (bytes < G) { bytes /= M; - out << std::setprecision(4) << bytes << " M"; - } else { + out << std::setprecision(4) << bytes << " MiB"; + } else if (bytes < T) { bytes /= G; - out << std::setprecision(4) << bytes << " G"; + out << std::setprecision(4) << bytes << " GiB"; + } else { + bytes /= T; + out << std::setprecision(4) << bytes << " TiB"; } return out.str(); } - -} // namespace Impl - -void Experimental::RawMemoryAllocationFailure::print_error_message( - std::ostream &o) const { - o << "Allocation of size " - << ::Kokkos::Impl::human_memory_size(m_attempted_size); - o << " failed"; - switch (m_failure_mode) { - case FailureMode::OutOfMemoryError: - o << ", likely due to insufficient memory."; - break; - case FailureMode::AllocationNotAligned: - o << " because the allocation was improperly aligned."; - break; - case FailureMode::InvalidAllocationSize: - o << " because the requested allocation size is not a valid size for the" - " requested allocation mechanism (it's probably too large)."; - break; - // TODO move this to the subclass for Cuda-related things - case FailureMode::MaximumCudaUVMAllocationsExceeded: - o << " because the maximum Cuda UVM allocations was exceeded."; - break; - case FailureMode::Unknown: o << " because of an unknown error."; break; - } - o << " (The allocation mechanism was "; - switch (m_mechanism) { - case AllocationMechanism::StdMalloc: o << "standard malloc()."; break; - case AllocationMechanism::CudaMalloc: o << "cudaMalloc()."; break; - case AllocationMechanism::CudaMallocManaged: - o << "cudaMallocManaged()."; - break; - case AllocationMechanism::CudaHostAlloc: o << "cudaHostAlloc()."; break; - case AllocationMechanism::HIPMalloc: o << "hipMalloc()."; break; - case AllocationMechanism::HIPHostMalloc: o << "hipHostMalloc()."; break; - case AllocationMechanism::HIPMallocManaged: - o << "hipMallocManaged()."; - break; - case AllocationMechanism::SYCLMallocDevice: - o << "sycl::malloc_device()."; - break; - case AllocationMechanism::SYCLMallocShared: - o << "sycl::malloc_shared()."; - break; - case AllocationMechanism::SYCLMallocHost: - o << "sycl::malloc_host()."; - break; - default: o << "unsupported."; - } - append_additional_error_information(o); - o << ")" << std::endl; -} - -std::string Experimental::RawMemoryAllocationFailure::get_error_message() - const { - std::ostringstream out; - print_error_message(out); - return out.str(); -} - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -#ifdef KOKKOS_ENABLE_CUDA -namespace Experimental { - -void CudaRawMemoryAllocationFailure::append_additional_error_information( - std::ostream &o) const { - if (m_error_code != cudaSuccess) { - o << " The Cuda allocation returned the error code \"" - << cudaGetErrorName(m_error_code) << "\"."; - } -} - -} // end namespace Experimental -#endif - -} // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp index 1058fd98db..9a80c7b31b 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp @@ -18,116 +18,19 @@ #define KOKKOS_IMPL_ERROR_HPP #include -#include #include #include #include -namespace Kokkos { -namespace Impl { +namespace Kokkos::Impl { [[noreturn]] void throw_runtime_exception(const std::string &msg); - +[[noreturn]] void throw_bad_alloc(std::string_view memory_space_name, + std::size_t size, std::string_view label); void log_warning(const std::string &msg); -std::string human_memory_size(size_t arg_bytes); +std::string human_memory_size(size_t bytes); -} // namespace Impl +} // namespace Kokkos::Impl -namespace Experimental { - -class RawMemoryAllocationFailure : public std::bad_alloc { - public: - enum class FailureMode { - OutOfMemoryError, - AllocationNotAligned, - InvalidAllocationSize, - MaximumCudaUVMAllocationsExceeded, - Unknown - }; - enum class AllocationMechanism { - StdMalloc, -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - PosixMemAlign KOKKOS_DEPRECATED, - PosixMMap KOKKOS_DEPRECATED, - IntelMMAlloc KOKKOS_DEPRECATED, #endif - CudaMalloc, - CudaMallocManaged, - CudaHostAlloc, - HIPMalloc, - HIPHostMalloc, - HIPMallocManaged, - SYCLMallocDevice, - SYCLMallocShared, - SYCLMallocHost, - OpenACCMalloc, - }; - - private: - size_t m_attempted_size; - size_t m_attempted_alignment; - FailureMode m_failure_mode; - AllocationMechanism m_mechanism; - - public: - RawMemoryAllocationFailure( - size_t arg_attempted_size, size_t arg_attempted_alignment, - FailureMode arg_failure_mode = FailureMode::OutOfMemoryError, - AllocationMechanism arg_mechanism = - AllocationMechanism::StdMalloc) noexcept - : m_attempted_size(arg_attempted_size), - m_attempted_alignment(arg_attempted_alignment), - m_failure_mode(arg_failure_mode), - m_mechanism(arg_mechanism) {} - - RawMemoryAllocationFailure() noexcept = delete; - - RawMemoryAllocationFailure(RawMemoryAllocationFailure const &) noexcept = - default; - RawMemoryAllocationFailure(RawMemoryAllocationFailure &&) noexcept = default; - - RawMemoryAllocationFailure &operator =( - RawMemoryAllocationFailure const &) noexcept = default; - RawMemoryAllocationFailure &operator =( - RawMemoryAllocationFailure &&) noexcept = default; - - ~RawMemoryAllocationFailure() noexcept override = default; - - [[nodiscard]] const char *what() const noexcept override { - if (m_failure_mode == FailureMode::OutOfMemoryError) { - return "Memory allocation error: out of memory"; - } else if (m_failure_mode == FailureMode::AllocationNotAligned) { - return "Memory allocation error: allocation result was under-aligned"; - } - - return nullptr; // unreachable - } - - [[nodiscard]] size_t attempted_size() const noexcept { - return m_attempted_size; - } - - [[nodiscard]] size_t attempted_alignment() const noexcept { - return m_attempted_alignment; - } - - [[nodiscard]] AllocationMechanism allocation_mechanism() const noexcept { - return m_mechanism; - } - - [[nodiscard]] FailureMode failure_mode() const noexcept { - return m_failure_mode; - } - - void print_error_message(std::ostream &o) const; - [[nodiscard]] std::string get_error_message() const; - - virtual void append_additional_error_information(std::ostream &) const {} -}; - -} // end namespace Experimental - -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_IMPL_ERROR_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp index 1047b773d7..1c1fb67ff0 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -79,22 +79,9 @@ void *HostSpace::impl_allocate( ptr = operator new (arg_alloc_size, std::align_val_t(alignment), std::nothrow_t{}); - if ((ptr == nullptr) || (reinterpret_cast(ptr) == ~uintptr_t(0)) || + if (!ptr || (reinterpret_cast(ptr) == ~uintptr_t(0)) || (reinterpret_cast(ptr) & alignment_mask)) { - Experimental::RawMemoryAllocationFailure::FailureMode failure_mode = - Experimental::RawMemoryAllocationFailure::FailureMode:: - AllocationNotAligned; - if (ptr == nullptr) { - failure_mode = Experimental::RawMemoryAllocationFailure::FailureMode:: - OutOfMemoryError; - } - - Experimental::RawMemoryAllocationFailure::AllocationMechanism alloc_mec = - Experimental::RawMemoryAllocationFailure::AllocationMechanism:: - StdMalloc; - - throw Kokkos::Experimental::RawMemoryAllocationFailure( - arg_alloc_size, alignment, failure_mode, alloc_mec); + Impl::throw_bad_alloc(name(), arg_alloc_size, arg_label); } if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(arg_handle, arg_label, ptr, reported_size); @@ -109,9 +96,8 @@ void HostSpace::deallocate(void *const arg_alloc_ptr, void HostSpace::deallocate(const char *arg_label, void *const arg_alloc_ptr, const size_t arg_alloc_size, - const size_t - - arg_logical_size) const { + const size_t arg_logical_size) const { + if (arg_alloc_ptr) Kokkos::fence("HostSpace::impl_deallocate before free"); impl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size); } void HostSpace::impl_deallocate( @@ -119,7 +105,6 @@ void HostSpace::impl_deallocate( const size_t arg_alloc_size, const size_t arg_logical_size, const Kokkos::Tools::SpaceHandle arg_handle) const { if (arg_alloc_ptr) { - Kokkos::fence("HostSpace::impl_deallocate before free"); size_t reported_size = (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; if (Kokkos::Profiling::profileLibraryLoaded()) { diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index 25f09b8286..3dc68a187b 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -106,7 +106,11 @@ class HostThreadTeamData { public: inline bool team_rendezvous() const noexcept { - int* ptr = reinterpret_cast(m_team_scratch + m_team_rendezvous); + // FIXME_OPENMP The tasking framework creates an instance with + // m_team_scratch == nullptr and m_team_rendezvous != 0: + int* ptr = m_team_scratch == nullptr + ? nullptr + : reinterpret_cast(m_team_scratch + m_team_rendezvous); HostBarrier::split_arrive(ptr, m_team_size, m_team_rendezvous_step); if (m_team_rank != 0) { HostBarrier::wait(ptr, m_team_size, m_team_rendezvous_step); @@ -130,9 +134,13 @@ class HostThreadTeamData { } inline void team_rendezvous_release() const noexcept { + // FIXME_OPENMP The tasking framework creates an instance with + // m_team_scratch == nullptr and m_team_rendezvous != 0: HostBarrier::split_release( - reinterpret_cast(m_team_scratch + m_team_rendezvous), m_team_size, - m_team_rendezvous_step); + (m_team_scratch == nullptr) + ? nullptr + : reinterpret_cast(m_team_scratch + m_team_rendezvous), + m_team_size, m_team_rendezvous_step); } inline int pool_rendezvous() const noexcept { @@ -271,6 +279,9 @@ class HostThreadTeamData { } int64_t* team_shared() const noexcept { + // FIXME_OPENMP The tasking framework creates an instance with + // m_team_scratch == nullptr and m_team_shared != 0 + if (m_team_scratch == nullptr) return nullptr; return m_team_scratch + m_team_shared; } @@ -400,8 +411,12 @@ class HostThreadTeamMember { int const m_league_size; public: + // FIXME_OPENMP The tasking framework creates an instance with + // m_team_scratch == nullptr and m_team_shared != 0: constexpr HostThreadTeamMember(HostThreadTeamData& arg_data) noexcept - : m_scratch(arg_data.team_shared(), arg_data.team_shared_bytes()), + : m_scratch(arg_data.team_shared(), (arg_data.team_shared() == nullptr) + ? 0 + : arg_data.team_shared_bytes()), m_data(arg_data), m_league_rank(arg_data.m_league_rank), m_league_size(arg_data.m_league_size) {} diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling.cpp index bc6197753c..0b34653017 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling.cpp @@ -971,84 +971,6 @@ void set_callbacks(Kokkos::Tools::Experimental::EventSet new_events) { } // namespace Experimental } // namespace Tools -namespace Profiling { -bool profileLibraryLoaded() { return Kokkos::Tools::profileLibraryLoaded(); } - -void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID) { - Kokkos::Tools::beginParallelFor(kernelPrefix, devID, kernelID); -} -void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID) { - Kokkos::Tools::beginParallelReduce(kernelPrefix, devID, kernelID); -} -void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID) { - Kokkos::Tools::beginParallelScan(kernelPrefix, devID, kernelID); -} -void endParallelFor(const uint64_t kernelID) { - Kokkos::Tools::endParallelFor(kernelID); -} -void endParallelReduce(const uint64_t kernelID) { - Kokkos::Tools::endParallelReduce(kernelID); -} -void endParallelScan(const uint64_t kernelID) { - Kokkos::Tools::endParallelScan(kernelID); -} - -void pushRegion(const std::string& kName) { Kokkos::Tools::pushRegion(kName); } -void popRegion() { Kokkos::Tools::popRegion(); } - -void createProfileSection(const std::string& sectionName, uint32_t* secID) { - Kokkos::Tools::createProfileSection(sectionName, secID); -} -void destroyProfileSection(const uint32_t secID) { - Kokkos::Tools::destroyProfileSection(secID); -} - -void startSection(const uint32_t secID) { Kokkos::Tools::startSection(secID); } - -void stopSection(const uint32_t secID) { Kokkos::Tools::stopSection(secID); } - -void markEvent(const std::string& eventName) { - Kokkos::Tools::markEvent(eventName); -} -void allocateData(const SpaceHandle handle, const std::string name, - const void* data, const uint64_t size) { - Kokkos::Tools::allocateData(handle, name, data, size); -} -void deallocateData(const SpaceHandle space, const std::string label, - const void* ptr, const uint64_t size) { - Kokkos::Tools::deallocateData(space, label, ptr, size); -} - -void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, - const void* dst_ptr, const SpaceHandle src_space, - const std::string src_label, const void* src_ptr, - const uint64_t size) { - Kokkos::Tools::beginDeepCopy(dst_space, dst_label, dst_ptr, src_space, - src_label, src_ptr, size); -} -void endDeepCopy() { Kokkos::Tools::endDeepCopy(); } - -void finalize() { Kokkos::Tools::finalize(); } -void initialize(const std::string& profileLibrary) { - Kokkos::Tools::initialize(profileLibrary); -} - -bool printHelp(const std::string& args) { - return Kokkos::Tools::printHelp(args); -} -void parseArgs(const std::string& args) { Kokkos::Tools::parseArgs(args); } -void parseArgs(int _argc, char** _argv) { - Kokkos::Tools::parseArgs(_argc, _argv); -} - -SpaceHandle make_space_handle(const char* space_name) { - return Kokkos::Tools::make_space_handle(space_name); -} -} // namespace Profiling - // Tuning namespace Tools { diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling.hpp index 025d8d3d18..01a41d0c3f 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling.hpp @@ -263,40 +263,41 @@ size_t get_current_context_id(); } // namespace Tools namespace Profiling { -bool profileLibraryLoaded(); +// don't let ClangFormat reorder the using-declarations below +// clang-format off +using Kokkos::Tools::profileLibraryLoaded; -void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID); -void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID); -void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, - uint64_t* kernelID); -void endParallelFor(const uint64_t kernelID); -void endParallelReduce(const uint64_t kernelID); -void endParallelScan(const uint64_t kernelID); -void pushRegion(const std::string& kName); -void popRegion(); +using Kokkos::Tools::printHelp; +using Kokkos::Tools::parseArgs; -void createProfileSection(const std::string& sectionName, uint32_t* secID); -void destroyProfileSection(const uint32_t secID); -void startSection(const uint32_t secID); +using Kokkos::Tools::initialize; +using Kokkos::Tools::finalize; -void stopSection(const uint32_t secID); +using Kokkos::Tools::beginParallelFor; +using Kokkos::Tools::beginParallelReduce; +using Kokkos::Tools::beginParallelScan; +using Kokkos::Tools::endParallelFor; +using Kokkos::Tools::endParallelReduce; +using Kokkos::Tools::endParallelScan; -void markEvent(const std::string& eventName); -void allocateData(const SpaceHandle handle, const std::string name, - const void* data, const uint64_t size); -void deallocateData(const SpaceHandle space, const std::string label, - const void* ptr, const uint64_t size); -void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, - const void* dst_ptr, const SpaceHandle src_space, - const std::string src_label, const void* src_ptr, - const uint64_t size); -void endDeepCopy(); -void finalize(); -void initialize(const std::string& = {}); +using Kokkos::Tools::allocateData; +using Kokkos::Tools::deallocateData; -SpaceHandle make_space_handle(const char* space_name); +using Kokkos::Tools::beginDeepCopy; +using Kokkos::Tools::endDeepCopy; + +using Kokkos::Tools::pushRegion; +using Kokkos::Tools::popRegion; + +using Kokkos::Tools::createProfileSection; +using Kokkos::Tools::destroyProfileSection; +using Kokkos::Tools::startSection; +using Kokkos::Tools::stopSection; + +using Kokkos::Tools::markEvent; + +using Kokkos::Tools::make_space_handle; +// clang-format on namespace Experimental { using Kokkos::Tools::Experimental::set_allocate_data_callback; diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h b/lib/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h index 15c466b27e..8c3194e43b 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_C_Interface.h @@ -32,6 +32,10 @@ // Profiling +#ifdef __cplusplus +extern "C" { +#endif + struct Kokkos_Profiling_KokkosPDeviceInfo { size_t deviceID; }; @@ -267,4 +271,8 @@ struct Kokkos_Profiling_EventSet { // changing struct layout }; +#ifdef __cplusplus +} +#endif + #endif // KOKKOS_PROFILING_C_INTERFACE_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp index 0bc3814b3a..ccf3c47a1e 100644 --- a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp @@ -323,41 +323,6 @@ void SharedAllocationRecord::print_host_accessible_records( } #endif -void safe_throw_allocation_with_header_failure( - std::string const& space_name, std::string const& label, - Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - auto generate_failure_message = [&](std::ostream& o) { - o << "Kokkos failed to allocate memory for label \"" << label - << "\". Allocation using MemorySpace named \"" << space_name - << "\" failed with the following error: "; - failure.print_error_message(o); - if (failure.failure_mode() == - Kokkos::Experimental::RawMemoryAllocationFailure::FailureMode:: - AllocationNotAligned) { - // TODO: delete the misaligned memory? - o << "Warning: Allocation failed due to misalignment; memory may " - "be leaked.\n"; - } - o.flush(); - }; - try { - std::ostringstream sstr; - generate_failure_message(sstr); - Kokkos::Impl::throw_runtime_exception(sstr.str()); - } catch (std::bad_alloc const&) { - // Probably failed to allocate the string because we're so close to out - // of memory. Try printing to std::cerr instead - try { - generate_failure_message(std::cerr); - } catch (std::bad_alloc const&) { - // oh well, we tried... - } - Kokkos::Impl::throw_runtime_exception( - "Kokkos encountered an allocation failure, then another allocation " - "failure while trying to create the error message."); - } -} - void fill_host_accessible_header_info( SharedAllocationRecord* arg_record, SharedAllocationHeader& arg_header, std::string const& arg_label) { diff --git a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp index 99ab660213..da03cc4983 100644 --- a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp @@ -196,36 +196,21 @@ class SharedAllocationRecord { const SharedAllocationRecord* const root, const bool detail); }; -void safe_throw_allocation_with_header_failure( - std::string const& space_name, std::string const& label, - Kokkos::Experimental::RawMemoryAllocationFailure const& failure); - template SharedAllocationHeader* checked_allocation_with_header(MemorySpace const& space, std::string const& label, size_t alloc_size) { - try { - return reinterpret_cast(space.allocate( - label.c_str(), alloc_size + sizeof(SharedAllocationHeader), - alloc_size)); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - safe_throw_allocation_with_header_failure(space.name(), label, failure); - } - return nullptr; // unreachable + return reinterpret_cast(space.allocate( + label.c_str(), alloc_size + sizeof(SharedAllocationHeader), alloc_size)); } template SharedAllocationHeader* checked_allocation_with_header( ExecutionSpace const& exec_space, MemorySpace const& space, std::string const& label, size_t alloc_size) { - try { - return reinterpret_cast(space.allocate( - exec_space, label.c_str(), alloc_size + sizeof(SharedAllocationHeader), - alloc_size)); - } catch (Kokkos::Experimental::RawMemoryAllocationFailure const& failure) { - safe_throw_allocation_with_header_failure(space.name(), label, failure); - } - return nullptr; // unreachable + return reinterpret_cast( + space.allocate(exec_space, label.c_str(), + alloc_size + sizeof(SharedAllocationHeader), alloc_size)); } void fill_host_accessible_header_info(SharedAllocationHeader& arg_header, @@ -385,11 +370,9 @@ SharedAllocationRecord template class Kokkos::Impl::HostInaccessibleSharedAllocationRecordCommon< \ MEMORY_SPACE> -namespace { - /* Taking the address of this function so make sure it is unique */ template -void deallocate(SharedAllocationRecord* record_ptr) { +inline void deallocate(SharedAllocationRecord* record_ptr) { using base_type = SharedAllocationRecord; using this_type = SharedAllocationRecord; @@ -401,8 +384,6 @@ void deallocate(SharedAllocationRecord* record_ptr) { delete ptr; } -} // namespace - /* * Memory space specialization of SharedAllocationRecord< Space , void > * requires : @@ -487,15 +468,21 @@ union SharedAllocationTracker { // pressure on compiler optimization by reducing // number of symbols and inline functions. -#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \ - KOKKOS_IF_ON_HOST((if (!(m_record_bits & DO_NOT_DEREF_FLAG)) { \ - Record::increment(m_record); \ - })) +#ifdef KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY +#define KOKKOS_IMPL_BRANCH_PROB KOKKOS_IMPL_ATTRIBUTE_UNLIKELY +#else +#define KOKKOS_IMPL_BRANCH_PROB +#endif -#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \ - KOKKOS_IF_ON_HOST((if (!(m_record_bits & DO_NOT_DEREF_FLAG)) { \ - Record::decrement(m_record); \ - })) +#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \ + KOKKOS_IF_ON_HOST( \ + (if (!(m_record_bits & DO_NOT_DEREF_FLAG)) \ + KOKKOS_IMPL_BRANCH_PROB { Record::increment(m_record); })) + +#define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \ + KOKKOS_IF_ON_HOST( \ + (if (!(m_record_bits & DO_NOT_DEREF_FLAG)) \ + KOKKOS_IMPL_BRANCH_PROB { Record::decrement(m_record); })) #define KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs, \ override_tracking) \ @@ -642,8 +629,41 @@ union SharedAllocationTracker { #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT +#undef KOKKOS_IMPL_BRANCH_PROB }; +struct SharedAllocationDisableTrackingGuard { + SharedAllocationDisableTrackingGuard() { + KOKKOS_ASSERT( + (Kokkos::Impl::SharedAllocationRecord::tracking_enabled())); + Kokkos::Impl::SharedAllocationRecord::tracking_disable(); + } + + SharedAllocationDisableTrackingGuard( + const SharedAllocationDisableTrackingGuard&) = delete; + SharedAllocationDisableTrackingGuard(SharedAllocationDisableTrackingGuard&&) = + delete; + + ~SharedAllocationDisableTrackingGuard() { + KOKKOS_ASSERT(( + !Kokkos::Impl::SharedAllocationRecord::tracking_enabled())); + Kokkos::Impl::SharedAllocationRecord::tracking_enable(); + } + // clang-format off + // The old version of clang format we use is particularly egregious here + SharedAllocationDisableTrackingGuard& operator=( + const SharedAllocationDisableTrackingGuard&) = delete; + SharedAllocationDisableTrackingGuard& operator=( + SharedAllocationDisableTrackingGuard&&) = delete; + // clang-format on +}; + +template +inline FunctorType construct_with_shared_allocation_tracking_disabled( + Args&&... args) { + [[maybe_unused]] auto guard = SharedAllocationDisableTrackingGuard{}; + return {std::forward(args)...}; +} } /* namespace Impl */ } /* namespace Kokkos */ #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp deleted file mode 100644 index fe43b63018..0000000000 --- a/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp +++ /dev/null @@ -1,622 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP -#define KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP - -#include - -namespace Kokkos { -namespace Impl { - -template -struct ViewDataAnalysis> { - private: - using array_analysis = ViewArrayAnalysis; - - static_assert(std::is_void

::value); - static_assert(std::is_same>::value); - static_assert(std::is_scalar::value, - "View of Array type must be of a scalar type"); - - public: - using specialize = Kokkos::Array<>; - - using dimension = typename array_analysis::dimension; - - private: - enum { - is_const = std::is_same::value - }; - - using array_scalar_dimension = typename dimension::template append::type; - - using scalar_type = std::conditional_t; - using non_const_scalar_type = V; - using const_scalar_type = const V; - - public: - using value_type = typename array_analysis::value_type; - using const_value_type = typename array_analysis::const_value_type; - using non_const_value_type = typename array_analysis::non_const_value_type; - - using type = typename ViewDataType::type; - using const_type = typename ViewDataType::type; - using non_const_type = - typename ViewDataType::type; - - using scalar_array_type = - typename ViewDataType::type; - using const_scalar_array_type = - typename ViewDataType::type; - using non_const_scalar_array_type = - typename ViewDataType::type; -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief View mapping for non-specialized data type and standard layout */ -template -class ViewMapping> { - private: - template - friend class ViewMapping; - template - friend class Kokkos::View; - - using offset_type = ViewOffset; - - using handle_type = typename Traits::value_type::pointer; - - handle_type m_impl_handle; - offset_type m_impl_offset; - size_t m_stride = 0; - - using scalar_type = typename Traits::value_type::value_type; - - using contiguous_reference = Kokkos::Array::contiguous>; - using strided_reference = - Kokkos::Array::strided>; - - enum { - is_contiguous_reference = - (Traits::rank == 0) || (std::is_same::value) - }; - - enum { Array_N = Traits::value_type::size() }; - enum { Array_S = is_contiguous_reference ? Array_N : 1 }; - - KOKKOS_INLINE_FUNCTION - ViewMapping(const handle_type &arg_handle, const offset_type &arg_offset) - : m_impl_handle(arg_handle), - m_impl_offset(arg_offset), - m_stride(is_contiguous_reference ? 0 : arg_offset.span()) {} - - public: - //---------------------------------------- - // Domain dimensions - - static constexpr unsigned Rank = Traits::dimension::rank; - - template - KOKKOS_INLINE_FUNCTION constexpr size_t extent(const iType &r) const { - return m_impl_offset.m_dim.extent(r); - } - - static KOKKOS_INLINE_FUNCTION constexpr size_t static_extent( - const unsigned r) noexcept { - using dim_type = typename offset_type::dimension_type; - return dim_type::static_extent(r); - } - - KOKKOS_INLINE_FUNCTION constexpr typename Traits::array_layout layout() - const { - return m_impl_offset.layout(); - } - - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { - return m_impl_offset.dimension_0(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { - return m_impl_offset.dimension_1(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { - return m_impl_offset.dimension_2(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { - return m_impl_offset.dimension_3(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { - return m_impl_offset.dimension_4(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { - return m_impl_offset.dimension_5(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { - return m_impl_offset.dimension_6(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { - return m_impl_offset.dimension_7(); - } - - // Is a regular layout with uniform striding for each index. - using is_regular = typename offset_type::is_regular; - - KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { - return m_impl_offset.stride_0(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { - return m_impl_offset.stride_1(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { - return m_impl_offset.stride_2(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { - return m_impl_offset.stride_3(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { - return m_impl_offset.stride_4(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { - return m_impl_offset.stride_5(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { - return m_impl_offset.stride_6(); - } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { - return m_impl_offset.stride_7(); - } - - //---------------------------------------- - // Range span - - /** \brief Span of the mapped range */ - KOKKOS_INLINE_FUNCTION constexpr size_t span() const { - return m_impl_offset.span() * Array_N; - } - - /** \brief Is the mapped range span contiguous */ - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { - return m_impl_offset.span_is_contiguous(); - } - - using reference_type = - std::conditional_t; - - using pointer_type = handle_type; - - /** \brief If data references are lvalue_reference than can query pointer to - * memory */ - KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { - return m_impl_handle; - } - - //---------------------------------------- - // The View class performs all rank and bounds checking before - // calling these element reference methods. - - KOKKOS_FORCEINLINE_FUNCTION - reference_type reference() const { - return reference_type(m_impl_handle + 0, Array_N, 0); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type reference(const I0 &i0) const { - return reference_type(m_impl_handle + m_impl_offset(i0) * Array_S, Array_N, - m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type reference(const I0 &i0, - const I1 &i1) const { - return reference_type(m_impl_handle + m_impl_offset(i0, i1) * Array_S, - Array_N, m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type reference(const I0 &i0, - const I1 &i1, - const I2 &i2) const { - return reference_type(m_impl_handle + m_impl_offset(i0, i1, i2) * Array_S, - Array_N, m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type - reference(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3) * Array_S, Array_N, - m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type reference(const I0 &i0, - const I1 &i1, - const I2 &i2, - const I3 &i3, - const I4 &i4) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3, i4) * Array_S, Array_N, - m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type - reference(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3, i4, i5) * Array_S, - Array_N, m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type - reference(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5, const I6 &i6) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3, i4, i5, i6) * Array_S, - Array_N, m_stride); - } - - template - KOKKOS_FORCEINLINE_FUNCTION reference_type - reference(const I0 &i0, const I1 &i1, const I2 &i2, const I3 &i3, - const I4 &i4, const I5 &i5, const I6 &i6, const I7 &i7) const { - return reference_type( - m_impl_handle + m_impl_offset(i0, i1, i2, i3, i4, i5, i6, i7) * Array_S, - Array_N, m_stride); - } - - //---------------------------------------- - - private: - enum { MemorySpanMask = 8 - 1 /* Force alignment on 8 byte boundary */ }; - enum { MemorySpanSize = sizeof(scalar_type) }; - - public: - /** \brief Span, in bytes, of the referenced memory */ - KOKKOS_INLINE_FUNCTION constexpr size_t memory_span() const { - return (m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask) & - ~size_t(MemorySpanMask); - } - - //---------------------------------------- - - KOKKOS_DEFAULTED_FUNCTION ViewMapping() = default; - - //---------------------------------------- - - template - KOKKOS_INLINE_FUNCTION ViewMapping(pointer_type ptr, Args... args) - : m_impl_handle(ptr), - m_impl_offset(std::integral_constant(), args...), - m_stride(m_impl_offset.span()) {} - - //---------------------------------------- - - template - Kokkos::Impl::SharedAllocationRecord<> *allocate_shared( - Kokkos::Impl::ViewCtorProp const &arg_prop, - typename Traits::array_layout const &arg_layout, - bool execution_space_specified) { - using alloc_prop = Kokkos::Impl::ViewCtorProp; - - using execution_space = typename alloc_prop::execution_space; - using memory_space = typename Traits::memory_space; - static_assert( - SpaceAccessibility::accessible); - using functor_type = - ViewValueFunctor; - using record_type = - Kokkos::Impl::SharedAllocationRecord; - - // Query the mapping for byte-size of allocation. - using padding = std::integral_constant< - unsigned int, alloc_prop::allow_padding ? sizeof(scalar_type) : 0>; - - m_impl_offset = offset_type(padding(), arg_layout); - - const size_t alloc_size = - (m_impl_offset.span() * Array_N * MemorySpanSize + MemorySpanMask) & - ~size_t(MemorySpanMask); - const auto &alloc_name = Impl::get_property(arg_prop); - const execution_space &exec_space = - Impl::get_property(arg_prop); - const memory_space &mem_space = - Impl::get_property(arg_prop); - - // Allocate memory from the memory space and create tracking record. - record_type *const record = - execution_space_specified - ? record_type::allocate(exec_space, mem_space, alloc_name, - alloc_size) - : record_type::allocate(mem_space, alloc_name, alloc_size); - - m_impl_handle = handle_type(reinterpret_cast(record->data())); - - functor_type functor = - execution_space_specified - ? functor_type(exec_space, (pointer_type)m_impl_handle, - m_impl_offset.span() * Array_N, alloc_name) - : functor_type((pointer_type)m_impl_handle, - m_impl_offset.span() * Array_N, alloc_name); - -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ - defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) - if (false) { - // Make sure the destroy functor gets instantiated. - // This avoids "cudaErrorInvalidDeviceFunction"-type errors. - functor.destroy_shared_allocation(); - } -#endif - - // Only initialize if the allocation is non-zero. - // May be zero if one of the dimensions is zero. - if constexpr (alloc_prop::initialize) - if (alloc_size) { - // Assume destruction is only required when construction is requested. - // The ViewValueFunctor has both value construction and destruction - // operators. - record->m_destroy = std::move(functor); - - // Construct values - record->m_destroy.construct_shared_allocation(); - } - - return record; - } -}; - -/** \brief Assign Array to non-Array */ - -template -class ViewMapping< - DstTraits, SrcTraits, - std::enable_if_t<( - std::is_same::value && - std::is_void::value && - (std::is_same::value || - std::is_same::value || - std::is_same::value) && - std::is_same>::value && - (std::is_same::value || - std::is_same::value || - std::is_same::value))>> { - public: - // Can only convert to View::array_type - - enum { - is_assignable_data_type = - std::is_same::value && - (DstTraits::rank == SrcTraits::rank + 1) - }; - enum { - is_assignable = - std::is_same::value && - std::is_same::value - }; - - using TrackType = Kokkos::Impl::SharedAllocationTracker; - using DstType = ViewMapping; - using SrcType = ViewMapping>; - - KOKKOS_INLINE_FUNCTION - static void assign(DstType &dst, const SrcType &src, - const TrackType & /*src_track*/) { - static_assert(is_assignable, "Can only convert to array_type"); - - using dst_offset_type = typename DstType::offset_type; - - // Array dimension becomes the last dimension. - // Arguments beyond the destination rank are ignored. - if (src.span_is_contiguous()) { // not padded - dst.m_impl_offset = dst_offset_type( - std::integral_constant(), - typename DstTraits::array_layout( - (0 < SrcType::Rank ? src.dimension_0() - : SrcTraits::value_type::size()), - (1 < SrcType::Rank ? src.dimension_1() - : SrcTraits::value_type::size()), - (2 < SrcType::Rank ? src.dimension_2() - : SrcTraits::value_type::size()), - (3 < SrcType::Rank ? src.dimension_3() - : SrcTraits::value_type::size()), - (4 < SrcType::Rank ? src.dimension_4() - : SrcTraits::value_type::size()), - (5 < SrcType::Rank ? src.dimension_5() - : SrcTraits::value_type::size()), - (6 < SrcType::Rank ? src.dimension_6() - : SrcTraits::value_type::size()), - (7 < SrcType::Rank ? src.dimension_7() - : SrcTraits::value_type::size()))); - } else { // is padded - using padded = std::integral_constant< - unsigned int, sizeof(typename SrcTraits::value_type::value_type)>; - - dst.m_impl_offset = dst_offset_type( - padded(), typename DstTraits::array_layout( - (0 < SrcType::Rank ? src.dimension_0() - : SrcTraits::value_type::size()), - (1 < SrcType::Rank ? src.dimension_1() - : SrcTraits::value_type::size()), - (2 < SrcType::Rank ? src.dimension_2() - : SrcTraits::value_type::size()), - (3 < SrcType::Rank ? src.dimension_3() - : SrcTraits::value_type::size()), - (4 < SrcType::Rank ? src.dimension_4() - : SrcTraits::value_type::size()), - (5 < SrcType::Rank ? src.dimension_5() - : SrcTraits::value_type::size()), - (6 < SrcType::Rank ? src.dimension_6() - : SrcTraits::value_type::size()), - (7 < SrcType::Rank ? src.dimension_7() - : SrcTraits::value_type::size()))); - } - - dst.m_impl_handle = src.m_impl_handle; - } -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -template -class ViewMapping< - std::enable_if_t<( - std::is_same>::value && - (std::is_same::value || - std::is_same::value || - std::is_same::value))>, - SrcTraits, Args...> { - private: - static_assert(SrcTraits::rank == sizeof...(Args)); - - enum : bool { - R0 = is_integral_extent<0, Args...>::value, - R1 = is_integral_extent<1, Args...>::value, - R2 = is_integral_extent<2, Args...>::value, - R3 = is_integral_extent<3, Args...>::value, - R4 = is_integral_extent<4, Args...>::value, - R5 = is_integral_extent<5, Args...>::value, - R6 = is_integral_extent<6, Args...>::value, - R7 = is_integral_extent<7, Args...>::value - }; - - enum { - rank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + - unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) - }; - - // Whether right-most rank is a range. - enum { - R0_rev = - 0 == SrcTraits::rank - ? false - : (1 == SrcTraits::rank - ? R0 - : (2 == SrcTraits::rank - ? R1 - : (3 == SrcTraits::rank - ? R2 - : (4 == SrcTraits::rank - ? R3 - : (5 == SrcTraits::rank - ? R4 - : (6 == SrcTraits::rank - ? R5 - : (7 == SrcTraits::rank - ? R6 - : R7))))))) - }; - - // Subview's layout - using array_layout = - std::conditional_t<((rank == 0) || - (rank <= 2 && R0 && - std::is_same::value) || - (rank <= 2 && R0_rev && - std::is_same::value)), - typename SrcTraits::array_layout, - Kokkos::LayoutStride>; - - using value_type = typename SrcTraits::value_type; - - using data_type = std::conditional_t< - rank == 0, value_type, - std::conditional_t< - rank == 1, value_type *, - std::conditional_t< - rank == 2, value_type **, - std::conditional_t< - rank == 3, value_type ***, - std::conditional_t< - rank == 4, value_type ****, - std::conditional_t< - rank == 5, value_type *****, - std::conditional_t< - rank == 6, value_type ******, - std::conditional_t>>>>>>>; - - public: - using traits_type = Kokkos::ViewTraits; - - using type = - Kokkos::View; - - KOKKOS_INLINE_FUNCTION - static void assign(ViewMapping &dst, - ViewMapping const &src, Args... args) { - using DstType = ViewMapping; - - using dst_offset_type = typename DstType::offset_type; - using dst_handle_type = typename DstType::handle_type; - - const SubviewExtents extents(src.m_impl_offset.m_dim, - args...); - - dst.m_impl_offset = dst_offset_type(src.m_impl_offset, extents); - dst.m_impl_handle = dst_handle_type( - src.m_impl_handle + - src.m_impl_offset(extents.domain_offset(0), extents.domain_offset(1), - extents.domain_offset(2), extents.domain_offset(3), - extents.domain_offset(4), extents.domain_offset(5), - extents.domain_offset(6), extents.domain_offset(7))); - } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_EXPERIMENTAL_VIEW_ARRAY_MAPPING_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp index e1b8ba86a5..379180ae64 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp @@ -23,12 +23,16 @@ namespace Kokkos { namespace Impl { +struct SequentialHostInit_t {}; struct WithoutInitializing_t {}; struct AllowPadding_t {}; template struct is_view_ctor_property : public std::false_type {}; +template <> +struct is_view_ctor_property : public std::true_type {}; + template <> struct is_view_ctor_property : public std::true_type {}; @@ -84,10 +88,10 @@ struct ViewCtorProp> { /* Property flags have constexpr value */ template -struct ViewCtorProp< - std::enable_if_t::value || - std::is_same::value>, - P> { +struct ViewCtorProp || + std::is_same_v || + std::is_same_v>, + P> { ViewCtorProp() = default; ViewCtorProp(const ViewCtorProp &) = default; ViewCtorProp &operator=(const ViewCtorProp &) = default; @@ -199,6 +203,11 @@ struct ViewCtorProp : public ViewCtorProp... { Kokkos::Impl::has_type::value; static constexpr bool initialize = !Kokkos::Impl::has_type::value; + static constexpr bool sequential_host_init = + Kokkos::Impl::has_type::value; + static_assert(initialize || !sequential_host_init, + "Incompatible WithoutInitializing and SequentialHostInit view " + "alloc properties"); using memory_space = typename var_memory_space::type; using execution_space = typename var_execution_space::type; @@ -251,7 +260,9 @@ auto with_properties_if_unset(const ViewCtorProp &view_ctor_prop, (is_view_label::value && !ViewCtorProp::has_label) || (std::is_same_v && - ViewCtorProp::initialize)) { + ViewCtorProp::initialize) || + (std::is_same_v && + !ViewCtorProp::sequential_host_init)) { using NewViewCtorProp = ViewCtorProp; NewViewCtorProp new_view_ctor_prop(view_ctor_prop); static_cast &>(new_view_ctor_prop).value = @@ -299,7 +310,9 @@ struct WithPropertiesIfUnset, Property, Properties...> { (is_view_label::value && !ViewCtorProp::has_label) || (std::is_same_v && - ViewCtorProp::initialize)) { + ViewCtorProp::initialize) || + (std::is_same_v && + !ViewCtorProp::sequential_host_init)) { using NewViewCtorProp = ViewCtorProp; NewViewCtorProp new_view_ctor_prop(view_ctor_prop); static_cast &>(new_view_ctor_prop).value = diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp deleted file mode 100644 index 957717f973..0000000000 --- a/lib/kokkos/core/src/impl/Kokkos_ViewLayoutTiled.hpp +++ /dev/null @@ -1,1425 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP -#define KOKKOS_EXPERIMENTAL_VIEWLAYOUTTILE_HPP - -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -// View offset and mapping for tiled view's - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout> - : public std::true_type {}; - -template -struct is_array_layout< - Kokkos::Experimental::LayoutTiled> - : public std::true_type {}; - -template -struct is_array_layout_tiled : public std::false_type {}; - -template -struct is_array_layout_tiled> : public std::true_type { -}; // Last template parameter "true" meaning this currently only supports - // powers-of-two - -namespace Impl { - -template -struct ViewOffset< - Dimension, Layout, - std::enable_if_t<((Dimension::rank <= 8) && (Dimension::rank >= 2) && - is_array_layout::value && - is_array_layout_tiled::value)>> { - public: - static constexpr Kokkos::Iterate outer_pattern = Layout::outer_pattern; - static constexpr Kokkos::Iterate inner_pattern = Layout::inner_pattern; - - static constexpr int VORank = Dimension::rank; - - static constexpr unsigned SHIFT_0 = - Kokkos::Impl::integral_power_of_two(Layout::N0); - static constexpr unsigned SHIFT_1 = - Kokkos::Impl::integral_power_of_two(Layout::N1); - static constexpr unsigned SHIFT_2 = - Kokkos::Impl::integral_power_of_two(Layout::N2); - static constexpr unsigned SHIFT_3 = - Kokkos::Impl::integral_power_of_two(Layout::N3); - static constexpr unsigned SHIFT_4 = - Kokkos::Impl::integral_power_of_two(Layout::N4); - static constexpr unsigned SHIFT_5 = - Kokkos::Impl::integral_power_of_two(Layout::N5); - static constexpr unsigned SHIFT_6 = - Kokkos::Impl::integral_power_of_two(Layout::N6); - static constexpr unsigned SHIFT_7 = - Kokkos::Impl::integral_power_of_two(Layout::N7); - static constexpr int MASK_0 = Layout::N0 - 1; - static constexpr int MASK_1 = Layout::N1 - 1; - static constexpr int MASK_2 = Layout::N2 - 1; - static constexpr int MASK_3 = Layout::N3 - 1; - static constexpr int MASK_4 = Layout::N4 - 1; - static constexpr int MASK_5 = Layout::N5 - 1; - static constexpr int MASK_6 = Layout::N6 - 1; - static constexpr int MASK_7 = Layout::N7 - 1; - - static constexpr unsigned SHIFT_2T = SHIFT_0 + SHIFT_1; - static constexpr unsigned SHIFT_3T = SHIFT_0 + SHIFT_1 + SHIFT_2; - static constexpr unsigned SHIFT_4T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3; - static constexpr unsigned SHIFT_5T = - SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4; - static constexpr unsigned SHIFT_6T = - SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5; - static constexpr unsigned SHIFT_7T = - SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5 + SHIFT_6; - static constexpr unsigned SHIFT_8T = SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + - SHIFT_4 + SHIFT_5 + SHIFT_6 + SHIFT_7; - - // Is an irregular layout that does not have uniform striding for each index. - using is_mapping_plugin = std::true_type; - using is_regular = std::false_type; - - using size_type = size_t; - using dimension_type = Dimension; - using array_layout = Layout; - - dimension_type m_dim; - size_type m_tile_N0; // Num tiles dim 0 - size_type m_tile_N1; - size_type m_tile_N2; - size_type m_tile_N3; - size_type m_tile_N4; - size_type m_tile_N5; - size_type m_tile_N6; - size_type m_tile_N7; - - //---------------------------------------- - -#define KOKKOS_IMPL_DEBUG_OUTPUT_CHECK 0 - - // Rank 2 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, - I1 const& i1) const { - auto tile_offset = - (outer_pattern == (Kokkos::Iterate::Left)) - ? (((i0 >> SHIFT_0) + m_tile_N0 * ((i1 >> SHIFT_1))) << SHIFT_2T) - : (((m_tile_N1 * (i0 >> SHIFT_0) + (i1 >> SHIFT_1))) << SHIFT_2T); - // ( num_tiles[1] * ti0 + ti1 ) * FTD - - auto local_offset = (inner_pattern == (Kokkos::Iterate::Left)) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0)) - : (((i0 & MASK_0) << SHIFT_1) + (i1 & MASK_1)); - // ( tile_dim[1] * li0 + li1 ) - -#if KOKKOS_IMPL_DEBUG_OUTPUT_CHECK - std::cout << "Am I Outer Left? " - << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl; - std::cout << "Am I Inner Left? " - << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl; - std::cout << "i0 = " << i0 << " i1 = " << i1 - << "\ntilei0 = " << (i0 >> SHIFT_0) - << " tilei1 = " << (i1 >> SHIFT_1) - << "locali0 = " << (i0 & MASK_0) - << "\nlocali1 = " << (i1 & MASK_1) << std::endl; -#endif - - return tile_offset + local_offset; - } - - // Rank 3 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * ((i1 >> SHIFT_1) + m_tile_N1 * (i2 >> SHIFT_2))) - << SHIFT_3T) - : ((m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) - << SHIFT_3T); - - auto local_offset = (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1))) - : (((i0 & MASK_0) << (SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_2)) + (i2 & MASK_2)); - -#if KOKKOS_IMPL_DEBUG_OUTPUT_CHECK - std::cout << "Am I Outer Left? " - << (outer_pattern == (Kokkos::Iterate::Left)) << std::endl; - std::cout << "Am I Inner Left? " - << (inner_pattern == (Kokkos::Iterate::Left)) << std::endl; - std::cout << "i0 = " << i0 << " i1 = " << i1 << " i2 = " << i2 - << "\ntilei0 = " << (i0 >> SHIFT_0) - << " tilei1 = " << (i1 >> SHIFT_1) - << " tilei2 = " << (i2 >> SHIFT_2) - << "\nlocali0 = " << (i0 & MASK_0) - << "locali1 = " << (i1 & MASK_1) << "locali2 = " << (i2 & MASK_2) - << std::endl; -#endif - - return tile_offset + local_offset; - } - - // Rank 4 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, - I3 const& i3) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * ((i1 >> SHIFT_1) + - m_tile_N1 * ((i2 >> SHIFT_2) + - m_tile_N2 * (i3 >> SHIFT_3)))) - << SHIFT_4T) - : ((m_tile_N3 * (m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) - << SHIFT_4T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2))) - : (((i0 & MASK_0) << (SHIFT_3 + SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) << (SHIFT_3)) + (i3 & MASK_3)); - - return tile_offset + local_offset; - } - - // Rank 5 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, I3 const& i3, - I4 const& i4) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * - ((i1 >> SHIFT_1) + - m_tile_N1 * ((i2 >> SHIFT_2) + - m_tile_N2 * ((i3 >> SHIFT_3) + - m_tile_N3 * (i4 >> SHIFT_4))))) - << SHIFT_5T) - : ((m_tile_N4 * - (m_tile_N3 * (m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) + - (i4 >> SHIFT_4)) - << SHIFT_5T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2)) + - ((i4 & MASK_4) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3))) - : (((i0 & MASK_0) << (SHIFT_4 + SHIFT_3 + SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_4 + SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) << (SHIFT_4 + SHIFT_3)) + - ((i3 & MASK_3) << (SHIFT_4)) + (i4 & MASK_4)); - - return tile_offset + local_offset; - } - - // Rank 6 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, I3 const& i3, - I4 const& i4, - I5 const& i5) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * - ((i1 >> SHIFT_1) + - m_tile_N1 * - ((i2 >> SHIFT_2) + - m_tile_N2 * - ((i3 >> SHIFT_3) + - m_tile_N3 * ((i4 >> SHIFT_4) + - m_tile_N4 * (i5 >> SHIFT_5)))))) - << SHIFT_6T) - : ((m_tile_N5 * - (m_tile_N4 * - (m_tile_N3 * - (m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) + - (i4 >> SHIFT_4)) + - (i5 >> SHIFT_5)) - << SHIFT_6T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2)) + - ((i4 & MASK_4) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3)) + - ((i5 & MASK_5) - << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4))) - : (((i0 & MASK_0) - << (SHIFT_5 + SHIFT_4 + SHIFT_3 + SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_5 + SHIFT_4 + SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) << (SHIFT_5 + SHIFT_4 + SHIFT_3)) + - ((i3 & MASK_3) << (SHIFT_5 + SHIFT_4)) + - ((i4 & MASK_4) << (SHIFT_5)) + (i5 & MASK_5)); - - return tile_offset + local_offset; - } - - // Rank 7 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, I3 const& i3, - I4 const& i4, I5 const& i5, - I6 const& i6) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * - ((i1 >> SHIFT_1) + - m_tile_N1 * - ((i2 >> SHIFT_2) + - m_tile_N2 * - ((i3 >> SHIFT_3) + - m_tile_N3 * - ((i4 >> SHIFT_4) + - m_tile_N4 * - ((i5 >> SHIFT_5) + - m_tile_N5 * (i6 >> SHIFT_6))))))) - << SHIFT_7T) - : ((m_tile_N6 * - (m_tile_N5 * - (m_tile_N4 * - (m_tile_N3 * - (m_tile_N2 * (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) + - (i4 >> SHIFT_4)) + - (i5 >> SHIFT_5)) + - (i6 >> SHIFT_6)) - << SHIFT_7T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2)) + - ((i4 & MASK_4) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3)) + - ((i5 & MASK_5) - << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4)) + - ((i6 & MASK_6) - << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4 + SHIFT_5))) - : (((i0 & MASK_0) << (SHIFT_6 + SHIFT_5 + SHIFT_4 + SHIFT_3 + - SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) - << (SHIFT_6 + SHIFT_5 + SHIFT_4 + SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) << (SHIFT_6 + SHIFT_5 + SHIFT_4 + SHIFT_3)) + - ((i3 & MASK_3) << (SHIFT_6 + SHIFT_5 + SHIFT_4)) + - ((i4 & MASK_4) << (SHIFT_6 + SHIFT_5)) + - ((i5 & MASK_5) << (SHIFT_6)) + (i6 & MASK_6)); - - return tile_offset + local_offset; - } - - // Rank 8 - template - KOKKOS_INLINE_FUNCTION size_type operator()(I0 const& i0, I1 const& i1, - I2 const& i2, I3 const& i3, - I4 const& i4, I5 const& i5, - I6 const& i6, - I7 const& i7) const { - auto tile_offset = - (outer_pattern == Kokkos::Iterate::Left) - ? (((i0 >> SHIFT_0) + - m_tile_N0 * - ((i1 >> SHIFT_1) + - m_tile_N1 * - ((i2 >> SHIFT_2) + - m_tile_N2 * - ((i3 >> SHIFT_3) + - m_tile_N3 * - ((i4 >> SHIFT_4) + - m_tile_N4 * - ((i5 >> SHIFT_5) + - m_tile_N5 * - ((i6 >> SHIFT_6) + - m_tile_N6 * (i7 >> SHIFT_7)))))))) - << SHIFT_8T) - : ((m_tile_N7 * - (m_tile_N6 * - (m_tile_N5 * - (m_tile_N4 * - (m_tile_N3 * - (m_tile_N2 * - (m_tile_N1 * (i0 >> SHIFT_0) + - (i1 >> SHIFT_1)) + - (i2 >> SHIFT_2)) + - (i3 >> SHIFT_3)) + - (i4 >> SHIFT_4)) + - (i5 >> SHIFT_5)) + - (i6 >> SHIFT_6)) + - (i7 >> SHIFT_7)) - << SHIFT_8T); - - auto local_offset = - (inner_pattern == Kokkos::Iterate::Left) - ? ((i0 & MASK_0) + ((i1 & MASK_1) << SHIFT_0) + - ((i2 & MASK_2) << (SHIFT_0 + SHIFT_1)) + - ((i3 & MASK_3) << (SHIFT_0 + SHIFT_1 + SHIFT_2)) + - ((i4 & MASK_4) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3)) + - ((i5 & MASK_5) - << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + SHIFT_4)) + - ((i6 & MASK_6) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + - SHIFT_4 + SHIFT_5)) + - ((i7 & MASK_7) << (SHIFT_0 + SHIFT_1 + SHIFT_2 + SHIFT_3 + - SHIFT_4 + SHIFT_5 + SHIFT_6))) - : (((i0 & MASK_0) << (SHIFT_7 + SHIFT_6 + SHIFT_5 + SHIFT_4 + - SHIFT_3 + SHIFT_2 + SHIFT_1)) + - ((i1 & MASK_1) << (SHIFT_7 + SHIFT_6 + SHIFT_5 + SHIFT_4 + - SHIFT_3 + SHIFT_2)) + - ((i2 & MASK_2) - << (SHIFT_7 + SHIFT_6 + SHIFT_5 + SHIFT_4 + SHIFT_3)) + - ((i3 & MASK_3) << (SHIFT_7 + SHIFT_6 + SHIFT_5 + SHIFT_4)) + - ((i4 & MASK_4) << (SHIFT_7 + SHIFT_6 + SHIFT_5)) + - ((i5 & MASK_5) << (SHIFT_7 + SHIFT_6)) + - ((i6 & MASK_6) << (SHIFT_7)) + (i7 & MASK_7)); - - return tile_offset + local_offset; - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION constexpr array_layout layout() const { - return array_layout((VORank > 0 ? m_dim.N0 : KOKKOS_INVALID_INDEX), - (VORank > 1 ? m_dim.N1 : KOKKOS_INVALID_INDEX), - (VORank > 2 ? m_dim.N2 : KOKKOS_INVALID_INDEX), - (VORank > 3 ? m_dim.N3 : KOKKOS_INVALID_INDEX), - (VORank > 4 ? m_dim.N4 : KOKKOS_INVALID_INDEX), - (VORank > 5 ? m_dim.N5 : KOKKOS_INVALID_INDEX), - (VORank > 6 ? m_dim.N6 : KOKKOS_INVALID_INDEX), - (VORank > 7 ? m_dim.N7 : KOKKOS_INVALID_INDEX)); - } - - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_0() const { - return m_dim.N0; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_1() const { - return m_dim.N1; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_2() const { - return m_dim.N2; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_3() const { - return m_dim.N3; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_4() const { - return m_dim.N4; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_5() const { - return m_dim.N5; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_6() const { - return m_dim.N6; - } - KOKKOS_INLINE_FUNCTION constexpr size_type dimension_7() const { - return m_dim.N7; - } - - KOKKOS_INLINE_FUNCTION constexpr size_type size() const { - return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * - m_dim.N6 * m_dim.N7; - } - - // Strides are meaningless due to irregularity - KOKKOS_INLINE_FUNCTION constexpr size_type stride_0() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_1() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const { return 0; } - KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const { return 0; } - - // Stride with [ rank ] value is the total length - template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { - s[0] = 0; - if (0 < dimension_type::rank) { - s[1] = 0; - } - if (1 < dimension_type::rank) { - s[2] = 0; - } - if (2 < dimension_type::rank) { - s[3] = 0; - } - if (3 < dimension_type::rank) { - s[4] = 0; - } - if (4 < dimension_type::rank) { - s[5] = 0; - } - if (5 < dimension_type::rank) { - s[6] = 0; - } - if (6 < dimension_type::rank) { - s[7] = 0; - } - if (7 < dimension_type::rank) { - s[8] = 0; - } - } - - KOKKOS_INLINE_FUNCTION constexpr size_type span() const { - // Rank2: ( NumTile0 * ( NumTile1 ) ) * TileSize, etc - return (VORank == 2) - ? (m_tile_N0 * m_tile_N1) << SHIFT_2T - : (VORank == 3) - ? (m_tile_N0 * m_tile_N1 * m_tile_N2) << SHIFT_3T - : (VORank == 4) - ? (m_tile_N0 * m_tile_N1 * m_tile_N2 * m_tile_N3) - << SHIFT_4T - : (VORank == 5) - ? (m_tile_N0 * m_tile_N1 * m_tile_N2 * - m_tile_N3 * m_tile_N4) - << SHIFT_5T - : (VORank == 6) - ? (m_tile_N0 * m_tile_N1 * m_tile_N2 * - m_tile_N3 * m_tile_N4 * m_tile_N5) - << SHIFT_6T - : (VORank == 7) - ? (m_tile_N0 * m_tile_N1 * - m_tile_N2 * m_tile_N3 * - m_tile_N4 * m_tile_N5 * - m_tile_N6) - << SHIFT_7T - : (m_tile_N0 * m_tile_N1 * - m_tile_N2 * m_tile_N3 * - m_tile_N4 * m_tile_N5 * - m_tile_N6 * m_tile_N7) - << SHIFT_8T; - } - - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { - return true; - } - - //---------------------------------------- -#ifdef KOKKOS_IMPL_WINDOWS_CUDA - KOKKOS_FUNCTION ViewOffset() {} - KOKKOS_FUNCTION ViewOffset(const ViewOffset& src) { - m_dim = src.m_dim; - m_tile_N0 = src.m_tile_N0; - m_tile_N1 = src.m_tile_N1; - m_tile_N2 = src.m_tile_N2; - m_tile_N3 = src.m_tile_N3; - m_tile_N4 = src.m_tile_N4; - m_tile_N5 = src.m_tile_N5; - m_tile_N6 = src.m_tile_N6; - m_tile_N7 = src.m_tile_N7; - } - KOKKOS_FUNCTION ViewOffset& operator=(const ViewOffset& src) { - m_dim = src.m_dim; - m_tile_N0 = src.m_tile_N0; - m_tile_N1 = src.m_tile_N1; - m_tile_N2 = src.m_tile_N2; - m_tile_N3 = src.m_tile_N3; - m_tile_N4 = src.m_tile_N4; - m_tile_N5 = src.m_tile_N5; - m_tile_N6 = src.m_tile_N6; - m_tile_N7 = src.m_tile_N7; - return *this; - } -#else - KOKKOS_DEFAULTED_FUNCTION ~ViewOffset() = default; - KOKKOS_DEFAULTED_FUNCTION ViewOffset() = default; - KOKKOS_DEFAULTED_FUNCTION ViewOffset(const ViewOffset&) = default; - KOKKOS_DEFAULTED_FUNCTION ViewOffset& operator=(const ViewOffset&) = default; -#endif - - template - KOKKOS_INLINE_FUNCTION constexpr ViewOffset( - std::integral_constant const&, - array_layout const arg_layout) - : m_dim(arg_layout.dimension[0], arg_layout.dimension[1], - arg_layout.dimension[2], arg_layout.dimension[3], - arg_layout.dimension[4], arg_layout.dimension[5], - arg_layout.dimension[6], arg_layout.dimension[7]), - m_tile_N0((arg_layout.dimension[0] + MASK_0) >> - SHIFT_0 /* number of tiles in first dimension */), - m_tile_N1((arg_layout.dimension[1] + MASK_1) >> SHIFT_1), - m_tile_N2((VORank > 2) ? (arg_layout.dimension[2] + MASK_2) >> SHIFT_2 - : 0), - m_tile_N3((VORank > 3) ? (arg_layout.dimension[3] + MASK_3) >> SHIFT_3 - : 0), - m_tile_N4((VORank > 4) ? (arg_layout.dimension[4] + MASK_4) >> SHIFT_4 - : 0), - m_tile_N5((VORank > 5) ? (arg_layout.dimension[5] + MASK_5) >> SHIFT_5 - : 0), - m_tile_N6((VORank > 6) ? (arg_layout.dimension[6] + MASK_6) >> SHIFT_6 - : 0), - m_tile_N7((VORank > 7) ? (arg_layout.dimension[7] + MASK_7) >> SHIFT_7 - : 0) {} -}; - -// FIXME Remove the out-of-class definitions when we require C++17 -#define KOKKOS_ITERATE_VIEW_OFFSET_ENABLE \ - std::enable_if_t<((Dimension::rank <= 8) && (Dimension::rank >= 2) && \ - is_array_layout::value && \ - is_array_layout_tiled::value)> -template -constexpr Kokkos::Iterate ViewOffset< - Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::outer_pattern; -template -constexpr Kokkos::Iterate ViewOffset< - Dimension, Layout, KOKKOS_ITERATE_VIEW_OFFSET_ENABLE>::inner_pattern; -template -constexpr int - ViewOffset::VORank; -template -constexpr unsigned - ViewOffset::SHIFT_0; -template -constexpr unsigned - ViewOffset::SHIFT_1; -template -constexpr unsigned - ViewOffset::SHIFT_2; -template -constexpr unsigned - ViewOffset::SHIFT_3; -template -constexpr unsigned - ViewOffset::SHIFT_4; -template -constexpr unsigned - ViewOffset::SHIFT_5; -template -constexpr unsigned - ViewOffset::SHIFT_6; -template -constexpr unsigned - ViewOffset::SHIFT_7; -template -constexpr int - ViewOffset::MASK_0; -template -constexpr int - ViewOffset::MASK_1; -template -constexpr int - ViewOffset::MASK_2; -template -constexpr int - ViewOffset::MASK_3; -template -constexpr int - ViewOffset::MASK_4; -template -constexpr int - ViewOffset::MASK_5; -template -constexpr int - ViewOffset::MASK_6; -template -constexpr int - ViewOffset::MASK_7; -template -constexpr unsigned - ViewOffset::SHIFT_2T; -template -constexpr unsigned - ViewOffset::SHIFT_3T; -template -constexpr unsigned - ViewOffset::SHIFT_4T; -template -constexpr unsigned - ViewOffset::SHIFT_5T; -template -constexpr unsigned - ViewOffset::SHIFT_6T; -template -constexpr unsigned - ViewOffset::SHIFT_7T; -template -constexpr unsigned - ViewOffset::SHIFT_8T; -#undef KOKKOS_ITERATE_VIEW_OFFSET_ENABLE - -//---------------------------------------- - -// ViewMapping assign method needed in order to return a 'subview' tile as a -// proper View The outer iteration pattern determines the mapping of the pointer -// offset to the beginning of requested tile The inner iteration pattern is -// needed for the layout of the tile's View to be returned Rank 2 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T**, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left ? ((i_tile0 + src.m_impl_offset.m_tile_N0 * i_tile1) - << src_offset_type::SHIFT_2T) - : ((src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1) - << src_offset_type::SHIFT_2T)) // offset to start - // of the tile - ), - dst_offset_type()); - } -}; - -// Rank 3 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T***, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + src.m_impl_offset.m_tile_N1 * i_tile2)) - << src_offset_type::SHIFT_3T) - : ((src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * i_tile0 + i_tile1) + - i_tile2) - << src_offset_type::SHIFT_3T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 4 -template -class ViewMapping< - std::enable_if_t<(N4 == 0 && N5 == 0 && N6 == 0 && N7 == 0)> // void - , - Kokkos::ViewTraits< - T****, - Kokkos::Experimental::LayoutTiled, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + src.m_impl_offset.m_tile_N1 * - (i_tile2 + src.m_impl_offset.m_tile_N2 * - i_tile3))) - << src_offset_type::SHIFT_4T) - : ((src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) - << src_offset_type::SHIFT_4T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 5 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T*****, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3, iType4> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3, const iType4 i_tile4) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + - src.m_impl_offset.m_tile_N1 * - (i_tile2 + - src.m_impl_offset.m_tile_N2 * - (i_tile3 + - src.m_impl_offset.m_tile_N3 * i_tile4)))) - << src_offset_type::SHIFT_5T) - : ((src.m_impl_offset.m_tile_N4 * - (src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) + - i_tile4) - << src_offset_type::SHIFT_5T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 6 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T******, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3, iType4, iType5> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = - Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3, const iType4 i_tile4, - const iType5 i_tile5) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + - src.m_impl_offset.m_tile_N1 * - (i_tile2 + - src.m_impl_offset.m_tile_N2 * - (i_tile3 + - src.m_impl_offset.m_tile_N3 * - (i_tile4 + src.m_impl_offset.m_tile_N4 * - i_tile5))))) - << src_offset_type::SHIFT_6T) - : ((src.m_impl_offset.m_tile_N5 * - (src.m_impl_offset.m_tile_N4 * - (src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) + - i_tile4) + - i_tile5) - << src_offset_type::SHIFT_6T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 7 -template -class ViewMapping // void - , - Kokkos::ViewTraits< - T*******, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3, iType4, iType5, iType6> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = - Kokkos::ViewTraits; - using type = Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3, const iType4 i_tile4, - const iType5 i_tile5, const iType6 i_tile6) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + - src.m_impl_offset.m_tile_N1 * - (i_tile2 + - src.m_impl_offset.m_tile_N2 * - (i_tile3 + - src.m_impl_offset.m_tile_N3 * - (i_tile4 + - src.m_impl_offset.m_tile_N4 * - (i_tile5 + - src.m_impl_offset.m_tile_N5 * - i_tile6)))))) - << src_offset_type::SHIFT_7T) - : ((src.m_impl_offset.m_tile_N6 * - (src.m_impl_offset.m_tile_N5 * - (src.m_impl_offset.m_tile_N4 * - (src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * - i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) + - i_tile4) + - i_tile5) + - i_tile6) - << src_offset_type::SHIFT_7T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -// Rank 8 -template -class ViewMapping< - std::enable_if_t<(N0 != 0 && N1 != 0 && N2 != 0 && N3 != 0 && N4 != 0 && - N5 != 0 && N6 != 0 && N7 != 0)> // void - , - Kokkos::ViewTraits< - T********, - Kokkos::Experimental::LayoutTiled, - P...>, - Kokkos::Experimental::LayoutTiled, - iType0, iType1, iType2, iType3, iType4, iType5, iType6, iType7> { - public: - using src_layout = - Kokkos::Experimental::LayoutTiled; - using src_traits = Kokkos::ViewTraits; - - static constexpr bool is_outer_left = (OuterP == Kokkos::Iterate::Left); - static constexpr bool is_inner_left = (InnerP == Kokkos::Iterate::Left); - using array_layout = std::conditional_t; - using traits = - Kokkos::ViewTraits; - using type = - Kokkos::View; - - KOKKOS_INLINE_FUNCTION static void assign( - ViewMapping& dst, const ViewMapping& src, - const src_layout&, const iType0 i_tile0, const iType1 i_tile1, - const iType2 i_tile2, const iType3 i_tile3, const iType4 i_tile4, - const iType5 i_tile5, const iType6 i_tile6, const iType7 i_tile7) { - using dst_map_type = ViewMapping; - using src_map_type = ViewMapping; - using dst_handle_type = typename dst_map_type::handle_type; - using dst_offset_type = typename dst_map_type::offset_type; - using src_offset_type = typename src_map_type::offset_type; - - dst = dst_map_type( - dst_handle_type( - src.m_impl_handle + - (is_outer_left - ? ((i_tile0 + - src.m_impl_offset.m_tile_N0 * - (i_tile1 + - src.m_impl_offset.m_tile_N1 * - (i_tile2 + - src.m_impl_offset.m_tile_N2 * - (i_tile3 + - src.m_impl_offset.m_tile_N3 * - (i_tile4 + - src.m_impl_offset.m_tile_N4 * - (i_tile5 + - src.m_impl_offset.m_tile_N5 * - (i_tile6 + - src.m_impl_offset.m_tile_N6 * - i_tile7))))))) - << src_offset_type::SHIFT_8T) - : ((src.m_impl_offset.m_tile_N7 * - (src.m_impl_offset.m_tile_N6 * - (src.m_impl_offset.m_tile_N5 * - (src.m_impl_offset.m_tile_N4 * - (src.m_impl_offset.m_tile_N3 * - (src.m_impl_offset.m_tile_N2 * - (src.m_impl_offset.m_tile_N1 * - i_tile0 + - i_tile1) + - i_tile2) + - i_tile3) + - i_tile4) + - i_tile5) + - i_tile6) + - i_tile7) - << src_offset_type::SHIFT_8T))) // offset to start of the - // tile - , - dst_offset_type()); - } -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------- - -namespace Kokkos { - -// Rank 2 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T**, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View(src, SrcLayout(), i_tile0, - i_tile1); -} - -// Rank 3 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T***, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2); -} - -// Rank 4 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T****, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3); -} - -// Rank 5 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T*****, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3, - const size_t i_tile4) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3, i_tile4); -} - -// Rank 6 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T******, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3, - const size_t i_tile4, const size_t i_tile5) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3, i_tile4, i_tile5); -} - -// Rank 7 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T*******, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3, - const size_t i_tile4, const size_t i_tile5, - const size_t i_tile6) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3, i_tile4, i_tile5, - i_tile6); -} - -// Rank 8 -template -KOKKOS_INLINE_FUNCTION - Kokkos::View, - P...> - tile_subview(const Kokkos::View< - T********, - Kokkos::Experimental::LayoutTiled< - OuterP, InnerP, N0, N1, N2, N3, N4, N5, N6, N7, true>, - P...>& src, - const size_t i_tile0, const size_t i_tile1, - const size_t i_tile2, const size_t i_tile3, - const size_t i_tile4, const size_t i_tile5, - const size_t i_tile6, const size_t i_tile7) { - // Force the specialized ViewMapping for extracting a tile - // by using the first subview argument as the layout. - using array_layout = - std::conditional_t<(InnerP == Kokkos::Iterate::Left), Kokkos::LayoutLeft, - Kokkos::LayoutRight>; - using SrcLayout = - Kokkos::Experimental::LayoutTiled; - - return Kokkos::View( - src, SrcLayout(), i_tile0, i_tile1, i_tile2, i_tile3, i_tile4, i_tile5, - i_tile6, i_tile7); -} - -} /* namespace Kokkos */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_EXPERIENTAL_VIEWLAYOUTTILE_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index 3217c76e38..10aaa63b7c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -17,6 +17,7 @@ #ifndef KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP #define KOKKOS_EXPERIMENTAL_VIEW_MAPPING_HPP +#include #include #include @@ -34,6 +35,7 @@ #include #include #include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -647,34 +649,60 @@ struct ViewOffset< m_dim.N5 * m_dim.N6; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // FIXME: The version of clang-format in CI fails from maybe_unused + // clang-format off + template + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { + iType n = 1; + if constexpr (0 < dimension_type::rank) { + s[0] = n; + n *= m_dim.N0; + } + if constexpr (1 < dimension_type::rank) { + s[1] = n; + n *= m_dim.N1; + } + if constexpr (2 < dimension_type::rank) { + s[2] = n; + n *= m_dim.N2; + } + if constexpr (3 < dimension_type::rank) { + s[3] = n; + n *= m_dim.N3; + } + if constexpr (4 < dimension_type::rank) { + s[4] = n; + n *= m_dim.N4; + } + if constexpr (5 < dimension_type::rank) { + s[5] = n; + n *= m_dim.N5; + } + if constexpr (6 < dimension_type::rank) { + s[6] = n; + n *= m_dim.N6; + } + if constexpr (7 < dimension_type::rank) { + s[7] = n; + n *= m_dim.N7; + } + return n; + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements Stride with [ rank ] value is + // the total length template KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { - s[0] = 1; - if (0 < dimension_type::rank) { - s[1] = m_dim.N0; - } - if (1 < dimension_type::rank) { - s[2] = s[1] * m_dim.N1; - } - if (2 < dimension_type::rank) { - s[3] = s[2] * m_dim.N2; - } - if (3 < dimension_type::rank) { - s[4] = s[3] * m_dim.N3; - } - if (4 < dimension_type::rank) { - s[5] = s[4] * m_dim.N4; - } - if (5 < dimension_type::rank) { - s[6] = s[5] * m_dim.N5; - } - if (6 < dimension_type::rank) { - s[7] = s[6] * m_dim.N6; - } - if (7 < dimension_type::rank) { - s[8] = s[7] * m_dim.N7; - } + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -935,34 +963,59 @@ struct ViewOffset< m_dim.N6; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // The version of clang-format in CI fails from maybe_unused + // clang-format off + template + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { + iType n = 1; + if constexpr (0 < dimension_type::rank) { + s[0] = n; + n *= m_stride; + } + if constexpr (1 < dimension_type::rank) { + s[1] = n; + n *= m_dim.N1; + } + if constexpr (2 < dimension_type::rank) { + s[2] = n; + n *= m_dim.N2; + } + if constexpr (3 < dimension_type::rank) { + s[3] = n; + n *= m_dim.N3; + } + if constexpr (4 < dimension_type::rank) { + s[4] = n; + n *= m_dim.N4; + } + if constexpr (5 < dimension_type::rank) { + s[5] = n; + n *= m_dim.N5; + } + if constexpr (6 < dimension_type::rank) { + s[6] = n; + n *= m_dim.N6; + } + if constexpr (7 < dimension_type::rank) { + s[7] = n; + n *= m_dim.N7; + } + return n; + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements template KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { - s[0] = 1; - if (0 < dimension_type::rank) { - s[1] = m_stride; - } - if (1 < dimension_type::rank) { - s[2] = s[1] * m_dim.N1; - } - if (2 < dimension_type::rank) { - s[3] = s[2] * m_dim.N2; - } - if (3 < dimension_type::rank) { - s[4] = s[3] * m_dim.N3; - } - if (4 < dimension_type::rank) { - s[5] = s[4] * m_dim.N4; - } - if (5 < dimension_type::rank) { - s[6] = s[5] * m_dim.N5; - } - if (6 < dimension_type::rank) { - s[7] = s[6] * m_dim.N6; - } - if (7 < dimension_type::rank) { - s[8] = s[7] * m_dim.N7; - } + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -1286,42 +1339,58 @@ struct ViewOffset< m_dim.N1; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // The version of clang-format in CI fails from maybe_unused + // clang-format off template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { size_type n = 1; - if (7 < dimension_type::rank) { + if constexpr (7 < dimension_type::rank) { s[7] = n; n *= m_dim.N7; } - if (6 < dimension_type::rank) { + if constexpr (6 < dimension_type::rank) { s[6] = n; n *= m_dim.N6; } - if (5 < dimension_type::rank) { + if constexpr (5 < dimension_type::rank) { s[5] = n; n *= m_dim.N5; } - if (4 < dimension_type::rank) { + if constexpr (4 < dimension_type::rank) { s[4] = n; n *= m_dim.N4; } - if (3 < dimension_type::rank) { + if constexpr (3 < dimension_type::rank) { s[3] = n; n *= m_dim.N3; } - if (2 < dimension_type::rank) { + if constexpr (2 < dimension_type::rank) { s[2] = n; n *= m_dim.N2; } - if (1 < dimension_type::rank) { + if constexpr (1 < dimension_type::rank) { s[1] = n; n *= m_dim.N1; } - if (0 < dimension_type::rank) { + if constexpr (0 < dimension_type::rank) { s[0] = n; } - s[dimension_type::rank] = n * m_dim.N0; + return n * m_dim.N0; + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -1573,41 +1642,57 @@ struct ViewOffset< return m_stride; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // The version of clang-format in CI fails from maybe_unused + // clang-format off template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { size_type n = 1; - if (7 < dimension_type::rank) { + if constexpr (7 < dimension_type::rank) { s[7] = n; n *= m_dim.N7; } - if (6 < dimension_type::rank) { + if constexpr (6 < dimension_type::rank) { s[6] = n; n *= m_dim.N6; } - if (5 < dimension_type::rank) { + if constexpr (5 < dimension_type::rank) { s[5] = n; n *= m_dim.N5; } - if (4 < dimension_type::rank) { + if constexpr (4 < dimension_type::rank) { s[4] = n; n *= m_dim.N4; } - if (3 < dimension_type::rank) { + if constexpr (3 < dimension_type::rank) { s[3] = n; n *= m_dim.N3; } - if (2 < dimension_type::rank) { + if constexpr (2 < dimension_type::rank) { s[2] = n; n *= m_dim.N2; } - if (1 < dimension_type::rank) { + if constexpr (1 < dimension_type::rank) { s[1] = n; } - if (0 < dimension_type::rank) { + if constexpr (0 < dimension_type::rank) { s[0] = m_stride; } - s[dimension_type::rank] = m_stride * m_dim.N0; + return m_stride * m_dim.N0; + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -2133,34 +2218,50 @@ struct ViewOffset { return m_stride.S7; } - // Stride with [ rank ] value is the total length + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + // The version of clang-format in CI fails from maybe_unused + // clang-format off template - KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { - if (0 < dimension_type::rank) { + KOKKOS_INLINE_FUNCTION iType + stride_fill([[maybe_unused]] iType* const s) const { + if constexpr (0 < dimension_type::rank) { s[0] = m_stride.S0; } - if (1 < dimension_type::rank) { + if constexpr (1 < dimension_type::rank) { s[1] = m_stride.S1; } - if (2 < dimension_type::rank) { + if constexpr (2 < dimension_type::rank) { s[2] = m_stride.S2; } - if (3 < dimension_type::rank) { + if constexpr (3 < dimension_type::rank) { s[3] = m_stride.S3; } - if (4 < dimension_type::rank) { + if constexpr (4 < dimension_type::rank) { s[4] = m_stride.S4; } - if (5 < dimension_type::rank) { + if constexpr (5 < dimension_type::rank) { s[5] = m_stride.S5; } - if (6 < dimension_type::rank) { + if constexpr (6 < dimension_type::rank) { s[6] = m_stride.S6; } - if (7 < dimension_type::rank) { + if constexpr (7 < dimension_type::rank) { s[7] = m_stride.S7; } - s[dimension_type::rank] = span(); + return span(); + } + // clang-format on + + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + s[dimension_type::rank] = stride_fill(s); } //---------------------------------------- @@ -2428,288 +2529,6 @@ struct ViewDataHandle< namespace Kokkos { namespace Impl { - -template -inline bool is_zero_byte(const T& t) { - using comparison_type = std::conditional_t< - sizeof(T) % sizeof(long long int) == 0, long long int, - std::conditional_t< - sizeof(T) % sizeof(long int) == 0, long int, - std::conditional_t< - sizeof(T) % sizeof(int) == 0, int, - std::conditional_t>>>; - const auto* const ptr = reinterpret_cast(&t); - for (std::size_t i = 0; i < sizeof(T) / sizeof(comparison_type); ++i) - if (ptr[i] != 0) return false; - return true; -} - -//---------------------------------------------------------------------------- - -/* - * The construction, assignment to default, and destruction - * are merged into a single functor. - * Primarily to work around an unresolved CUDA back-end bug - * that would lose the destruction cuda device function when - * called from the shared memory tracking destruction. - * Secondarily to have two fewer partial specializations. - */ -template ::value> -struct ViewValueFunctor; - -template -struct ViewValueFunctor { - using ExecSpace = typename DeviceType::execution_space; - - struct DestroyTag {}; - struct ConstructTag {}; - - ExecSpace space; - ValueType* ptr; - size_t n; - std::string name; - bool default_exec_space; - - template - KOKKOS_INLINE_FUNCTION - std::enable_if_t::value> - operator()(ConstructTag const&, const size_t i) const { - new (ptr + i) ValueType(); - } - - KOKKOS_INLINE_FUNCTION void operator()(DestroyTag const&, - const size_t i) const { - (ptr + i)->~ValueType(); - } - - ViewValueFunctor() = default; - ViewValueFunctor(const ViewValueFunctor&) = default; - ViewValueFunctor& operator=(const ViewValueFunctor&) = default; - - ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, - size_t const arg_n, std::string arg_name) - : space(arg_space), - ptr(arg_ptr), - n(arg_n), - name(std::move(arg_name)), - default_exec_space(false) { - functor_instantiate_workaround(); - } - - ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, - std::string arg_name) - : space(ExecSpace{}), - ptr(arg_ptr), - n(arg_n), - name(std::move(arg_name)), - default_exec_space(true) { - functor_instantiate_workaround(); - } - - template - std::enable_if_t::value && - std::is_trivially_copy_assignable::value> - construct_dispatch() { - ValueType value{}; -// On A64FX memset seems to do the wrong thing with regards to first touch -// leading to the significant performance issues -#ifndef KOKKOS_ARCH_A64FX - if (Impl::is_zero_byte(value)) { - uint64_t kpID = 0; - if (Kokkos::Profiling::profileLibraryLoaded()) { - // We are not really using parallel_for here but using beginParallelFor - // instead of begin_parallel_for (and adding "via memset") is the best - // we can do to indicate that this is not supposed to be tunable (and - // doesn't really execute a parallel_for). - Kokkos::Profiling::beginParallelFor( - "Kokkos::View::initialization [" + name + "] via memset", - Kokkos::Profiling::Experimental::device_id(space), &kpID); - } - (void)ZeroMemset( - space, Kokkos::View>(ptr, n)); - - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } - if (default_exec_space) - space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); - } else { -#endif - parallel_for_implementation(); -#ifndef KOKKOS_ARCH_A64FX - } -#endif - } - - template - std::enable_if_t::value && - std::is_trivially_copy_assignable::value)> - construct_dispatch() { - parallel_for_implementation(); - } - - template - void parallel_for_implementation() { - using PolicyType = - Kokkos::RangePolicy, Tag>; - PolicyType policy(space, 0, n); - uint64_t kpID = 0; - if (Kokkos::Profiling::profileLibraryLoaded()) { - const std::string functor_name = - (std::is_same_v - ? "Kokkos::View::destruction [" + name + "]" - : "Kokkos::View::initialization [" + name + "]"); - Kokkos::Profiling::beginParallelFor( - functor_name, Kokkos::Profiling::Experimental::device_id(space), - &kpID); - } - -#ifdef KOKKOS_ENABLE_CUDA - if (std::is_same::value) { - Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, - true); - } -#endif - const Kokkos::Impl::ParallelFor closure( - *this, policy); - closure.execute(); - if (default_exec_space || std::is_same_v) - space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } - } - - void construct_shared_allocation() { construct_dispatch(); } - - void destroy_shared_allocation() { - parallel_for_implementation(); - } - - // This function is to ensure that the functor with DestroyTag is instantiated - // This is a workaround to avoid "cudaErrorInvalidDeviceFunction" error later - // when the function is queried with cudaFuncGetAttributes - void functor_instantiate_workaround() { -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || \ - defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET) - if (false) { - parallel_for_implementation(); - } -#endif - } -}; - -template -struct ViewValueFunctor { - using ExecSpace = typename DeviceType::execution_space; - using PolicyType = Kokkos::RangePolicy>; - - ExecSpace space; - ValueType* ptr; - size_t n; - std::string name; - bool default_exec_space; - - KOKKOS_INLINE_FUNCTION - void operator()(const size_t i) const { ptr[i] = ValueType(); } - - ViewValueFunctor() = default; - ViewValueFunctor(const ViewValueFunctor&) = default; - ViewValueFunctor& operator=(const ViewValueFunctor&) = default; - - ViewValueFunctor(ExecSpace const& arg_space, ValueType* const arg_ptr, - size_t const arg_n, std::string arg_name) - : space(arg_space), - ptr(arg_ptr), - n(arg_n), - name(std::move(arg_name)), - default_exec_space(false) {} - - ViewValueFunctor(ValueType* const arg_ptr, size_t const arg_n, - std::string arg_name) - : space(ExecSpace{}), - ptr(arg_ptr), - n(arg_n), - name(std::move(arg_name)), - default_exec_space(true) {} - - template - std::enable_if_t::value && - std::is_trivially_copy_assignable::value> - construct_shared_allocation() { - // Shortcut for zero initialization -// On A64FX memset seems to do the wrong thing with regards to first touch -// leading to the significant performance issues -#ifndef KOKKOS_ARCH_A64FX - ValueType value{}; - if (Impl::is_zero_byte(value)) { - uint64_t kpID = 0; - if (Kokkos::Profiling::profileLibraryLoaded()) { - // We are not really using parallel_for here but using beginParallelFor - // instead of begin_parallel_for (and adding "via memset") is the best - // we can do to indicate that this is not supposed to be tunable (and - // doesn't really execute a parallel_for). - Kokkos::Profiling::beginParallelFor( - "Kokkos::View::initialization [" + name + "] via memset", - Kokkos::Profiling::Experimental::device_id(space), &kpID); - } - - (void)ZeroMemset( - space, Kokkos::View>(ptr, n)); - - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } - if (default_exec_space) - space.fence("Kokkos::Impl::ViewValueFunctor: View init/destroy fence"); - } else { -#endif - parallel_for_implementation(); -#ifndef KOKKOS_ARCH_A64FX - } -#endif - } - - template - std::enable_if_t::value && - std::is_trivially_copy_assignable::value)> - construct_shared_allocation() { - parallel_for_implementation(); - } - - void parallel_for_implementation() { - PolicyType policy(0, n); - uint64_t kpID = 0; - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelFor( - "Kokkos::View::initialization [" + name + "]", - Kokkos::Profiling::Experimental::device_id(space), &kpID); - } -#ifdef KOKKOS_ENABLE_CUDA - if (std::is_same::value) { - Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, - true); - } -#endif - const Kokkos::Impl::ParallelFor closure( - *this, PolicyType(0, n)); - closure.execute(); - if (default_exec_space) - space.fence( - "Kokkos::Impl::ViewValueFunctor: Fence after setting values in " - "view"); - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } - } - - void destroy_shared_allocation() {} -}; - //---------------------------------------------------------------------------- /** \brief View mapping for non-specialized data type and standard layout */ template @@ -2814,11 +2633,24 @@ class ViewMapping< return m_impl_offset.stride_7(); } + // Fill the target unbounded array s with the stride and the total spanned + // size. This method differs from stride_fill() in that it writes the total + // spanned size to the last index of the array. Preconditions: s must be an + // array of dimension_type::rank + 1 elements template KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { m_impl_offset.stride(s); } + // Fill the target unbounded array s with the stride. + // This method differs from stride() in that it does not write the total + // length to the last index of the array. Preconditions: s must be an array of + // dimension_type::rank elements + template + KOKKOS_INLINE_FUNCTION iType stride_fill(iType* const s) const { + return m_impl_offset.stride_fill(s); + } + //---------------------------------------- // Range span @@ -2993,10 +2825,12 @@ class ViewMapping< using memory_space = typename Traits::memory_space; static_assert( SpaceAccessibility::accessible); - using value_type = typename Traits::value_type; - using functor_type = - ViewValueFunctor, - value_type>; + using device_type = Kokkos::Device; + using value_type = typename Traits::value_type; + using functor_type = std::conditional_t< + alloc_prop::sequential_host_init, + ViewValueFunctorSequentialHostInit, + ViewValueFunctor>; using record_type = Kokkos::Impl::SharedAllocationRecord; @@ -3360,7 +3194,7 @@ struct SubViewDataTypeImpl> { }; /* for integral args, subview doesn't have that dimension */ -template struct SubViewDataTypeImpl< std::enable_if_t>::value>, @@ -3369,7 +3203,7 @@ struct SubViewDataTypeImpl< Kokkos::Experimental::Extents, Args...> {}; /* for ALL slice, subview has the same dimension */ -template +template struct SubViewDataTypeImpl, Kokkos::ALL_t, Args...> @@ -3380,7 +3214,7 @@ struct SubViewDataTypeImpl struct SubViewDataTypeImpl< std::enable_if_t::value>, ValueType, diff --git a/lib/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp b/lib/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp index 1130485e84..b2faccc527 100644 --- a/lib/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp +++ b/lib/kokkos/core/src/setup/Kokkos_Setup_Cuda.hpp @@ -56,6 +56,8 @@ #define KOKKOS_LAMBDA [=] __host__ __device__ #define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__ +#define KOKKOS_DEDUCTION_GUIDE __host__ __device__ + #define KOKKOS_IMPL_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ #define KOKKOS_IMPL_FORCEINLINE __forceinline__ #define KOKKOS_IMPL_INLINE_FUNCTION __device__ __host__ inline diff --git a/lib/kokkos/core/src/setup/Kokkos_Setup_HIP.hpp b/lib/kokkos/core/src/setup/Kokkos_Setup_HIP.hpp index 7b01866107..a3c5000b33 100644 --- a/lib/kokkos/core/src/setup/Kokkos_Setup_HIP.hpp +++ b/lib/kokkos/core/src/setup/Kokkos_Setup_HIP.hpp @@ -27,6 +27,8 @@ #define KOKKOS_LAMBDA [=] __host__ __device__ #define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__ +#define KOKKOS_DEDUCTION_GUIDE __host__ __device__ + #define KOKKOS_IMPL_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ #define KOKKOS_IMPL_INLINE_FUNCTION __device__ __host__ inline #define KOKKOS_IMPL_FUNCTION __device__ __host__ diff --git a/lib/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp b/lib/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp index 30f6fa2ad2..b117d75acb 100644 --- a/lib/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp +++ b/lib/kokkos/core/src/setup/Kokkos_Setup_SYCL.hpp @@ -45,4 +45,21 @@ #define KOKKOS_IMPL_SYCL_GET_MULTI_PTR(accessor) accessor.get_pointer() #endif +// FIXME_SYCL Use type directly once it has stabilized in SYCL. +namespace Kokkos::Impl { +#ifndef SYCL_EXT_INTEL_USM_ADDRESS_SPACES +#error SYCL_EXT_INTEL_USM_ADDRESS_SPACES undefined! +#elif SYCL_EXT_INTEL_USM_ADDRESS_SPACES >= 2 +template +using sycl_device_ptr = sycl::ext::intel::device_ptr; +template +using sycl_host_ptr = sycl::ext::intel::host_ptr; +#else +template +using sycl_device_ptr = sycl::device_ptr; +template +using sycl_host_ptr = sycl::host_ptr; +#endif +} // namespace Kokkos::Impl + #endif diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index 6dfb7505c5..f821581872 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -93,6 +93,9 @@ SET(COMPILE_ONLY_SOURCES TestViewTypeTraits.cpp TestTypeList.cpp TestMDRangePolicyCTAD.cpp + TestTeamPolicyCTAD.cpp + TestTeamMDRangePolicyCTAD.cpp + TestNestedReducerCTAD.cpp view/TestExtentsDatatypeConversion.cpp ) @@ -105,6 +108,9 @@ endif() IF(KOKKOS_HAS_TRILINOS) LIST(REMOVE_ITEM COMPILE_ONLY_SOURCES TestInterOp.cpp) ENDIF() +if(Kokkos_ENABLE_OPENMPTARGET) + list(REMOVE_ITEM COMPILE_ONLY_SOURCES TestNestedReducerCTAD.cpp) +endif() KOKKOS_ADD_EXECUTABLE( CoreTestCompileOnly SOURCES @@ -148,8 +154,10 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) Crs DeepCopyAlignment ExecSpacePartitioning + ExecSpaceThreadSafety ExecutionSpace FunctorAnalysis + Graph HostSharedPtr HostSharedPtrAccessOnDevice Init @@ -173,7 +181,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) endforeach() set(${Tag}_SOURCES1B) - foreach(Name + set(${Tag}_TESTNAMES1B MDRange_a MDRange_b MDRange_c @@ -184,6 +192,8 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) MDRangePolicyConstructors MDRangeReduce MDSpan + MDSpanAtomicAccessor + MDSpanConversion MinMaxClamp NumericTraits OccupancyControlTrait @@ -203,8 +213,19 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) Reductions Reductions_DeviceView SharedAlloc + SpaceAwareAccessorAccessViolation + SpaceAwareAccessor Swap ) + IF (NOT Kokkos_ENABLE_IMPL_MDSPAN) + LIST(REMOVE_ITEM ${Tag}_TESTNAMES1B + MDSpanAtomicAccessor + MDSpanConversion + SpaceAwareAccessorAccessViolation + SpaceAwareAccessor + ) + ENDIF() + foreach(Name IN LISTS ${Tag}_TESTNAMES1B) set(file ${dir}/Test${Tag}_${Name}.cpp) # Write to a temporary intermediate file and call configure_file to avoid # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. @@ -217,7 +238,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) endforeach() SET(${Tag}_SOURCES2A) - foreach(Name + SET(${TAG}_TESTNAMES2A TeamBasic TeamCombinedReducers TeamMDRange @@ -234,8 +255,10 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) ViewAPI_c ViewAPI_d ViewAPI_e + ViewBadAlloc ViewCopy_a ViewCopy_b + ViewCopy_c ViewCtorDimMatch ViewEmptyRuntimeUnmanaged ViewHooks @@ -245,11 +268,21 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL) ViewMapping_subview ViewMemoryAccessViolation ViewOfClass + ViewOfViews ViewOutOfBoundsAccess ViewResize WorkGraph WithoutInitializing ) + # Workaround to internal compiler error with intel classic compilers + # when using -no-ip flag in ViewCopy_c + # See issue: https://github.com/kokkos/kokkos/issues/7084 + IF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + LIST(REMOVE_ITEM ${Tag}_TESTNAMES2A + ViewCopy_c + ) + endif() + foreach(Name IN LISTS ${Tag}_TESTNAMES2A) set(file ${dir}/Test${Tag}_${Name}.cpp) # Write to a temporary intermediate file and call configure_file to avoid # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. @@ -353,6 +386,7 @@ foreach(PairDeviceSpace HIP-HostPinned;HIP-Managed;Cuda-HostPinned;Cuda-UVM;SYCL ViewAPI_e ViewCopy_a ViewCopy_b + ViewCopy_c ViewMapping_a ViewMapping_b ViewMapping_subview @@ -648,12 +682,6 @@ if(Kokkos_ENABLE_SERIAL) UnitTestMainInit.cpp ${Serial_SOURCES2} ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - CoreUnitTest_SerialGraph - SOURCES - UnitTestMainInit.cpp - serial/TestSerial_Graph.cpp - ) endif() if(Kokkos_ENABLE_THREADS) @@ -681,12 +709,6 @@ if (Kokkos_ENABLE_OPENMP) UnitTestMain.cpp openmp/TestOpenMP_InterOp.cpp ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - CoreUnitTest_OpenMPGraph - SOURCES - UnitTestMainInit.cpp - openmp/TestOpenMP_Graph.cpp - ) endif() if(Kokkos_ENABLE_HPX) @@ -794,12 +816,6 @@ if(Kokkos_ENABLE_CUDA) UnitTestMainInit.cpp cuda/TestCuda_InterOp_StreamsMultiGPU.cpp ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - CoreUnitTest_CudaGraph - SOURCES - UnitTestMainInit.cpp - cuda/TestCuda_Graph.cpp - ) endif() if(Kokkos_ENABLE_HIP) @@ -827,12 +843,6 @@ if(Kokkos_ENABLE_HIP) UnitTestMain.cpp hip/TestHIP_InterOp_Streams.cpp ) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_HIPGraph - SOURCES - UnitTestMainInit.cpp - hip/TestHIP_Graph.cpp - ) endif() if(Kokkos_ENABLE_SYCL) @@ -902,15 +912,21 @@ if(Kokkos_ENABLE_SYCL) KOKKOS_ADD_EXECUTABLE_AND_TEST( CoreUnitTest_SYCLInterOpInit_Context SOURCES - UnitTestMainInit.cpp + UnitTestMainInit.cpp sycl/TestSYCL_InterOp_Init_Context.cpp ) KOKKOS_ADD_EXECUTABLE_AND_TEST( CoreUnitTest_SYCLInterOpStreams SOURCES - UnitTestMain.cpp + UnitTestMain.cpp sycl/TestSYCL_InterOp_Streams.cpp ) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + CoreUnitTest_SYCLInterOpStreamsMultiGPU + SOURCES + UnitTestMainInit.cpp + sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp + ) endif() SET(DEFAULT_DEVICE_SOURCES @@ -993,6 +1009,13 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_PushFinalizeHook.cpp ) +KOKKOS_ADD_EXECUTABLE_AND_TEST( + CoreUnitTest_ScopeGuard + SOURCES + UnitTestMain.cpp + UnitTest_ScopeGuard.cpp +) + # This test is intended for development and debugging by putting code # into TestDefaultDeviceDevelop.cpp. By default its empty. KOKKOS_ADD_EXECUTABLE_AND_TEST( @@ -1002,23 +1025,35 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( default/TestDefaultDeviceDevelop.cpp ) -# This test is special, because it passes exactly when it prints the -# message "PASSED: I am the custom std::terminate handler.", AND calls -# std::terminate. This means that we can't use -# KOKKOS_ADD_EXECUTABLE_AND_TEST. See GitHub issue #2147. +# With MSVC, the terminate handler is called and prints the message but the +# program does not seem to exit and we get a timeout with ctest. +if (NOT WIN32) + # This test is special, because it passes exactly when it prints the + # message "PASSED: I am the custom std::terminate handler.", AND calls + # std::terminate. This means that we can't use + # KOKKOS_ADD_EXECUTABLE_AND_TEST. See GitHub issue #2147. + KOKKOS_ADD_TEST_EXECUTABLE( + CoreUnitTest_PushFinalizeHookTerminate + SOURCES UnitTest_PushFinalizeHook_terminate.cpp + ) + add_test( + NAME Kokkos_CoreUnitTest_PushFinalizeHookTerminateRegex + COMMAND ${CMAKE_COMMAND} -E env $ + ) + set_property( + TEST Kokkos_CoreUnitTest_PushFinalizeHookTerminateRegex + PROPERTY PASS_REGULAR_EXPRESSION "PASSED: I am the custom std::terminate handler." + ) + add_test( + NAME Kokkos_CoreUnitTest_PushFinalizeHookTerminateFails + COMMAND ${CMAKE_COMMAND} -E env $ + ) + set_property( + TEST Kokkos_CoreUnitTest_PushFinalizeHookTerminateFails + PROPERTY WILL_FAIL TRUE + ) +endif() -KOKKOS_ADD_TEST_EXECUTABLE( push_finalize_hook_terminate - SOURCES UnitTest_PushFinalizeHook_terminate.cpp -) - -KOKKOS_ADD_ADVANCED_TEST( CoreUnitTest_PushFinalizeHook_terminate - TEST_0 - EXEC push_finalize_hook_terminate - NUM_MPI_PROCS 1 - PASS_REGULAR_EXPRESSION - "PASSED: I am the custom std::terminate handler." - ALWAYS_FAIL_ON_ZERO_RETURN -) if(KOKKOS_ENABLE_TUNING) KOKKOS_ADD_EXECUTABLE_AND_TEST( CoreUnitTest_TuningBuiltins @@ -1243,7 +1278,7 @@ if (NOT KOKKOS_HAS_TRILINOS) ) add_test( NAME Kokkos_CoreUnitTest_DeviceAndThreads - COMMAND ${Python3_EXECUTABLE} -m unittest -v $/TestDeviceAndThreads.py + COMMAND ${Python3_EXECUTABLE} $/TestDeviceAndThreads.py -v ) endif() endif() diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index 202809d3fc..a4d65687e5 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -62,7 +62,7 @@ else STACK_TRACE_TERMINATE_FILTER := endif -TESTS = AtomicOperations_int AtomicOperations_unsignedint AtomicOperations_longint AtomicOperations_unsignedlongint AtomicOperations_longlongint AtomicOperations_double AtomicOperations_float AtomicOperations_complexdouble AtomicOperations_complexfloat AtomicViews Atomics BlockSizeDeduction Concepts Complex Crs DeepCopyAlignment FunctorAnalysis Init LocalDeepCopy MDRange_a MDRange_b MDRange_c MDRange_d MDRange_e MDRange_f Other ParallelScanRangePolicy RangePolicy RangePolicyRequire Reductions Reducers_a Reducers_b Reducers_c Reducers_d Reducers_e Reductions_DeviceView SharedAlloc TeamBasic TeamReductionScan TeamScratch TeamTeamSize TeamVectorRange UniqueToken ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewLayoutStrideAssignment ViewMapping_a ViewMapping_b ViewMapping_subview ViewOfClass WorkGraph View_64bit ViewResize +TESTS = AtomicOperations_int AtomicOperations_unsignedint AtomicOperations_longint AtomicOperations_unsignedlongint AtomicOperations_longlongint AtomicOperations_double AtomicOperations_float AtomicOperations_complexdouble AtomicOperations_complexfloat AtomicViews Atomics BlockSizeDeduction Concepts Complex Crs DeepCopyAlignment FunctorAnalysis Init LocalDeepCopy MDRange_a MDRange_b MDRange_c MDRange_d MDRange_e MDRange_f Other ParallelScanRangePolicy RangePolicy RangePolicyRequire Reductions Reducers_a Reducers_b Reducers_c Reducers_d Reducers_e Reductions_DeviceView SharedAlloc TeamBasic TeamReductionScan TeamScratch TeamTeamSize TeamVectorRange UniqueToken ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewCopy_c ViewLayoutStrideAssignment ViewMapping_a ViewMapping_b ViewMapping_subview ViewOfClass WorkGraph View_64bit ViewResize tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ tmp2 := $(foreach test, $(TESTS), \ @@ -73,7 +73,7 @@ tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ ) \ ) -GPU_SPACE_TESTS = SharedAlloc ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewMapping_a ViewMapping_b ViewMapping_subview +GPU_SPACE_TESTS = SharedAlloc ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewCopy_c ViewMapping_a ViewMapping_b ViewMapping_subview SUBVIEW_TESTS = SubView_a SubView_b SubView_c01 SubView_c02 SubView_c03 SubView_c04 SubView_c05 SubView_c06 SubView_c07 SubView_c08 SubView_c09 SubView_c10 SubView_c11 SubView_c12 SubView_c13 @@ -110,14 +110,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA += TestCuda_Init.o OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o OBJ_CUDA += TestCuda_RangePolicy.o TestCuda_RangePolicyRequire.o - OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o TestCuda_ViewCopy_a.o TestCuda_ViewCopy_b.o + OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o TestCuda_ViewCopy_a.o TestCuda_ViewCopy_b.o TestCuda_ViewCopy_c.o OBJ_CUDA += TestCuda_DeepCopyAlignment.o OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewResize.o TestCuda_ViewLayoutStrideAssignment.o OBJ_CUDA += TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o - OBJ_CUDA += TestCudaUVM_ViewCopy_a.o TestCudaUVM_ViewCopy_b.o + OBJ_CUDA += TestCudaUVM_ViewCopy_a.o TestCudaUVM_ViewCopy_b.o TestCudaUVM_ViewCopy_c.o OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o OBJ_CUDA += TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o - OBJ_CUDA += TestCudaHostPinned_ViewCopy_a.o TestCudaHostPinned_ViewCopy_b.o + OBJ_CUDA += TestCudaHostPinned_ViewCopy_a.o TestCudaHostPinned_ViewCopy_b.o TestCudaHostPinned_ViewCopy_c.o OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o OBJ_CUDA += TestCuda_View_64bit.o OBJ_CUDA += TestCuda_ViewOfClass.o @@ -162,7 +162,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) OBJ_THREADS += TestThreads_RangePolicy.o TestThreads_RangePolicyRequire.o OBJ_THREADS += TestThreads_View_64bit.o OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o - OBJ_THREADS += TestThreads_ViewCopy_a.o TestThreads_ViewCopy_b.o + OBJ_THREADS += TestThreads_ViewCopy_a.o TestThreads_ViewCopy_b.o TestThreads_ViewCopy_c.o OBJ_THREADS += TestThreads_DeepCopyAlignment.o OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o TestThreads_ViewResize.o TestThreads_ViewLayoutStrideAssignment.o OBJ_THREADS += TestThreads_ViewOfClass.o @@ -198,7 +198,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP += TestOpenMP_RangePolicy.o TestOpenMP_RangePolicyRequire.o OBJ_OPENMP += TestOpenMP_View_64bit.o OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o - OBJ_OPENMP += TestOpenMP_DeepCopyAlignment.o TestOpenMP_ViewCopy_a.o TestOpenMP_ViewCopy_b.o + OBJ_OPENMP += TestOpenMP_DeepCopyAlignment.o TestOpenMP_ViewCopy_a.o TestOpenMP_ViewCopy_b.o TestOpenMP_ViewCopy_c.o OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o TestOpenMP_ViewResize.o TestOpenMP_ViewLayoutStrideAssignment.o OBJ_OPENMP += TestOpenMP_ViewOfClass.o OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o @@ -237,7 +237,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) #OBJ_OPENMPTARGET += TestOpenMPTarget_SharedAlloc.o OBJ_OPENMPTARGET += TestOpenMPTarget_RangePolicy.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_a.o TestOpenMPTarget_ViewAPI_b.o TestOpenMPTarget_ViewAPI_c.o TestOpenMPTarget_ViewAPI_d.o #Some commented out code - #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_e.o TestOpenMPTarget_ViewCopy_a.o TestOpenMPTarget_ViewCopy_b.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_e.o TestOpenMPTarget_ViewCopy_a.o TestOpenMPTarget_ViewCopy_b.o TestOpenMPTarget_ViewCopy_c.o OBJ_OPENMPTARGET += TestOpenMPTarget_DeepCopyAlignment.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_a.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_b.o @@ -292,7 +292,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) OBJ_HIP += TestHIP_Memory_Requirements.o OBJ_HIP += TestHIP_ParallelScanRangePolicy.o OBJ_HIP += TestHIPHostPinned_ViewAPI_a.o TestHIPHostPinned_ViewAPI_b.o TestHIPHostPinned_ViewAPI_c.o TestHIPHostPinned_ViewAPI_d.o TestHIPHostPinned_ViewAPI_e.o - OBJ_HIP += TestHIPHostPinned_ViewCopy_a.o TestHIPHostPinned_ViewCopy_b.o + OBJ_HIP += TestHIPHostPinned_ViewCopy_a.o TestHIPHostPinned_ViewCopy_b.o TestHIPHostPinned_ViewCopy_c.o OBJ_HIP += TestHIPHostPinned_ViewMapping_a.o TestHIPHostPinned_ViewMapping_b.o TestHIPHostPinned_ViewMapping_subview.o TARGETS += KokkosCore_UnitTest_HIP @@ -307,7 +307,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) OBJ_HPX += TestHPX_RangePolicy.o TestHPX_RangePolicyRequire.o OBJ_HPX += TestHPX_View_64bit.o OBJ_HPX += TestHPX_ViewAPI_a.o TestHPX_ViewAPI_b.o TestHPX_ViewAPI_c.o TestHPX_ViewAPI_d.o TestHPX_ViewAPI_e.o - OBJ_HPX += TestHPX_ViewCopy_a.o TestHPX_ViewCopy_b.o + OBJ_HPX += TestHPX_ViewCopy_a.o TestHPX_ViewCopy_b.o TestHPX_ViewCopy_c.o OBJ_HPX += TestHPX_ViewMapping_a.o TestHPX_ViewMapping_b.o TestHPX_ViewMapping_subview.o TestHPX_ViewResize.o OBJ_HPX += TestHPX_ViewOfClass.o OBJ_HPX += TestHPX_SubView_a.o TestHPX_SubView_b.o @@ -347,7 +347,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL += TestSerial_RangePolicy.o TestSerial_RangePolicyRequire.o OBJ_SERIAL += TestSerial_View_64bit.o OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o - OBJ_SERIAL += TestSerial_DeepCopyAlignment.o TestSerial_ViewCopy_a.o TestSerial_ViewCopy_b.o + OBJ_SERIAL += TestSerial_DeepCopyAlignment.o TestSerial_ViewCopy_a.o TestSerial_ViewCopy_b.o TestSerial_ViewCopy_c.o OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o TestSerial_ViewResize.o TestSerial_ViewLayoutStrideAssignment.o OBJ_SERIAL += TestSerial_ViewOfClass.o OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp deleted file mode 100644 index f1316a7426..0000000000 --- a/lib/kokkos/core/unit_test/TestAggregate.hpp +++ /dev/null @@ -1,108 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef TEST_AGGREGATE_HPP -#define TEST_AGGREGATE_HPP - -#include - -namespace Test { - -template -void TestViewAggregate() { - using value_type = Kokkos::Array; - using analysis_1d = - Kokkos::Impl::ViewDataAnalysis; - - static_assert( - std::is_same >::value); - - using a32_traits = Kokkos::ViewTraits; - using flat_traits = - Kokkos::ViewTraits; - - static_assert( - std::is_same >::value); - static_assert( - std::is_same::value); - static_assert(a32_traits::rank == 2); - static_assert(a32_traits::rank_dynamic == 2); - - static_assert(std::is_void::value); - static_assert(flat_traits::rank == 3); - static_assert(flat_traits::rank_dynamic == 2); - static_assert(flat_traits::dimension::N2 == 32); - - using a32_type = Kokkos::View **, DeviceType>; - using a32_flat_type = typename a32_type::array_type; - - static_assert(std::is_same::value); - static_assert(std::is_same::value); - static_assert(a32_type::rank == 2); - static_assert(a32_flat_type::rank == 3); - - a32_type x("test", 4, 5); - a32_flat_type y(x); - - ASSERT_EQ(x.extent(0), 4u); - ASSERT_EQ(x.extent(1), 5u); - ASSERT_EQ(y.extent(0), 4u); - ASSERT_EQ(y.extent(1), 5u); - ASSERT_EQ(y.extent(2), 32u); - - // Initialize arrays from brace-init-list as for std::array. - // - // Comment: Clang will issue the following warning if we don't use double - // braces here (one for initializing the Kokkos::Array and one for - // initializing the sub-aggreagate C-array data member), - // - // warning: suggest braces around initialization of subobject - // - // but single brace syntax would be valid as well. - Kokkos::Array aggregate_initialization_syntax_1 = {{1.41, 3.14}}; - ASSERT_FLOAT_EQ(aggregate_initialization_syntax_1[0], 1.41); - ASSERT_FLOAT_EQ(aggregate_initialization_syntax_1[1], 3.14); - - Kokkos::Array aggregate_initialization_syntax_2{ - {0, 1, 2}}; // since C++11 - for (int i = 0; i < 3; ++i) { - ASSERT_EQ(aggregate_initialization_syntax_2[i], i); - } - - // Note that this is a valid initialization. - Kokkos::Array initialized_with_one_argument_missing = {{255, 255}}; - for (int i = 0; i < 2; ++i) { - ASSERT_DOUBLE_EQ(initialized_with_one_argument_missing[i], 255); - } - // But the following line would not compile - // Kokkos::Array< double, 3 > initialized_with_too_many{ { 1, 2, 3, 4 } }; - - // The code below must compile for zero-sized arrays. - using T = float; - - constexpr int N = 0; - Kokkos::Array a; - for (int i = 0; i < N; ++i) { - a[i] = T(); - } -} - -TEST(TEST_CATEGORY, view_aggregate) { TestViewAggregate(); } - -} // namespace Test - -#endif /* #ifndef TEST_AGGREGATE_HPP */ diff --git a/lib/kokkos/core/unit_test/TestArray.cpp b/lib/kokkos/core/unit_test/TestArray.cpp index 673d0036b7..cb713a1782 100644 --- a/lib/kokkos/core/unit_test/TestArray.cpp +++ b/lib/kokkos/core/unit_test/TestArray.cpp @@ -15,9 +15,19 @@ //@HEADER #include +#include namespace { +// nvcc errors on variables only used in static_asserts +// Passing those variables to this function should eliminate the warning +template +KOKKOS_FUNCTION constexpr void maybe_unused(Ts&&...) {} + +template +using equality_comparable = + decltype(std::declval() == std::declval()); + KOKKOS_FUNCTION constexpr bool test_array() { constexpr Kokkos::Array a{{1, 2}}; @@ -49,17 +59,6 @@ KOKKOS_FUNCTION constexpr bool test_array_structured_binding_support() { static_assert(test_array_structured_binding_support()); -template -KOKKOS_FUNCTION constexpr bool is_equal(L const& l, R const& r) { - if (std::size(l) != std::size(r)) return false; - - for (size_t i = 0; i != std::size(l); ++i) { - if (l[i] != r[i]) return false; - } - - return true; -} - // Disable ctad test for intel versions < 2021, see issue #6702 #if !defined(KOKKOS_COMPILER_INTEL) || KOKKOS_COMPILER_INTEL >= 2021 KOKKOS_FUNCTION constexpr bool test_array_ctad() { @@ -67,10 +66,180 @@ KOKKOS_FUNCTION constexpr bool test_array_ctad() { constexpr Kokkos::Array a{1, 2, 3, 5, x}; constexpr Kokkos::Array b{1, 2, 3, 5, x}; - return std::is_same_v && is_equal(a, b); + return std::is_same_v && a == b; } static_assert(test_array_ctad()); #endif +KOKKOS_FUNCTION constexpr bool test_array_aggregate_initialization() { + // Initialize arrays from brace-init-list as for std::array. + + Kokkos::Array aggregate_initialization_syntax_1 = {1.41f, 3.14f}; + if ((aggregate_initialization_syntax_1[0] != 1.41f) || + (aggregate_initialization_syntax_1[1] != 3.14f)) + return false; + + Kokkos::Array aggregate_initialization_syntax_2{ + {0, 1, 2}}; // since C++11 + if ((aggregate_initialization_syntax_2[0] != 0) || + (aggregate_initialization_syntax_2[1] != 1) || + (aggregate_initialization_syntax_2[2] != 2)) + return false; + + // Note that this is a valid initialization. + Kokkos::Array initialized_with_one_argument_missing = {{255, 255}}; + if ((initialized_with_one_argument_missing[0] != 255) || + (initialized_with_one_argument_missing[1] != 255) || + (initialized_with_one_argument_missing[2] != 0)) + return false; + + // But the following line would not compile + // Kokkos::Array< double, 3 > initialized_with_too_many{ { 1, 2, 3, 4 } }; + + return true; +} + +static_assert(test_array_aggregate_initialization()); + +// A few compilers, such as GCC 8.4, were erroring out when the function below +// appeared in a constant expression because +// Kokkos::Array::operator[] is non-constexpr. The issue +// disappears with GCC 9.1 (https://godbolt.org/z/TG4TEef1b). As a workaround, +// the static_assert was dropped and the [[maybe_unused]] is used as an attempt +// to silent warnings that the function is never used. +[[maybe_unused]] KOKKOS_FUNCTION void test_array_zero_sized() { + using T = float; + + // The code below must compile for zero-sized arrays. + constexpr int N = 0; + Kokkos::Array a; + for (int i = 0; i < N; ++i) { + a[i] = T(); + } +} + +constexpr bool test_array_const_qualified_element_type() { + Kokkos::Array a{255}; + return a[0] == 255; +} + +static_assert(test_array_const_qualified_element_type()); + +// User-defined type providing a sepcialization of kokkos_swap +struct MyInt { + int i; + + private: + friend constexpr KOKKOS_FUNCTION void kokkos_swap(MyInt& lhs, + MyInt& rhs) noexcept { + lhs.i = 255; + rhs.i = 127; + } +}; + +constexpr bool test_array_specialization_kokkos_swap() { + Kokkos::Array a{MyInt{1}, MyInt{2}}; + Kokkos::Array b{MyInt{11}, MyInt{22}}; + + // sanity check + if (a[0].i != 1 || a[1].i != 2 || b[0].i != 11 || b[1].i != 22) { + return false; + } + + using Kokkos::kokkos_swap; + kokkos_swap(a, b); + + // check that the user-definied kokkos_swap(MyInt) overload was called + if (a[0].i != 255 || a[1].i != 255 || b[0].i != 127 || b[1].i != 127) { + return false; + } + + return true; +} + +static_assert(test_array_specialization_kokkos_swap()); + +constexpr bool test_to_array() { + // copies a string literal + [[maybe_unused]] auto a1 = Kokkos::to_array("foo"); + static_assert(a1.size() == 4); + maybe_unused(a1); + + // deduces both element type and length + [[maybe_unused]] auto a2 = Kokkos::to_array({0, 2, 1, 3}); + static_assert(std::is_same_v>); + maybe_unused(a2); + +// gcc8, icc, and nvcc 11.3 do not support the implicit conversion +#if !(defined(KOKKOS_COMPILER_GNU) && (KOKKOS_COMPILER_GNU < 910)) && \ + !(defined(KOKKOS_COMPILER_INTEL) && (KOKKOS_COMPILER_INTEL < 2021)) && \ + !(defined(KOKKOS_COMPILER_NVCC) && (KOKKOS_COMPILER_NVCC < 1140)) + // deduces length with element type specified + // implicit conversion happens + [[maybe_unused]] auto a3 = Kokkos::to_array({0, 1, 3}); + static_assert(std::is_same_v>); + maybe_unused(a3); +#endif + + return true; +} + +static_assert(test_to_array()); + +constexpr bool test_array_equality_comparable() { + using C0 = Kokkos::Array; + using C2 = Kokkos::Array; + using C3 = Kokkos::Array; + using I0 = Kokkos::Array; + using I2 = Kokkos::Array; + using I3 = Kokkos::Array; + + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(!Kokkos::is_detected_v); + static_assert(Kokkos::is_detected_v); + + return true; +} + +static_assert(test_array_equality_comparable()); + } // namespace diff --git a/lib/kokkos/core/unit_test/TestArrayOps.hpp b/lib/kokkos/core/unit_test/TestArrayOps.hpp index 0652857271..29a452b660 100644 --- a/lib/kokkos/core/unit_test/TestArrayOps.hpp +++ b/lib/kokkos/core/unit_test/TestArrayOps.hpp @@ -92,6 +92,31 @@ TEST(TEST_CATEGORY, array_element_access) { ASSERT_EQ(ca.data()[index], a[index]); } +TEST(TEST_CATEGORY, array_operator_equal) { + using A = Kokkos::Array; + constexpr A a{{3, 5}}; + constexpr A b{{3, 5}}; + constexpr A c{{5, 3}}; + + static_assert(a == b); + static_assert(!(a == c)); + static_assert(a != c); + + ASSERT_TRUE(a == b); + ASSERT_FALSE(a == c); + ASSERT_TRUE(a != c); + + using E = Kokkos::Array; + constexpr E e; + constexpr E f; + + static_assert(e == f); + static_assert(!(e != f)); + + ASSERT_TRUE(e == f); + ASSERT_FALSE(e != f); +} + TEST(TEST_CATEGORY, array_zero_capacity) { using A = Kokkos::Array; A e; @@ -111,6 +136,8 @@ TEST(TEST_CATEGORY, array_zero_data_nullptr) { ASSERT_EQ(ce.data(), nullptr); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() TEST(TEST_CATEGORY, array_contiguous_capacity) { using A = Kokkos::Array::contiguous>; @@ -389,5 +416,7 @@ TEST(TEST_CATEGORY, array_strided_assignment) { ASSERT_EQ(e.max_size(), std::size(ee) / eStride); ASSERT_EQ(e[0], ee[0]); } +KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() +#endif } // namespace diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp index cd7ba47aa1..957ba9a7aa 100644 --- a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -459,9 +459,11 @@ bool AtomicOperationsTestIntegralType(int old_val_in, int update_in, int test) { case 12: return true; #else case 11: - return update_in >= 0 ? atomic_op_test( - old_val, update) - : true; + return (std::make_signed_t(update_in) >= 0 && + std::make_signed_t(old_val) >= 0) + ? atomic_op_test(old_val, + update) + : true; case 12: return update_in >= 0 ? atomic_op_test( old_val, update) diff --git a/lib/kokkos/core/unit_test/TestBitManipulationBuiltins.hpp b/lib/kokkos/core/unit_test/TestBitManipulationBuiltins.hpp index 2f3bcfe817..fe015404f1 100644 --- a/lib/kokkos/core/unit_test/TestBitManipulationBuiltins.hpp +++ b/lib/kokkos/core/unit_test/TestBitManipulationBuiltins.hpp @@ -827,12 +827,6 @@ struct TestBitCastFunction { } } -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if constexpr (std::is_same_v) { - return; - } -#endif struct S { int i; diff --git a/lib/kokkos/core/unit_test/TestComplex.hpp b/lib/kokkos/core/unit_test/TestComplex.hpp index 5501a35b7f..ef6a21cd37 100644 --- a/lib/kokkos/core/unit_test/TestComplex.hpp +++ b/lib/kokkos/core/unit_test/TestComplex.hpp @@ -15,9 +15,26 @@ //@HEADER #include -#include #include +// Suppress "'long double' is treated as 'double' in device code" +#ifdef KOKKOS_COMPILER_NVCC +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress 20208 +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress 20208 +#endif +#endif +#endif + +namespace { +template +KOKKOS_FUNCTION constexpr void maybe_unused(Ts &&...) noexcept {} +} // namespace + namespace Test { // Test construction and assignment @@ -532,4 +549,151 @@ TEST(TEST_CATEGORY, complex_operations_arithmetic_types_overloads) { Kokkos::complex>::value)); } +template +struct TestComplexStructuredBindings { + using exec_space = ExecSpace; + using value_type = double; + using complex_type = Kokkos::complex; + using device_view_type = Kokkos::View; + using host_view_type = typename device_view_type::HostMirror; + + device_view_type d_results; + host_view_type h_results; + + // tuple_size + static_assert(std::is_same_v::type, + std::integral_constant>); + + // tuple_element + static_assert( + std::is_same_v, value_type>); + static_assert( + std::is_same_v, value_type>); + + static void testgetreturnreferencetypes() { + complex_type m; + const complex_type c; + + // get lvalue + complex_type &ml = m; + static_assert(std::is_same_v(ml)), value_type &>); + static_assert(std::is_same_v(ml)), value_type &>); + + // get rvalue + complex_type &&mr = std::move(m); + static_assert( + std::is_same_v(std::move(mr))), value_type &&>); + static_assert( + std::is_same_v(std::move(mr))), value_type &&>); + + // get const lvalue + const complex_type &cl = c; + static_assert( + std::is_same_v(cl)), value_type const &>); + static_assert( + std::is_same_v(cl)), value_type const &>); + + // get const rvalue + complex_type const &&cr = std::move(c); + static_assert(std::is_same_v(std::move(cr))), + value_type const &&>); + static_assert(std::is_same_v(std::move(cr))), + value_type const &&>); + + maybe_unused(m, c, ml, mr, cl, cr); + } + + void testit() { + testgetreturnreferencetypes(); + + d_results = device_view_type("TestComplexStructuredBindings", 6); + h_results = Kokkos::create_mirror_view(d_results); + + Kokkos::parallel_for(Kokkos::RangePolicy(0, 1), *this); + Kokkos::fence(); + Kokkos::deep_copy(h_results, d_results); + + // get lvalue + ASSERT_FLOAT_EQ(h_results[0].real(), 2.); + ASSERT_FLOAT_EQ(h_results[0].imag(), 3.); + + // get rvalue + ASSERT_FLOAT_EQ(h_results[1].real(), 2.); + ASSERT_FLOAT_EQ(h_results[1].imag(), 3.); + + // get const lvalue + ASSERT_FLOAT_EQ(h_results[2].real(), 5.); + ASSERT_FLOAT_EQ(h_results[2].imag(), 7.); + + // get const rvalue + ASSERT_FLOAT_EQ(h_results[3].real(), 5.); + ASSERT_FLOAT_EQ(h_results[3].imag(), 7.); + + // swap real and imaginary + ASSERT_FLOAT_EQ(h_results[4].real(), 11.); + ASSERT_FLOAT_EQ(h_results[4].imag(), 13.); + ASSERT_FLOAT_EQ(h_results[5].real(), 13.); + ASSERT_FLOAT_EQ(h_results[5].imag(), 11.); + } + + KOKKOS_FUNCTION + void operator()(int) const { + complex_type m(2., 3.); + const complex_type c(5., 7.); + + // get lvalue + { + complex_type &ml = m; + auto &[mlr, mli] = ml; + d_results[0] = complex_type(mlr, mli); + } + + // get rvalue + { + complex_type &&mr = std::move(m); + auto &&[mrr, mri] = std::move(mr); + d_results[1] = complex_type(mrr, mri); + } + + // get const lvalue + { + const complex_type &cl = c; + auto &[clr, cli] = cl; + d_results[2] = complex_type(clr, cli); + } + + // get const rvalue + { + complex_type const &&cr = std::move(c); + auto &&[crr, cri] = std::move(cr); + d_results[3] = complex_type(crr, cri); + } + + // swap real and imaginary + { + complex_type z(11., 13.); + d_results[4] = z; + + auto &[zr, zi] = z; + Kokkos::kokkos_swap(zr, zi); + d_results[5] = z; + } + } +}; + +TEST(TEST_CATEGORY, complex_structured_bindings) { + TestComplexStructuredBindings test; + test.testit(); +} + } // namespace Test + +#ifdef KOKKOS_COMPILER_NVCC +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#endif diff --git a/lib/kokkos/core/unit_test/TestExecSpaceThreadSafety.hpp b/lib/kokkos/core/unit_test/TestExecSpaceThreadSafety.hpp new file mode 100644 index 0000000000..a83355c51f --- /dev/null +++ b/lib/kokkos/core/unit_test/TestExecSpaceThreadSafety.hpp @@ -0,0 +1,327 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +namespace { + +#ifdef KOKKOS_ENABLE_OPENMP +template +void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { +#pragma omp parallel num_threads(2) + { + if (omp_get_thread_num() == 0) l1(); + if (omp_get_thread_num() == 1) l2(); + } +} +// We cannot run the multithreaded test when threads or HPX is enabled because +// we cannot launch a thread from inside another thread +#elif !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_HPX) +template +void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { + std::thread t1(l1); + std::thread t2(l2); + t1.join(); + t2.join(); +} +#else +template +void run_threaded_test(const Lambda1 l1, const Lambda2 l2) { + l1(); + l2(); +} +#endif + +// The idea for all of these tests is to access a View from kernels submitted by +// two different threads to the same execution space instance. If the kernels +// are executed concurrently, we expect to count too many increments. +void run_exec_space_thread_safety_range() { + constexpr int N = 10000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_for( + Kokkos::RangePolicy(exec, 0, 1), KOKKOS_LAMBDA(int) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) + Kokkos::atomic_store(error.data(), 1); + }); + } + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_range) { +#ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail with OpenACC"; +#endif +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail for OpenMPTarget"; +#endif + run_exec_space_thread_safety_range(); +} + +void run_exec_space_thread_safety_mdrange() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_for( + Kokkos::MDRangePolicy>(exec, {0, 0}, + {1, 1}), + KOKKOS_LAMBDA(int, int) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) + Kokkos::atomic_store(error.data(), 1); + }); + } + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_mdrange) { +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail for OpenMPTarget"; +#endif + run_exec_space_thread_safety_mdrange(); +} + +void run_exec_space_thread_safety_team_policy() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_for( + Kokkos::TeamPolicy(exec, 1, 1, 1), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy::member_type + &team_member) { + Kokkos::single(Kokkos::PerTeam(team_member), [=]() { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) + Kokkos::atomic_store(error.data(), 1); + }); + }); + } + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_team_policy) { +// FIXME_OPENMPTARGET +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same_v) + GTEST_SKIP() << "skipping for OpenMPTarget since the test is designed to " + "run with vector_length=1"; +#endif + run_exec_space_thread_safety_team_policy(); +} + +void run_exec_space_thread_safety_range_reduce() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_reduce( + Kokkos::RangePolicy(exec, 0, 1), + KOKKOS_LAMBDA(int, int &update) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) ++update; + }, + error); + } + exec.fence(); + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_range_reduce) { + run_exec_space_thread_safety_range_reduce(); +} + +void run_exec_space_thread_safety_mdrange_reduce() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_reduce( + Kokkos::MDRangePolicy>(exec, {0, 0}, + {1, 1}), + KOKKOS_LAMBDA(int, int, int &update) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) ++update; + }, + error); + } + exec.fence(); + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_mdrange_reduce) { +// FIXME_INTEL +#if defined(KOKKOS_COMPILER_INTEL) && defined(KOKKOS_ENABLE_OPENMP) + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail for OpenMP using the " + "legacy Intel compiler"; +#endif + run_exec_space_thread_safety_mdrange_reduce(); +} + +void run_exec_space_thread_safety_team_policy_reduce() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_reduce( + Kokkos::TeamPolicy(exec, 1, 1, 1), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy::member_type + &team_member, + int &update) { + Kokkos::single(Kokkos::PerTeam(team_member), [=, &update]() { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) ++update; + }); + }, + error); + } + }; + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_team_policy_reduce) { +// FIXME_OPENMPTARGET +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same_v) + GTEST_SKIP() << "skipping for OpenMPTarget since the test is designed to " + "run with vector_length=1"; +#endif + // FIXME_SYCL +#if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is know to fail with SYCL+Cuda"; +#endif + run_exec_space_thread_safety_team_policy_reduce(); +} + +void run_exec_space_thread_safety_range_scan() { + constexpr int N = 1000000; + constexpr int M = 10; + + Kokkos::View view("view"); + Kokkos::View error("error"); + + auto lambda = [=]() { + TEST_EXECSPACE exec; + for (int j = 0; j < M; ++j) { + Kokkos::parallel_scan( + Kokkos::RangePolicy(exec, 0, 1), + KOKKOS_LAMBDA(int, int &, const bool final) { + if (final) { + Kokkos::atomic_store(view.data(), 0); + for (int i = 0; i < N; ++i) Kokkos::atomic_inc(view.data()); + if (Kokkos::atomic_load(view.data()) != N) + Kokkos::atomic_store(error.data(), 1); + } + }); + } + exec.fence(); + }; + + run_threaded_test(lambda, lambda); + + auto host_error = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, error); + ASSERT_EQ(host_error(), 0); +} + +TEST(TEST_CATEGORY, exec_space_thread_safety_range_scan) { +#ifdef KOKKOS_ENABLE_OPENACC // FIXME_OPENACC + if (std::is_same_v) + GTEST_SKIP() << "skipping since test is known to fail with OpenACC"; +#endif + run_exec_space_thread_safety_range_scan(); +} + +} // namespace diff --git a/lib/kokkos/core/unit_test/TestExecutionSpace.hpp b/lib/kokkos/core/unit_test/TestExecutionSpace.hpp index 983a5975af..d4142dee18 100644 --- a/lib/kokkos/core/unit_test/TestExecutionSpace.hpp +++ b/lib/kokkos/core/unit_test/TestExecutionSpace.hpp @@ -44,4 +44,60 @@ TEST(TEST_CATEGORY, execution_space_as_class_data_member) { } #endif +constexpr bool test_execspace_explicit_construction() { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 +#ifdef KOKKOS_ENABLE_SERIAL + static_assert(std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_OPENMP + static_assert(std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_CUDA + static_assert(std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_HIP + static_assert(std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_HPX + static_assert(std::is_convertible_v); + static_assert( + std::is_convertible_v&&, + Kokkos::Experimental::HPX>); +#endif +#else +#ifdef KOKKOS_ENABLE_SERIAL + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_OPENMP + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_CUDA + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_HIP + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_HPX + static_assert(!std::is_convertible_v); + static_assert(!std::is_convertible_v< + hpx::execution::experimental::unique_any_sender<>&&, + Kokkos::Experimental::HPX>); +#endif +#endif + +#ifdef KOKKOS_ENABLE_OPENACC + static_assert(!std::is_convertible_v); +#endif +#ifdef KOKKOS_ENABLE_SYCL + static_assert( + !std::is_convertible_v); +#endif + + return true; +} + +static_assert(test_execspace_explicit_construction()); + } // namespace diff --git a/lib/kokkos/core/unit_test/TestGraph.hpp b/lib/kokkos/core/unit_test/TestGraph.hpp index 9a36d08f44..f9dc63d30c 100644 --- a/lib/kokkos/core/unit_test/TestGraph.hpp +++ b/lib/kokkos/core/unit_test/TestGraph.hpp @@ -21,6 +21,21 @@ namespace Test { +template +struct NoOpReduceFunctor { + KOKKOS_FUNCTION void operator()(int, ValueType&) const { + Kokkos::abort("Should never be called!"); + } + KOKKOS_FUNCTION void operator()(int, int, ValueType&) const { + Kokkos::abort("Should never be called!"); + } + KOKKOS_FUNCTION void operator()( + const typename Kokkos::TeamPolicy::member_type&, + ValueType&) const { + Kokkos::abort("Should never be called!"); + } +}; + template struct CountTestFunctor { using value_type = int; @@ -66,7 +81,7 @@ struct SetResultToViewFunctor { } }; -struct TEST_CATEGORY_FIXTURE(count_bugs) : public ::testing::Test { +struct TEST_CATEGORY_FIXTURE(graph) : public ::testing::Test { public: using count_functor = CountTestFunctor; using set_functor = SetViewToValueFunctor; @@ -88,7 +103,7 @@ struct TEST_CATEGORY_FIXTURE(count_bugs) : public ::testing::Test { } }; -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_one) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_one) { auto graph = Kokkos::Experimental::create_graph([&](auto root) { root.then_parallel_for(1, count_functor{count, bugs, 0, 0}); @@ -101,7 +116,7 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_one) { ASSERT_EQ(0, bugs_host()); } -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_one_rvalue) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_one_rvalue) { Kokkos::Experimental::create_graph(ex, [&](auto root) { root.then_parallel_for(1, count_functor{count, bugs, 0, 0}); }).submit(); @@ -112,7 +127,17 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_one_rvalue) { ASSERT_EQ(0, bugs_host()); } -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_six) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), launch_six) { +#ifdef KOKKOS_ENABLE_OPENMPTARGET // FIXME_OPENMPTARGET team_size incompatible + if (std::is_same_v) + GTEST_SKIP() << "skipping since OpenMPTarget can't use team_size 1"; +#endif +#if defined(KOKKOS_ENABLE_SYCL) && \ + !defined(SYCL_EXT_ONEAPI_GRAPH) // FIXME_SYCL + if (std::is_same_v) + GTEST_SKIP() << "skipping since test case is known to fail with SYCL"; +#endif + auto graph = Kokkos::Experimental::create_graph(ex, [&](auto root) { auto f_setup_count = root.then_parallel_for(1, set_functor{count, 0}); auto f_setup_bugs = root.then_parallel_for(1, set_functor{bugs, 0}); @@ -145,7 +170,7 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), launch_six) { ASSERT_EQ(0, bugs_host()); } -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), when_all_cycle) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), when_all_cycle) { view_type reduction_out{"reduction_out"}; view_host reduction_host{"reduction_host"}; Kokkos::Experimental::create_graph(ex, [&](auto root) { @@ -172,7 +197,7 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), when_all_cycle) { // This test is disabled because we don't currently support copying to host, // even asynchronously. We _may_ want to do that eventually? -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), DISABLED_repeat_chain) { +TEST_F(TEST_CATEGORY_FIXTURE(graph), DISABLED_repeat_chain) { auto graph = Kokkos::Experimental::create_graph( ex, [&, count_host = count_host](auto root) { //---------------------------------------- @@ -198,10 +223,27 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), DISABLED_repeat_chain) { //---------------------------------------- } -TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), zero_work_reduce) { - auto graph = Kokkos::Experimental::create_graph(ex, [&](auto root) { - root.then_parallel_reduce(0, set_result_functor{bugs}, count); - }); +TEST_F(TEST_CATEGORY_FIXTURE(graph), zero_work_reduce) { + auto graph = Kokkos::Experimental::create_graph( + ex, [&](Kokkos::Experimental::GraphNodeRef root) { + NoOpReduceFunctor no_op_functor; + root.then_parallel_reduce(Kokkos::RangePolicy(0, 0), + no_op_functor, count) +#if !defined(KOKKOS_ENABLE_SYCL) || \ + defined(SYCL_EXT_ONEAPI_GRAPH) // FIXME_SYCL +#if !defined(KOKKOS_ENABLE_CUDA) && \ + !defined(KOKKOS_ENABLE_HIP) // FIXME_CUDA FIXME_HIP + .then_parallel_reduce( + Kokkos::MDRangePolicy>{{0, 0}, + {0, 0}}, + no_op_functor, count) +#endif + .then_parallel_reduce( + Kokkos::TeamPolicy{0, Kokkos::AUTO}, + no_op_functor, count) +#endif + ; + }); // These fences are only necessary because of the weirdness of how CUDA // UVM works on pre pascal cards. #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM) && \ @@ -214,12 +256,15 @@ TEST_F(TEST_CATEGORY_FIXTURE(count_bugs), zero_work_reduce) { // UVM works on pre pascal cards. #if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_CUDA_UVM) && \ (defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL)) - Kokkos::fence(); + if constexpr (std::is_same_v) Kokkos::fence(); #endif - graph.submit(); // should reset to 0, but doesn't +#ifdef KOKKOS_ENABLE_HPX // FIXME_HPX graph.submit() isn't properly enqueued + if constexpr (std::is_same_v) + Kokkos::fence(); +#endif + graph.submit(); Kokkos::deep_copy(ex, count_host, count); ex.fence(); ASSERT_EQ(count_host(), 0); } - } // end namespace Test diff --git a/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp b/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp index 1ee23a47c4..c6ee687cf9 100644 --- a/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp +++ b/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp @@ -907,13 +907,7 @@ void impl_test_local_deepcopy_rangepolicy_rank_7(const int N) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutleft) { using ExecSpace = TEST_EXECSPACE; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if (std::is_same_v) - GTEST_SKIP() - << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; -#endif - using ViewType = Kokkos::View; + using ViewType = Kokkos::View; { // Rank-1 impl_test_local_deepcopy_teampolicy_rank_1(8); @@ -940,13 +934,7 @@ TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutleft) { //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutleft) { using ExecSpace = TEST_EXECSPACE; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if (std::is_same_v) - GTEST_SKIP() - << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; -#endif - using ViewType = Kokkos::View; + using ViewType = Kokkos::View; { // Rank-1 impl_test_local_deepcopy_rangepolicy_rank_1(8); @@ -973,12 +961,6 @@ TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutleft) { //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutright) { using ExecSpace = TEST_EXECSPACE; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if (std::is_same_v) - GTEST_SKIP() - << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; -#endif using ViewType = Kokkos::View; { // Rank-1 @@ -1006,12 +988,6 @@ TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutright) { //------------------------------------------------------------------------------------------------------------- TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutright) { using ExecSpace = TEST_EXECSPACE; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if (std::is_same_v) - GTEST_SKIP() - << "FIXME_NVHPC : Compiler bug affecting subviews of high rank Views"; -#endif using ViewType = Kokkos::View; diff --git a/lib/kokkos/core/unit_test/TestMDSpan.hpp b/lib/kokkos/core/unit_test/TestMDSpan.hpp index ef0bea1394..fa88b547a5 100644 --- a/lib/kokkos/core/unit_test/TestMDSpan.hpp +++ b/lib/kokkos/core/unit_test/TestMDSpan.hpp @@ -35,13 +35,19 @@ void test_mdspan_minimal_functional() { Kokkos::parallel_reduce( "CheckMinimalMDSpan", Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(int i, int& err) { +#if !defined(KOKKOS_ENABLE_OPENACC) Kokkos::mdspan> b_mds(a.data(), N); -#ifdef KOKKOS_ENABLE_CXX23 +#endif +#if !defined(KOKKOS_ENABLE_CXX17) && !defined(KOKKOS_ENABLE_CXX20) if (a_mds[i] != i) err++; +#if !defined(KOKKOS_ENABLE_OPENACC) if (b_mds[i] != i) err++; +#endif #else if (a_mds(i) != i) err++; +#if !defined(KOKKOS_ENABLE_OPENACC) if (b_mds(i) != i) err++; +#endif #endif }, errors); diff --git a/lib/kokkos/core/unit_test/TestMDSpanAtomicAccessor.hpp b/lib/kokkos/core/unit_test/TestMDSpanAtomicAccessor.hpp new file mode 100644 index 0000000000..04460e6419 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestMDSpanAtomicAccessor.hpp @@ -0,0 +1,112 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include +#ifndef KOKKOS_ENABLE_CXX17 +#include +#endif + +template +void test_atomic_accessor() { + using value_type = std::remove_const_t; + Kokkos::View v("V", 100); + + Kokkos::parallel_for( + Kokkos::RangePolicy(0, v.extent(0)), + KOKKOS_LAMBDA(int i) { v(i) = i; }); + + int errors; + using acc_t = Kokkos::Impl::AtomicAccessorRelaxed; + acc_t acc{}; + typename acc_t::data_handle_type ptr = v.data(); + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, v.extent(0)), + KOKKOS_LAMBDA(int i, int& error) { + if (acc.access(ptr, i) != ptr[i]) error++; + if (acc.offset(ptr, i) != ptr + i) error++; + static_assert(std::is_same_v); + static_assert( + std::is_same_v>); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_nothrow_move_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + static_assert(std::is_nothrow_swappable_v); + static_assert(std::is_trivially_copyable_v); + static_assert(std::is_trivially_default_constructible_v); + static_assert(std::is_trivially_constructible_v); + static_assert(std::is_trivially_move_constructible_v); + static_assert(std::is_trivially_assignable_v); + static_assert(std::is_trivially_move_assignable_v); +#ifndef KOKKOS_ENABLE_CXX17 + static_assert(std::copyable); + static_assert(std::is_empty_v); +#endif + }, + errors); + ASSERT_EQ(errors, 0); +} + +void test_atomic_accessor_conversion() { + using ExecutionSpace = TEST_EXECSPACE; + using T = float; + using acc_t = Kokkos::Impl::AtomicAccessorRelaxed; + using const_acc_t = Kokkos::Impl::AtomicAccessorRelaxed; + using int_acc_t = Kokkos::Impl::AtomicAccessorRelaxed; + using defacc_t = Kokkos::default_accessor; + using const_defacc_t = Kokkos::default_accessor; + using int_defacc_t = Kokkos::default_accessor; + + Kokkos::parallel_for( + Kokkos::RangePolicy(0, 1), KOKKOS_LAMBDA(int) { + static_assert(std::is_constructible_v); + static_assert(std::is_convertible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + static_assert(!std::is_convertible_v); + static_assert(!std::is_convertible_v); + static_assert(!std::is_convertible_v); + }); +} + +TEST(TEST_CATEGORY, mdspan_atomic_accessor) { + using ExecutionSpace = TEST_EXECSPACE; + test_atomic_accessor(); + test_atomic_accessor(); +} diff --git a/lib/kokkos/core/unit_test/TestMDSpanConversion.hpp b/lib/kokkos/core/unit_test/TestMDSpanConversion.hpp new file mode 100644 index 0000000000..10123901c4 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestMDSpanConversion.hpp @@ -0,0 +1,507 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include +#include "experimental/__p0009_bits/layout_stride.hpp" + +namespace { + +template +struct TestViewMDSpanConversion { + using value_type = T; + + template + using layout_left_padded = Kokkos::Experimental::layout_left_padded; + + template + using layout_right_padded = + Kokkos::Experimental::layout_right_padded; + + struct TestAccessor { + using offset_policy = TestAccessor; + using element_type = value_type; + using reference = element_type &; + using data_handle_type = element_type *; + + constexpr TestAccessor() noexcept = default; + constexpr reference access(data_handle_type p, std::size_t i) noexcept { + return p[i]; + } + constexpr data_handle_type offset(data_handle_type p, + std::size_t i) noexcept { + return p + i; + } + }; + + template + static void test_conversion_from_mdspan( + Kokkos::View ref, + const MDSpanLayoutMapping &mapping) { + using unmanaged_view_type = + Kokkos::View>; + using natural_mdspan_type = typename Kokkos::Impl::MDSpanViewTraits< + typename unmanaged_view_type::traits>::mdspan_type; + using mapping_type = MDSpanLayoutMapping; + using mdspan_layout_type = typename MDSpanLayoutMapping::layout_type; + using extents_type = typename mapping_type::extents_type; + using mdspan_type = + Kokkos::mdspan; + + static_assert(std::is_constructible_v); + static_assert(std::is_convertible_v == + std::is_convertible_v); + // Manually create an mdspan from ref so we have a valid pointer to play + // with + const auto &exts = mapping.extents(); + auto mds = mdspan_type{ref.data(), mapping}; + + auto test_view = unmanaged_view_type(mds); + + ASSERT_EQ(test_view.data(), ref.data()); + ASSERT_EQ(test_view.data(), mds.data_handle()); + ASSERT_EQ(test_view.layout(), ref.layout()); + for (std::size_t r = 0; r < mdspan_type::rank(); ++r) { + ASSERT_EQ(test_view.extent(r), ref.extent(r)); + ASSERT_EQ(test_view.extent(r), exts.extent(r)); + } + } + + template + static void test_conversion_to_mdspan( + const MDSpanLayoutMapping &ref_layout_mapping, ViewType v) { + using view_type = ViewType; + using natural_mdspan_type = typename Kokkos::Impl::MDSpanViewTraits< + typename view_type::traits>::mdspan_type; + + static_assert(natural_mdspan_type::rank() == view_type::rank); + static_assert(std::is_same_v); + constexpr bool is_strided_layout = + std::is_same_v; + if constexpr (!is_strided_layout) { + static_assert(natural_mdspan_type::mapping_type::padding_value == + Kokkos::dynamic_extent); + } + // test conversion operator to natural mdspan + { + natural_mdspan_type cvt = v; + ASSERT_EQ(cvt.data_handle(), v.data()); + ASSERT_EQ(cvt.mapping(), ref_layout_mapping); + + if constexpr (!is_strided_layout && natural_mdspan_type::rank() > 1) { + ASSERT_EQ(cvt.mapping().stride(1), ref_layout_mapping.stride(1)); + } + } + // test to_mdspan() returning natural mdspan + { + auto cvt = v.to_mdspan(); + static_assert(std::is_same_v); + ASSERT_EQ(cvt.data_handle(), v.data()); + ASSERT_EQ(cvt.mapping(), ref_layout_mapping); + } + // test conversion operator to different mdspan type + { + using element_type = const typename natural_mdspan_type::element_type; + using const_acc_type = Kokkos::Impl::SpaceAwareAccessor< + typename ViewType::memory_space, + Kokkos::default_accessor>; + using mdspan_type = Kokkos::mdspan< + element_type, + Kokkos::dextents, + typename natural_mdspan_type::layout_type, const_acc_type>; + mdspan_type cvt = v; + ASSERT_EQ(cvt.data_handle(), v.data()); + ASSERT_EQ(cvt.mapping(), ref_layout_mapping); + } + } + + template + static void test_conversion_to_mdspan_with_accessor( + const MDSpanLayoutMapping &ref_layout_mapping, ViewType v, + const AccessorType &a) { + auto cvt = v.to_mdspan(a); + static_assert(decltype(cvt)::rank() == ViewType::rank); + static_assert(std::is_same_v); + ASSERT_EQ(cvt.data_handle(), v.data()); + ASSERT_EQ(cvt.mapping(), ref_layout_mapping); + } + + template + using natural_mdspan_type_for_view = typename Kokkos::Impl::MDSpanViewTraits< + typename ViewType::traits>::mdspan_type; + + static void run_test() { + // Verify we can only convert to compatible mdspans + static_assert(std::is_convertible_v< + Kokkos::View, + natural_mdspan_type_for_view>>); + static_assert( + std::is_convertible_v< + Kokkos::View, + natural_mdspan_type_for_view>>); + + // Do not cast const away + static_assert(!std::is_convertible_v< + Kokkos::View, + natural_mdspan_type_for_view>>); + + // Mismatched dim + static_assert(!std::is_convertible_v< + Kokkos::View, + natural_mdspan_type_for_view>>); + + // Mismatched layouts + static_assert( + !std::is_convertible_v, + natural_mdspan_type_for_view>>); + static_assert( + !std::is_convertible_v, + natural_mdspan_type_for_view>>); + // nvcc doesn't do CTAD properly here, making this way more verbose.. + // LayoutLeft + test_conversion_from_mdspan( + Kokkos::View("ref", + 7), + typename layout_left_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7)}); + + test_conversion_from_mdspan( + Kokkos::View("ref"), + typename layout_left_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + test_conversion_from_mdspan( + Kokkos::View("ref"), + typename layout_left_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7)}); + test_conversion_from_mdspan( + Kokkos::View("ref", + 7), + typename layout_left_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + + test_conversion_from_mdspan( + Kokkos::View("ref", + 7, 3), + typename layout_left_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7, 3)}); + test_conversion_from_mdspan( + Kokkos::View( + "ref"), + typename layout_left_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + test_conversion_from_mdspan( + Kokkos::View( + "ref"), + typename layout_left_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7, 3)}); + test_conversion_from_mdspan( + Kokkos::View("ref", + 7, 3), + typename layout_left_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + + // LayoutRight + test_conversion_from_mdspan( + Kokkos::View("ref", + 7), + typename layout_right_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7)}); + test_conversion_from_mdspan( + Kokkos::View("ref"), + typename layout_right_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + test_conversion_from_mdspan( + Kokkos::View("ref"), + typename layout_right_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(7)}); + test_conversion_from_mdspan( + Kokkos::View("ref", + 7), + typename layout_right_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + + test_conversion_from_mdspan( + Kokkos::View("ref", + 3, 7), + typename layout_right_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(3, 7)}); + test_conversion_from_mdspan( + Kokkos::View( + "ref"), + typename layout_right_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + test_conversion_from_mdspan( + Kokkos::View( + "ref"), + typename layout_right_padded<7>::template mapping< + Kokkos::dextents>{ + Kokkos::dextents(3, 7)}); + test_conversion_from_mdspan( + Kokkos::View("ref", + 3, 7), + typename layout_right_padded<7>::template mapping< + Kokkos::extents>{ + Kokkos::extents()}); + + // LayoutStride + { + const size_t strides[] = {2}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::dextents{7}, + strides}); + } + { + const size_t strides[] = {2}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, {}, strides}); + } + { + const size_t strides[] = {2}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::dextents{7}, + strides}); + } + { + const size_t strides[] = {2}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::extents(), + strides}); + } + + { + const size_t strides[] = {2, 4}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2, 3, 4}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, + Kokkos::dextents(7, 3), strides}); + } + { + const size_t strides[] = {2, 4}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2, 3, 4}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::extents(), + strides}); + } + { + const size_t strides[] = {2, 4}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2, 3, 4}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, + Kokkos::dextents(7, 3), strides}); + } + { + const size_t strides[] = {2, 4}; + test_conversion_from_mdspan( + Kokkos::View( + "ref", Kokkos::LayoutStride{7, 2, 3, 4}), + Kokkos::layout_stride::mapping>{ + Kokkos::mdspan_non_standard, Kokkos::extents(), + strides}); + } + + // Conversion to mdspan + test_conversion_to_mdspan( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4)); + test_conversion_to_mdspan( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4, + 7)); + + test_conversion_to_mdspan( + layout_right_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", + 4)); + test_conversion_to_mdspan( + layout_right_padded::mapping< + Kokkos::extents>({}, 7), + Kokkos::View("v", 4, + 7)); + + { + const size_t strides[] = {5}; + test_conversion_to_mdspan( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5})); + } + { + const size_t strides[] = {5, 9}; + test_conversion_to_mdspan( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5, 7, 9})); + } + + // Aligned types (for padded layouts) + test_conversion_to_mdspan( + layout_left_padded::mapping< + Kokkos::extents>({}, 128), + Kokkos::View( + Kokkos::view_alloc("v", Kokkos::AllowPadding), 127, 7)); + + test_conversion_to_mdspan( + layout_right_padded::mapping< + Kokkos::extents>({}, 128), + Kokkos::View( + Kokkos::view_alloc("v", Kokkos::AllowPadding), 7, 127)); + + // Conversion with standard default_accessor + + test_conversion_to_mdspan_with_accessor( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4), + Kokkos::default_accessor{}); + test_conversion_to_mdspan_with_accessor( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4, + 7), + Kokkos::default_accessor{}); + + test_conversion_to_mdspan_with_accessor( + layout_right_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4), + Kokkos::default_accessor{}); + test_conversion_to_mdspan_with_accessor( + layout_right_padded::mapping< + Kokkos::extents>({}, 7), + Kokkos::View("v", 4, + 7), + Kokkos::default_accessor{}); + + { + const size_t strides[] = {5}; + test_conversion_to_mdspan_with_accessor( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5}), + Kokkos::default_accessor{}); + } + { + const size_t strides[] = {5, 9}; + test_conversion_to_mdspan_with_accessor( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5, 7, 9}), + Kokkos::default_accessor{}); + } + + // Conversion with a test accessor + + test_conversion_to_mdspan_with_accessor( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4), + TestAccessor{}); + test_conversion_to_mdspan_with_accessor( + layout_left_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4, + 7), + TestAccessor{}); + + test_conversion_to_mdspan_with_accessor( + layout_right_padded::mapping< + Kokkos::extents>({}, 4), + Kokkos::View("v", 4), + TestAccessor{}); + test_conversion_to_mdspan_with_accessor( + layout_right_padded::mapping< + Kokkos::extents>({}, 7), + Kokkos::View("v", 4, + 7), + TestAccessor{}); + + { + const size_t strides[] = {5}; + test_conversion_to_mdspan_with_accessor( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5}), + TestAccessor{}); + } + { + const size_t strides[] = {5, 9}; + test_conversion_to_mdspan_with_accessor( + Kokkos::layout_stride::mapping>( + Kokkos::mdspan_non_standard, {}, strides), + Kokkos::View( + "v", Kokkos::LayoutStride{4, 5, 7, 9}), + TestAccessor{}); + } + } +}; + +TEST(TEST_CATEGORY, view_mdspan_conversion) { + TestViewMDSpanConversion::run_test(); + TestViewMDSpanConversion::run_test(); + TestViewMDSpanConversion::run_test(); +} + +} // namespace diff --git a/lib/kokkos/core/unit_test/TestMathematicalConstants.hpp b/lib/kokkos/core/unit_test/TestMathematicalConstants.hpp index e446d81321..f52bfeaff7 100644 --- a/lib/kokkos/core/unit_test/TestMathematicalConstants.hpp +++ b/lib/kokkos/core/unit_test/TestMathematicalConstants.hpp @@ -63,8 +63,7 @@ struct TestMathematicalConstants { KOKKOS_FUNCTION void use_on_device() const { #if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_OPENMPTARGET) || \ - defined(KOKKOS_ENABLE_OPENACC) || \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 + defined(KOKKOS_ENABLE_OPENACC) take_by_value(Trait::value); #else (void)take_address_of(Trait::value); diff --git a/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp b/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp index ad035d4e4b..f996c61a52 100644 --- a/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp +++ b/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp @@ -1585,34 +1585,24 @@ struct TestIsFinite { Kokkos::printf("failed isfinite(float)\n"); } #if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) - if (!isfinite(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isfinite(quiet_NaN::value) || + if (!isfinite(static_cast(2.f)) || + isfinite(quiet_NaN::value) || isfinite(signaling_NaN::value) || - isfinite(infinity::value) -#endif - ) { + isfinite(infinity::value)) { ++e; Kokkos::printf("failed isfinite(KE::half_t)\n"); } - if (!isfinite(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isfinite(quiet_NaN::value) || + if (!isfinite(static_cast(2.f)) || + isfinite(quiet_NaN::value) || isfinite(signaling_NaN::value) || - isfinite(infinity::value) -#endif - ) { + isfinite(infinity::value)) { ++e; Kokkos::printf("failed isfinite(KE::bhalf_t)\n"); } #endif - if (!isfinite(3.) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isfinite(quiet_NaN::value) || + if (!isfinite(3.) || isfinite(quiet_NaN::value) || isfinite(signaling_NaN::value) || - isfinite(infinity::value) -#endif - ) { + isfinite(infinity::value)) { ++e; Kokkos::printf("failed isfinite(double)\n"); } @@ -1666,33 +1656,24 @@ struct TestIsInf { Kokkos::printf("failed isinf(float)\n"); } #if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) - if (isinf(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isinf(quiet_NaN::value) || + if (isinf(static_cast(2.f)) || + isinf(quiet_NaN::value) || isinf(signaling_NaN::value) || - !isinf(infinity::value) -#endif - ) { + !isinf(infinity::value)) { ++e; Kokkos::printf("failed isinf(KE::half_t)\n"); } - if (isinf(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isinf(quiet_NaN::value) || + if (isinf(static_cast(2.f)) || + isinf(quiet_NaN::value) || isinf(signaling_NaN::value) || - !isinf(infinity::value) -#endif - ) { + !isinf(infinity::value)) { ++e; Kokkos::printf("failed isinf(KE::bhalf_t)\n"); } #endif - if (isinf(3.) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || isinf(quiet_NaN::value) || - isinf(signaling_NaN::value) || !isinf(infinity::value) -#endif - ) { + if (isinf(3.) || isinf(quiet_NaN::value) || + isinf(signaling_NaN::value) || + !isinf(infinity::value)) { ++e; Kokkos::printf("failed isinf(double)\n"); } @@ -1746,32 +1727,23 @@ struct TestIsNaN { Kokkos::printf("failed isnan(float)\n"); } #if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) - if (isnan(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || !isnan(quiet_NaN::value) || + if (isnan(static_cast(2.f)) || + !isnan(quiet_NaN::value) || !isnan(signaling_NaN::value) || - isnan(infinity::value) -#endif - ) { + isnan(infinity::value)) { ++e; Kokkos::printf("failed isnan(KE::half_t)\n"); } - if (isnan(static_cast(2.f)) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || !isnan(quiet_NaN::value) || + if (isnan(static_cast(2.f)) || + !isnan(quiet_NaN::value) || !isnan(signaling_NaN::value) || - isnan(infinity::value) -#endif - ) { + isnan(infinity::value)) { ++e; Kokkos::printf("failed isnan(KE::bhalf_t)\n"); } - if (isnan(3.) -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 - || !isnan(quiet_NaN::value) || - !isnan(signaling_NaN::value) || isnan(infinity::value) -#endif - ) { + if (isnan(3.) || !isnan(quiet_NaN::value) || + !isnan(signaling_NaN::value) || + isnan(infinity::value)) { ++e; Kokkos::printf("failed isnan(double)\n"); } diff --git a/lib/kokkos/core/unit_test/TestMultiGPU.hpp b/lib/kokkos/core/unit_test/TestMultiGPU.hpp new file mode 100644 index 0000000000..aad2fa45f4 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestMultiGPU.hpp @@ -0,0 +1,184 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace { + +void test_policies(TEST_EXECSPACE exec0, Kokkos::View v0, + TEST_EXECSPACE exec, Kokkos::View v) { + using MemorySpace = typename TEST_EXECSPACE::memory_space; + + exec.fence(); + exec0.fence(); + + Kokkos::deep_copy(exec, v, 5); + Kokkos::deep_copy(exec0, v0, 5); + + Kokkos::deep_copy(v, v0); + + int sum; + int sum0; + + Kokkos::parallel_for("Test::Range_0", + Kokkos::RangePolicy(exec0, 0, 100), + Test::FunctorRange(v0)); + Kokkos::parallel_for("Test::Range", + Kokkos::RangePolicy(exec, 0, 100), + Test::FunctorRange(v)); + exec.fence(); + exec0.fence(); + Kokkos::parallel_reduce( + "Test::RangeReduce_0", + Kokkos::RangePolicy>(exec0, + 0, 100), + Test::FunctorRangeReduce(v0), sum0); + Kokkos::parallel_reduce( + "Test::RangeReduce", + Kokkos::RangePolicy>(exec, 0, + 100), + Test::FunctorRangeReduce(v), sum); + ASSERT_EQ(600, sum0); + ASSERT_EQ(600, sum); + + Kokkos::parallel_for("Test::MDRange_0", + Kokkos::MDRangePolicy>( + exec0, {0, 0}, {10, 10}), + Test::FunctorMDRange(v0)); + Kokkos::parallel_for("Test::MDRange", + Kokkos::MDRangePolicy>( + exec, {0, 0}, {10, 10}), + Test::FunctorMDRange(v)); + Kokkos::parallel_reduce("Test::MDRangeReduce_0", + Kokkos::MDRangePolicy, + Kokkos::LaunchBounds<128, 2>>( + exec0, {0, 0}, {10, 10}), + Test::FunctorMDRangeReduce(v0), sum0); + Kokkos::parallel_reduce("Test::MDRangeReduce", + Kokkos::MDRangePolicy, + Kokkos::LaunchBounds<128, 2>>( + exec, {0, 0}, {10, 10}), + Test::FunctorMDRangeReduce(v), sum); + ASSERT_EQ(700, sum0); + ASSERT_EQ(700, sum); + + Kokkos::parallel_for("Test::Team_0", + Kokkos::TeamPolicy(exec0, 10, 10), + Test::FunctorTeam(v0)); + Kokkos::parallel_for("Test::Team", + Kokkos::TeamPolicy(exec, 10, 10), + Test::FunctorTeam(v)); + Kokkos::parallel_reduce( + "Test::Team_0", + Kokkos::TeamPolicy>(exec0, + 10, 10), + Test::FunctorTeamReduce(v0), sum0); + Kokkos::parallel_reduce( + "Test::Team", + Kokkos::TeamPolicy>(exec, 10, + 10), + Test::FunctorTeamReduce(v), sum); + ASSERT_EQ(800, sum0); + ASSERT_EQ(800, sum); +} + +struct ScratchFunctor { + int scratch_size; + int R; + + ScratchFunctor(int scratch_size_, int R_) + : scratch_size(scratch_size_), R(R_) {} + + KOKKOS_FUNCTION + void operator()(const Kokkos::TeamPolicy::member_type &team, + int &error_accum) const { + Kokkos::View scratch_mem( + team.team_scratch(1), scratch_size); + + // Initialize scratch memory + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 0, scratch_size), + [&](int i) { scratch_mem(i) = 0; }); + team.team_barrier(); + + // Increment each entry in scratch memory R times + for (int r = 0; r < R; ++r) { + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 0, scratch_size), + [&](int i) { scratch_mem(i) += 1; }); + } + team.team_barrier(); + + // Check that each scratch entry has been incremented exactly R times + int team_error_accum; + auto R_loc = R; // avoid implicit capture of this + Kokkos::parallel_reduce( + Kokkos::TeamVectorRange(team, 0, scratch_size), + [&](int i, int &tsum) { + if (scratch_mem(i) != R_loc) { + tsum += 1; + } + }, + team_error_accum); + Kokkos::single(Kokkos::PerTeam(team), + [&]() { error_accum += team_error_accum; }); + } +}; + +void test_scratch(TEST_EXECSPACE exec0, TEST_EXECSPACE exec1) { + constexpr int N = 10; + constexpr int R = 1000; + constexpr int scratch_size = 100; + using ScratchType = Kokkos::View; + + // Test allocating and using scratch space + ScratchFunctor f(scratch_size, R); + + auto policy0 = + Kokkos::TeamPolicy(exec0, N, 10) + .set_scratch_size( + 1, Kokkos::PerTeam(ScratchType::shmem_size(scratch_size))); + auto policy1 = + Kokkos::TeamPolicy(exec1, N, 10) + .set_scratch_size( + 1, Kokkos::PerTeam(ScratchType::shmem_size(scratch_size))); + + int error0, error1; + + Kokkos::parallel_reduce("test_scratch_device_0", policy0, f, error0); + Kokkos::parallel_reduce("test_scratch_device_1", policy1, f, error1); + ASSERT_EQ(error0, 0); + ASSERT_EQ(error1, 0); + + // Request larger scratch size to trigger a realloc and test + const auto new_scratch_size = scratch_size + 10; + ScratchFunctor f_more_scratch(new_scratch_size, R); + + auto policy0_more_scratch = + Kokkos::TeamPolicy(exec0, N, 10) + .set_scratch_size( + 1, Kokkos::PerTeam(ScratchType::shmem_size(new_scratch_size))); + auto policy1_more_scratch = + Kokkos::TeamPolicy(exec1, N, 10) + .set_scratch_size( + 1, Kokkos::PerTeam(ScratchType::shmem_size(new_scratch_size))); + + Kokkos::parallel_reduce("test_realloc_scratch_device_0", policy0_more_scratch, + f_more_scratch, error0); + Kokkos::parallel_reduce("test_realloc_scratch_device_1", policy1_more_scratch, + f_more_scratch, error1); + ASSERT_EQ(error0, 0); + ASSERT_EQ(error1, 0); +} +} // namespace diff --git a/lib/kokkos/core/unit_test/TestNestedReducerCTAD.cpp b/lib/kokkos/core/unit_test/TestNestedReducerCTAD.cpp new file mode 100644 index 0000000000..95493a5874 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestNestedReducerCTAD.cpp @@ -0,0 +1,246 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace { + +struct TestNestedReducerCTAD { + using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space; + using ScalarType = int; + using IndexType = int; + using TeamPolicy = Kokkos::TeamPolicy; + using TeamHandle = TeamPolicy::member_type; + + struct FakeComparator { + template + KOKKOS_FUNCTION bool operator()(T const&, T const&) const { + return true; + } + }; + + template + struct FakeFunctor { + KOKKOS_FUNCTION void operator()(int, ValueType&) const {} + }; + + template + KOKKOS_FUNCTION static void check_types([ + [maybe_unused]] ReducerTypeToCheck const& reducer) { + static_assert(std::is_same_v); + } + + KOKKOS_FUNCTION void operator()([ + [maybe_unused]] TeamHandle const& team_handle) const { + { + using ReducerTypeExpected = Kokkos::Sum; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::Sum reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::Prod; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::Prod reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::Min; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::Min reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::Max; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::Max reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::LAnd; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::LAnd reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::LOr; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::LOr reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::BAnd; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::BAnd reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::BOr; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::BOr reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MaxLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MaxLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::MinMax; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinMax reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinMaxLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinMaxLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MaxFirstLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MaxFirstLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MaxFirstLocCustomComparator; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + FakeComparator comparator; + Kokkos::MaxFirstLocCustomComparator reducer(view, comparator); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinFirstLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinFirstLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinFirstLocCustomComparator; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + FakeComparator comparator; + Kokkos::MinFirstLocCustomComparator reducer(view, comparator); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::MinMaxFirstLastLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::MinMaxFirstLastLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::MinMaxFirstLastLocCustomComparator< + ScalarType, IndexType, FakeComparator, MemorySpace>; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + FakeComparator comparator; + Kokkos::MinMaxFirstLastLocCustomComparator reducer(view, comparator); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::FirstLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::FirstLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = Kokkos::LastLoc; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::LastLoc reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::StdIsPartitioned; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::StdIsPartitioned reducer(view); + check_types(reducer); + } + + { + using ReducerTypeExpected = + Kokkos::StdPartitionPoint; + using ValueType = ReducerTypeExpected::value_type; + Kokkos::View view; + Kokkos::StdPartitionPoint reducer(view); + check_types(reducer); + } + } + + TestNestedReducerCTAD() { + Kokkos::parallel_for(TeamPolicy(0, Kokkos::AUTO), *this); + } +}; + +} // namespace diff --git a/lib/kokkos/core/unit_test/TestNumericTraits.hpp b/lib/kokkos/core/unit_test/TestNumericTraits.hpp index 81a9d0a5e0..0c80335488 100644 --- a/lib/kokkos/core/unit_test/TestNumericTraits.hpp +++ b/lib/kokkos/core/unit_test/TestNumericTraits.hpp @@ -21,6 +21,19 @@ #include #include "Kokkos_NumericTraits.hpp" +// Suppress "'long double' is treated as 'double' in device code" +#ifdef KOKKOS_COMPILER_NVCC +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress 20208 +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress 20208 +#endif +#endif +#endif + struct extrema { #define DEFINE_EXTREMA(T, m, M) \ KOKKOS_FUNCTION static T min(T) { return m; } \ @@ -145,33 +158,25 @@ struct TestNumericTraits { KOKKOS_FUNCTION void operator()(MaxExponent10, int, int&) const { use_on_device(); } // clang-format on KOKKOS_FUNCTION void operator()(QuietNaN, int, int& e) const { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 nan using Kokkos::Experimental::quiet_NaN; constexpr auto nan = quiet_NaN::value; auto const zero = T(0); e += (int)!(nan != nan); e += (int)!(nan != zero); -#else - (void)e; -#endif use_on_device(); } KOKKOS_FUNCTION void operator()(SignalingNaN, int, int& e) const { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 nan using Kokkos::Experimental::signaling_NaN; constexpr auto nan = signaling_NaN::value; auto const zero = T(0); e += (int)!(nan != nan); e += (int)!(nan != zero); -#else - (void)e; -#endif use_on_device(); } KOKKOS_FUNCTION void use_on_device() const { -#if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_COMPILER_NVHPC) || \ - defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_OPENACC) +#if defined(KOKKOS_COMPILER_NVCC) || defined(KOKKOS_ENABLE_OPENMPTARGET) || \ + defined(KOKKOS_ENABLE_OPENACC) take_by_value(trait::value); #else (void)take_address_of(trait::value); @@ -204,59 +209,46 @@ struct TestNumericTraits< #endif TEST(TEST_CATEGORY, numeric_traits_infinity) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } TEST(TEST_CATEGORY, numeric_traits_epsilon) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 bit_comparison_type TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) + // FIXME_OPENMPTARGET long double on Intel GPUs +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } TEST(TEST_CATEGORY, numeric_traits_round_error) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 bit_comparison_type TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) + // FIXME_OPENMPTARGET long double on Intel GPUs +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } TEST(TEST_CATEGORY, numeric_traits_norm_min) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 bit_comparison_type TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) + // FIXME_OPENMPTARGET long double on Intel GPUs +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -264,9 +256,8 @@ TEST(TEST_CATEGORY, numeric_traits_norm_min) { TEST(TEST_CATEGORY, numeric_traits_denorm_min) { TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) + // FIXME_OPENMPTARGET long double on Intel GPUs +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -303,10 +294,8 @@ TEST(TEST_CATEGORY, numeric_traits_finite_min_max) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); TestNumericTraits(); #endif @@ -329,10 +318,8 @@ TEST(TEST_CATEGORY, numeric_traits_digits) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -354,10 +341,8 @@ TEST(TEST_CATEGORY, numeric_traits_digits10) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -365,10 +350,8 @@ TEST(TEST_CATEGORY, numeric_traits_digits10) { TEST(TEST_CATEGORY, numeric_traits_max_digits10) { TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -389,10 +372,8 @@ TEST(TEST_CATEGORY, numeric_traits_radix) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); #endif } @@ -406,10 +387,8 @@ TEST(TEST_CATEGORY, numeric_traits_min_max_exponent) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); TestNumericTraits(); #endif @@ -420,31 +399,29 @@ TEST(TEST_CATEGORY, numeric_traits_min_max_exponent10) { TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); TestNumericTraits(); #endif } TEST(TEST_CATEGORY, numeric_traits_quiet_and_signaling_nan) { -#ifndef KOKKOS_COMPILER_NVHPC // FIXME_NVHPC 23.7 +// FIXME_NVHPC +#ifdef KOKKOS_COMPILER_NVHPC + GTEST_SKIP() << "This test is known to fail with the NVHPC compiler"; +#endif TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); -#endif TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); TestNumericTraits(); - // FIXME_NVHPC 23.7 long double // FIXME_OPENMPTARGET long double on Intel GPUs -#if (!defined(KOKKOS_ENABLE_CUDA) || !defined(KOKKOS_COMPILER_NVHPC)) && \ - (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) +#if (!defined(KOKKOS_ENABLE_OPENMPTARGET) || !defined(KOKKOS_ARCH_INTEL_GPU)) TestNumericTraits(); TestNumericTraits(); #endif @@ -736,3 +713,13 @@ CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT(signaling_NaN); #undef CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES_FLOATING_POINT #undef CHECK_NAN_INSTANTIATED_ON_CV_QUALIFIED_TYPES + +#ifdef KOKKOS_COMPILER_NVCC +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#endif diff --git a/lib/kokkos/core/unit_test/TestOther.hpp b/lib/kokkos/core/unit_test/TestOther.hpp index fcf0353a88..9daef3ca3f 100644 --- a/lib/kokkos/core/unit_test/TestOther.hpp +++ b/lib/kokkos/core/unit_test/TestOther.hpp @@ -16,13 +16,8 @@ #ifndef KOKKOS_TEST_OTHER_HPP #define KOKKOS_TEST_OTHER_HPP -#include #include #include #include -// with VS 16.11.3 and CUDA 11.4.2 getting cudafe stackoverflow crash -#if !(defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA)) -#include -#endif #endif diff --git a/lib/kokkos/core/unit_test/TestRangePolicyConstructors.hpp b/lib/kokkos/core/unit_test/TestRangePolicyConstructors.hpp index c8c1542af1..d6920beed0 100644 --- a/lib/kokkos/core/unit_test/TestRangePolicyConstructors.hpp +++ b/lib/kokkos/core/unit_test/TestRangePolicyConstructors.hpp @@ -20,6 +20,7 @@ #include #include +#include namespace { @@ -196,4 +197,43 @@ TEST(TEST_CATEGORY_DEATH, range_policy_implicitly_converted_bounds) { #endif } +constexpr bool test_chunk_size_explicit() { + using ExecutionSpace = TEST_EXECSPACE; + using Kokkos::ChunkSize; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + static_assert(std::is_convertible_v); + static_assert(std::is_constructible_v); + // Some execution spaces were implicitly constructible from int + // which made the constructor call ambiguous. + static_assert( + std::is_constructible_v || + std::is_constructible_v< + Kokkos::RangePolicy, int, int, int>); + static_assert(std::is_constructible_v< + Kokkos::RangePolicy, int, int, + ChunkSize>); + static_assert(std::is_constructible_v, + ExecutionSpace, int, int, int>); + static_assert(std::is_constructible_v, + ExecutionSpace, int, int, ChunkSize>); +#else + static_assert(!std::is_convertible_v); + static_assert(std::is_constructible_v); + static_assert( + !std::is_constructible_v< + Kokkos::RangePolicy, int, int, int>); + static_assert(std::is_constructible_v< + Kokkos::RangePolicy, int, int, + ChunkSize>); + static_assert(!std::is_constructible_v, + ExecutionSpace, int, int, int>); + static_assert(std::is_constructible_v, + ExecutionSpace, int, int, ChunkSize>); +#endif + return true; +} + +static_assert(test_chunk_size_explicit()); + } // namespace diff --git a/lib/kokkos/core/unit_test/TestRealloc.hpp b/lib/kokkos/core/unit_test/TestRealloc.hpp index 2c9dc5ee47..f30c9e15e1 100644 --- a/lib/kokkos/core/unit_test/TestRealloc.hpp +++ b/lib/kokkos/core/unit_test/TestRealloc.hpp @@ -144,6 +144,11 @@ void impl_testRealloc() { EXPECT_EQ(oldPointer, newPointer); } } +struct NoDefaultConstructor { + int value; + KOKKOS_FUNCTION + NoDefaultConstructor(int x) : value(x) {} +}; template void testRealloc() { @@ -154,6 +159,14 @@ void testRealloc() { impl_testRealloc(); // without data initialization } + // Check #6992 fix (no default initialization in realloc without initializing) + { + using view_type = Kokkos::View; + view_type view_1d_no_default( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "view_1d_no_default"), + 5); + realloc_dispatch(WithoutInitializing{}, view_1d_no_default, 3); + } } } // namespace TestViewRealloc diff --git a/lib/kokkos/core/unit_test/TestResize.hpp b/lib/kokkos/core/unit_test/TestResize.hpp index 13d7e16d58..3102d2b9a1 100644 --- a/lib/kokkos/core/unit_test/TestResize.hpp +++ b/lib/kokkos/core/unit_test/TestResize.hpp @@ -358,6 +358,12 @@ void impl_testResize() { } } +struct NoDefaultConstructor { + int value; + KOKKOS_FUNCTION + NoDefaultConstructor(int x) : value(x) {} +}; + template void testResize() { { @@ -367,6 +373,13 @@ void testResize() { impl_testResize(); // without data initialization } + { + using view_type = Kokkos::View; + view_type view_1d_no_default( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "view_1d_no_default"), + 5); + resize_dispatch(WithoutInitializing{}, view_1d_no_default, 3); + } } } // namespace TestViewResize diff --git a/lib/kokkos/core/unit_test/TestSpaceAwareAccessor.hpp b/lib/kokkos/core/unit_test/TestSpaceAwareAccessor.hpp new file mode 100644 index 0000000000..2fad17cb85 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestSpaceAwareAccessor.hpp @@ -0,0 +1,156 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include +#ifndef KOKKOS_ENABLE_CXX17 +#include +#endif + +template +struct funky_data_handle { + T* val; + + KOKKOS_FUNCTION + operator T*() { return val; } + KOKKOS_FUNCTION + operator const T*() const { return val; } +}; + +template +struct FunkyAcc { + using element_type = ElementType; + using reference = std::conditional_t, + element_type, element_type&>; + using data_handle_type = funky_data_handle; + using offset_policy = Kokkos::default_accessor; + KOKKOS_FUNCTION + reference access(data_handle_type p, size_t i) const { return p.val[i]; } + KOKKOS_FUNCTION + element_type* offset(data_handle_type p, size_t i) const { return p.val + i; } +}; + +template +void test_space_aware_accessor() { + using memory_space_t = MemorySpace; + using value_type = std::remove_const_t; + Kokkos::View v("V", 100); + + Kokkos::parallel_for( + Kokkos::RangePolicy(0, v.extent(0)), + KOKKOS_LAMBDA(int i) { v(i) = i; }); + + int errors; + using acc_t = Kokkos::Impl::SpaceAwareAccessor>; + acc_t acc{}; + typename acc_t::data_handle_type ptr{v.data()}; + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, v.extent(0)), + KOKKOS_LAMBDA(int i, int& error) { + if (acc.access(ptr, i) != ptr[i]) error++; + if (acc.offset(ptr, i) != ptr + i) error++; + static_assert(std::is_same_v); + if constexpr (std::is_const_v) { + static_assert(std::is_same_v); + } else { + static_assert(std::is_same_v); + } + static_assert(std::is_same_v>); + static_assert( + std::is_same_v>>); + if constexpr (std::is_const_v) { + static_assert(std::is_same_v>); + } else { + static_assert(std::is_same_v); + } + static_assert(std::is_same_v); + static_assert(std::is_same_v&>); + static_assert(std::is_nothrow_move_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + static_assert(std::is_nothrow_swappable_v); + static_assert( + std::is_same_v); + static_assert( + std::is_same_v>); +#ifndef KOKKOS_ENABLE_CXX17 + static_assert(std::copyable); + static_assert(std::is_empty_v); +#endif + }, + errors); + ASSERT_EQ(errors, 0); +} + +void test_space_aware_accessor_conversion() { + using ExecutionSpace = TEST_EXECSPACE; + using memory_space_t = typename ExecutionSpace::memory_space; + using T = float; + using acc_t = Kokkos::Impl::SpaceAwareAccessor>; + using const_acc_t = + Kokkos::Impl::SpaceAwareAccessor>; + using int_acc_t = + Kokkos::Impl::SpaceAwareAccessor>; + using host_acc_t = + Kokkos::Impl::SpaceAwareAccessor>; + using anon_acc_t = + Kokkos::Impl::SpaceAwareAccessor>; + + Kokkos::parallel_for( + Kokkos::RangePolicy(0, 1), KOKKOS_LAMBDA(int) { + static_assert(std::is_constructible_v); + static_assert(std::is_convertible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert( + std::is_constructible_v == + Kokkos::Impl::MemorySpaceAccess::assignable); + static_assert( + std::is_constructible_v == + Kokkos::Impl::MemorySpaceAccess::assignable); + static_assert(std::is_constructible_v); + static_assert(std::is_constructible_v); + static_assert(std::is_convertible_v); + static_assert(std::is_convertible_v); + }); +} + +TEST(TEST_CATEGORY, mdspan_space_aware_accessor) { + using ExecutionSpace = TEST_EXECSPACE; + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor(); + test_space_aware_accessor_conversion(); +} diff --git a/lib/kokkos/core/unit_test/TestSpaceAwareAccessorAccessViolation.hpp b/lib/kokkos/core/unit_test/TestSpaceAwareAccessorAccessViolation.hpp new file mode 100644 index 0000000000..b9982d5fc4 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestSpaceAwareAccessorAccessViolation.hpp @@ -0,0 +1,128 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +template +struct TestMemoryAccessViolation { + Kokkos::Impl::SpaceAwareAccessor> + acc; + + KOKKOS_FUNCTION decltype(auto) bad_access() const { + return acc.access(nullptr, 0); + } + + KOKKOS_FUNCTION void operator()(int) const { ++bad_access(); } + + TestMemoryAccessViolation(ExecutionSpace const& s, + std::string const& matcher) { + constexpr bool accessible_from_execution_space = Kokkos::SpaceAccessibility< + /*AccessSpace=*/ExecutionSpace, + /*MemorySpace=*/MemorySpace>::accessible; + EXPECT_FALSE(accessible_from_execution_space); + EXPECT_DEATH( + { + Kokkos::parallel_for(Kokkos::RangePolicy(s, 0, 1), + *this); + Kokkos::fence(); + }, + matcher); + } +}; + +template +void test_memory_access_violation(ExecutionSpace const& s, + std::string const& m) { + TestMemoryAccessViolation(s, m); +} + +template +void test_memory_access_violations_from_host() { + using memory_space_t = typename ExecutionSpace::memory_space; + using exec_space_t = Kokkos::DefaultHostExecutionSpace; + const exec_space_t exec_space{}; + std::string const message = + "Kokkos::SpaceAwareAccessor ERROR: attempt to access inaccessible memory " + "space"; + test_memory_access_violation(exec_space, + message); +} + +template +void test_memory_access_violations_from_device() { + using memory_space_t = Kokkos::HostSpace; + using exec_space_t = ExecutionSpace; + const exec_space_t exec_space{}; + std::string const message = + "Kokkos::SpaceAwareAccessor ERROR: attempt to access inaccessible memory " + "space"; + test_memory_access_violation(exec_space, + message); +} + +// FIXME_SYCL +#if !(defined(KOKKOS_COMPILER_INTEL_LLVM) && defined(KOKKOS_ENABLE_SYCL)) +TEST(TEST_CATEGORY_DEATH, + mdspan_space_aware_accessor_invalid_access_from_host) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using ExecutionSpace = TEST_EXECSPACE; + + if (Kokkos::SpaceAccessibility< + /*AccessSpace=*/Kokkos::HostSpace, + /*MemorySpace=*/typename ExecutionSpace::memory_space>::accessible) { + GTEST_SKIP() << "skipping since no memory access violation would occur"; + } + + test_memory_access_violations_from_host(); +} +#endif + +TEST(TEST_CATEGORY_DEATH, + mdspan_space_aware_accessor_invalid_access_from_device) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + + using ExecutionSpace = TEST_EXECSPACE; + + if (Kokkos::SpaceAccessibility< + /*AccessSpace=*/ExecutionSpace, + /*MemorySpace=*/Kokkos::HostSpace>::accessible) { + GTEST_SKIP() << "skipping since no memory access violation would occur"; + } + +#if defined(KOKKOS_ENABLE_SYCL) && defined(NDEBUG) // FIXME_SYCL + if (std::is_same::value) { + GTEST_SKIP() << "skipping SYCL device-side abort does not work when NDEBUG " + "is defined"; + } +#endif +#if defined(KOKKOS_ENABLE_OPENMPTARGET) // FIXME_OPENMPTARGET + if (std::is_same::value) { + GTEST_SKIP() << "skipping because OpenMPTarget backend is currently not " + "able to abort from the device"; + } +#endif +#if defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC + if (std::is_same::value) { + GTEST_SKIP() << "skipping because OpenACC backend is currently not " + "able to abort from the device"; + } +#endif + + test_memory_access_violations_from_device(); +} diff --git a/lib/kokkos/core/unit_test/TestTeamMDRangePolicyCTAD.cpp b/lib/kokkos/core/unit_test/TestTeamMDRangePolicyCTAD.cpp new file mode 100644 index 0000000000..0de639e02e --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTeamMDRangePolicyCTAD.cpp @@ -0,0 +1,199 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace { + +struct TestTeamThreadMDRangeCTAD { + using TeamPolicy = Kokkos::TeamPolicy; + using TeamHandle = TeamPolicy::member_type; + + KOKKOS_FUNCTION void operator()(TeamHandle const& team_handle) const { + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamThreadMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + } + + TestTeamThreadMDRangeCTAD() { + Kokkos::parallel_for(TeamPolicy(0, Kokkos::AUTO), *this); + } +}; + +struct TestTeamVectorMDRangeCTAD { + using TeamPolicy = Kokkos::TeamPolicy; + using TeamHandle = TeamPolicy::member_type; + + KOKKOS_FUNCTION void operator()(TeamHandle const& team_handle) const { + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + + { + Kokkos::TeamVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0, 0); + static_assert( + std::is_same_v, TeamHandle>, + decltype(md_range)>); + } + } + + TestTeamVectorMDRangeCTAD() { + Kokkos::parallel_for(TeamPolicy(0, Kokkos::AUTO), *this); + } +}; + +struct TestThreadVectorMDRangeCTAD { + using TeamPolicy = Kokkos::TeamPolicy; + using TeamHandle = TeamPolicy::member_type; + + template + KOKKOS_FUNCTION static void check_types([ + [maybe_unused]] PolicyTypeToCheck const& team_handle) { + static_assert(std::is_same_v); + } + + KOKKOS_FUNCTION void operator()(TeamHandle const& team_handle) const { + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + + { + Kokkos::ThreadVectorMDRange md_range(team_handle, 0, 0, 0, 0, 0, 0, 0, 0); + check_types, TeamHandle>>( + md_range); + } + } + + TestThreadVectorMDRangeCTAD() { + Kokkos::parallel_for(TeamPolicy(0, Kokkos::AUTO), *this); + } +}; + +} // namespace diff --git a/lib/kokkos/core/unit_test/TestTeamPolicyCTAD.cpp b/lib/kokkos/core/unit_test/TestTeamPolicyCTAD.cpp new file mode 100644 index 0000000000..07aaeae819 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTeamPolicyCTAD.cpp @@ -0,0 +1,135 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +namespace { + +struct TestTeamPolicyCTAD { + template + static void maybe_unused(Ts&&...) {} + + struct SomeExecutionSpace { + using execution_space = SomeExecutionSpace; + using size_type = size_t; + }; + static_assert(Kokkos::is_execution_space_v); + + struct ImplicitlyConvertibleToDefaultExecutionSpace { + [[maybe_unused]] operator Kokkos::DefaultExecutionSpace() const { + return Kokkos::DefaultExecutionSpace(); + } + }; + static_assert(!Kokkos::is_execution_space_v< + ImplicitlyConvertibleToDefaultExecutionSpace>); + + [[maybe_unused]] static inline Kokkos::DefaultExecutionSpace des; + [[maybe_unused]] static inline ImplicitlyConvertibleToDefaultExecutionSpace + notEs; + [[maybe_unused]] static inline SomeExecutionSpace ses; + + [[maybe_unused]] static inline int i; + + // Workaround for nvc++ (CUDA-11.7-NVHPC) ignoring [[maybe_unused]] on + // ImplicitlyConvertibleToDefaultExecutionSpace::operator + // Kokkos::DefaultExecutionSpace() const + [[maybe_unused]] static inline Kokkos::DefaultExecutionSpace notEsToDes = + notEs; + + // Workaround for HIP-ROCm-5.2 warning about was declared but never referenced + TestTeamPolicyCTAD() { maybe_unused(des, notEs, ses, i, notEsToDes); } + + // Default construction deduces to TeamPolicy<> + static_assert( + std::is_same_v, decltype(Kokkos::TeamPolicy{})>); + + // Execution space not provided deduces to TeamPolicy<> + + static_assert( + std::is_same_v, decltype(Kokkos::TeamPolicy(i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(i, i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(i, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(i, Kokkos::AUTO, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(i, Kokkos::AUTO, + Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(i, i, Kokkos::AUTO))>); + + // DefaultExecutionSpace deduces to TeamPolicy<> + + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, i, i))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, Kokkos::AUTO, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, Kokkos::AUTO, + Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(des, i, i, Kokkos::AUTO))>); + + // Convertible to DefaultExecutionSpace deduces to TeamPolicy<> + + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, i, i))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, Kokkos::AUTO, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy( + notEs, i, Kokkos::AUTO, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(notEs, i, i, Kokkos::AUTO))>); + + // SES != DefaultExecutionSpace deduces to TeamPolicy + + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, i, i))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, Kokkos::AUTO, i))>); + static_assert(std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, Kokkos::AUTO, + Kokkos::AUTO))>); + static_assert( + std::is_same_v, + decltype(Kokkos::TeamPolicy(ses, i, i, Kokkos::AUTO))>); +}; + +} // namespace diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index 5e16539d65..4d8f42720d 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -1060,11 +1060,8 @@ TEST(TEST_CATEGORY, parallel_scan_with_reducers) { constexpr int n = 1000000; constexpr int n_vector_range = 100; -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if constexpr (std::is_same_v) { - GTEST_SKIP() << "All but max inclusive scan differ at index 101"; - } +#ifdef KOKKOS_IMPL_32BIT + GTEST_SKIP() << "Failing KOKKOS_IMPL_32BIT"; // FIXME_32BIT #endif checkScan(0))); -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC 23.7 - if constexpr (std::is_same_v) { - GTEST_SKIP() << "Disabling 2/3rd of the test for now"; - } -#endif ASSERT_TRUE((TestTeamVectorRange::Test(1))); // FIXME_OPENMPTARGET - Use of kokkos reducers currently results in runtime // memory errors. diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp index ca098dbc24..53c1f01678 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp @@ -837,18 +837,15 @@ struct TestViewMirror { view_const_cast(v)); } - template + template struct CopyUnInit { - using mirror_view_type = typename Kokkos::Impl::MirrorViewType< - Space, double *, Layout, Kokkos::HostSpace, MemoryTraits>::view_type; - - mirror_view_type a_d; + View a_d; KOKKOS_INLINE_FUNCTION - CopyUnInit(mirror_view_type &a_d_) : a_d(a_d_) {} + explicit CopyUnInit(View const &a_d_) : a_d(a_d_) {} KOKKOS_INLINE_FUNCTION - void operator()(const typename Space::size_type i) const { + void operator()(const typename View::size_type i) const { a_d(i) = (double)(10 - i); } }; @@ -875,7 +872,8 @@ struct TestViewMirror { Kokkos::parallel_for( Kokkos::RangePolicy(0, int(10)), - CopyUnInit(a_d)); + // decltype required for Intel classics, that doesn't recognize the CTAD + CopyUnInit(a_d)); Kokkos::deep_copy(a_h, a_d); @@ -1339,6 +1337,40 @@ class TestViewAPI { ASSERT_EQ(dz.data(), nullptr); } + struct test_refcount_poison_copy_functor { + using view_type = Kokkos::View; + explicit test_refcount_poison_copy_functor(view_type v) : view(v) {} + + test_refcount_poison_copy_functor( + const test_refcount_poison_copy_functor &other) + : view(other.view) { + throw std::bad_alloc(); + } + + KOKKOS_INLINE_FUNCTION void operator()(int) const {} + + view_type view; + }; + + static void run_test_refcount_exception() { + using view_type = typename test_refcount_poison_copy_functor::view_type; + view_type original("original", N0); + ASSERT_EQ(original.use_count(), 1); + + // test_refcount_poison_copy_functor throws during copy construction + try { + Kokkos::parallel_for( + Kokkos::RangePolicy(0, N0), + test_refcount_poison_copy_functor(original)); + } catch (const std::bad_alloc &) { + } + + // Ensure refcounting is enabled, we should increment here + auto copy = original; + ASSERT_EQ(original.use_count(), 2); + ASSERT_EQ(copy.use_count(), 2); + } + static void run_test_deep_copy_empty() { // Check Deep Copy of LayoutLeft to LayoutRight { @@ -1539,56 +1571,6 @@ class TestViewAPI { typename multivector_type::const_type cmvX(cmv); typename const_multivector_type::const_type ccmvX(cmv); } - - static void run_test_error() { -#ifdef KOKKOS_ENABLE_OPENMPTARGET - if (std::is_same::value) - return; -#endif -// FIXME_MSVC_WITH_CUDA -// This test doesn't behave as expected on Windows with CUDA -#if defined(_WIN32) && defined(KOKKOS_ENABLE_CUDA) - if (std::is_same::value) - return; -#endif - bool did_throw = false; - auto alloc_size = std::numeric_limits::max() - 42; - try { - auto should_always_fail = dView1("hello_world_failure", alloc_size); - } catch (std::runtime_error const &error) { - // TODO once we remove the conversion to std::runtime_error, catch the - // appropriate Kokkos error here - std::string msg = error.what(); - ASSERT_PRED_FORMAT2(::testing::IsSubstring, "hello_world_failure", msg); - ASSERT_PRED_FORMAT2(::testing::IsSubstring, - typename device::memory_space{}.name(), msg); - // Can't figure out how to make assertions either/or, so we'll just use - // an if statement here for now. Test failure message will be a bit - // misleading, but developers should figure out what's going on pretty - // quickly. - if (msg.find("is not a valid size") != std::string::npos) { - ASSERT_PRED_FORMAT2(::testing::IsSubstring, "is not a valid size", msg); - } else -#ifdef KOKKOS_ENABLE_SYCL - if (msg.find("insufficient memory") != std::string::npos) -#endif - { - ASSERT_PRED_FORMAT2(::testing::IsSubstring, "insufficient memory", msg); - } - // SYCL cannot tell the reason why a memory allocation failed -#ifdef KOKKOS_ENABLE_SYCL - else { - // Otherwise, there has to be some sort of "unknown error" error - ASSERT_PRED_FORMAT2(::testing::IsSubstring, - "because of an unknown error.", msg); - } -#endif - did_throw = true; - } - ASSERT_TRUE(did_throw); - } }; } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewAPI_c.hpp b/lib/kokkos/core/unit_test/TestViewAPI_c.hpp index 5efbd95bc9..042da1e984 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI_c.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI_c.hpp @@ -19,6 +19,7 @@ namespace Test { TEST(TEST_CATEGORY, view_api_c) { + TestViewAPI::run_test_refcount_exception(); TestViewAPI::run_test_deep_copy_empty(); TestViewAPI::run_test_view_operator_b(); } diff --git a/lib/kokkos/core/unit_test/TestViewAPI_d.hpp b/lib/kokkos/core/unit_test/TestViewAPI_d.hpp index b0d759ffcc..075ac3329c 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI_d.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI_d.hpp @@ -26,22 +26,4 @@ TEST(TEST_CATEGORY, view_api_d) { TestViewAPI::run_test_view_operator_c(); } -TEST(TEST_CATEGORY, view_allocation_error) { -#if defined(__has_feature) -#if __has_feature(address_sanitizer) - GTEST_SKIP() << "AddressSanitzer detects allocating too much memory " - "preventing our checks to run"; -#endif -#endif -#if ((HIP_VERSION_MAJOR == 5) && (HIP_VERSION_MINOR == 3)) - GTEST_SKIP() << "ROCm 5.3 segfaults when trying to allocate too much memory"; -#endif -#if defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC - if (std::is_same_v) { - GTEST_SKIP() << "acc_malloc() not properly returning nullptr"; - } -#endif - TestViewAPI::run_test_error(); -} - } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewBadAlloc.hpp b/lib/kokkos/core/unit_test/TestViewBadAlloc.hpp new file mode 100644 index 0000000000..7cb2f91655 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewBadAlloc.hpp @@ -0,0 +1,86 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +namespace { + +template +void test_view_bad_alloc() { + bool did_throw = false; + auto too_large = std::numeric_limits::max() - 42; + std::string label = "my_label"; + try { + auto should_always_fail = + Kokkos::View(label, too_large); + } catch (std::runtime_error const &error) { + std::string msg = error.what(); + ASSERT_PRED_FORMAT2( + ::testing::IsSubstring, + std::string(MemorySpace::name()) + " memory space failed to allocate", + msg) + << "memory space name is missing"; + ASSERT_PRED_FORMAT2(::testing::IsSubstring, + std::string("(label=\"") + label + "\")", msg) + << "label is missing"; + did_throw = true; + } + ASSERT_TRUE(did_throw); +} + +TEST(TEST_CATEGORY, view_bad_alloc) { + using ExecutionSpace = TEST_EXECSPACE; + using MemorySpace = ExecutionSpace::memory_space; +#if defined(__has_feature) +#if __has_feature(address_sanitizer) + if (std::is_same_v) { + GTEST_SKIP() << "AddressSanitizer detects allocating too much memory " + "preventing our checks to run"; + } +#endif +#endif +#if ((HIP_VERSION_MAJOR == 5) && (HIP_VERSION_MINOR == 3)) + if (std::is_same_v) { + GTEST_SKIP() + << "ROCm 5.3 segfaults when trying to allocate too much memory"; + } +#endif +#if defined(KOKKOS_ENABLE_OPENACC) // FIXME_OPENACC + if (std::is_same_v) { + GTEST_SKIP() << "acc_malloc() not properly returning nullptr"; + } +#endif + + test_view_bad_alloc(); + + constexpr bool execution_space_is_device = + std::is_same_v && + !std::is_same_v; + + if constexpr (execution_space_is_device) { + if constexpr (Kokkos::has_shared_space) { + test_view_bad_alloc(); + } + if constexpr (Kokkos::has_shared_host_pinned_space) { + test_view_bad_alloc(); + } + } +} + +} // namespace diff --git a/lib/kokkos/core/unit_test/TestViewCopy_c.hpp b/lib/kokkos/core/unit_test/TestViewCopy_c.hpp new file mode 100644 index 0000000000..758af13c7d --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewCopy_c.hpp @@ -0,0 +1,434 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +namespace { +// Do not rely on deep_copy(0) as we want to test it! +template +void reset_view(const ExecSpace& space, ViewType& a, int magic) { + auto policy = Kokkos::RangePolicy(space, 0, a.span()); + + assert(a.span_is_contiguous()); + + Kokkos::parallel_for( + "TestViewCopy::ResetView", policy, + KOKKOS_LAMBDA(int i) { a.data()[i] = magic; }); +} + +template +size_t compute_overall_sum(const ExecSpace& space, ViewType& a) { + auto policy = Kokkos::RangePolicy(space, 0, a.span()); + + assert(a.span_is_contiguous()); + + typename ViewType::value_type sum = 0; + Kokkos::parallel_reduce( + "TestViewCopy::ComputeSum", policy, + KOKKOS_LAMBDA(int i, int& lcl_sum) { lcl_sum += a.data()[i]; }, sum); + + return static_cast(sum); +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 0>* = nullptr) { + auto policy = Kokkos::RangePolicy(space, 0, 1); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank0", policy, + KOKKOS_LAMBDA(int, bool& local_check) { local_check &= (a() == magic); }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 1>* = nullptr) { + auto policy = Kokkos::RangePolicy(space, 0, a.extent(0)); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank1", policy, + KOKKOS_LAMBDA(int i, bool& local_check) { + local_check &= (a(i) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 2>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0}, {a.extent(0), a.extent(1)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank2", policy, + KOKKOS_LAMBDA(int i0, int i1, bool& local_check) { + local_check &= (a(i0, i1) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 3>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank3", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, bool& local_check) { + local_check &= (a(i0, i1, i2) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 4>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank4", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, bool& local_check) { + local_check &= (a(i0, i1, i2, i3) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 5>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank5", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, bool& local_check) { + local_check &= (a(i0, i1, i2, i3, i4) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 6>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4), + a.extent(5)}); + + bool all_elements_are_set; // Uninitialized, set by parallel_reduce + + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank6", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5, + bool& local_check) { + local_check &= (a(i0, i1, i2, i3, i4, i5) == magic); + }, + Kokkos::LAnd(all_elements_are_set)); + + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 7>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4), + a.extent(5)}); + + bool all_elements_are_set = true; + + for (size_t outer = 0; outer < a.extent(6); ++outer) { + bool all_local_elements_are_set; // Uninitialized, set by parallel_reduce + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank7", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5, + bool& local_check) { + local_check &= (a(i0, i1, i2, i3, i4, i5, outer) == magic); + }, + Kokkos::LAnd(all_local_elements_are_set)); + + all_elements_are_set = all_elements_are_set && all_local_elements_are_set; + } + return all_elements_are_set; +} + +template +bool check_magic_value( + const ExecSpace& space, const Kokkos::View& a, int magic, + std::enable_if_t::rank == 8>* = nullptr) { + auto policy = Kokkos::MDRangePolicy, ExecSpace>( + space, {0, 0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4), + a.extent(5)}); + + bool all_elements_are_set = true; + + for (size_t outer = 0; outer < a.extent(7); ++outer) { + for (size_t inner = 0; inner < a.extent(6); ++inner) { + bool all_local_elements_are_set; // Uninitialized, set by parallel_reduce + Kokkos::parallel_reduce( + "TestViewCopy::CheckMagicValueRank8", policy, + KOKKOS_LAMBDA(int i0, int i1, int i2, int i3, int i4, int i5, + bool& local_check) { + local_check &= (a(i0, i1, i2, i3, i4, i5, inner, outer) == magic); + }, + Kokkos::LAnd(all_local_elements_are_set)); + + all_elements_are_set = all_elements_are_set && all_local_elements_are_set; + } + } + return all_elements_are_set; +} + +template +bool view_fill_test(const ExecSpace& space, ViewType& a, int magic) { + Kokkos::deep_copy(space, a, magic); +#if defined(KOKKOS_ENABLE_OPENMPTARGET) + // FIXME_OPENMPTARGET Does not work with Land reducer + return true; +#else // KOKKOS_ENABLE_OPENMPTARGET + return check_magic_value(space, a, magic); +#endif // KOKKOS_ENABLE_OPENMPTARGET +} + +template +void run_test() { + int magic = 19; + + using ViewType = Kokkos::View; + // Create views with different lengths for each dimension + // We want to test if all loops are over the correct dimensions + // We use prime numbers to make sure that the strides are different + ViewType a_decreasing("a", 23, 19, 17, 13, 11, 7, 5, 3); + // We also test with increasing strides to catch more "out-of-bounds" errors + // within subviews. + ViewType a_increasing("a", 3, 5, 7, 11, 13, 17, 19, 23); + + using exec_space = typename Space::execution_space; + auto space = exec_space(); + + // Use subviews in the tests to have cases with different ranks and + // non-contiguous memory + // Tests have two parts: + // 1. Fill the subview with a magic value and check that all elements are set + // 2. Check if only the subview is set by summing all elements in the view and + // comparing to the subview size times the magic value + + // Rank 0 + { + auto sub_dec = Kokkos::subview(a_decreasing, 0, 0, 0, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), + static_cast(magic)); + + auto sub_inc = Kokkos::subview(a_increasing, 0, 0, 0, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), + static_cast(magic)); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + + // Rank 1 + { + auto sub_dec = + Kokkos::subview(a_decreasing, Kokkos::ALL, 0, 0, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = + Kokkos::subview(a_increasing, Kokkos::ALL, 0, 0, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + + // Rank 2 + { + auto sub_dec = Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, 0, 0, + 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, 0, 0, + 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 3 + { + auto sub_dec = Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ( + compute_overall_sum(space, a_decreasing), + sub_dec.extent(0) * sub_dec.extent(1) * sub_dec.extent(2) * magic); + + auto sub_inc = Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, 0, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 4 + { + auto sub_dec = Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), + sub_dec.extent(0) * sub_dec.extent(1) * sub_dec.extent(2) * + sub_dec.extent(3) * magic); + + auto sub_inc = Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, 0, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 5 + { + auto sub_dec = + Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = + Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, 0, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 6 + { + auto sub_dec = + Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = + Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, 0, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 7 + { + auto sub_dec = + Kokkos::subview(a_decreasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, 0); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = + Kokkos::subview(a_increasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, 0); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } + reset_view(space, a_decreasing, 0); + reset_view(space, a_increasing, 0); + space.fence(); + + // Rank 8 + { + auto sub_dec = Kokkos::subview( + a_decreasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, std::make_pair(0, 2)); + EXPECT_TRUE(view_fill_test(space, sub_dec, magic)); + EXPECT_EQ(compute_overall_sum(space, a_decreasing), sub_dec.size() * magic); + + auto sub_inc = Kokkos::subview( + a_increasing, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, + Kokkos::ALL, Kokkos::ALL, Kokkos::ALL, std::make_pair(0, 2)); + EXPECT_TRUE(view_fill_test(space, sub_inc, magic)); + EXPECT_EQ(compute_overall_sum(space, a_increasing), sub_inc.size() * magic); + } +} + +TEST(TEST_CATEGORY, view_fill_tests_layout_right) { + using Space = TEST_EXECSPACE; + using Layout = Kokkos::LayoutRight; + run_test(); +} + +TEST(TEST_CATEGORY, view_fill_tests_layout_left) { + using Space = TEST_EXECSPACE; + using Layout = Kokkos::LayoutLeft; + run_test(); +} + +} // namespace diff --git a/lib/kokkos/core/unit_test/TestViewLayoutTiled.hpp b/lib/kokkos/core/unit_test/TestViewLayoutTiled.hpp deleted file mode 100644 index 67308212ee..0000000000 --- a/lib/kokkos/core/unit_test/TestViewLayoutTiled.hpp +++ /dev/null @@ -1,1756 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE -#define KOKKOS_IMPL_PUBLIC_INCLUDE -#endif - -#include - -#include - -#include -#include - -#include -#include - -namespace Test { - -namespace { - -template -struct TestViewLayoutTiled { - using Scalar = double; - - static constexpr int T0 = 2; - static constexpr int T1 = 4; - static constexpr int T2 = 4; - static constexpr int T3 = 2; - static constexpr int T4 = 2; - static constexpr int T5 = 2; - static constexpr int T6 = 2; - static constexpr int T7 = 2; - - // Rank 2 - using LayoutLL_2D_2x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutRL_2D_2x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutLR_2D_2x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutRR_2D_2x4 = - Kokkos::Experimental::LayoutTiled; - - // Rank 3 - using LayoutLL_3D_2x4x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutRL_3D_2x4x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutLR_3D_2x4x4 = - Kokkos::Experimental::LayoutTiled; - using LayoutRR_3D_2x4x4 = - Kokkos::Experimental::LayoutTiled; - - // Rank 4 - using LayoutLL_4D_2x4x4x2 = - Kokkos::Experimental::LayoutTiled; - using LayoutRL_4D_2x4x4x2 = - Kokkos::Experimental::LayoutTiled; - using LayoutLR_4D_2x4x4x2 = - Kokkos::Experimental::LayoutTiled; - using LayoutRR_4D_2x4x4x2 = - Kokkos::Experimental::LayoutTiled; - -#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) - static void test_view_layout_tiled_2d(const int, const int) { -#else - static void test_view_layout_tiled_2d(const int N0, const int N1) { - const int FT = T0 * T1; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - // Test create_mirror_view, deep_copy - // Create LL View - { - using ViewType = - typename Kokkos::View; - ViewType v("v", N0, N1); - - typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); - - // Initialize host-view - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - hv(ti * T0 + i, tj * T1 + j) = - (ti + tj * NT0) * FT + (i + j * T0); - } - } - } - } - - // copy to device - Kokkos::deep_copy(v, hv); - - Kokkos::MDRangePolicy< - Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 2 LL", mdrangepolicy, - KOKKOS_LAMBDA(const int ti, const int tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { - v(ti * T0 + i, tj * T1 + j) += 1; - } - } - } - }); - - Kokkos::deep_copy(hv, v); - - long counter_subview = 0; - long counter_inc = 0; - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(hv, ti, tj); - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { - ++counter_subview; - } - if (tile_subview(i, j) != - ((ti + tj * NT0) * FT + (i + j * T0) + 1)) { - ++counter_inc; - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } - - // Create RL View - { - using ViewType = - typename Kokkos::View; - Kokkos::View v("v", N0, N1); - - typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); - - // Initialize host-view - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - hv(ti * T0 + i, tj * T1 + j) = - (ti * NT1 + tj) * FT + (i + j * T0); - } - } - } - } - - // copy to device - Kokkos::deep_copy(v, hv); - - Kokkos::MDRangePolicy< - Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 2 RL", mdrangepolicy, - KOKKOS_LAMBDA(const int ti, const int tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { - v(ti * T0 + i, tj * T1 + j) += 1; - } - } - } - }); - - Kokkos::deep_copy(hv, v); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - auto tile_subview = Kokkos::tile_subview(hv, ti, tj); - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { - ++counter_subview; - } - if (tile_subview(i, j) != - ((ti * NT1 + tj) * FT + (i + j * T0) + 1)) { - ++counter_inc; - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create LR View - { - using ViewType = - typename Kokkos::View; - Kokkos::View v("v", N0, N1); - - typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); - - // Initialize host-view - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - hv(ti * T0 + i, tj * T1 + j) = - (ti + tj * NT0) * FT + (i * T1 + j); - } - } - } - } - - // copy to device - Kokkos::deep_copy(v, hv); - - Kokkos::MDRangePolicy< - Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 2 LR", mdrangepolicy, - KOKKOS_LAMBDA(const int ti, const int tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { - v(ti * T0 + i, tj * T1 + j) += 1; - } - } - } - }); - - Kokkos::deep_copy(hv, v); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(hv, ti, tj); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { - ++counter_subview; - } - if (tile_subview(i, j) != - ((ti + tj * NT0) * FT + (i * T1 + j) + 1)) { - ++counter_inc; - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RR View - { - using ViewType = - typename Kokkos::View; - Kokkos::View v("v", N0, N1); - - typename ViewType::HostMirror hv = Kokkos::create_mirror_view(v); - - // Initialize host-view - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - hv(ti * T0 + i, tj * T1 + j) = - (ti * NT1 + tj) * FT + (i * T1 + j); - } - } - } - } - - // copy to device - Kokkos::deep_copy(v, hv); - - Kokkos::MDRangePolicy< - Kokkos::Rank<2, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0}, {NT0, NT1}, {T0, T1}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 2 LR", mdrangepolicy, - KOKKOS_LAMBDA(const int ti, const int tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if ((ti * T0 + i < N0) && (tj * T1 + j < N1)) { - v(ti * T0 + i, tj * T1 + j) += 1; - } - } - } - }); - - Kokkos::deep_copy(hv, v); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - auto tile_subview = Kokkos::tile_subview(hv, ti, tj); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - if (tile_subview(i, j) != hv(ti * T0 + i, tj * T1 + j)) { - ++counter_subview; - } - if (tile_subview(i, j) != - ((ti * NT1 + tj) * FT + (i * T1 + j) + 1)) { - ++counter_inc; - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope -#endif - } // end test_view_layout_tiled_2d - -#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) - static void test_view_layout_tiled_3d(const int, const int, const int) { -#else - static void test_view_layout_tiled_3d(const int N0, const int N1, - const int N2) { - const int FT = T0 * T1 * T2; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - const int NT2 = int(std::ceil(N2 / T2)); - - // Create LL View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti + tj * NT0 + tk * N0 * N1) * FT + - (i + j * T0 + k * T0 * T1); - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 3 LL", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k) { - dv(i, j, k) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter_subview; - } - if (tile_subview(i, j, k) != - ((ti + tj * NT0 + tk * N0 * N1) * FT + - (i + j * T0 + k * T0 * T1) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RL View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i + j * T0 + k * T0 * T1); - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 3 RL", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k) { - dv(i, j, k) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter_subview; - } - if (tile_subview(i, j, k) != - ((ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i + j * T0 + k * T0 * T1) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create LR View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti + tj * NT0 + tk * NT0 * NT1) * FT + - (i * T1 * T2 + j * T2 + k); - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 3 LR", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k) { - dv(i, j, k) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter_subview; - } - if (tile_subview(i, j, k) != - ((ti + tj * NT0 + tk * NT0 * NT1) * FT + - (i * T1 * T2 + j * T2 + k) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RR View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i * T1 * T2 + j * T2 + k); - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0, 0}, {N0, N1, N2}, {T0, T1, T2}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 3 RR", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k) { - dv(i, j, k) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter_subview; - } - if (tile_subview(i, j, k) != - ((ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i * T1 * T2 + j * T2 + k) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope -#endif - } // end test_view_layout_tiled_3d - -#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) - static void test_view_layout_tiled_4d(const int, const int, const int, - const int){ -#else - static void test_view_layout_tiled_4d(const int N0, const int N1, - const int N2, const int N3) { - const int FT = T0 * T1 * T2 * T3; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - const int NT2 = int(std::ceil(N2 / T2)); - const int NT3 = int(std::ceil(N3 / T3)); - - // Create LL View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2, N3); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); - } - } - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 4 LL", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - dv(i, j, k, l) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter_subview; - } - if (tile_subview(i, j, k, l) != - ((ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RL View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2, N3); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); - } - } - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Left>, - ExecSpace> - mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 4 RL", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - dv(i, j, k, l) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter_subview; - } - if (tile_subview(i, j, k, l) != - ((ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + - tl) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create LR View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2, N3); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti + tj * NT0 + tk * NT0 * NT1 + - tl * NT0 * NT1 * NT2) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); - } - } - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<4, Kokkos::Iterate::Left, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 4 LR", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - dv(i, j, k, l) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter_subview; - } - if (tile_subview(i, j, k, l) != - ((ti + tj * NT0 + tk * NT0 * NT1 + - tl * NT0 * NT1 * NT2) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope - - // Create RR View - { - using ViewType = Kokkos::View; - Kokkos::View dv("dv", N0, N1, - N2, N3); - - typename ViewType::HostMirror v = Kokkos::create_mirror_view(dv); - - // Initialize on host - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + - tl) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); - } - } - } - } - } - } - } - } - - // copy to device - Kokkos::deep_copy(dv, v); - - Kokkos::MDRangePolicy< - Kokkos::Rank<4, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, - ExecSpace> - mdrangepolicy({0, 0, 0, 0}, {N0, N1, N2, N3}, {T0, T1, T2, T3}); - - // iterate by tile - Kokkos::parallel_for( - "ViewTile rank 4 RR", mdrangepolicy, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { - dv(i, j, k, l) += 1; - }); - - Kokkos::deep_copy(v, dv); - - long counter_subview = 0; - long counter_inc = 0; - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter_subview; - } - if (tile_subview(i, j, k, l) != - ((ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + - tl) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) + 1)) { - ++counter_inc; - } - } - } - } - } - } - } - } - } - ASSERT_EQ(counter_subview, long(0)); - ASSERT_EQ(counter_inc, long(0)); - } // end scope -#endif - } // end test_view_layout_tiled_4d - - static void test_view_layout_tiled_subtile_2d(const int N0, const int N1) { - const int FT = T0 * T1; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - - // Counter to check for errors at the end - long counter[4] = {0}; - - // Create LL View - { - Kokkos::View v("v", N0, N1); - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i + j * T0); - } - } - } - } - - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj); - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { - ++counter[0]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j - << std::endl; - std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," - << j << " v = " << v(ti * T0 + i, tj * T1 + j) - << " flat idx = " - << (ti + tj * NT0) * FT + (i + j * T0) << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j) - << std::endl; -#endif - } - } - } - } - } // end scope - - // Create RL View - { - Kokkos::View v("v", N0, N1); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i + j * T0); - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj); - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { - ++counter[1]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j - << std::endl; - std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," - << j << " v = " << v(ti * T0 + i, tj * T1 + j) - << " flat idx = " - << (ti * NT1 + tj) * FT + (i + j * T0) << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j) - << std::endl; -#endif - } - } - } - } - } // end scope - - // Create LR View - { - Kokkos::View v("v", N0, N1); - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - v(ti * T0 + i, tj * T1 + j) = (ti + tj * NT0) * FT + (i * T1 + j); - } - } - } - } - - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { - ++counter[2]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j - << std::endl; - std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," - << j << " v = " << v(ti * T0 + i, tj * T1 + j) - << " flat idx = " - << (ti + tj * NT0) * FT + (i * T1 + j) << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j) - << std::endl; -#endif - } - } - } - } - } // end scope - - // Create RR View - { - Kokkos::View v("v", N0, N1); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - v(ti * T0 + i, tj * T1 + j) = (ti * NT1 + tj) * FT + (i * T1 + j); - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - if (tile_subview(i, j) != v(ti * T0 + i, tj * T1 + j)) { - ++counter[3]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1 = " << ti * T0 + i << "," << tj * T1 + j - << std::endl; - std::cout << "ti,tj,i,j: " << ti << "," << tj << "," << i << "," - << j << " v = " << v(ti * T0 + i, tj * T1 + j) - << " flat idx = " - << (ti * NT1 + tj) * FT + (i * T1 + j) << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j) - << std::endl; - std::cout << "subview tile rank = " << Kokkos::rank(tile_subview) - << std::endl; -#endif - } - } - } - } - } // end scope - -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "subview_tile vs view errors:\n" - << " LL: " << counter[0] << " RL: " << counter[1] - << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; -#endif - - ASSERT_EQ(counter[0], long(0)); - ASSERT_EQ(counter[1], long(0)); - ASSERT_EQ(counter[2], long(0)); - ASSERT_EQ(counter[3], long(0)); - } // end test_view_layout_tiled_subtile_2d - - static void test_view_layout_tiled_subtile_3d(const int N0, const int N1, - const int N2) { - const int FT = T0 * T1 * T2; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - const int NT2 = int(std::ceil(N2 / T2)); - - // Counter to check for errors at the end - long counter[4] = {0}; - // Create LL View - { - Kokkos::View v("v", N0, - N1, N2); - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti + tj * NT0 + tk * N0 * N1) * FT + - (i + j * T0 + k * T0 * T1); - } - } - } - } - } - } - - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter[0]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," - << tj * T1 + j << "," << tk * T2 + k << std::endl; - std::cout - << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk - << "," << i << "," << j << "," << k - << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) - << " flat idx = " - << (ti + tj * NT0 + tk * N0 * N1) * FT + - (i + j * T0 + k * T0 * T1) - << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j, k) - << std::endl; - std::cout - << "subview tile rank = " << Kokkos::rank(tile_subview) - << std::endl; -#endif - } - } - } - } - } - } - } // end scope - - // Create RL View - { - Kokkos::View v("v", N0, - N1, N2); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i + j * T0 + k * T0 * T1); - } - } - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter[1]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," - << tj * T1 + j << "," << tk * T2 + k << std::endl; - std::cout - << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk - << "," << i << "," << j << "," << k - << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) - << " flat idx = " - << (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i + j * T0 + k * T0 * T1) - << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j, k) - << std::endl; -#endif - } - } - } - } - } - } - } // end scope - - // Create LR View - { - Kokkos::View v("v", N0, - N1, N2); - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti + tj * NT0 + tk * NT0 * NT1) * FT + - (i * T1 * T2 + j * T2 + k); - } - } - } - } - } - } - - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter[2]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," - << tj * T1 + j << "," << tk * T2 + k << std::endl; - std::cout - << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk - << "," << i << "," << j << "," << k - << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) - << " flat idx = " - << (ti + tj * NT0 + tk * NT0 * NT1) * FT + - (i * T1 * T2 + j * T2 + k) - << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j, k) - << std::endl; - std::cout - << "subview tile rank = " << Kokkos::rank(tile_subview) - << std::endl; -#endif - } - } - } - } - } - } - } // end scope - - // Create RR View - { - Kokkos::View v("v", N0, - N1, N2); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k) = - (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i * T1 * T2 + j * T2 + k); - } - } - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - if (tile_subview(i, j, k) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k)) { - ++counter[3]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2 = " << ti * T0 + i << "," - << tj * T1 + j << "," << tk * T2 + k << std::endl; - std::cout - << "ti,tj,tk,i,j,k: " << ti << "," << tj << "," << tk - << "," << i << "," << j << "," << k - << " v = " << v(ti * T0 + i, tj * T1 + j, tk * T2 + k) - << " flat idx = " - << (ti * NT1 * NT2 + tj * NT2 + tk) * FT + - (i * T1 * T2 + j * T2 + k) - << std::endl; - std::cout << "subview_tile output = " << tile_subview(i, j, k) - << std::endl; - std::cout - << "subview tile rank = " << Kokkos::rank(tile_subview) - << std::endl; -#endif - } - } - } - } - } - } - } // end scope - -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "subview_tile vs view errors:\n" - << " LL: " << counter[0] << " RL: " << counter[1] - << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; -#endif - - ASSERT_EQ(counter[0], long(0)); - ASSERT_EQ(counter[1], long(0)); - ASSERT_EQ(counter[2], long(0)); - ASSERT_EQ(counter[3], long(0)); - - } // end test_view_layout_tiled_subtile_3d - - static void test_view_layout_tiled_subtile_4d(const int N0, const int N1, - const int N2, const int N3) { - const int FT = T0 * T1 * T2 * T3; - - const int NT0 = int(std::ceil(N0 / T0)); - const int NT1 = int(std::ceil(N1 / T1)); - const int NT2 = int(std::ceil(N2 / T2)); - const int NT3 = int(std::ceil(N3 / T3)); - - // Counter to check for errors at the end - long counter[4] = {0}; - // Create LL View - { - Kokkos::View v( - "v", N0, N1, N2, N3); - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti + tj * NT0 + tk * N0 * N1 + tl * N0 * N1 * N2) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); - } - } - } - } - } - } - } - } - - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter[0]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i - << "," << tj * T1 + j << "," << tk * T2 + k - << "," << tl * T3 + l << std::endl; - std::cout - << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk - << "," << tl << "," - << " i,j,k,l: " << i << "," << j << "," << k << "," - << l << " v = " - << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l) - << " flat idx = " - << (ti + tj * NT0 + tk * N0 * N1 + - tl * N0 * N1 * N2) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) - << std::endl; - std::cout << "subview_tile output = " - << tile_subview(i, j, k, l) << std::endl; - std::cout << "subview tile rank = " - << Kokkos::rank(tile_subview) << std::endl; -#endif - } - } - } - } - } - } - } - } - } // end scope - - // Create RL View - { - Kokkos::View v( - "v", N0, N1, N2, N3); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + tl) * - FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2); - } - } - } - } - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int l = 0; l < T3; ++l) { - for (int k = 0; k < T2; ++k) { - for (int j = 0; j < T1; ++j) { - for (int i = 0; i < T0; ++i) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter[1]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i - << "," << tj * T1 + j << "," << tk * T2 + k - << "," << tl * T3 + l << std::endl; - std::cout - << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk - << "," << tl << "," - << " i,j,k,l: " << i << "," << j << "," << k << "," - << l << " v = " - << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l) - << " flat idx = " - << (ti * NT1 * NT2 * N3 + tj * NT2 * N3 + tk * N3 + - tl) * FT + - (i + j * T0 + k * T0 * T1 + l * T0 * T1 * T2) - << std::endl; - std::cout << "subview_tile output = " - << tile_subview(i, j, k, l) << std::endl; - std::cout << "subview tile rank = " - << Kokkos::rank(tile_subview) << std::endl; -#endif - } - } - } - } - } - } - } - } - } // end scope - - // Create LR View - { - Kokkos::View v( - "v", N0, N1, N2, N3); - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti + tj * NT0 + tk * NT0 * NT1 + - tl * NT0 * NT1 * NT2) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); - } - } - } - } - } - } - } - } - - for (int tl = 0; tl < NT3; ++tl) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tj = 0; tj < NT1; ++tj) { - for (int ti = 0; ti < NT0; ++ti) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter[2]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i - << "," << tj * T1 + j << "," << tk * T2 + k - << "," << tl * T3 + l << std::endl; - std::cout - << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk - << "," << tl << "," - << " i,j,k,l: " << i << "," << j << "," << k << "," - << l << " v = " - << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l) - << " flat idx = " - << (ti + tj * NT0 + tk * NT0 * NT1 + - tl * NT0 * NT1 * NT2) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) - << std::endl; - std::cout << "subview_tile output = " - << tile_subview(i, j, k, l) << std::endl; - std::cout << "subview tile rank = " - << Kokkos::rank(tile_subview) << std::endl; -#endif - } - } - } - } - } - } - } - } - } // end scope - - // Create RR View - { - Kokkos::View v( - "v", N0, N1, N2, N3); - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, tl * T3 + l) = - (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + - tl) * - FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l); - } - } - } - } - } - } - } - } - - for (int ti = 0; ti < NT0; ++ti) { - for (int tj = 0; tj < NT1; ++tj) { - for (int tk = 0; tk < NT2; ++tk) { - for (int tl = 0; tl < NT3; ++tl) { - auto tile_subview = Kokkos::tile_subview(v, ti, tj, tk, tl); - for (int i = 0; i < T0; ++i) { - for (int j = 0; j < T1; ++j) { - for (int k = 0; k < T2; ++k) { - for (int l = 0; l < T3; ++l) { - if (tile_subview(i, j, k, l) != - v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l)) { - ++counter[3]; - } -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "idx0,idx1,idx2,idx3 = " << ti * T0 + i - << "," << tj * T1 + j << "," << tk * T2 + k - << "," << tl * T3 + l << std::endl; - std::cout - << "ti,tj,tk,tl: " << ti << "," << tj << "," << tk - << "," << tl << "," - << " i,j,k,l: " << i << "," << j << "," << k << "," - << l << " v = " - << v(ti * T0 + i, tj * T1 + j, tk * T2 + k, - tl * T3 + l) - << " flat idx = " - << (ti * NT1 * NT2 * NT3 + tj * NT2 * NT3 + tk * NT3 + - tl) * FT + - (i * T1 * T2 * T3 + j * T2 * T3 + k * T3 + l) - << std::endl; - std::cout << "subview_tile output = " - << tile_subview(i, j, k, l) << std::endl; - std::cout << "subview tile rank = " - << Kokkos::rank(tile_subview) << std::endl; -#endif - } - } - } - } - } - } - } - } - } // end scope - -#ifdef KOKKOS_VERBOSE_LAYOUTTILED_OUTPUT - std::cout << "subview_tile vs view errors:\n" - << " LL: " << counter[0] << " RL: " << counter[1] - << " LR: " << counter[2] << " RR: " << counter[3] << std::endl; -#endif - - ASSERT_EQ(counter[0], long(0)); - ASSERT_EQ(counter[1], long(0)); - ASSERT_EQ(counter[2], long(0)); - ASSERT_EQ(counter[3], long(0)); - - } // end test_view_layout_tiled_subtile_4d - -}; // end TestViewLayoutTiled struct - -} // namespace - -TEST(TEST_CATEGORY, view_layouttiled) { - // These two examples are iterating by tile, then within a tile - not by - // extents If N# is not a power of two, but want to iterate by tile then - // within a tile, need to check that mapped index is within extent - TestViewLayoutTiled::test_view_layout_tiled_2d(4, 12); - TestViewLayoutTiled::test_view_layout_tiled_3d(4, 12, 16); - TestViewLayoutTiled::test_view_layout_tiled_4d(4, 12, 16, 12); -} -TEST(TEST_CATEGORY, view_layouttiled_subtile) { - // These two examples are iterating by tile, then within a tile - not by - // extents If N# is not a power of two, but want to iterate by tile then - // within a tile, need to check that mapped index is within extent - TestViewLayoutTiled::test_view_layout_tiled_subtile_2d(4, 12); - TestViewLayoutTiled::test_view_layout_tiled_subtile_3d(4, 12, - 16); - TestViewLayoutTiled::test_view_layout_tiled_subtile_4d( - 4, 12, 16, 12); -} -} // namespace Test - -#undef KOKKOS_IMPL_PUBLIC_INCLUDE diff --git a/lib/kokkos/core/unit_test/TestViewOfViews.hpp b/lib/kokkos/core/unit_test/TestViewOfViews.hpp new file mode 100644 index 0000000000..1d53bca336 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewOfViews.hpp @@ -0,0 +1,129 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include + +#include + +namespace { + +// User-defined types with a View data member +template +class S { + V v_; + + public: + template + S(std::string label, Extents... extents) : v_(std::move(label), extents...) {} + KOKKOS_DEFAULTED_FUNCTION S() = default; +}; + +template +class N { // not default constructible + V v_; + + public: + template + N(std::string label, Extents... extents) : v_(std::move(label), extents...) {} +}; + +template +class H { // constructible and destructible only from on the host side + V v_; + + public: + template + H(std::string label, Extents... extents) : v_(std::move(label), extents...) {} + H() {} + ~H() {} +}; + +template +void test_view_of_views_default() { + // assigning a default-constructed view to destruct the inner objects + using VoV = Kokkos::View; + VoV vov("vov", 2, 3); + V a("a"); + V b("b"); + vov(0, 0) = a; + vov(1, 0) = a; + vov(0, 1) = b; +#ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND + vov(0, 0) = V(); + vov(1, 0) = V(); + vov(0, 1) = V(); +#endif +} + +template +void test_view_of_views_without_initializing() { + // using placement new to construct the inner objects and explicitly + // calling the destructor + using VoV = Kokkos::View; + VoV vov(Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); + V a("a"); + V b("b"); + new (&vov(0, 0)) V(a); + new (&vov(1, 0)) V(a); + new (&vov(0, 1)) V(b); +#ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND + vov(0, 0).~V(); + vov(1, 0).~V(); + vov(0, 1).~V(); +#else + // leaks memory +#endif +} + +template +void test_view_of_views_sequential_host_init() { + // inner views value-initialized sequentially on the host, and also + // sequentially destructed on the host, without the need to cleanup + using VoV = Kokkos::View; + VoV vov(Kokkos::view_alloc("vov", Kokkos::SequentialHostInit), 2, 3); + V a("a"); + V b("b"); + vov(0, 0) = a; + vov(1, 0) = a; + vov(0, 1) = b; +} + +TEST(TEST_CATEGORY, view_of_views_default) { + test_view_of_views_default>(); + test_view_of_views_default>(); + // User-defined type with View data member + test_view_of_views_default>>(); +} + +TEST(TEST_CATEGORY, view_of_views_without_initializing) { + test_view_of_views_without_initializing>(); + test_view_of_views_without_initializing< + S>>(); + test_view_of_views_without_initializing< + N>>(); + test_view_of_views_without_initializing< + H>>(); +} + +TEST(TEST_CATEGORY, test_view_of_views_sequential_host_init) { + test_view_of_views_sequential_host_init>(); + test_view_of_views_sequential_host_init< + S>>(); + test_view_of_views_sequential_host_init< + H>>(); +} + +} // namespace diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp index 386887d923..c60aa2fe26 100644 --- a/lib/kokkos/core/unit_test/TestViewSubview.hpp +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -2294,9 +2294,8 @@ template struct TestExtentsStaticTests { using test1 = typename static_expect_same< /* expected */ - Kokkos::Experimental::Extents, + Kokkos::Experimental::Extents, /* actual */ typename Kokkos::Impl::ParseViewExtents::type>::type; diff --git a/lib/kokkos/core/unit_test/UnitTest_ScopeGuard.cpp b/lib/kokkos/core/unit_test/UnitTest_ScopeGuard.cpp new file mode 100644 index 0000000000..b2176f3ef0 --- /dev/null +++ b/lib/kokkos/core/unit_test/UnitTest_ScopeGuard.cpp @@ -0,0 +1,155 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include + +namespace { + +/** + * Fixture that checks Kokkos is neither initialized nor finalized before and + * after the test. + */ +class AssertEnvironmentTest : public ::testing::Test { + protected: + void SetUp() override { + ASSERT_FALSE(Kokkos::is_initialized()); + ASSERT_FALSE(Kokkos::is_finalized()); + } + + void TearDown() override { + ASSERT_FALSE(Kokkos::is_initialized()); + ASSERT_FALSE(Kokkos::is_finalized()); + } +}; + +using scope_guard_DeathTest = AssertEnvironmentTest; + +/** + * Test to create a scope guard normally. + */ +TEST_F(scope_guard_DeathTest, create) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + // run it in a different process so side effects are not kept + EXPECT_EXIT( + { + { + Kokkos::ScopeGuard guard{}; + + if (!Kokkos::is_initialized()) std::exit(EXIT_FAILURE); + if (Kokkos::is_finalized()) std::exit(EXIT_FAILURE); + } + + if (Kokkos::is_initialized()) std::exit(EXIT_FAILURE); + if (!Kokkos::is_finalized()) std::exit(EXIT_FAILURE); + + std::exit(EXIT_SUCCESS); + }, + testing::ExitedWithCode(EXIT_SUCCESS), ""); +} + +/** + * Test to create a scope guard with an argument. + */ +TEST_F(scope_guard_DeathTest, create_argument) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + // run it in a different process so side effects are not kept + EXPECT_EXIT( + { + { + Kokkos::InitializationSettings settings{}; + Kokkos::ScopeGuard guard{settings}; + } + + std::exit(EXIT_SUCCESS); + }, + testing::ExitedWithCode(EXIT_SUCCESS), ""); +} + +/** + * Test to create another scope guard when one has been created. + */ +TEST_F(scope_guard_DeathTest, create_while_initialize) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_DEATH( + { + Kokkos::ScopeGuard guard1{}; + + // create a second scope guard while there is one already existing + Kokkos::ScopeGuard guard2{}; + }, + "Creating a ScopeGuard while Kokkos is initialized"); +} + +/** + * Test to create a scope guard when initialization has been done manually. + */ +TEST_F(scope_guard_DeathTest, create_after_initialize) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_DEATH( + { + Kokkos::initialize(); + + // create a scope guard after manual initialization + Kokkos::ScopeGuard guard{}; + }, + "Creating a ScopeGuard while Kokkos is initialized"); +} + +/** + * Test to create another scope guard when one has been destroyed. + */ +TEST_F(scope_guard_DeathTest, create_after_finalize) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_DEATH( + { + { Kokkos::ScopeGuard guard1{}; } + + // create a second scope guard while the first one has been destroyed + // already + Kokkos::ScopeGuard guard2{}; + }, + "Creating a ScopeGuard after Kokkos was finalized"); +} + +/** + * Test to destroy a scope guard when finalization has been done manually. + */ +TEST_F(scope_guard_DeathTest, destroy_after_finalize) { + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_DEATH( + { + // create a scope guard and finalize it manually + Kokkos::ScopeGuard guard{}; + Kokkos::finalize(); + }, + "Destroying a ScopeGuard after Kokkos was finalized"); +} + +/** + * Static tests + */ + +// Test scope guard is not copyable. +static_assert(!std::is_copy_assignable()); +static_assert(!std::is_copy_constructible()); + +// Test scope guard is not movable. +static_assert(!std::is_move_assignable()); +static_assert(!std::is_move_constructible()); + +} // namespace diff --git a/lib/kokkos/core/unit_test/category_files/TestHPX_Category.hpp b/lib/kokkos/core/unit_test/category_files/TestHPX_Category.hpp index d3a7cdbea5..c6a2aa9f20 100644 --- a/lib/kokkos/core/unit_test/category_files/TestHPX_Category.hpp +++ b/lib/kokkos/core/unit_test/category_files/TestHPX_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 3 #define TEST_CATEGORY_DEATH hpx_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::HPX +#define TEST_CATEGORY_FIXTURE(name) hpx_##name #endif diff --git a/lib/kokkos/core/unit_test/category_files/TestOpenACC_Category.hpp b/lib/kokkos/core/unit_test/category_files/TestOpenACC_Category.hpp index 0c4e4b7e11..6105eadf14 100644 --- a/lib/kokkos/core/unit_test/category_files/TestOpenACC_Category.hpp +++ b/lib/kokkos/core/unit_test/category_files/TestOpenACC_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 8 #define TEST_CATEGORY_DEATH openacc_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::OpenACC +#define TEST_CATEGORY_FIXTURE(name) openacc_##name #endif diff --git a/lib/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp b/lib/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp index 235b34ffab..921cff7890 100644 --- a/lib/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp +++ b/lib/kokkos/core/unit_test/category_files/TestOpenMPTarget_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 4 #define TEST_CATEGORY_DEATH openmptarget_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::OpenMPTarget +#define TEST_CATEGORY_FIXTURE(name) openmptarget_##name #endif diff --git a/lib/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp b/lib/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp index 8e1b18c9ac..59e72c72c7 100644 --- a/lib/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp +++ b/lib/kokkos/core/unit_test/category_files/TestSYCL_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 7 #define TEST_CATEGORY_DEATH sycl_DeathTest #define TEST_EXECSPACE Kokkos::Experimental::SYCL +#define TEST_CATEGORY_FIXTURE(name) sycl_##name #endif diff --git a/lib/kokkos/core/unit_test/category_files/TestThreads_Category.hpp b/lib/kokkos/core/unit_test/category_files/TestThreads_Category.hpp index 13b0b653f2..ae8ac60833 100644 --- a/lib/kokkos/core/unit_test/category_files/TestThreads_Category.hpp +++ b/lib/kokkos/core/unit_test/category_files/TestThreads_Category.hpp @@ -23,5 +23,6 @@ #define TEST_CATEGORY_NUMBER 1 #define TEST_CATEGORY_DEATH threads_DeathTest #define TEST_EXECSPACE Kokkos::Threads +#define TEST_CATEGORY_FIXTURE(name) threads_##name #endif diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp deleted file mode 100644 index 2720363969..0000000000 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Graph.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp index d94735ceb2..40955e9c7c 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_InterOp_StreamsMultiGPU.cpp @@ -15,7 +15,7 @@ //@HEADER #include -#include +#include namespace { @@ -57,79 +57,6 @@ std::array get_execution_spaces( return {exec0, exec1}; } -// Test Interoperability with Cuda Streams -void test_policies(TEST_EXECSPACE exec0, Kokkos::View v0, - TEST_EXECSPACE exec, Kokkos::View v) { - using MemorySpace = typename TEST_EXECSPACE::memory_space; - - Kokkos::deep_copy(exec, v, 5); - Kokkos::deep_copy(exec0, v0, 5); - - Kokkos::deep_copy(v, v0); - - int sum; - int sum0; - - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Range_0", - Kokkos::RangePolicy(exec0, 0, 100), - Test::FunctorRange(v0)); - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Range", - Kokkos::RangePolicy(exec, 0, 100), - Test::FunctorRange(v)); - Kokkos::parallel_reduce( - "Test::cuda::raw_cuda_stream::RangeReduce_0", - Kokkos::RangePolicy>(exec0, - 0, 100), - Test::FunctorRangeReduce(v0), sum0); - Kokkos::parallel_reduce( - "Test::cuda::raw_cuda_stream::RangeReduce", - Kokkos::RangePolicy>(exec, 0, - 100), - Test::FunctorRangeReduce(v), sum); - ASSERT_EQ(600, sum0); - ASSERT_EQ(600, sum); - - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::MDRange_0", - Kokkos::MDRangePolicy>( - exec0, {0, 0}, {10, 10}), - Test::FunctorMDRange(v0)); - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::MDRange", - Kokkos::MDRangePolicy>( - exec, {0, 0}, {10, 10}), - Test::FunctorMDRange(v)); - Kokkos::parallel_reduce("Test::cuda::raw_cuda_stream::MDRangeReduce_0", - Kokkos::MDRangePolicy, - Kokkos::LaunchBounds<128, 2>>( - exec0, {0, 0}, {10, 10}), - Test::FunctorMDRangeReduce(v0), sum0); - Kokkos::parallel_reduce("Test::cuda::raw_cuda_stream::MDRangeReduce", - Kokkos::MDRangePolicy, - Kokkos::LaunchBounds<128, 2>>( - exec, {0, 0}, {10, 10}), - Test::FunctorMDRangeReduce(v), sum); - ASSERT_EQ(700, sum0); - ASSERT_EQ(700, sum); - - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Team_0", - Kokkos::TeamPolicy(exec0, 10, 10), - Test::FunctorTeam(v0)); - Kokkos::parallel_for("Test::cuda::raw_cuda_stream::Team", - Kokkos::TeamPolicy(exec, 10, 10), - Test::FunctorTeam(v)); - Kokkos::parallel_reduce( - "Test::cuda::raw_cuda_stream::Team_0", - Kokkos::TeamPolicy>(exec0, - 10, 10), - Test::FunctorTeamReduce(v0), sum0); - Kokkos::parallel_reduce( - "Test::cuda::raw_cuda_stream::Team", - Kokkos::TeamPolicy>(exec, 10, - 10), - Test::FunctorTeamReduce(v), sum); - ASSERT_EQ(800, sum0); - ASSERT_EQ(800, sum); -} - TEST(cuda_multi_gpu, managed_views) { StreamsAndDevices streams_and_devices; { @@ -169,93 +96,6 @@ TEST(cuda_multi_gpu, unmanaged_views) { } } -struct ScratchFunctor { - int scratch_size; - int R; - - ScratchFunctor(int scratch_size_, int R_) - : scratch_size(scratch_size_), R(R_) {} - - KOKKOS_FUNCTION - void operator()(const Kokkos::TeamPolicy::member_type &team, - int &error_accum) const { - Kokkos::View scratch_mem( - team.team_scratch(1), scratch_size); - - // Initialize scratch memory - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 0, scratch_size), - [&](int i) { scratch_mem(i) = 0; }); - team.team_barrier(); - - // Increment each entry in scratch memory R times - for (int r = 0; r < R; ++r) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, 0, scratch_size), - [&](int i) { scratch_mem(i) += 1; }); - } - team.team_barrier(); - - // Check that each scratch entry has been incremented exactly R times - int team_error_accum; - auto R_loc = R; // avoid implicit capture of this - Kokkos::parallel_reduce( - Kokkos::TeamVectorRange(team, 0, scratch_size), - [&](int i, int &tsum) { - if (scratch_mem(i) != R_loc) { - tsum += 1; - } - }, - team_error_accum); - Kokkos::single(Kokkos::PerTeam(team), - [&]() { error_accum += team_error_accum; }); - } -}; - -void test_scratch(TEST_EXECSPACE exec0, TEST_EXECSPACE exec1) { - constexpr int N = 10; - constexpr int R = 1000; - constexpr int scratch_size = 100; - using ScratchType = Kokkos::View; - - // Test allocating and using scratch space - ScratchFunctor f(scratch_size, R); - - auto policy0 = - Kokkos::TeamPolicy(exec0, N, 10) - .set_scratch_size( - 1, Kokkos::PerTeam(ScratchType::shmem_size(scratch_size))); - auto policy1 = - Kokkos::TeamPolicy(exec1, N, 10) - .set_scratch_size( - 1, Kokkos::PerTeam(ScratchType::shmem_size(scratch_size))); - - int error0, error1; - - Kokkos::parallel_reduce("test_scratch_device_0", policy0, f, error0); - Kokkos::parallel_reduce("test_scratch_device_1", policy1, f, error1); - ASSERT_EQ(error0, 0); - ASSERT_EQ(error1, 0); - - // Request larger scratch size to trigger a realloc and test - const auto new_scratch_size = scratch_size + 10; - ScratchFunctor f_more_scratch(new_scratch_size, R); - - auto policy0_more_scratch = - Kokkos::TeamPolicy(exec0, N, 10) - .set_scratch_size( - 1, Kokkos::PerTeam(ScratchType::shmem_size(new_scratch_size))); - auto policy1_more_scratch = - Kokkos::TeamPolicy(exec1, N, 10) - .set_scratch_size( - 1, Kokkos::PerTeam(ScratchType::shmem_size(new_scratch_size))); - - Kokkos::parallel_reduce("test_realloc_scratch_device_0", policy0_more_scratch, - f_more_scratch, error0); - Kokkos::parallel_reduce("test_realloc_scratch_device_1", policy1_more_scratch, - f_more_scratch, error1); - ASSERT_EQ(error0, 0); - ASSERT_EQ(error1, 0); -} - TEST(cuda_multi_gpu, scratch_space) { StreamsAndDevices streams_and_devices; { diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp index 11fe6b8555..f40af99e7c 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp @@ -39,9 +39,14 @@ TEST(cuda, space_access) { !Kokkos::Impl::MemorySpaceAccess::assignable); +#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY static_assert( !Kokkos::Impl::MemorySpaceAccess::accessible); +#else + static_assert(Kokkos::Impl::MemorySpaceAccess::accessible); +#endif static_assert( !Kokkos::Impl::MemorySpaceAccess::accessible); +#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY static_assert(!Kokkos::SpaceAccessibility::accessible); +#else + static_assert(Kokkos::SpaceAccessibility::accessible); +#endif static_assert(Kokkos::SpaceAccessibility::accessible); @@ -157,8 +167,14 @@ TEST(cuda, space_access) { Kokkos::SpaceAccessibility::accessible); +#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY static_assert(std::is_same::Space, Kokkos::HostSpace>::value); +#else + static_assert(std::is_same::Space, + Kokkos::Device>::value); +#endif static_assert( std::is_same::Space, diff --git a/lib/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt b/lib/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt index f792b03ed8..4c364ceee7 100644 --- a/lib/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/headers_self_contained/CMakeLists.txt @@ -10,7 +10,8 @@ file(GLOB KOKKOS_CONTAINERS_HEADERS RELATIVE ${BASE_DIR}/containers/src file(GLOB KOKKOS_ALGORITHMS_HEADERS RELATIVE ${BASE_DIR}/algorithms/src ${BASE_DIR}/algorithms/src/*.hpp) -if(NOT Kokkos_ENABLE_DEPRECATED_CODE_4) +# erroring out when deprecated code is disabled and raising warnings that are treated as errors in the CI otherwise +if(NOT Kokkos_ENABLE_DEPRECATED_CODE_4 OR Kokkos_ENABLE_DEPRECATION_WARNINGS) list(REMOVE_ITEM KOKKOS_CONTAINERS_HEADERS "Kokkos_Vector.hpp") endif() diff --git a/lib/kokkos/core/unit_test/hip/TestHIP_Graph.cpp b/lib/kokkos/core/unit_test/hip/TestHIP_Graph.cpp deleted file mode 100644 index 405cb76c64..0000000000 --- a/lib/kokkos/core/unit_test/hip/TestHIP_Graph.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include diff --git a/lib/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp b/lib/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp index a213453ea1..8c72e9f297 100644 --- a/lib/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp +++ b/lib/kokkos/core/unit_test/hip/TestHIP_Memory_Requirements.cpp @@ -48,9 +48,6 @@ TEST(hip, memory_requirements) { // we want all user-facing memory in hip to be coarse grained. As of // today(07.01.22) the documentation is not reliable/correct, we test the // memory on the device and host - // FIXME_HIP - GTEST_SKIP() << "skipping the test because the CI on MI100 returns: error( " - "hipErrorInvalidValue)"; KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::HIPSpace, int, 10); KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::HIPHostPinnedSpace, int, 10); KOKKOS_TEST_MEMORY_COARSEGRAINEDNESS(Kokkos::HIPManagedSpace, int, 10); diff --git a/lib/kokkos/core/unit_test/incremental/Test01_execspace.hpp b/lib/kokkos/core/unit_test/incremental/Test01_execspace.hpp index d7b2a57b44..a7fa26c728 100644 --- a/lib/kokkos/core/unit_test/incremental/Test01_execspace.hpp +++ b/lib/kokkos/core/unit_test/incremental/Test01_execspace.hpp @@ -63,7 +63,9 @@ struct TestIncrExecSpace { ASSERT_GT(concurrency, 0); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() int in_parallel = ExecSpace::in_parallel(); + KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() ASSERT_FALSE(in_parallel); #endif diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp deleted file mode 100644 index 22c8ab1bf8..0000000000 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Graph.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include diff --git a/lib/kokkos/core/unit_test/sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp b/lib/kokkos/core/unit_test/sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp new file mode 100644 index 0000000000..d3906e409f --- /dev/null +++ b/lib/kokkos/core/unit_test/sycl/TestSYCL_InterOp_StreamsMultiGPU.cpp @@ -0,0 +1,64 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +namespace { + +std::array get_execution_spaces() { + std::vector gpu_devices = + sycl::device::get_devices(sycl::info::device_type::gpu); + + TEST_EXECSPACE exec0( + sycl::queue{gpu_devices.front(), sycl::property::queue::in_order()}); + TEST_EXECSPACE exec1( + sycl::queue{gpu_devices.back(), sycl::property::queue::in_order()}); + + return {exec0, exec1}; +} + +TEST(sycl_multi_gpu, managed_views) { + std::array execs = get_execution_spaces(); + + Kokkos::View view0(Kokkos::view_alloc("v0", execs[0]), + 100); + Kokkos::View view(Kokkos::view_alloc("v", execs[1]), + 100); + + test_policies(execs[0], view0, execs[1], view); +} + +TEST(sycl_multi_gpu, unmanaged_views) { + std::array execs = get_execution_spaces(); + + int *p0 = sycl::malloc_device(100, execs[0].sycl_queue()); + Kokkos::View view0(p0, 100); + + int *p1 = sycl::malloc_device(100, execs[1].sycl_queue()); + Kokkos::View view1(p1, 100); + + test_policies(execs[0], view0, execs[1], view1); + sycl::free(p0, execs[0].sycl_queue()); + sycl::free(p1, execs[1].sycl_queue()); +} + +TEST(sycl_multi_gpu, scratch_space) { + std::array execs = get_execution_spaces(); + + test_scratch(execs[0], execs[1]); +} +} // namespace diff --git a/lib/kokkos/core/unit_test/view/TestExtentsDatatypeConversion.cpp b/lib/kokkos/core/unit_test/view/TestExtentsDatatypeConversion.cpp index b95890614e..1b9b2a3681 100644 --- a/lib/kokkos/core/unit_test/view/TestExtentsDatatypeConversion.cpp +++ b/lib/kokkos/core/unit_test/view/TestExtentsDatatypeConversion.cpp @@ -23,15 +23,14 @@ namespace { // Helper to make static tests more succinct template -constexpr bool datatype_matches_extent = - std::is_same_v::type, - Extent>; +constexpr bool datatype_matches_extent = std::is_same_v< + typename Kokkos::Impl::ExtentsFromDataType::type, + Extent>; template constexpr bool extent_matches_datatype = - std::is_same_v::type>; + std::is_same_v::type>; // Conversion from DataType to extents // 0-rank view diff --git a/lib/kokkos/example/README b/lib/kokkos/example/README index 6686051244..2fe8727648 100644 --- a/lib/kokkos/example/README +++ b/lib/kokkos/example/README @@ -1,7 +1,7 @@ This directory contains example application proxies that use different parts of Kokkos. If you are looking for the FENL ("finite element -nonlinear" solve) example, it has moved into the LinAlg subpackage of -Tpetra. +nonlinear" solve) example, it has moved into the TrilinosCouplings +package in Trilinos. MANIFEST: diff --git a/lib/kokkos/example/build_cmake_installed/CMakeLists.txt b/lib/kokkos/example/build_cmake_installed/CMakeLists.txt index aaf745b418..c025f1d7d2 100644 --- a/lib/kokkos/example/build_cmake_installed/CMakeLists.txt +++ b/lib/kokkos/example/build_cmake_installed/CMakeLists.txt @@ -12,6 +12,7 @@ find_package(Kokkos REQUIRED) add_executable(example cmake_example.cpp foo.f) if(CMAKE_Fortran_COMPILER_ID STREQUAL LLVMFlang) set_target_properties(example PROPERTIES LINKER_LANGUAGE Fortran) + target_link_options(example PRIVATE -fno-fortran-main) endif() # This is the only thing required to set up compiler/linker flags diff --git a/lib/kokkos/example/tutorial/01_hello_world/hello_world.cpp b/lib/kokkos/example/tutorial/01_hello_world/hello_world.cpp index 22b8b6d63c..3104003fb4 100644 --- a/lib/kokkos/example/tutorial/01_hello_world/hello_world.cpp +++ b/lib/kokkos/example/tutorial/01_hello_world/hello_world.cpp @@ -16,7 +16,6 @@ #include #include -#include // // "Hello world" parallel_for example: @@ -25,12 +24,12 @@ // using a functor to define the loop body // 3. Shut down Kokkos // -// If Kokkos was built with C++11 enabled, try comparing this example -// to 01_hello_world_lambda. The latter uses C++11 lambdas (anonymous -// functions) to define the loop body of the parallel_for. That makes -// the code much more concise and readable. On the other hand, -// breaking out the loop body into an explicit functor makes it easier -// to test the loop independently of the parallel pattern. +// Try comparing this example to 01_hello_world_lambda, which uses +// C++11 lambdas (anonymous functions) to define the loop body of the +// parallel_for. That makes the code much more concise and readable. +// On the other hand, breaking out the loop body into an explicit +// functor makes it easier to test the loop independently of the +// parallel pattern. // // Functor that defines the parallel_for's loop body. @@ -72,11 +71,9 @@ int main(int argc, char* argv[]) { // start with "--kokkos-". Kokkos::initialize(argc, argv); - // Print the name of Kokkos' default execution space. We're using - // typeid here, so the name might get a bit mangled by the linker, - // but you should still be able to figure out what it is. + // Print the name of Kokkos' default execution space. printf("Hello World on Kokkos execution space %s\n", - typeid(Kokkos::DefaultExecutionSpace).name()); + Kokkos::DefaultExecutionSpace::name()); // Run the above functor on the default Kokkos execution space in // parallel, with a parallel for loop count of 15. diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp index 909765e1fc..ad2c258c0f 100644 --- a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp @@ -16,7 +16,6 @@ #include #include -#include // // "Hello world" parallel_for example: @@ -25,10 +24,9 @@ // using a C++11 lambda to define the loop body // 3. Shut down Kokkos // -// This example only builds if C++11 is enabled. Compare this example -// to 01_hello_world, which uses functors (explicitly defined classes) -// to define the loop body of the parallel_for. Both functors and -// lambdas have their places. +// Compare this example to 01_hello_world, which uses functors +// (explicitly defined classes) to define the loop body of the +// parallel_for. Both functors and lambdas have their places. // int main(int argc, char* argv[]) { @@ -41,11 +39,9 @@ int main(int argc, char* argv[]) { // start with "--kokkos-". Kokkos::initialize(argc, argv); - // Print the name of Kokkos' default execution space. We're using - // typeid here, so the name might get a bit mangled by the linker, - // but you should still be able to figure out what it is. + // Print the name of Kokkos' default execution space. printf("Hello World on Kokkos execution space %s\n", - typeid(Kokkos::DefaultExecutionSpace).name()); + Kokkos::DefaultExecutionSpace::name()); // Run lambda on the default Kokkos execution space in parallel, // with a parallel for loop count of 15. The lambda's argument is diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp index 5cae6da16c..1ca30e07e8 100644 --- a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp @@ -24,9 +24,8 @@ // using a C++11 lambda to define the loop body // 3. Shut down Kokkos // -// This example only builds if C++11 is enabled. Compare this example -// to 02_simple_reduce, which uses a functor to define the loop body -// of the parallel_reduce. +// Compare this example to 02_simple_reduce, which uses a functor to +// define the loop body of the parallel_reduce. // int main(int argc, char* argv[]) { diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index 70dd61f9af..25370daa3f 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -164,7 +164,6 @@ display_help_text() { echo " AMD_GFX942 = AMD GPU MI300 GFX942" echo " AMD_GFX1030 = AMD GPU V620/W6800 GFX1030" echo " AMD_GFX1100 = AMD GPU RX 7900 XT(X) GFX1100" - echo " AMD_GFX1103 = AMD APU Radeon 740M/760M/780M/880M/890M GFX1103" echo " [ARM]" echo " ARMV80 = ARMv8.0 Compatible CPU" echo " ARMV81 = ARMv8.1 Compatible CPU" diff --git a/lib/kokkos/master_history.txt b/lib/kokkos/master_history.txt index 31be925325..f2a4163610 100644 --- a/lib/kokkos/master_history.txt +++ b/lib/kokkos/master_history.txt @@ -37,3 +37,5 @@ tag: 4.2.00 date: 11:09:2023 master: 1a3ea28f release: abe01c88 tag: 4.2.01 date: 01:30:2024 master: 71a9bcae release: 221e5f7a tag: 4.3.00 date: 04:03:2024 master: e0dc0128 release: f08217a4 tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e +tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c +tag: 4.4.01 date: 09:12:2024 master: 08ceff92 release: 2d60c039 diff --git a/lib/kokkos/simd/src/Kokkos_SIMD.hpp b/lib/kokkos/simd/src/Kokkos_SIMD.hpp index 57d4afd88b..5e34e51989 100644 --- a/lib/kokkos/simd/src/Kokkos_SIMD.hpp +++ b/lib/kokkos/simd/src/Kokkos_SIMD.hpp @@ -183,15 +183,18 @@ template class data_types {}; #if defined(KOKKOS_ARCH_AVX512XEON) -using host_abi_set = abi_set>; +using host_abi_set = abi_set, + simd_abi::avx512_fixed_size<16>>; using data_type_set = data_types; #elif defined(KOKKOS_ARCH_AVX2) -using host_abi_set = abi_set>; +using host_abi_set = abi_set, + simd_abi::avx2_fixed_size<8>>; using data_type_set = data_types; #elif defined(KOKKOS_ARCH_ARM_NEON) -using host_abi_set = abi_set>; +using host_abi_set = abi_set, + simd_abi::neon_fixed_size<4>>; using data_type_set = data_types; #else diff --git a/lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp b/lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp index 6d0956f383..0525dc8887 100644 --- a/lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp +++ b/lib/kokkos/simd/src/Kokkos_SIMD_AVX2.hpp @@ -228,6 +228,106 @@ class simd_mask> { } }; +template <> +class simd_mask> { + __m256 m_value; + + public: + class reference { + __m256& m_mask; + int m_lane; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION __m256 bit_mask() const { + // FIXME_HIP ROCm 5.6, 5.7, and 6.0 can't compile with the intrinsic used + // here. +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + return _mm256_cvtepi32_ps(_mm256_setr_epi32( +#else + return _mm256_castsi256_ps(_mm256_setr_epi32( +#endif + -std::int32_t(m_lane == 0), -std::int32_t(m_lane == 1), + -std::int32_t(m_lane == 2), -std::int32_t(m_lane == 3), + -std::int32_t(m_lane == 4), -std::int32_t(m_lane == 5), + -std::int32_t(m_lane == 6), -std::int32_t(m_lane == 7))); + } + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(__m256& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + if (value) { + m_mask = _mm256_or_ps(bit_mask(), m_mask); + } else { + m_mask = _mm256_andnot_ps(bit_mask(), m_mask); + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + return (_mm256_movemask_ps(m_mask) & (1 << m_lane)) != 0; + } + }; + using value_type = bool; + using abi_type = simd_abi::avx2_fixed_size<8>; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) + : m_value(_mm256_castsi256_ps(_mm256_set1_epi32(-std::int32_t(value)))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + G&& gen) noexcept + : m_value(_mm256_castsi256_ps(_mm256_setr_epi32( + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant()))))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + __m256 const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return static_cast( + reference(const_cast<__m256&>(m_value), int(i))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator||(simd_mask const& other) const { + return simd_mask(_mm256_or_ps(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator&&(simd_mask const& other) const { + return simd_mask(_mm256_and_ps(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask operator!() const { + auto const true_value = static_cast<__m256>(simd_mask(true)); + return simd_mask(_mm256_andnot_ps(m_value, true_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + simd_mask const& other) const { + return _mm256_movemask_ps(m_value) == _mm256_movemask_ps(other.m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + simd_mask const& other) const { + return !operator==(other); + } +}; + template <> class simd_mask> { __m128i m_value; @@ -261,9 +361,7 @@ class simd_mask> { }; using value_type = bool; using abi_type = simd_abi::avx2_fixed_size<4>; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) : m_value(_mm_set1_epi32(-std::int32_t(value))) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { @@ -324,6 +422,107 @@ class simd_mask> { } }; +template <> +class simd_mask> { + __m256i m_value; + + public: + class reference { + __m256i& m_mask; + int m_lane; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION __m256i bit_mask() const { + return _mm256_setr_epi32( + -std::int32_t(m_lane == 0), -std::int32_t(m_lane == 1), + -std::int32_t(m_lane == 2), -std::int32_t(m_lane == 3), + -std::int32_t(m_lane == 4), -std::int32_t(m_lane == 5), + -std::int32_t(m_lane == 6), -std::int32_t(m_lane == 7)); + } + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(__m256i& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + if (value) { + m_mask = _mm256_or_si256(bit_mask(), m_mask); + } else { + m_mask = _mm256_andnot_si256(bit_mask(), m_mask); + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + return (_mm256_movemask_ps(_mm256_castsi256_ps(m_mask)) & + (1 << m_lane)) != 0; + } + }; + using value_type = bool; + using abi_type = simd_abi::avx2_fixed_size<8>; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) + : m_value(_mm256_set1_epi32(-std::int32_t(value))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + __m256i const& value_in) + : m_value(value_in) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + G&& gen) noexcept + : m_value(_mm256_setr_epi32( + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())), + -std::int32_t(gen(std::integral_constant())))) {} + template + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask( + simd_mask const& other) { + for (std::size_t i = 0; i < size(); ++i) (*this)[i] = other[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return static_cast( + reference(const_cast<__m256i&>(m_value), int(i))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator||(simd_mask const& other) const { + return simd_mask(_mm256_or_si256(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator&&(simd_mask const& other) const { + return simd_mask(_mm256_and_si256(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask operator!() const { + auto const true_value = static_cast<__m256i>(simd_mask(true)); + return simd_mask(_mm256_andnot_si256(m_value, true_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + simd_mask const& other) const { + return _mm256_movemask_ps(_mm256_castsi256_ps(m_value)) == + _mm256_movemask_ps(_mm256_castsi256_ps(other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + simd_mask const& other) const { + return !operator==(other); + } +}; + template <> class simd_mask> { __m256i m_value; @@ -358,9 +557,7 @@ class simd_mask> { }; using value_type = bool; using abi_type = simd_abi::avx2_fixed_size<4>; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default; - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) : m_value(_mm256_set1_epi64x(-std::int64_t(value))) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { @@ -800,11 +997,11 @@ class simd> { KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { return 4; } - template , - bool> = false> + template , + bool> = false> KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) : m_value(_mm_set1_ps(value_type(value))) {} - template >, @@ -1030,6 +1227,264 @@ namespace Experimental { static_cast<__m128>(c), static_cast<__m128>(b), static_cast<__m128>(a))); } +template <> +class simd> { + __m256 m_value; + + public: + using value_type = float; + using abi_type = simd_abi::avx2_fixed_size<8>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm256_set1_ps(value_type(value))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) + : m_value(_mm256_setr_ps(gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) { + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m256 const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm256_loadu_ps(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = _mm256_load_ps(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm256_storeu_ps(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm256_store_ps(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256() + const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(_mm256_sub_ps(_mm256_set1_ps(0.0), m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_mul_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator/( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_div_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_add_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_sub_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_LT_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_GT_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_LE_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_GE_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_EQ_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmp_ps(static_cast<__m256>(lhs), + static_cast<__m256>(rhs), _CMP_NEQ_OS)); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> +copysign( + Experimental::simd> const& + a, + Experimental::simd> const& + b) { + __m256 const sign_mask = _mm256_set1_ps(-0.0); + return Experimental::simd>( + _mm256_xor_ps(_mm256_andnot_ps(sign_mask, static_cast<__m256>(a)), + _mm256_and_ps(sign_mask, static_cast<__m256>(b)))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + abs(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + __m256 const sign_mask = _mm256_set1_ps(-0.0); + return Experimental::simd>( + _mm256_andnot_ps(sign_mask, static_cast<__m256>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_round_ps(static_cast<__m256>(a), + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_round_ps(static_cast<__m256>(a), + (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_round_ps(static_cast<__m256>(a), + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_round_ps(static_cast<__m256>(a), + (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + sqrt(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_sqrt_ps(static_cast<__m256>(a))); +} + +#ifdef __INTEL_COMPILER + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + cbrt(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cbrt_ps(static_cast<__m256>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + exp(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_exp_ps(static_cast<__m256>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + log(Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_log_ps(static_cast<__m256>(a))); +} + +#endif + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> +fma(Experimental::simd> const& + a, + Experimental::simd> const& + b, + Experimental::simd> const& + c) { + return Experimental::simd>( + _mm256_fmadd_ps(static_cast<__m256>(a), static_cast<__m256>(b), + static_cast<__m256>(c))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> +max(Experimental::simd> const& + a, + Experimental::simd> const& + b) { + return Experimental::simd>( + _mm256_max_ps(static_cast<__m256>(a), static_cast<__m256>(b))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx2_fixed_size<8>> +min(Experimental::simd> const& + a, + Experimental::simd> const& + b) { + return Experimental::simd>( + _mm256_min_ps(static_cast<__m256>(a), static_cast<__m256>(b))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>(_mm256_blendv_ps( + static_cast<__m256>(c), static_cast<__m256>(b), static_cast<__m256>(a))); +} + template <> class simd> { __m128i m_value; @@ -1229,6 +1684,207 @@ namespace Experimental { _mm_castsi128_ps(static_cast<__m128i>(a))))); } +template <> +class simd> { + __m256i m_value; + + public: + using value_type = std::int32_t; + using abi_type = simd_abi::avx2_fixed_size<8>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 8; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm256_set1_epi32(value_type(value))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept + : m_value( + _mm256_setr_epi32(gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m256i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + // FIXME_HIP ROCm 5.6, 5.7, and 6.0 can't compile with the intrinsic used + // here. +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + m_value = _mm256_loadu_si256(reinterpret_cast<__m256i const*>(ptr)); +#else + m_value = _mm256_maskload_epi32(ptr, static_cast<__m256i>(mask_type(true))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + // FIXME_HIP ROCm 5.6, 5.7, and 6.0 can't compile with the intrinsic used + // here. +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + m_value = _mm256_loadu_si256(reinterpret_cast<__m256i const*>(ptr)); +#else + m_value = _mm256_maskload_epi32(ptr, static_cast<__m256i>(mask_type(true))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm256_maskstore_epi32(ptr, static_cast<__m256i>(mask_type(true)), m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm256_maskstore_epi32(ptr, static_cast<__m256i>(mask_type(true)), m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() + const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmpeq_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm256_cmpgt_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return !(lhs >= rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return (lhs < rhs) || (lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return (lhs > rhs) || (lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return !(lhs == rhs); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm256_sub_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm256_add_epi32(static_cast<__m256i>(lhs), static_cast<__m256i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_mullo_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(_mm256_srai_epi32(static_cast<__m256i>(lhs), rhs)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_srav_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd(_mm256_slli_epi32(static_cast<__m256i>(lhs), rhs)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm256_sllv_epi32(static_cast<__m256i>(lhs), + static_cast<__m256i>(rhs))); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + abs(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + __m256i const rhs = static_cast<__m256i>(a); + return Experimental::simd>( + _mm256_abs_epi32(rhs)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cvtepi32_ps(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cvtepi32_ps(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cvtepi32_ps(static_cast<__m256i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx2_fixed_size<8>> const& a) { + return Experimental::simd>( + _mm256_cvtepi32_ps(static_cast<__m256i>(a))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>(_mm256_castps_si256( + _mm256_blendv_ps(_mm256_castsi256_ps(static_cast<__m256i>(c)), + _mm256_castsi256_ps(static_cast<__m256i>(b)), + _mm256_castsi256_ps(static_cast<__m256i>(a))))); +} + template <> class simd> { __m256i m_value; @@ -1515,6 +2171,16 @@ class simd> { static_cast<__m256i>(mask_type(true))); #endif } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm256_maskstore_epi64(reinterpret_cast(ptr), + static_cast<__m256i>(mask_type(true)), m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm256_maskstore_epi64(reinterpret_cast(ptr), + static_cast<__m256i>(mask_type(true)), m_value); + } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() const { return m_value; @@ -1821,6 +2487,94 @@ class where_expression>, } }; +template <> +class const_where_expression>, + simd>> { + public: + using abi_type = simd_abi::avx2_fixed_size<8>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, element_aligned_tag) const { + _mm256_maskstore_ps(mem, _mm256_castps_si256(static_cast<__m256>(m_mask)), + static_cast<__m256>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, vector_aligned_tag) const { + _mm256_maskstore_ps(mem, _mm256_castps_si256(static_cast<__m256>(m_mask)), + static_cast<__m256>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + float* mem, + simd> const& index) const { + for (std::size_t lane = 0; lane < value_type::size(); ++lane) { + if (m_mask[lane]) mem[index[lane]] = m_value[lane]; + } + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, element_aligned_tag) { + m_value = value_type(_mm256_maskload_ps( + mem, _mm256_castps_si256(static_cast<__m256>(m_mask)))); + } + void copy_from(float const* mem, vector_aligned_tag) { + m_value = value_type(_mm256_maskload_ps( + mem, _mm256_castps_si256(static_cast<__m256>(m_mask)))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + float const* mem, + simd> const& index) { + m_value = value_type(_mm256_mask_i32gather_ps( + static_cast<__m256>(m_value), mem, static_cast<__m256i>(index), + static_cast<__m256>(m_mask), 4)); + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>(_mm256_blendv_ps( + static_cast<__m256>(m_value), static_cast<__m256>(x_as_value_type), + static_cast<__m256>(m_mask))); + } +}; + template <> class const_where_expression< simd_mask>, @@ -1923,6 +2677,109 @@ class where_expression>, } }; +template <> +class const_where_expression< + simd_mask>, + simd>> { + public: + using abi_type = simd_abi::avx2_fixed_size<8>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, element_aligned_tag) const { + _mm256_maskstore_epi32(mem, static_cast<__m256i>(m_mask), + static_cast<__m256i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, vector_aligned_tag) const { + _mm256_maskstore_epi32(mem, static_cast<__m256i>(m_mask), + static_cast<__m256i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + std::int32_t* mem, + simd> const& index) const { + for (std::size_t lane = 0; lane < value_type::size(); ++lane) { + if (m_mask[lane]) mem[index[lane]] = m_value[lane]; + } + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, element_aligned_tag) { +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + __m256i tmp = _mm256_loadu_si256(reinterpret_cast<__m256i const*>(mem)); + m_value = value_type(_mm256_and_si256(tmp, static_cast<__m256i>(m_mask))); +#else + m_value = + value_type(_mm256_maskload_epi32(mem, static_cast<__m256i>(m_mask))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, vector_aligned_tag) { +#ifdef KOKKOS_IMPL_WORKAROUND_ROCM_AVX2_ISSUE + __m256i tmp = _mm256_load_si256(reinterpret_cast<__m256i const*>(mem)); + m_value = value_type(_mm256_and_si256(tmp, static_cast<__m256i>(m_mask))); +#else + m_value = + value_type(_mm256_maskload_epi32(mem, static_cast<__m256i>(m_mask))); +#endif + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + std::int32_t const* mem, + simd> const& index) { + m_value = value_type(_mm256_mask_i32gather_epi32( + static_cast<__m256i>(m_value), mem, static_cast<__m256i>(index), + static_cast<__m256i>(m_mask), 4)); + } + template < + class U, + std::enable_if_t>>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>( + _mm256_castps_si256(_mm256_blendv_ps( + _mm256_castsi256_ps(static_cast<__m256i>(m_value)), + _mm256_castsi256_ps(static_cast<__m256i>(x_as_value_type)), + _mm256_castsi256_ps(static_cast<__m256i>(m_mask))))); + } +}; + template <> class const_where_expression< simd_mask>, diff --git a/lib/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp b/lib/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp index 7fa35c204a..84e8af3cd7 100644 --- a/lib/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp +++ b/lib/kokkos/simd/src/Kokkos_SIMD_AVX512.hpp @@ -140,6 +140,122 @@ class simd_mask> { } }; +template +class simd_mask> { + __mmask16 m_value; + + public: + class reference { + __mmask16& m_mask; + int m_lane; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION __mmask16 bit_mask() const { + return __mmask16(std::int32_t(1 << m_lane)); + } + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(__mmask16& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + if (value) { + m_mask |= bit_mask(); + } else { + m_mask &= ~bit_mask(); + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + return (m_mask & bit_mask()) != 0; + } + }; + using value_type = bool; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value) + : m_value(-std::int32_t(value)) {} + template + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask( + simd_mask> const& other) + : m_value(static_cast<__mmask16>(other)) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(G&& gen) : m_value(false) { + reference(m_value, int(0)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(1)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(2)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(3)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(4)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(5)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(6)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(7)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(8)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(9)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(10)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(11)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(12)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(13)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(14)) = + static_cast(gen(std::integral_constant())); + reference(m_value, int(15)) = + static_cast(gen(std::integral_constant())); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 16; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + __mmask16 const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __mmask16() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + auto const bit_mask = __mmask16(std::int32_t(1 << i)); + return (m_value & bit_mask) != 0; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator||(simd_mask const& other) const { + return simd_mask(_kor_mask16(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask + operator&&(simd_mask const& other) const { + return simd_mask(_kand_mask16(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask operator!() const { + static const __mmask16 true_value(static_cast<__mmask16>(simd_mask(true))); + return simd_mask(_kxor_mask16(true_value, m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + simd_mask const& other) const { + return m_value == other.m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + simd_mask const& other) const { + return m_value != other.m_value; + } +}; + template <> class simd> { __m512d m_value; @@ -700,6 +816,280 @@ simd> condition( static_cast<__m256>(b))); } +template <> +class simd> { + __m512 m_value; + + public: + using value_type = float; + using abi_type = simd_abi::avx512_fixed_size<16>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 16; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm512_set1_ps(value_type(value))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m512 const& value_in) + : m_value(value_in) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) + : m_value( + _mm512_setr_ps(gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm512_loadu_ps(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = _mm512_load_ps(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm512_storeu_ps(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm512_store_ps(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512() + const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(_mm512_sub_ps(_mm512_set1_ps(0.0), m_value)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_mul_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator/( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_div_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_add_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_sub_ps(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_LT_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_GT_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_LE_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_GE_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_EQ_OS)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmp_ps_mask(lhs.m_value, rhs.m_value, _CMP_NEQ_OS)); + } +}; + +} // namespace Experimental + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> +copysign(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a, + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& b) { + __m512 const sign_mask = _mm512_set1_ps(-0.0); + return Experimental::simd>( + _mm512_xor_ps(_mm512_andnot_ps(sign_mask, static_cast<__m512>(a)), + _mm512_and_ps(sign_mask, static_cast<__m512>(b)))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> abs( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const sign_mask = _mm512_set1_ps(-0.0); + return Experimental::simd>( + _mm512_andnot_ps(sign_mask, static_cast<__m512>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const val = static_cast<__m512>(a); + return Experimental::simd>( + _mm512_roundscale_ps(val, _MM_FROUND_TO_NEG_INF)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const val = static_cast<__m512>(a); + return Experimental::simd>( + _mm512_roundscale_ps(val, _MM_FROUND_TO_POS_INF)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const val = static_cast<__m512>(a); + return Experimental::simd>( + _mm512_roundscale_ps(val, _MM_FROUND_TO_NEAREST_INT)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512 const val = static_cast<__m512>(a); + return Experimental::simd>( + _mm512_roundscale_ps(val, _MM_FROUND_TO_ZERO)); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> sqrt( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_sqrt_ps(static_cast<__m512>(a))); +} + +#ifdef __INTEL_COMPILER + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> cbrt( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cbrt_ps(static_cast<__m512>(a))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> exp( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_exp_ps(static_cast<__m512>(a))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> log( + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_log_ps(static_cast<__m512>(a))); +} + +#endif + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> fma( + Experimental::simd> const& a, + Experimental::simd> const& b, + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& c) { + return Experimental::simd>( + _mm512_fmadd_ps(static_cast<__m512>(a), static_cast<__m512>(b), + static_cast<__m512>(c))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> max( + Experimental::simd> const& a, + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& b) { + return Experimental::simd>( + _mm512_max_ps(static_cast<__m512>(a), static_cast<__m512>(b))); +} + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +Experimental::simd> min( + Experimental::simd> const& a, + Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> const& b) { + return Experimental::simd>( + _mm512_min_ps(static_cast<__m512>(a), static_cast<__m512>(b))); +} + +namespace Experimental { + +KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION +simd> condition( + simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + _mm512_mask_blend_ps(static_cast<__mmask16>(a), static_cast<__m512>(c), + static_cast<__m512>(b))); +} + template <> class simd> { __m256i m_value; @@ -907,6 +1297,222 @@ namespace Experimental { static_cast<__m256i>(b))); } +template <> +class simd> { + __m512i m_value; + + public: + using value_type = std::int32_t; + using abi_type = simd_abi::avx512_fixed_size<16>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 16; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm512_set1_epi32(value_type(value))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m512i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd const& other); + template ()); } + std::is_invocable_r_v>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept + : m_value(_mm512_setr_epi32( + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm512_mask_storeu_epi32(ptr, static_cast<__mmask16>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm512_mask_store_epi32(ptr, static_cast<__mmask16>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm512_mask_loadu_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = _mm512_mask_load_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512i() + const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(_mm512_sub_epi32(_mm512_set1_epi32(0), m_value)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_mullo_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd>( + _mm512_add_epi32(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd>( + _mm512_sub_epi32(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmplt_epi32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmplt_epi32_mask(static_cast<__m512i>(rhs), + static_cast<__m512i>(lhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmple_epi32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmple_epi32_mask(static_cast<__m512i>(rhs), + static_cast<__m512i>(lhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmpeq_epi32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmpneq_epi32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(_mm512_srai_epi32(static_cast<__m512i>(lhs), rhs)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_srav_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd(_mm512_slli_epi32(static_cast<__m512i>(lhs), rhs)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_sllv_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> +abs(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + __m512i const rhs = static_cast<__m512i>(a); + return Experimental::simd>( + _mm512_abs_epi32(rhs)); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +floor(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepi32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +ceil(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepi32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +round(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepi32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +trunc(Experimental::simd< + std::int32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepi32_ps(static_cast<__m512i>(a))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + _mm512_mask_blend_epi32(static_cast<__mmask16>(a), + static_cast<__m512i>(c), + static_cast<__m512i>(b))); +} + template <> class simd> { __m256i m_value; @@ -960,16 +1566,6 @@ class simd> { operator[](std::size_t i) const { return reinterpret_cast(&m_value)[i]; } - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, - element_aligned_tag) { - m_value = _mm256_mask_loadu_epi32( - _mm256_set1_epi32(0), static_cast<__mmask8>(mask_type(true)), ptr); - } - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, - vector_aligned_tag) { - m_value = _mm256_mask_load_epi32( - _mm256_set1_epi32(0), static_cast<__mmask8>(mask_type(true)), ptr); - } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( value_type* ptr, element_aligned_tag) const { _mm256_mask_storeu_epi32(ptr, static_cast<__mmask8>(mask_type(true)), @@ -980,10 +1576,21 @@ class simd> { _mm256_mask_store_epi32(ptr, static_cast<__mmask8>(mask_type(true)), m_value); } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm256_mask_loadu_epi32( + _mm256_set1_epi32(0), static_cast<__mmask8>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = _mm256_mask_load_epi32( + _mm256_set1_epi32(0), static_cast<__mmask8>(mask_type(true)), ptr); + } KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m256i() const { return m_value; } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( simd const& lhs, simd const& rhs) noexcept { return simd(_mm256_mullo_epi32(static_cast<__m256i>(lhs), @@ -1108,6 +1715,217 @@ namespace Experimental { static_cast<__m256i>(b))); } +template <> +class simd> { + __m512i m_value; + + public: + using value_type = std::uint32_t; + using abi_type = simd_abi::avx512_fixed_size<16>; + using mask_type = simd_mask; + using reference = value_type&; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 16; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(_mm512_set1_epi32( + Kokkos::bit_cast(value_type(value)))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + __m512i const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd> const& other) + : m_value(static_cast<__m512i>(other)) {} + template ()); } + std::is_invocable_r_v>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept + : m_value(_mm512_setr_epi32( + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()), + gen(std::integral_constant()))) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reinterpret_cast(&m_value)[i]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = _mm512_mask_loadu_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = _mm512_mask_load_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(mask_type(true)), ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + _mm512_mask_storeu_epi32(ptr, static_cast<__mmask16>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + _mm512_mask_store_epi32(ptr, static_cast<__mmask16>(mask_type(true)), + m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator __m512i() + const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_mullo_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm512_add_epi32(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd( + _mm512_sub_epi32(static_cast<__m512i>(lhs), static_cast<__m512i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmplt_epu32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmplt_epu32_mask(static_cast<__m512i>(rhs), + static_cast<__m512i>(lhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmple_epu32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmple_epu32_mask(static_cast<__m512i>(rhs), + static_cast<__m512i>(lhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmpeq_epu32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(_mm512_cmpneq_epu32_mask(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(_mm512_srli_epi32(static_cast<__m512i>(lhs), rhs)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_srlv_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd(_mm512_slli_epi32(static_cast<__m512i>(lhs), rhs)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, simd const& rhs) noexcept { + return simd(_mm512_sllv_epi32(static_cast<__m512i>(lhs), + static_cast<__m512i>(rhs))); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> +abs(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return a; +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +floor(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +ceil(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +round(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_ps(static_cast<__m512i>(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::avx512_fixed_size<16>> +trunc(Experimental::simd< + std::uint32_t, Experimental::simd_abi::avx512_fixed_size<16>> const& a) { + return Experimental::simd>( + _mm512_cvtepu32_ps(static_cast<__m512i>(a))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition( + simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + _mm512_mask_blend_epi32(static_cast<__mmask16>(a), + static_cast<__m512i>(c), + static_cast<__m512i>(b))); +} + template <> class simd> { __m512i m_value; @@ -1716,6 +2534,95 @@ class where_expression>, } }; +template <> +class const_where_expression>, + simd>> { + public: + using abi_type = simd_abi::avx512_fixed_size<16>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, element_aligned_tag) const { + _mm512_mask_storeu_ps(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, vector_aligned_tag) const { + _mm512_mask_store_ps(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + float* mem, + simd> const& index) const { + _mm512_mask_i32scatter_ps(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), + static_cast<__m512>(m_value), 4); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, element_aligned_tag) { + m_value = value_type(_mm512_mask_loadu_ps( + _mm512_set1_ps(0.0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, vector_aligned_tag) { + m_value = value_type(_mm512_mask_load_ps( + _mm512_set1_ps(0.0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + float const* mem, + simd> const& index) { + m_value = value_type(_mm512_mask_i32gather_ps( + static_cast<__m512>(m_value), static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), mem, 4)); + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>(_mm512_mask_blend_ps( + static_cast<__mmask16>(m_mask), static_cast<__m512>(m_value), + static_cast<__m512>(x_as_value_type))); + } +}; + template <> class const_where_expression< simd_mask>, @@ -1810,6 +2717,98 @@ class where_expression>, } }; +template <> +class const_where_expression< + simd_mask>, + simd>> { + public: + using abi_type = simd_abi::avx512_fixed_size<16>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, element_aligned_tag) const { + _mm512_mask_storeu_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, vector_aligned_tag) const { + _mm512_mask_store_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + std::int32_t* mem, + simd> const& index) const { + _mm512_mask_i32scatter_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), + static_cast<__m512i>(m_value), 4); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, element_aligned_tag) { + m_value = value_type(_mm512_mask_loadu_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, vector_aligned_tag) { + m_value = value_type(_mm512_mask_load_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + std::int32_t const* mem, + simd> const& index) { + m_value = value_type(_mm512_mask_i32gather_epi32( + static_cast<__m512i>(m_value), static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), mem, 4)); + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>( + _mm512_mask_blend_epi32(static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value), + static_cast<__m512i>(x_as_value_type))); + } +}; + template <> class const_where_expression< simd_mask>, @@ -1905,6 +2904,99 @@ class where_expression>, } }; +template <> +class const_where_expression< + simd_mask>, + simd>> { + public: + using abi_type = simd_abi::avx512_fixed_size<16>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::uint32_t* mem, element_aligned_tag) const { + _mm512_mask_storeu_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::uint32_t* mem, vector_aligned_tag) const { + _mm512_mask_store_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + std::uint32_t* mem, + simd> const& index) const { + _mm512_mask_i32scatter_epi32(mem, static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), + static_cast<__m512i>(m_value), 4); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression< + simd_mask>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::uint32_t const* mem, element_aligned_tag) { + m_value = value_type(_mm512_mask_loadu_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::uint32_t const* mem, vector_aligned_tag) { + m_value = value_type(_mm512_mask_load_epi32( + _mm512_set1_epi32(0), static_cast<__mmask16>(m_mask), mem)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + std::uint32_t const* mem, + simd> const& index) { + m_value = value_type(_mm512_mask_i32gather_epi32( + static_cast<__m512i>(m_value), static_cast<__mmask16>(m_mask), + static_cast<__m512i>(index), mem, 4)); + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = simd>( + _mm512_mask_blend_epi32(static_cast<__mmask16>(m_mask), + static_cast<__m512i>(m_value), + static_cast<__m512i>(x_as_value_type))); + } +}; + template <> class const_where_expression< simd_mask>, diff --git a/lib/kokkos/simd/src/Kokkos_SIMD_NEON.hpp b/lib/kokkos/simd/src/Kokkos_SIMD_NEON.hpp index efc81135d1..8cb0cc75fc 100644 --- a/lib/kokkos/simd/src/Kokkos_SIMD_NEON.hpp +++ b/lib/kokkos/simd/src/Kokkos_SIMD_NEON.hpp @@ -42,11 +42,11 @@ class neon_fixed_size {}; namespace Impl { -template +template class neon_mask; template -class neon_mask { +class neon_mask { uint64x2_t m_value; public: @@ -104,12 +104,13 @@ class neon_mask { } template KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask( - neon_mask const& other) { + neon_mask const& other) { operator[](0) = bool(other[0]); operator[](1) = bool(other[1]); } template - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask(neon_mask const& other) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask( + neon_mask const& other) : neon_mask(static_cast(other)) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { return 2; @@ -158,7 +159,7 @@ class neon_mask { }; template -class neon_mask { +class neon_mask { uint32x2_t m_value; public: @@ -211,10 +212,12 @@ class neon_mask { m_value, 1); } template - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask(neon_mask const& other) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask( + neon_mask const& other) : m_value(vqmovn_u64(static_cast(other))) {} template - KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask(neon_mask const& other) + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask( + neon_mask const& other) : m_value(static_cast(other)) {} KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { return 2; @@ -260,14 +263,125 @@ class neon_mask { } }; +template +class neon_mask { + uint32x4_t m_value; + + public: + class reference { + uint32x4_t& m_mask; + int m_lane; + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(uint32x4_t& mask_arg, + int lane_arg) + : m_mask(mask_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(bool value) const { + switch (m_lane) { + case 0: + m_mask = vsetq_lane_u32(value ? 0xFFFFFFFFU : 0, m_mask, 0); + break; + case 1: + m_mask = vsetq_lane_u32(value ? 0xFFFFFFFFU : 0, m_mask, 1); + break; + case 2: + m_mask = vsetq_lane_u32(value ? 0xFFFFFFFFU : 0, m_mask, 2); + break; + case 3: + m_mask = vsetq_lane_u32(value ? 0xFFFFFFFFU : 0, m_mask, 3); + break; + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator bool() const { + switch (m_lane) { + case 0: return vgetq_lane_u32(m_mask, 0) != 0; + case 1: return vgetq_lane_u32(m_mask, 1) != 0; + case 2: return vgetq_lane_u32(m_mask, 2) != 0; + case 3: return vgetq_lane_u32(m_mask, 3) != 0; + } + return false; + } + }; + using value_type = bool; + using abi_type = simd_abi::neon_fixed_size<4>; + using implementation_type = uint32x4_t; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION neon_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit neon_mask(value_type value) + : m_value(vmovq_n_u32(value ? 0xFFFFFFFFU : 0)) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit neon_mask( + G&& gen) noexcept { + m_value = vsetq_lane_u32( + (gen(std::integral_constant()) ? 0xFFFFFFFFU : 0), + m_value, 0); + m_value = vsetq_lane_u32( + (gen(std::integral_constant()) ? 0xFFFFFFFFU : 0), + m_value, 1); + m_value = vsetq_lane_u32( + (gen(std::integral_constant()) ? 0xFFFFFFFFU : 0), + m_value, 2); + m_value = vsetq_lane_u32( + (gen(std::integral_constant()) ? 0xFFFFFFFFU : 0), + m_value, 3); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 4; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit neon_mask( + uint32x4_t const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator uint32x4_t() + const { + return m_value; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return static_cast( + reference(const_cast(m_value), int(i))); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Derived + operator||(neon_mask const& other) const { + return Derived(vorrq_u32(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Derived + operator&&(neon_mask const& other) const { + return Derived(vandq_u32(m_value, other.m_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Derived operator!() const { + auto const true_value = static_cast(neon_mask(true)); + return Derived(veorq_u32(m_value, true_value)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator==( + neon_mask const& other) const { + uint32x4_t const elementwise_equality = vceqq_u32(m_value, other.m_value); + uint64x2_t const overall_equality_neon = + vreinterpretq_u64_u32(elementwise_equality); + return (overall_equality_neon[0] == 0xFFFFFFFFFFFFFFFFULL) && + (overall_equality_neon[1] == 0xFFFFFFFFFFFFFFFFULL); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION bool operator!=( + neon_mask const& other) const { + return !operator==(other); + } +}; + } // namespace Impl template class simd_mask> : public Impl::neon_mask>, - sizeof(T) * 8> { + sizeof(T) * 8, 2> { using base_type = Impl::neon_mask>, - sizeof(T) * 8>; + sizeof(T) * 8, 2>; public: using implementation_type = typename base_type::implementation_type; @@ -291,6 +405,35 @@ class simd_mask> : base_type(gen) {} }; +template +class simd_mask> + : public Impl::neon_mask>, + sizeof(T) * 8, 4> { + using base_type = Impl::neon_mask>, + sizeof(T) * 8, 4>; + + public: + using implementation_type = typename base_type::implementation_type; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(bool value) + : base_type(value) {} + template + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask( + simd_mask> const& other) + : base_type(other) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + implementation_type const& value) + : base_type(value) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd_mask( + G&& gen) noexcept + : base_type(gen) {} +}; + template <> class simd> { float64x2_t m_value; @@ -788,6 +931,256 @@ namespace Experimental { static_cast(c))); } +template <> +class simd> { + float32x4_t m_value; + + public: + using value_type = float; + using abi_type = simd_abi::neon_fixed_size<4>; + using mask_type = simd_mask; + class reference { + float32x4_t& m_value; + int m_lane; + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(float32x4_t& value_arg, + int lane_arg) + : m_value(value_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(float value) const { + switch (m_lane) { + case 0: m_value = vsetq_lane_f32(value, m_value, 0); break; + case 1: m_value = vsetq_lane_f32(value, m_value, 1); break; + case 2: m_value = vsetq_lane_f32(value, m_value, 2); break; + case 3: m_value = vsetq_lane_f32(value, m_value, 3); break; + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator float() const { + switch (m_lane) { + case 0: return vgetq_lane_f32(m_value, 0); + case 1: return vgetq_lane_f32(m_value, 1); + case 2: return vgetq_lane_f32(m_value, 2); + case 3: return vgetq_lane_f32(m_value, 3); + } + return 0; + } + }; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 4; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(vmovq_n_f32(value_type(value))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(G&& gen) { + m_value = vsetq_lane_f32(gen(std::integral_constant()), + m_value, 0); + m_value = vsetq_lane_f32(gen(std::integral_constant()), + m_value, 1); + m_value = vsetq_lane_f32(gen(std::integral_constant()), + m_value, 2); + m_value = vsetq_lane_f32(gen(std::integral_constant()), + m_value, 3); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + float32x4_t const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reference(const_cast(this)->m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = vld1q_f32(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = vld1q_f32(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + vst1q_f32(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + vst1q_f32(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit + operator float32x4_t() const { + return m_value; + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(vnegq_f32(m_value)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd(vmulq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator/( + simd const& lhs, simd const& rhs) noexcept { + return simd(vdivq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd(vaddq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd(vsubq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vcltq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vcgtq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vcleq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vcgeq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type(vceqq_f32(lhs.m_value, rhs.m_value)); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return !(lhs == rhs); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + abs(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vabsq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vrndmq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vrndpq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vrndxq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vrndq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> +copysign( + Experimental::simd> const& + a, + Experimental::simd> const& + b) { + uint32x4_t const sign_mask = vreinterpretq_u32_f32(vmovq_n_f32(-0.0)); + return Experimental::simd>( + vreinterpretq_f32_u32(vorrq_u32( + vreinterpretq_u32_f32(static_cast(abs(a))), + vandq_u32(sign_mask, + vreinterpretq_u32_f32(static_cast(b)))))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + sqrt(Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vsqrtq_f32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> +fma(Experimental::simd> const& + a, + Experimental::simd> const& + b, + Experimental::simd> const& + c) { + return Experimental::simd>( + vfmaq_f32(static_cast(c), static_cast(b), + static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> +max(Experimental::simd> const& + a, + Experimental::simd> const& + b) { + return Experimental::simd>( + vmaxq_f32(static_cast(a), static_cast(b))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION Experimental::simd< + float, Experimental::simd_abi::neon_fixed_size<4>> +min(Experimental::simd> const& + a, + Experimental::simd> const& + b) { + return Experimental::simd>( + vminq_f32(static_cast(a), static_cast(b))); +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + vbslq_f32(static_cast(a), static_cast(b), + static_cast(c))); +} + template <> class simd> { int32x2_t m_value; @@ -1000,6 +1393,226 @@ namespace Experimental { static_cast(c))); } +template <> +class simd> { + int32x4_t m_value; + + public: + using value_type = std::int32_t; + using abi_type = simd_abi::neon_fixed_size<4>; + using mask_type = simd_mask; + class reference { + int32x4_t& m_value; + int m_lane; + + public: + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference(int32x4_t& value_arg, + int lane_arg) + : m_value(value_arg), m_lane(lane_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference + operator=(std::int32_t value) const { + switch (m_lane) { + case 0: m_value = vsetq_lane_s32(value, m_value, 0); break; + case 1: m_value = vsetq_lane_s32(value, m_value, 1); break; + case 2: m_value = vsetq_lane_s32(value, m_value, 2); break; + case 3: m_value = vsetq_lane_s32(value, m_value, 3); break; + } + return *this; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION operator std::int32_t() const { + switch (m_lane) { + case 0: return vgetq_lane_s32(m_value, 0); + case 1: return vgetq_lane_s32(m_value, 1); + case 2: return vgetq_lane_s32(m_value, 2); + case 3: return vgetq_lane_s32(m_value, 3); + } + return 0; + } + }; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd() = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd const&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd& operator=(simd&&) = default; + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() { + return 4; + } + template , + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd(U&& value) + : m_value(vmovq_n_s32(value_type(value))) {} + template >, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + G&& gen) noexcept { + m_value = vsetq_lane_s32(gen(std::integral_constant()), + m_value, 0); + m_value = vsetq_lane_s32(gen(std::integral_constant()), + m_value, 1); + m_value = vsetq_lane_s32(gen(std::integral_constant()), + m_value, 2); + m_value = vsetq_lane_s32(gen(std::integral_constant()), + m_value, 3); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit simd( + int32x4_t const& value_in) + : m_value(value_in) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd( + simd const& other); + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION reference operator[](std::size_t i) { + return reference(m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type + operator[](std::size_t i) const { + return reference(const_cast(this)->m_value, int(i)); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + element_aligned_tag) { + m_value = vld1q_s32(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_from(value_type const* ptr, + vector_aligned_tag) { + m_value = vld1q_s32(ptr); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to( + value_type* ptr, element_aligned_tag) const { + vst1q_s32(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void copy_to(value_type* ptr, + vector_aligned_tag) const { + vst1q_s32(ptr, m_value); + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION constexpr explicit operator int32x4_t() + const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd operator-() const + noexcept { + return simd(vnegq_s32(m_value)); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator-( + simd const& lhs, simd const& rhs) noexcept { + return simd( + vsubq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator+( + simd const& lhs, simd const& rhs) noexcept { + return simd( + vaddq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator*( + simd const& lhs, simd const& rhs) noexcept { + return simd( + vmulq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator==(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vceqq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vcgtq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vcltq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator<=(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vcleq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator>=(simd const& lhs, simd const& rhs) noexcept { + return mask_type( + vcgeq_s32(static_cast(lhs), static_cast(rhs))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend mask_type + operator!=(simd const& lhs, simd const& rhs) noexcept { + return !(lhs == rhs); + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, int rhs) noexcept { + return simd(vshlq_s32(static_cast(lhs), + vnegq_s32(vmovq_n_s32(std::int32_t(rhs))))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator>>( + simd const& lhs, simd const& rhs) noexcept { + return simd(vshlq_s32(static_cast(lhs), + vnegq_s32(static_cast(rhs)))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, int rhs) noexcept { + return simd( + vshlq_s32(static_cast(lhs), vmovq_n_s32(std::int32_t(rhs)))); + } + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION friend simd operator<<( + simd const& lhs, simd const& rhs) noexcept { + return simd( + vshlq_s32(static_cast(lhs), static_cast(rhs))); + } +}; + +} // namespace Experimental + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + abs(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return Experimental::simd>( + vabsq_s32(static_cast(a))); +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + floor(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return a; +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + ceil(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return a; +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + round(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return a; +} + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + Experimental::simd> + trunc(Experimental::simd< + std::int32_t, Experimental::simd_abi::neon_fixed_size<4>> const& a) { + return a; +} + +namespace Experimental { + +[[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + simd> + condition(simd_mask> const& a, + simd> const& b, + simd> const& c) { + return simd>( + vbslq_s32(static_cast(a), static_cast(b), + static_cast(c))); +} + template <> class simd> { int64x2_t m_value; @@ -1593,6 +2206,106 @@ class where_expression>, } }; +template <> +class const_where_expression>, + simd>> { + public: + using abi_type = simd_abi::neon_fixed_size<4>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, element_aligned_tag) const { + if (m_mask[0]) mem[0] = m_value[0]; + if (m_mask[1]) mem[1] = m_value[1]; + if (m_mask[2]) mem[2] = m_value[2]; + if (m_mask[3]) mem[3] = m_value[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(float* mem, vector_aligned_tag) const { + if (m_mask[0]) mem[0] = m_value[0]; + if (m_mask[1]) mem[1] = m_value[1]; + if (m_mask[2]) mem[2] = m_value[2]; + if (m_mask[3]) mem[3] = m_value[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + float* mem, + simd> const& index) const { + if (m_mask[0]) mem[index[0]] = m_value[0]; + if (m_mask[1]) mem[index[1]] = m_value[1]; + if (m_mask[2]) mem[index[2]] = m_value[2]; + if (m_mask[3]) mem[index[3]] = m_value[3]; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, element_aligned_tag) { + if (m_mask[0]) m_value[0] = mem[0]; + if (m_mask[1]) m_value[1] = mem[1]; + if (m_mask[2]) m_value[2] = mem[2]; + if (m_mask[3]) m_value[3] = mem[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(float const* mem, vector_aligned_tag) { + if (m_mask[0]) m_value[0] = mem[0]; + if (m_mask[1]) m_value[1] = mem[1]; + if (m_mask[2]) m_value[2] = mem[2]; + if (m_mask[3]) m_value[3] = mem[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + float const* mem, + simd> const& index) { + if (m_mask[0]) m_value[0] = mem[index[0]]; + if (m_mask[1]) m_value[1] = mem[index[1]]; + if (m_mask[2]) m_value[2] = mem[index[2]]; + if (m_mask[3]) m_value[3] = mem[index[3]]; + } + template >>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = static_cast>>( + vbslq_f32(static_cast(m_mask), + static_cast(x_as_value_type), + static_cast(m_value))); + } +}; + template <> class const_where_expression< simd_mask>, @@ -1686,6 +2399,108 @@ class where_expression>, } }; +template <> +class const_where_expression< + simd_mask>, + simd>> { + public: + using abi_type = simd_abi::neon_fixed_size<4>; + using value_type = simd; + using mask_type = simd_mask; + + protected: + value_type& m_value; + mask_type const& m_mask; + + public: + const_where_expression(mask_type const& mask_arg, value_type const& value_arg) + : m_value(const_cast(value_arg)), m_mask(mask_arg) {} + + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, element_aligned_tag) const { + if (m_mask[0]) mem[0] = m_value[0]; + if (m_mask[1]) mem[1] = m_value[1]; + if (m_mask[2]) mem[2] = m_value[2]; + if (m_mask[3]) mem[3] = m_value[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_to(std::int32_t* mem, vector_aligned_tag) const { + if (m_mask[0]) mem[0] = m_value[0]; + if (m_mask[1]) mem[1] = m_value[1]; + if (m_mask[2]) mem[2] = m_value[2]; + if (m_mask[3]) mem[3] = m_value[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void scatter_to( + std::int32_t* mem, + simd> const& index) const { + if (m_mask[0]) mem[index[0]] = m_value[0]; + if (m_mask[1]) mem[index[1]] = m_value[1]; + if (m_mask[2]) mem[index[2]] = m_value[2]; + if (m_mask[3]) mem[index[3]] = m_value[3]; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION value_type const& + impl_get_value() const { + return m_value; + } + + [[nodiscard]] KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION mask_type const& + impl_get_mask() const { + return m_mask; + } +}; + +template <> +class where_expression>, + simd>> + : public const_where_expression< + simd_mask>, + simd>> { + public: + where_expression( + simd_mask> const& mask_arg, + simd>& value_arg) + : const_where_expression(mask_arg, value_arg) {} + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, element_aligned_tag) { + if (m_mask[0]) m_value[0] = mem[0]; + if (m_mask[1]) m_value[1] = mem[1]; + if (m_mask[2]) m_value[2] = mem[2]; + if (m_mask[3]) m_value[3] = mem[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void copy_from(std::int32_t const* mem, vector_aligned_tag) { + if (m_mask[0]) m_value[0] = mem[0]; + if (m_mask[1]) m_value[1] = mem[1]; + if (m_mask[2]) m_value[2] = mem[2]; + if (m_mask[3]) m_value[3] = mem[3]; + } + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION + void gather_from( + std::int32_t const* mem, + simd> const& index) { + if (m_mask[0]) m_value[0] = mem[index[0]]; + if (m_mask[1]) m_value[1] = mem[index[1]]; + if (m_mask[2]) m_value[2] = mem[index[2]]; + if (m_mask[3]) m_value[3] = mem[index[3]]; + } + template < + class U, + std::enable_if_t< + std::is_convertible_v>>, + bool> = false> + KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION void operator=(U&& x) { + auto const x_as_value_type = + static_cast>>( + std::forward(x)); + m_value = static_cast>>( + vbslq_s32(static_cast(m_mask), + static_cast(x_as_value_type), + static_cast(m_value))); + } +}; + template <> class const_where_expression< simd_mask>, diff --git a/lib/kokkos/simd/unit_tests/CMakeLists.txt b/lib/kokkos/simd/unit_tests/CMakeLists.txt index 75d557e8b5..109effc710 100644 --- a/lib/kokkos/simd/unit_tests/CMakeLists.txt +++ b/lib/kokkos/simd/unit_tests/CMakeLists.txt @@ -1,7 +1,9 @@ KOKKOS_INCLUDE_DIRECTORIES(${KOKKOS_SOURCE_DIR}/simd/unit_tests/include) -KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_SIMD - SOURCES - UnitTestMain.cpp - TestSIMD.cpp) +IF((NOT (Kokkos_ENABLE_CUDA AND WIN32))) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_SIMD + SOURCES + UnitTestMain.cpp + TestSIMD.cpp) +ENDIF() diff --git a/lib/kokkos/simd/unit_tests/TestSIMD.cpp b/lib/kokkos/simd/unit_tests/TestSIMD.cpp index 7a1f9be2a0..df18b43c4e 100644 --- a/lib/kokkos/simd/unit_tests/TestSIMD.cpp +++ b/lib/kokkos/simd/unit_tests/TestSIMD.cpp @@ -22,3 +22,4 @@ #include #include #include +#include diff --git a/lib/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp b/lib/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp index c587ccf304..74141f2531 100644 --- a/lib/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp +++ b/lib/kokkos/simd/unit_tests/include/SIMDTesting_Ops.hpp @@ -81,7 +81,9 @@ class absolutes { auto on_host(T const& a) const { if constexpr (std::is_signed_v) { #if defined(KOKKOS_ENABLE_DEPRECATED_CODE_4) + KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_PUSH() return Kokkos::Experimental::abs(a); + KOKKOS_IMPL_DISABLE_DEPRECATED_WARNINGS_POP() #else return Kokkos::abs(a); #endif diff --git a/lib/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp b/lib/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp index d36e1e5afc..9719855f0f 100644 --- a/lib/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp +++ b/lib/kokkos/simd/unit_tests/include/SIMDTesting_Utilities.hpp @@ -135,8 +135,8 @@ class load_masked { for (std::size_t i = 0; i < n; ++i) { mask[i] = true; } + result = T(0); where(mask, result).copy_from(mem, Kokkos::Experimental::simd_flag_default); - where(!mask, result) = 0; return true; } template @@ -181,4 +181,14 @@ class load_as_scalars { } }; +// Simple check to loosely test that T is a complete type. +// Some capabilities are only defined for specific data type and abi pairs (i.e. +// extended vector width); this is used to exclude pairs that +// are not defined from being tested. +template +constexpr bool is_type_v = false; + +template +constexpr bool is_type_v = true; + #endif diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp index f8d8cc70fa..bf22cf3352 100644 --- a/lib/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_Condition.hpp @@ -22,21 +22,23 @@ template inline void host_check_condition() { - using simd_type = typename Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; + if constexpr (is_type_v>) { + using simd_type = typename Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; - auto condition_op = [](mask_type const& mask, simd_type const& a, - simd_type const& b) { - return Kokkos::Experimental::condition(mask, a, b); - }; + auto condition_op = [](mask_type const& mask, simd_type const& a, + simd_type const& b) { + return Kokkos::Experimental::condition(mask, a, b); + }; - simd_type value_a(16); - simd_type value_b(20); + simd_type value_a(16); + simd_type value_b(20); - auto condition_result = condition_op(mask_type(false), value_a, value_b); - EXPECT_TRUE(all_of(condition_result == value_b)); - condition_result = condition_op(mask_type(true), value_a, value_b); - EXPECT_TRUE(all_of(condition_result == value_a)); + auto condition_result = condition_op(mask_type(false), value_a, value_b); + EXPECT_TRUE(all_of(condition_result == value_b)); + condition_result = condition_op(mask_type(true), value_a, value_b); + EXPECT_TRUE(all_of(condition_result == value_a)); + } } template @@ -54,22 +56,24 @@ inline void host_check_condition_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_condition() { - using simd_type = typename Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - kokkos_checker checker; + if constexpr (is_type_v>) { + using simd_type = typename Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + kokkos_checker checker; - auto condition_op = [](mask_type const& mask, simd_type const& a, - simd_type const& b) { - return Kokkos::Experimental::condition(mask, a, b); - }; + auto condition_op = [](mask_type const& mask, simd_type const& a, + simd_type const& b) { + return Kokkos::Experimental::condition(mask, a, b); + }; - simd_type value_a(16); - simd_type value_b(20); + simd_type value_a(16); + simd_type value_b(20); - auto condition_result = condition_op(mask_type(false), value_a, value_b); - checker.truth(all_of(condition_result == value_b)); - condition_result = condition_op(mask_type(true), value_a, value_b); - checker.truth(all_of(condition_result == value_a)); + auto condition_result = condition_op(mask_type(false), value_a, value_b); + checker.truth(all_of(condition_result == value_b)); + condition_result = condition_op(mask_type(true), value_a, value_b); + checker.truth(all_of(condition_result == value_a)); + } } template diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_Construction.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_Construction.hpp new file mode 100644 index 0000000000..0ceb1496c4 --- /dev/null +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_Construction.hpp @@ -0,0 +1,150 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_TEST_SIMD_CONSTRUCTION_HPP +#define KOKKOS_TEST_SIMD_CONSTRUCTION_HPP + +#include +#include + +template +inline void host_test_simd_traits() { + using simd_type = Kokkos::Experimental::simd; + + static_assert(std::is_nothrow_default_constructible_v); + static_assert(std::is_nothrow_copy_assignable_v); + static_assert(std::is_nothrow_copy_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + static_assert(std::is_nothrow_move_constructible_v); + + simd_type default_simd, result; + simd_type test_simd(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); }); + simd_type copy_simd(test_simd); + simd_type move_simd(std::move(copy_simd)); + default_simd = std::move(move_simd); + result = default_simd; + EXPECT_TRUE(all_of(test_simd == result)); +} + +template +inline void host_test_mask_traits() { + using mask_type = Kokkos::Experimental::simd_mask; + + static_assert(std::is_nothrow_default_constructible_v); + static_assert(std::is_nothrow_copy_assignable_v); + static_assert(std::is_nothrow_copy_constructible_v); + static_assert(std::is_nothrow_move_assignable_v); + static_assert(std::is_nothrow_move_constructible_v); + + mask_type default_mask, result; + mask_type test_mask(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); }); + mask_type copy_mask(test_mask); + mask_type move_mask(std::move(copy_mask)); + default_mask = std::move(move_mask); + result = default_mask; + EXPECT_EQ(test_mask, result); +} + +template +inline void host_check_construction() { + if constexpr (is_type_v>) { + host_test_simd_traits(); + host_test_mask_traits(); + } +} + +template +inline void host_check_construction_all_types( + Kokkos::Experimental::Impl::data_types) { + (host_check_construction(), ...); +} + +template +inline void host_check_construction_all_abis( + Kokkos::Experimental::Impl::abi_set) { + using DataTypes = Kokkos::Experimental::Impl::data_type_set; + (host_check_construction_all_types(DataTypes()), ...); +} + +template +KOKKOS_INLINE_FUNCTION void device_test_simd_traits() { + using simd_type = Kokkos::Experimental::simd; + + simd_type default_simd, result; + simd_type test_simd(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); }); + simd_type copy_simd(test_simd); + simd_type move_simd(std::move(copy_simd)); + default_simd = std::move(move_simd); + result = default_simd; + + kokkos_checker checker; + checker.truth(all_of(test_simd == result)); +} + +template +KOKKOS_INLINE_FUNCTION void device_test_mask_traits() { + using mask_type = Kokkos::Experimental::simd_mask; + + mask_type default_mask, result; + mask_type test_mask(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); }); + mask_type copy_mask(test_mask); + mask_type move_mask(std::move(copy_mask)); + default_mask = std::move(move_mask); + result = default_mask; + + kokkos_checker checker; + checker.truth(test_mask == result); +} + +template +KOKKOS_INLINE_FUNCTION void device_check_construction() { + if constexpr (is_type_v>) { + device_test_simd_traits(); + device_test_mask_traits(); + } +} + +template +KOKKOS_INLINE_FUNCTION void device_check_construction_all_types( + Kokkos::Experimental::Impl::data_types) { + (device_check_construction(), ...); +} + +template +KOKKOS_INLINE_FUNCTION void device_check_construction_all_abis( + Kokkos::Experimental::Impl::abi_set) { + using DataTypes = Kokkos::Experimental::Impl::data_type_set; + (device_check_construction_all_types(DataTypes()), ...); +} + +class simd_device_construction_functor { + public: + KOKKOS_INLINE_FUNCTION void operator()(int) const { + device_check_construction_all_abis( + Kokkos::Experimental::Impl::device_abi_set()); + } +}; + +TEST(simd, host_construction) { + host_check_construction_all_abis(Kokkos::Experimental::Impl::host_abi_set()); +} + +TEST(simd, device_construction) { + Kokkos::parallel_for(Kokkos::RangePolicy>(0, 1), + simd_device_construction_functor()); +} + +#endif diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp index b98871bbab..20b0729762 100644 --- a/lib/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_Conversions.hpp @@ -22,40 +22,42 @@ template inline void host_check_conversions() { - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - EXPECT_TRUE(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - EXPECT_TRUE(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - EXPECT_TRUE(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - EXPECT_TRUE(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - EXPECT_TRUE(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - EXPECT_TRUE(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - EXPECT_TRUE(b == decltype(b)(true)); + if constexpr (is_type_v>) { + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + EXPECT_TRUE(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + EXPECT_TRUE(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + EXPECT_TRUE(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + EXPECT_TRUE(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + EXPECT_TRUE(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + EXPECT_TRUE(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + EXPECT_TRUE(b == decltype(b)(true)); + } } } @@ -67,41 +69,43 @@ inline void host_check_conversions_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_conversions() { - kokkos_checker checker; - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - checker.truth(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - checker.truth(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd(1); - auto b = Kokkos::Experimental::simd(a); - checker.truth(all_of(b == decltype(b)(1))); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - checker.truth(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - checker.truth(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - checker.truth(b == decltype(b)(true)); - } - { - auto a = Kokkos::Experimental::simd_mask(true); - auto b = Kokkos::Experimental::simd_mask(a); - checker.truth(b == decltype(b)(true)); + if constexpr (is_type_v>) { + kokkos_checker checker; + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + checker.truth(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + checker.truth(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd(1); + auto b = Kokkos::Experimental::simd(a); + checker.truth(all_of(b == decltype(b)(1))); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + checker.truth(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + checker.truth(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + checker.truth(b == decltype(b)(true)); + } + { + auto a = Kokkos::Experimental::simd_mask(true); + auto b = Kokkos::Experimental::simd_mask(a); + checker.truth(b == decltype(b)(true)); + } } } diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp index 23e3826c75..1a61fd9cbb 100644 --- a/lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp @@ -22,49 +22,51 @@ template inline void host_check_gen_ctor() { - using simd_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - constexpr std::size_t lanes = simd_type::size(); + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + constexpr std::size_t lanes = simd_type::size(); - DataType init[lanes]; - DataType expected[lanes]; - mask_type init_mask(false); + DataType init[lanes]; + DataType expected[lanes]; + mask_type init_mask(false); - for (std::size_t i = 0; i < lanes; ++i) { - if (i % 3 == 0) init_mask[i] = true; - init[i] = 7; - expected[i] = (init_mask[i]) ? init[i] * 9 : init[i]; - } + for (std::size_t i = 0; i < lanes; ++i) { + if (i % 3 == 0) init_mask[i] = true; + init[i] = 7; + expected[i] = (init_mask[i]) ? init[i] * 9 : init[i]; + } - simd_type rhs; - rhs.copy_from(init, Kokkos::Experimental::simd_flag_default); + simd_type rhs; + rhs.copy_from(init, Kokkos::Experimental::simd_flag_default); - simd_type blend; - blend.copy_from(expected, Kokkos::Experimental::simd_flag_default); + simd_type blend; + blend.copy_from(expected, Kokkos::Experimental::simd_flag_default); #if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC)) - if constexpr (std::is_same_v) { - simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; }); - host_check_equality(basic, rhs, lanes); + if constexpr (std::is_same_v) { + simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; }); + host_check_equality(basic, rhs, lanes); - simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; }); - mask_type mask(KOKKOS_LAMBDA(std::size_t i) { return init_mask[i]; }); - simd_type result( - KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); + simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; }); + mask_type mask(KOKKOS_LAMBDA(std::size_t i) { return init_mask[i]; }); + simd_type result( + KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); - host_check_equality(blend, result, lanes); - } else { - simd_type basic([=](std::size_t i) { return init[i]; }); - host_check_equality(basic, rhs, lanes); + host_check_equality(blend, result, lanes); + } else { + simd_type basic([=](std::size_t i) { return init[i]; }); + host_check_equality(basic, rhs, lanes); - simd_type lhs([=](std::size_t i) { return init[i] * 9; }); - mask_type mask([=](std::size_t i) { return init_mask[i]; }); - simd_type result( - [=](std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); + simd_type lhs([=](std::size_t i) { return init[i] * 9; }); + mask_type mask([=](std::size_t i) { return init_mask[i]; }); + simd_type result( + [=](std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); - host_check_equality(blend, result, lanes); - } + host_check_equality(blend, result, lanes); + } #endif + } } template @@ -82,32 +84,34 @@ inline void host_check_gen_ctors_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_gen_ctor() { - using simd_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; - constexpr std::size_t lanes = simd_type::size(); + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; + constexpr std::size_t lanes = simd_type::size(); - DataType init[lanes]; - DataType expected[lanes]; - mask_type mask(false); + DataType init[lanes]; + DataType expected[lanes]; + mask_type mask(false); - for (std::size_t i = 0; i < lanes; ++i) { - if (i % 3 == 0) mask[i] = true; - init[i] = 7; - expected[i] = (mask[i]) ? init[i] * 9 : init[i]; + for (std::size_t i = 0; i < lanes; ++i) { + if (i % 3 == 0) mask[i] = true; + init[i] = 7; + expected[i] = (mask[i]) ? init[i] * 9 : init[i]; + } + + simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; }); + simd_type rhs; + rhs.copy_from(init, Kokkos::Experimental::simd_flag_default); + device_check_equality(basic, rhs, lanes); + + simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; }); + simd_type result( + KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); + + simd_type blend; + blend.copy_from(expected, Kokkos::Experimental::simd_flag_default); + device_check_equality(result, blend, lanes); } - - simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; }); - simd_type rhs; - rhs.copy_from(init, Kokkos::Experimental::simd_flag_default); - device_check_equality(basic, rhs, lanes); - - simd_type lhs(KOKKOS_LAMBDA(std::size_t i) { return init[i] * 9; }); - simd_type result( - KOKKOS_LAMBDA(std::size_t i) { return (mask[i]) ? lhs[i] : rhs[i]; }); - - simd_type blend; - blend.copy_from(expected, Kokkos::Experimental::simd_flag_default); - device_check_equality(result, blend, lanes); } template diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp index a93c52e9a8..c3d4ac594d 100644 --- a/lib/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_MaskOps.hpp @@ -22,25 +22,27 @@ template inline void host_check_mask_ops() { - using mask_type = Kokkos::Experimental::simd_mask; + if constexpr (is_type_v>) { + using mask_type = Kokkos::Experimental::simd_mask; - EXPECT_FALSE(none_of(mask_type(true))); - EXPECT_TRUE(none_of(mask_type(false))); - EXPECT_TRUE(all_of(mask_type(true))); - EXPECT_FALSE(all_of(mask_type(false))); - EXPECT_TRUE(any_of(mask_type(true))); - EXPECT_FALSE(any_of(mask_type(false))); + EXPECT_FALSE(none_of(mask_type(true))); + EXPECT_TRUE(none_of(mask_type(false))); + EXPECT_TRUE(all_of(mask_type(true))); + EXPECT_FALSE(all_of(mask_type(false))); + EXPECT_TRUE(any_of(mask_type(true))); + EXPECT_FALSE(any_of(mask_type(false))); - for (std::size_t i = 0; i < mask_type::size(); ++i) { - mask_type test_mask(KOKKOS_LAMBDA(std::size_t j) { return i == j; }); + for (std::size_t i = 0; i < mask_type::size(); ++i) { + mask_type test_mask(KOKKOS_LAMBDA(std::size_t j) { return i == j; }); - EXPECT_TRUE(any_of(test_mask)); - EXPECT_FALSE(none_of(test_mask)); + EXPECT_TRUE(any_of(test_mask)); + EXPECT_FALSE(none_of(test_mask)); - if constexpr (mask_type::size() > 1) { - EXPECT_FALSE(all_of(test_mask)); - } else { - EXPECT_TRUE(all_of(test_mask)); + if constexpr (mask_type::size() > 1) { + EXPECT_FALSE(all_of(test_mask)); + } else { + EXPECT_TRUE(all_of(test_mask)); + } } } } @@ -60,25 +62,27 @@ inline void host_check_mask_ops_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_mask_ops() { - using mask_type = Kokkos::Experimental::simd_mask; - kokkos_checker checker; - checker.truth(!none_of(mask_type(true))); - checker.truth(none_of(mask_type(false))); - checker.truth(all_of(mask_type(true))); - checker.truth(!all_of(mask_type(false))); - checker.truth(any_of(mask_type(true))); - checker.truth(!any_of(mask_type(false))); + if constexpr (is_type_v>) { + using mask_type = Kokkos::Experimental::simd_mask; + kokkos_checker checker; + checker.truth(!none_of(mask_type(true))); + checker.truth(none_of(mask_type(false))); + checker.truth(all_of(mask_type(true))); + checker.truth(!all_of(mask_type(false))); + checker.truth(any_of(mask_type(true))); + checker.truth(!any_of(mask_type(false))); - for (std::size_t i = 0; i < mask_type::size(); ++i) { - mask_type test_mask(KOKKOS_LAMBDA(std::size_t j) { return i == j; }); + for (std::size_t i = 0; i < mask_type::size(); ++i) { + mask_type test_mask(KOKKOS_LAMBDA(std::size_t j) { return i == j; }); - checker.truth(any_of(test_mask)); - checker.truth(!none_of(test_mask)); + checker.truth(any_of(test_mask)); + checker.truth(!none_of(test_mask)); - if constexpr (mask_type::size() > 1) { - checker.truth(!all_of(test_mask)); - } else { - checker.truth(all_of(test_mask)); + if constexpr (mask_type::size() > 1) { + checker.truth(!all_of(test_mask)); + } else { + checker.truth(all_of(test_mask)); + } } } } diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp index 59f2f6c18f..4891a54f6c 100644 --- a/lib/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_MathOps.hpp @@ -121,31 +121,34 @@ inline void host_check_abi_size() { template inline void host_check_math_ops() { - constexpr size_t n = 11; - constexpr size_t alignment = - Kokkos::Experimental::simd::size() * sizeof(DataType); + if constexpr (is_type_v>) { + constexpr size_t alignment = + Kokkos::Experimental::simd::size() * sizeof(DataType); - host_check_abi_size(); + host_check_abi_size(); - if constexpr (!std::is_integral_v) { - alignas(alignment) DataType const first_args[n] = { - 0.1, 0.4, 0.5, 0.7, 1.0, 1.5, -2.0, 10.0, 0.0, 1.2, -2.8}; - alignas(alignment) DataType const second_args[n] = { - 1.0, 0.2, 1.1, 1.8, -0.1, -3.0, -2.4, 1.0, 13.0, -3.2, -2.1}; - host_check_all_math_ops(first_args, second_args); - } else { - if constexpr (std::is_signed_v) { - alignas(alignment) - DataType const first_args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; - alignas(alignment) DataType const second_args[n] = {1, 2, 1, 1, 1, -3, - -2, 1, 13, -3, -2}; + if constexpr (!std::is_integral_v) { + alignas(alignment) DataType const first_args[] = { + 0.1, 0.4, 0.5, 0.7, 1.0, 1.5, -2.0, 10.0, + 0.0, 1.2, -2.8, 3.0, 4.0, -0.1, 5.0, -0.2}; + alignas(alignment) DataType const second_args[] = { + 1.0, 0.2, 1.1, 1.8, -0.1, -3.0, -2.4, 1.0, + 13.0, -3.2, -2.1, 3.0, -15.0, -0.5, -0.2, -0.2}; host_check_all_math_ops(first_args, second_args); } else { - alignas(alignment) - DataType const first_args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2}; - alignas(alignment) - DataType const second_args[n] = {1, 2, 1, 1, 1, 3, 2, 1, 13, 3, 2}; - host_check_all_math_ops(first_args, second_args); + if constexpr (std::is_signed_v) { + alignas(alignment) DataType const first_args[] = { + 1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2, -3, 7, 4, -9, -15}; + alignas(alignment) DataType const second_args[] = { + 1, 2, 1, 1, 1, -3, -2, 1, 13, -3, -2, 10, -15, 7, 2, -10}; + host_check_all_math_ops(first_args, second_args); + } else { + alignas(alignment) DataType const first_args[] = { + 1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2, 11, 5, 8, 2, 14}; + alignas(alignment) DataType const second_args[] = { + 1, 2, 1, 1, 1, 3, 2, 1, 13, 3, 2, 3, 6, 20, 5, 14}; + host_check_all_math_ops(first_args, second_args); + } } } } @@ -253,25 +256,31 @@ KOKKOS_INLINE_FUNCTION void device_check_abi_size() { template KOKKOS_INLINE_FUNCTION void device_check_math_ops() { - constexpr size_t n = 11; + if constexpr (is_type_v>) { + device_check_abi_size(); - device_check_abi_size(); - - if constexpr (!std::is_integral_v) { - DataType const first_args[n] = {0.1, 0.4, 0.5, 0.7, 1.0, 1.5, - -2.0, 10.0, 0.0, 1.2, -2.8}; - DataType const second_args[n] = {1.0, 0.2, 1.1, 1.8, -0.1, -3.0, - -2.4, 1.0, 13.0, -3.2, -2.1}; - device_check_all_math_ops(first_args, second_args); - } else { - if constexpr (std::is_signed_v) { - DataType const first_args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; - DataType const second_args[n] = {1, 2, 1, 1, 1, -3, -2, 1, 13, -3, -2}; + if constexpr (!std::is_integral_v) { + DataType const first_args[] = {0.1, 0.4, 0.5, 0.7, 1.0, 1.5, + -2.0, 10.0, 0.0, 1.2, -2.8, 3.0, + 4.0, -0.1, 5.0, -0.2}; + DataType const second_args[] = {1.0, 0.2, 1.1, 1.8, -0.1, -3.0, + -2.4, 1.0, 13.0, -3.2, -2.1, 3.0, + -15.0, -0.5, -0.2, -0.2}; device_check_all_math_ops(first_args, second_args); } else { - DataType const first_args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2}; - DataType const second_args[n] = {1, 2, 1, 1, 1, 3, 2, 1, 13, 3, 2}; - device_check_all_math_ops(first_args, second_args); + if constexpr (std::is_signed_v) { + DataType const first_args[] = {1, 2, -1, 10, 0, 1, -2, 10, + 0, 1, -2, -3, 7, 4, -9, -15}; + DataType const second_args[] = {1, 2, 1, 1, 1, -3, -2, 1, + 13, -3, -2, 10, -15, 7, 2, -10}; + device_check_all_math_ops(first_args, second_args); + } else { + DataType const first_args[] = {1, 2, 1, 10, 0, 1, 2, 10, + 0, 1, 2, 11, 5, 8, 2, 14}; + DataType const second_args[] = {1, 2, 1, 1, 1, 3, 2, 1, + 13, 3, 2, 3, 6, 20, 5, 14}; + device_check_all_math_ops(first_args, second_args); + } } } } diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp index b3c7ac9a01..a3e796a030 100644 --- a/lib/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_Reductions.hpp @@ -65,14 +65,18 @@ inline void host_check_all_reductions(const DataType (&args)[n]) { template inline void host_check_reductions() { - constexpr size_t n = 11; + if constexpr (is_type_v>) { + constexpr size_t n = 16; - if constexpr (std::is_signed_v) { - DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; - host_check_all_reductions(args); - } else { - DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2}; - host_check_all_reductions(args); + if constexpr (std::is_signed_v) { + DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, + 0, 1, -2, -15, 5, 17, -22, 20}; + host_check_all_reductions(args); + } else { + DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, + 0, 1, 2, 15, 5, 17, 22, 20}; + host_check_all_reductions(args); + } } } @@ -135,14 +139,18 @@ KOKKOS_INLINE_FUNCTION void device_check_all_reductions( template KOKKOS_INLINE_FUNCTION void device_check_reductions() { - constexpr size_t n = 11; + if constexpr (is_type_v>) { + constexpr size_t n = 16; - if constexpr (std::is_signed_v) { - DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, 0, 1, -2}; - device_check_all_reductions(args); - } else { - DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, 0, 1, 2}; - device_check_all_reductions(args); + if constexpr (std::is_signed_v) { + DataType const args[n] = {1, 2, -1, 10, 0, 1, -2, 10, + 0, 1, -2, -15, 5, 17, -22, 20}; + device_check_all_reductions(args); + } else { + DataType const args[n] = {1, 2, 1, 10, 0, 1, 2, 10, + 0, 1, 2, 15, 5, 17, 22, 20}; + device_check_all_reductions(args); + } } } diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp index ffdd2cba4a..7329f08501 100644 --- a/lib/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_ShiftOps.hpp @@ -103,34 +103,35 @@ inline void host_check_shift_op_all_loaders(ShiftOp shift_op, template inline void host_check_shift_ops() { - if constexpr (std::is_integral_v) { - using simd_type = Kokkos::Experimental::simd; - constexpr std::size_t width = simd_type::size(); - constexpr std::size_t num_cases = 8; - constexpr size_t alignment = - Kokkos::Experimental::simd::size() * sizeof(DataType); + if constexpr (is_type_v>) { + if constexpr (std::is_integral_v) { + using simd_type = Kokkos::Experimental::simd; + constexpr std::size_t width = simd_type::size(); + constexpr std::size_t num_cases = 16; + constexpr size_t alignment = + Kokkos::Experimental::simd::size() * sizeof(DataType); - DataType max = std::numeric_limits::max(); + DataType max = std::numeric_limits::max(); - alignas(alignment) DataType shift_by[num_cases] = { - 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1}; - alignas(alignment) DataType test_vals[width]; - for (std::size_t i = 0; i < width; ++i) { - DataType inc = max / width; - test_vals[i] = i * inc + 1; - } + alignas(alignment) DataType shift_by[num_cases] = { + 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1, + 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1}; + alignas(alignment) DataType test_vals[width]; + for (std::size_t i = 0; i < width; ++i) { + DataType inc = max / width; + test_vals[i] = i * inc + 1; + } - host_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, - num_cases); - host_check_shift_op_all_loaders(shift_left(), test_vals, shift_by, - num_cases); - - if constexpr (std::is_signed_v) { - for (std::size_t i = 0; i < width; ++i) test_vals[i] *= -1; host_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, num_cases); host_check_shift_op_all_loaders(shift_left(), test_vals, shift_by, num_cases); + + if constexpr (std::is_signed_v) { + for (std::size_t i = 0; i < width; ++i) test_vals[i] *= -1; + host_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, + num_cases); + } } } } @@ -224,33 +225,34 @@ KOKKOS_INLINE_FUNCTION void device_check_shift_op_all_loaders( template KOKKOS_INLINE_FUNCTION void device_check_shift_ops() { - if constexpr (std::is_integral_v) { - using simd_type = Kokkos::Experimental::simd; - constexpr std::size_t width = simd_type::size(); - constexpr std::size_t num_cases = 8; + if constexpr (is_type_v>) { + if constexpr (std::is_integral_v) { + using simd_type = Kokkos::Experimental::simd; + constexpr std::size_t width = simd_type::size(); + constexpr std::size_t num_cases = 16; - DataType max = Kokkos::reduction_identity::max(); + DataType max = Kokkos::reduction_identity::max(); - DataType shift_by[num_cases] = { - 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1}; - DataType test_vals[width]; + DataType shift_by[num_cases] = { + 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1, + 0, 1, 3, width / 2, width / 2 + 1, width - 1, width, width + 1}; + DataType test_vals[width]; - for (std::size_t i = 0; i < width; ++i) { - DataType inc = max / width; - test_vals[i] = i * inc + 1; - } + for (std::size_t i = 0; i < width; ++i) { + DataType inc = max / width; + test_vals[i] = i * inc + 1; + } - device_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, - num_cases); - device_check_shift_op_all_loaders(shift_left(), test_vals, shift_by, - num_cases); - - if constexpr (std::is_signed_v) { - for (std::size_t i = 0; i < width; ++i) test_vals[i] *= -1; device_check_shift_op_all_loaders(shift_right(), test_vals, shift_by, num_cases); device_check_shift_op_all_loaders(shift_left(), test_vals, shift_by, num_cases); + + if constexpr (std::is_signed_v) { + for (std::size_t i = 0; i < width; ++i) test_vals[i] *= -1; + device_check_shift_op_all_loaders(shift_right(), test_vals, + shift_by, num_cases); + } } } } diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp index 152fd9e984..904b2c665e 100644 --- a/lib/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp +++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_WhereExpressions.hpp @@ -22,60 +22,66 @@ template inline void host_check_where_expr_scatter_to() { - using simd_type = Kokkos::Experimental::simd; - using index_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using index_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; - std::size_t nlanes = simd_type::size(); - DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37}; - simd_type src; - src.copy_from(init, Kokkos::Experimental::simd_flag_default); + std::size_t nlanes = simd_type::size(); + DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37, + 53, 71, 79, 83, 89, 93, 97, 103}; + simd_type src; + src.copy_from(init, Kokkos::Experimental::simd_flag_default); - for (std::size_t idx = 0; idx < nlanes; ++idx) { - mask_type mask(true); - mask[idx] = false; + for (std::size_t idx = 0; idx < nlanes; ++idx) { + mask_type mask(true); + mask[idx] = false; - DataType dst[8] = {0}; - index_type index; - simd_type expected_result; - for (std::size_t i = 0; i < nlanes; ++i) { - dst[i] = (2 + (i * 2)); - index[i] = i; - expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + DataType dst[simd_type::size()] = {0}; + index_type index; + simd_type expected_result; + for (std::size_t i = 0; i < nlanes; ++i) { + dst[i] = (2 + (i * 2)); + index[i] = i; + expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + } + where(mask, src).scatter_to(dst, index); + + simd_type dst_simd; + dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default); + + host_check_equality(expected_result, dst_simd, nlanes); } - where(mask, src).scatter_to(dst, index); - - simd_type dst_simd; - dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default); - - host_check_equality(expected_result, dst_simd, nlanes); } } template inline void host_check_where_expr_gather_from() { - using simd_type = Kokkos::Experimental::simd; - using index_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using index_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; - std::size_t nlanes = simd_type::size(); - DataType src[] = {11, 13, 17, 19, 23, 29, 31, 37}; + std::size_t nlanes = simd_type::size(); + DataType src[] = {11, 13, 17, 19, 23, 29, 31, 37, + 53, 71, 79, 83, 89, 93, 97, 103}; - for (std::size_t idx = 0; idx < nlanes; ++idx) { - mask_type mask(true); - mask[idx] = false; + for (std::size_t idx = 0; idx < nlanes; ++idx) { + mask_type mask(true); + mask[idx] = false; - simd_type dst; - index_type index; - simd_type expected_result; - for (std::size_t i = 0; i < nlanes; ++i) { - dst[i] = (2 + (i * 2)); - index[i] = i; - expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + simd_type dst; + index_type index; + simd_type expected_result; + for (std::size_t i = 0; i < nlanes; ++i) { + dst[i] = (2 + (i * 2)); + index[i] = i; + expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + } + where(mask, dst).gather_from(src, index); + + host_check_equality(expected_result, dst, nlanes); } - where(mask, dst).gather_from(src, index); - - host_check_equality(expected_result, dst, nlanes); } } @@ -100,33 +106,36 @@ inline void host_check_where_expr_all_abis( template KOKKOS_INLINE_FUNCTION void device_check_where_expr_scatter_to() { - using simd_type = Kokkos::Experimental::simd; - using index_type = Kokkos::Experimental::simd; - using mask_type = typename simd_type::mask_type; + if constexpr (is_type_v>) { + using simd_type = Kokkos::Experimental::simd; + using index_type = Kokkos::Experimental::simd; + using mask_type = typename simd_type::mask_type; - std::size_t nlanes = simd_type::size(); - DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37}; - simd_type src; - src.copy_from(init, Kokkos::Experimental::simd_flag_default); + std::size_t nlanes = simd_type::size(); + DataType init[] = {11, 13, 17, 19, 23, 29, 31, 37, + 53, 71, 79, 83, 89, 93, 97, 103}; + simd_type src; + src.copy_from(init, Kokkos::Experimental::simd_flag_default); - for (std::size_t idx = 0; idx < nlanes; ++idx) { - mask_type mask(true); - mask[idx] = false; + for (std::size_t idx = 0; idx < nlanes; ++idx) { + mask_type mask(true); + mask[idx] = false; - DataType dst[8] = {0}; - index_type index; - simd_type expected_result; - for (std::size_t i = 0; i < nlanes; ++i) { - dst[i] = (2 + (i * 2)); - index[i] = i; - expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + DataType dst[simd_type::size()] = {0}; + index_type index; + simd_type expected_result; + for (std::size_t i = 0; i < nlanes; ++i) { + dst[i] = (2 + (i * 2)); + index[i] = i; + expected_result[i] = (mask[i]) ? src[index[i]] : dst[i]; + } + where(mask, src).scatter_to(dst, index); + + simd_type dst_simd; + dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default); + + device_check_equality(expected_result, dst_simd, nlanes); } - where(mask, src).scatter_to(dst, index); - - simd_type dst_simd; - dst_simd.copy_from(dst, Kokkos::Experimental::simd_flag_default); - - device_check_equality(expected_result, dst_simd, nlanes); } } @@ -137,7 +146,8 @@ KOKKOS_INLINE_FUNCTION void device_check_where_expr_gather_from() { using mask_type = typename simd_type::mask_type; std::size_t nlanes = simd_type::size(); - DataType src[] = {11, 13, 17, 19, 23, 29, 31, 37}; + DataType src[] = {11, 13, 17, 19, 23, 29, 31, 37, + 53, 71, 79, 83, 89, 93, 97, 103}; for (std::size_t idx = 0; idx < nlanes; ++idx) { mask_type mask(true); diff --git a/lib/kokkos/tpls/desul/include/desul/atomics/Adapt_HIP.hpp b/lib/kokkos/tpls/desul/include/desul/atomics/Adapt_HIP.hpp new file mode 100644 index 0000000000..0eab27fe98 --- /dev/null +++ b/lib/kokkos/tpls/desul/include/desul/atomics/Adapt_HIP.hpp @@ -0,0 +1,77 @@ +/* +Copyright (c) 2019, Lawrence Livermore National Security, LLC +and DESUL project contributors. See the COPYRIGHT file for details. +Source: https://github.com/desul/desul + +SPDX-License-Identifier: (BSD-3-Clause) +*/ + +#ifndef DESUL_ATOMICS_ADAPT_HIP_HPP_ +#define DESUL_ATOMICS_ADAPT_HIP_HPP_ + +#include + +namespace desul { +namespace Impl { + +// FIXME same code as GCCMemoryOrder +template +struct HIPMemoryOrder; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_RELAXED; +}; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_ACQUIRE; +}; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_RELEASE; +}; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_ACQ_REL; +}; + +template <> +struct HIPMemoryOrder { + static constexpr int value = __ATOMIC_SEQ_CST; +}; + +// __HIP_MEMORY_SCOPE_SYSTEM +// __HIP_MEMORY_SCOPE_AGENT +// __HIP_MEMORY_SCOPE_WORKGROUP +// __HIP_MEMORY_SCOPE_WAVEFRONT +// __HIP_MEMORY_SCOPE_SINGLETHREAD +template +struct HIPMemoryScope; + +template <> +struct HIPMemoryScope { + static constexpr int value = __HIP_MEMORY_SCOPE_WORKGROUP; +}; + +template <> +struct HIPMemoryScope { + static constexpr int value = __HIP_MEMORY_SCOPE_AGENT; +}; + +template <> +struct HIPMemoryScope { + static constexpr int value = __HIP_MEMORY_SCOPE_SYSTEM; +}; + +template <> +struct HIPMemoryScope { + static constexpr int value = __HIP_MEMORY_SCOPE_SYSTEM; +}; + +} // namespace Impl +} // namespace desul + +#endif diff --git a/lib/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp b/lib/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp index 3d69dcf6c5..e7f9239e03 100644 --- a/lib/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp +++ b/lib/kokkos/tpls/desul/include/desul/atomics/Atomic_Ref.hpp @@ -6,533 +6,95 @@ Source: https://github.com/desul/desul SPDX-License-Identifier: (BSD-3-Clause) */ -#ifndef DESUL_ATOMIC_REF_IMPL_HPP_ -#define DESUL_ATOMIC_REF_IMPL_HPP_ +#ifndef DESUL_ATOMIC_REF_HPP_ +#define DESUL_ATOMIC_REF_HPP_ -#include #include #include #include -#include -#include namespace desul { -namespace Impl { -// TODO current implementation is missing the following: -// * member functions -// * wait -// * notify_one -// * notify_all - -template {}, - bool = std::is_floating_point{}> -struct basic_atomic_ref; - -// base class for non-integral, non-floating-point, non-pointer types template -struct basic_atomic_ref { - static_assert(std::is_trivially_copyable{}, ""); - - private: - T* _ptr; - - // 1/2/4/8/16-byte types must be aligned to at least their size - static constexpr int _min_alignment = (sizeof(T) & (sizeof(T) - 1)) || sizeof(T) > 16 - ? 0 - : sizeof(T); +class AtomicRef { + T* ptr_; public: using value_type = T; + using memory_order = MemoryOrder; + using memory_scope = MemoryScope; - static constexpr bool is_always_lock_free = atomic_always_lock_free(sizeof(T)); + DESUL_FUNCTION explicit AtomicRef(T& obj) : ptr_(&obj) {} - static constexpr std::size_t required_alignment = _min_alignment > alignof(T) - ? _min_alignment - : alignof(T); - - basic_atomic_ref() = delete; - basic_atomic_ref& operator=(basic_atomic_ref const&) = delete; - - basic_atomic_ref(basic_atomic_ref const&) = default; - - explicit basic_atomic_ref(T& obj) : _ptr(std::addressof(obj)) {} - - T operator=(T desired) const noexcept { - this->store(desired); + DESUL_FUNCTION T operator=(T desired) const noexcept { + store(desired); return desired; } - operator T() const noexcept { return this->load(); } + DESUL_FUNCTION operator T() const noexcept { return load(); } - template - DESUL_FUNCTION void store(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - atomic_store(_ptr, desired, order, MemoryScope()); + DESUL_FUNCTION T load() const noexcept { + return desul::atomic_load(ptr_, MemoryOrder(), MemoryScope()); } - template - DESUL_FUNCTION T load(_MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, order, MemoryScope()); + DESUL_FUNCTION void store(T desired) const noexcept { + return desul::atomic_store(ptr_, desired, MemoryOrder(), MemoryScope()); } - template - DESUL_FUNCTION T exchange(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, desired, order, MemoryScope()); + DESUL_FUNCTION T exchange(T desired) const noexcept { + return desul::atomic_exchange(ptr_, desired, MemoryOrder(), MemoryScope()); } - DESUL_FUNCTION bool is_lock_free() const noexcept { - return atomic_is_lock_free(); + // TODO compare_exchange_{weak,strong} and is_lock_free + +#define DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(FETCH_OP, OP_FETCH) \ + DESUL_FUNCTION T FETCH_OP(T arg) const noexcept { \ + return desul::atomic_##FETCH_OP(ptr_, arg, MemoryOrder(), MemoryScope()); \ + } \ + DESUL_FUNCTION T OP_FETCH(T arg) const noexcept { \ + return desul::atomic_##OP_FETCH(ptr_, arg, MemoryOrder(), MemoryScope()); \ } - template - DESUL_FUNCTION bool compare_exchange_weak(T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_weak( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_weak( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_strong( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } -}; - -// base class for atomic_ref -template -struct basic_atomic_ref { - static_assert(std::is_integral{}, ""); - - private: - T* _ptr; - - public: - using value_type = T; - using difference_type = value_type; - - static constexpr bool is_always_lock_free = atomic_always_lock_free(sizeof(T)); - - static constexpr std::size_t required_alignment = sizeof(T) > alignof(T) ? sizeof(T) - : alignof(T); - - basic_atomic_ref() = delete; - basic_atomic_ref& operator=(basic_atomic_ref const&) = delete; - - explicit basic_atomic_ref(T& obj) : _ptr(&obj) {} - - basic_atomic_ref(basic_atomic_ref const&) = default; - - T operator=(T desired) const noexcept { - this->store(desired); - return desired; - } - - operator T() const noexcept { return this->load(); } - - template - DESUL_FUNCTION void store(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - atomic_store(_ptr, desired, order, MemoryScope()); - } - - template - DESUL_FUNCTION T load(_MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, order, MemoryScope()); - } - - template - DESUL_FUNCTION T exchange(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, desired, order, MemoryScope()); - } - - DESUL_FUNCTION bool is_lock_free() const noexcept { - return atomic_is_lock_free(); - } - - template - DESUL_FUNCTION bool compare_exchange_weak(T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_weak( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_weak( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_strong( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_add(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_add(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_sub(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_sub(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_and(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_and(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_or(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_or(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_xor(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_xor(_ptr, arg, order, MemoryScope()); - } - - DESUL_FUNCTION value_type operator++() const noexcept { - return atomic_add_fetch(_ptr, value_type(1), MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator++(int) const noexcept { return fetch_add(1); } - - DESUL_FUNCTION value_type operator--() const noexcept { - return atomic_sub_fetch(_ptr, value_type(1), MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator--(int) const noexcept { return fetch_sub(1); } - - DESUL_FUNCTION value_type operator+=(value_type arg) const noexcept { - atomic_add_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator-=(value_type arg) const noexcept { - atomic_sub_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator&=(value_type arg) const noexcept { - atomic_and_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator|=(value_type arg) const noexcept { - atomic_or_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator^=(value_type arg) const noexcept { - atomic_xor_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } -}; - -// base class for atomic_ref -template -struct basic_atomic_ref { - static_assert(std::is_floating_point{}, ""); - - private: - T* _ptr; - - public: - using value_type = T; - using difference_type = value_type; - - static constexpr bool is_always_lock_free = atomic_always_lock_free(sizeof(T)); - - static constexpr std::size_t required_alignment = alignof(T); - - basic_atomic_ref() = delete; - basic_atomic_ref& operator=(basic_atomic_ref const&) = delete; - - explicit basic_atomic_ref(T& obj) : _ptr(&obj) {} - - basic_atomic_ref(basic_atomic_ref const&) = default; - - T operator=(T desired) const noexcept { - this->store(desired); - return desired; - } - - operator T() const noexcept { return this->load(); } - - template - DESUL_FUNCTION void store(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - atomic_store(_ptr, desired, order, MemoryScope()); - } - - template - DESUL_FUNCTION T load(_MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, order, MemoryScope()); - } - - template - DESUL_FUNCTION T exchange(T desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, desired, order, MemoryScope()); - } - - DESUL_FUNCTION bool is_lock_free() const noexcept { - return atomic_is_lock_free(); - } - - template - DESUL_FUNCTION bool compare_exchange_weak(T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_weak( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_weak( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, - T desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_strong( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T& expected, T desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_add(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_add(_ptr, arg, order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_sub(value_type arg, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_sub(_ptr, arg, order, MemoryScope()); - } - - DESUL_FUNCTION value_type operator+=(value_type arg) const noexcept { - atomic_add_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator-=(value_type arg) const noexcept { - atomic_sub_fetch(_ptr, arg, MemoryOrder(), MemoryScope()); - } -}; - -// base class for atomic_ref -template -struct basic_atomic_ref { - private: - T** _ptr; - - public: - using value_type = T*; - using difference_type = std::ptrdiff_t; - - static constexpr bool is_always_lock_free = atomic_always_lock_free(sizeof(T)); - - static constexpr std::size_t required_alignment = alignof(T*); - - basic_atomic_ref() = delete; - basic_atomic_ref& operator=(basic_atomic_ref const&) = delete; - - explicit basic_atomic_ref(T*& arg) : _ptr(std::addressof(arg)) {} - - basic_atomic_ref(basic_atomic_ref const&) = default; - - T* operator=(T* desired) const noexcept { - this->store(desired); - return desired; - } - - operator T*() const noexcept { return this->load(); } - - template - DESUL_FUNCTION void store(T* desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - atomic_store(_ptr, desired, order, MemoryScope()); - } - - template - DESUL_FUNCTION T* load(_MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, order, MemoryScope()); - } - - template - DESUL_FUNCTION T* exchange(T* desired, - _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_load(_ptr, desired, order, MemoryScope()); - } - - DESUL_FUNCTION bool is_lock_free() const noexcept { - return atomic_is_lock_free(); - } - - template - DESUL_FUNCTION bool compare_exchange_weak(T*& expected, - T* desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_weak( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_weak( - T*& expected, T* desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_weak(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T*& expected, - T* desired, - SuccessMemoryOrder success, - FailureMemoryOrder failure) const noexcept { - return atomic_compare_exchange_strong( - _ptr, expected, desired, success, failure, MemoryScope()); - } - - template - DESUL_FUNCTION bool compare_exchange_strong( - T*& expected, T* desired, _MemoryOrder order = _MemoryOrder()) const noexcept { - return compare_exchange_strong(expected, - desired, - order, - cmpexch_failure_memory_order<_MemoryOrder>(), - MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_add(difference_type d, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_add(_ptr, _type_size(d), order, MemoryScope()); - } - - template - DESUL_FUNCTION value_type - fetch_sub(difference_type d, _MemoryOrder order = _MemoryOrder()) const noexcept { - return atomic_fetch_sub(_ptr, _type_size(d), order, MemoryScope()); - } - - DESUL_FUNCTION value_type operator++() const noexcept { - return atomic_add_fetch(_ptr, _type_size(1), MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator++(int) const noexcept { return fetch_add(1); } - - DESUL_FUNCTION value_type operator--() const noexcept { - return atomic_sub_fetch(_ptr, _type_size(1), MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator--(int) const noexcept { return fetch_sub(1); } - - DESUL_FUNCTION value_type operator+=(difference_type d) const noexcept { - atomic_add_fetch(_ptr, _type_size(d), MemoryOrder(), MemoryScope()); - } - - DESUL_FUNCTION value_type operator-=(difference_type d) const noexcept { - atomic_sub_fetch(_ptr, _type_size(d), MemoryOrder(), MemoryScope()); - } - - private: - static constexpr std::ptrdiff_t _type_size(std::ptrdiff_t d) noexcept { - static_assert(std::is_object{}, ""); - return d * sizeof(T); - } -}; - -} // namespace Impl - -template -struct scoped_atomic_ref : Impl::basic_atomic_ref { - explicit scoped_atomic_ref(T& obj) noexcept - : Impl::basic_atomic_ref(obj) {} - - scoped_atomic_ref& operator=(scoped_atomic_ref const&) = delete; - - scoped_atomic_ref(scoped_atomic_ref const&) = default; - - using Impl::basic_atomic_ref::operator=; +#define DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(COMPD_ASGMT, OP_FETCH) \ + DESUL_FUNCTION T operator COMPD_ASGMT(T arg) const noexcept { return OP_FETCH(arg); } + + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_add, add_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(+=, add_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_sub, sub_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(-=, sub_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_min, min_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_max, max_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_mul, mul_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(*=, mul_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_div, div_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(/=, div_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_mod, mod_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(%=, mod_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_and, and_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(&=, and_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_or, or_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(|=, or_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_xor, xor_fetch) + DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP(^=, xor_fetch) + DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP(fetch_nand, nand_fetch) + +#undef DESUL_IMPL_DEFINE_ATOMIC_COMPOUND_ASSIGNMENT_OP +#undef DESUL_IMPL_DEFINE_ATOMIC_FETCH_OP + +#define DESUL_IMPL_DEFINE_ATOMIC_INCREMENT_DECREMENT(OPER, NAME) \ + DESUL_FUNCTION T fetch_##NAME() const noexcept { \ + return desul::atomic_fetch_##NAME(ptr_, MemoryOrder(), MemoryScope()); \ + } \ + DESUL_FUNCTION T NAME##_fetch() const noexcept { \ + return desul::atomic_##NAME##_fetch(ptr_, MemoryOrder(), MemoryScope()); \ + } \ + DESUL_FUNCTION T operator OPER() const noexcept { return NAME##_fetch(); } \ + DESUL_FUNCTION T operator OPER(int) const noexcept { return fetch_##NAME(); } + + DESUL_IMPL_DEFINE_ATOMIC_INCREMENT_DECREMENT(++, inc) + DESUL_IMPL_DEFINE_ATOMIC_INCREMENT_DECREMENT(--, dec) + +#undef DESUL_IMPL_DEFINE_ATOMIC_INCREMENT_DECREMENT }; } // namespace desul diff --git a/lib/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp b/lib/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp index 8c909bacdf..0ade34f25d 100644 --- a/lib/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp +++ b/lib/kokkos/tpls/desul/include/desul/atomics/Compare_Exchange_HIP.hpp @@ -9,6 +9,7 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifndef DESUL_ATOMICS_COMPARE_EXCHANGE_HIP_HPP_ #define DESUL_ATOMICS_COMPARE_EXCHANGE_HIP_HPP_ +#include #include #include #include @@ -17,130 +18,40 @@ SPDX-License-Identifier: (BSD-3-Clause) namespace desul { namespace Impl { -template -__device__ std::enable_if_t device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned int) == 4, - "this function assumes an unsigned int is 32-bit"); - unsigned int return_val = atomicCAS(reinterpret_cast(dest), - reinterpret_cast(compare), - reinterpret_cast(value)); - return reinterpret_cast(return_val); -} -template -__device__ std::enable_if_t device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, - "this function assumes an unsigned long long is 64-bit"); - unsigned long long int return_val = - atomicCAS(reinterpret_cast(dest), - reinterpret_cast(compare), - reinterpret_cast(value)); - return reinterpret_cast(return_val); -} +template +struct atomic_exchange_available_hip { + constexpr static bool value = + ((sizeof(T) == 1 && alignof(T) == 1) || (sizeof(T) == 4 && alignof(T) == 4) || + (sizeof(T) == 8 && alignof(T) == 8)) && + std::is_trivially_copyable::value; +}; -template -__device__ std::enable_if_t +template +__device__ std::enable_if_t::value, T> device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderRelease, MemoryScope) { - T return_val = atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return return_val; + T* const dest, T compare, T value, MemoryOrder, MemoryScope) { + (void)__hip_atomic_compare_exchange_strong( + dest, + &compare, + value, + HIPMemoryOrder::value, + HIPMemoryOrder>::value, + HIPMemoryScope::value); + return compare; } -template -__device__ std::enable_if_t -device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderAcquire, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - return return_val; -} - -template -__device__ std::enable_if_t -device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderAcqRel, MemoryScope) { - atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return return_val; -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned int) == 4, - "this function assumes an unsigned int is 32-bit"); - unsigned int return_val = atomicExch(reinterpret_cast(dest), - reinterpret_cast(value)); - return reinterpret_cast(return_val); -} -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T value, MemoryOrderRelaxed, MemoryScope) { - static_assert(sizeof(unsigned long long int) == 8, - "this function assumes an unsigned long long is 64-bit"); - unsigned long long int return_val = - atomicExch(reinterpret_cast(dest), - reinterpret_cast(value)); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T compare, T value, MemoryOrderRelease, MemoryScope) { - T return_val = device_atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - device_atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T /*compare*/, T value, MemoryOrderAcquire, MemoryScope) { - device_atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = - device_atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T value, MemoryOrderAcqRel, MemoryScope) { - device_atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = - device_atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - device_atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t device_atomic_exchange( - T* const dest, T value, MemoryOrderSeqCst, MemoryScope) { - device_atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = - device_atomic_exchange(dest, value, MemoryOrderRelaxed(), MemoryScope()); - device_atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); - return reinterpret_cast(return_val); -} - -template -__device__ std::enable_if_t -device_atomic_compare_exchange( - T* const dest, T compare, T value, MemoryOrderSeqCst, MemoryScope) { - device_atomic_thread_fence(MemoryOrderAcquire(), MemoryScope()); - T return_val = device_atomic_compare_exchange( - dest, compare, value, MemoryOrderRelaxed(), MemoryScope()); - device_atomic_thread_fence(MemoryOrderRelease(), MemoryScope()); +template +__device__ std::enable_if_t::value, T> +device_atomic_exchange(T* const dest, T value, MemoryOrder, MemoryScope) { + T return_val = __hip_atomic_exchange(dest, + value, + HIPMemoryOrder::value, + HIPMemoryScope::value); return return_val; } template -__device__ std::enable_if_t<(sizeof(T) != 8) && (sizeof(T) != 4), T> +__device__ std::enable_if_t::value, T> device_atomic_compare_exchange( T* const dest, T compare, T value, MemoryOrder, MemoryScope scope) { // This is a way to avoid deadlock in a warp or wave front @@ -169,7 +80,7 @@ device_atomic_compare_exchange( } template -__device__ std::enable_if_t<(sizeof(T) != 8) && (sizeof(T) != 4), T> +__device__ std::enable_if_t::value, T> device_atomic_exchange(T* const dest, T value, MemoryOrder, MemoryScope scope) { // This is a way to avoid deadlock in a warp or wave front T return_val; diff --git a/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_CUDA.hpp b/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_CUDA.hpp index 69ed8bcb9f..68622758d8 100644 --- a/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_CUDA.hpp +++ b/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_CUDA.hpp @@ -69,56 +69,56 @@ inline __device__ unsigned int device_atomic_fetch_inc_mod( unsigned int* inline __device__ unsigned int device_atomic_fetch_dec_mod( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicDec(ptr, val); } // clang-format on -#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, TYPE) \ +#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, TYPE) \ template \ - __device__ TYPE device_atomic_fetch_##OP( \ + __device__ TYPE device_atomic_##FETCH_OP( \ TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeDevice) { \ __threadfence(); \ TYPE return_val = \ - device_atomic_fetch_##OP(ptr, val, MemoryOrderRelaxed(), MemoryScopeDevice()); \ + device_atomic_##FETCH_OP(ptr, val, MemoryOrderRelaxed(), MemoryScopeDevice()); \ __threadfence(); \ return return_val; \ } \ template \ - __device__ TYPE device_atomic_fetch_##OP( \ + __device__ TYPE device_atomic_##FETCH_OP( \ TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeCore) { \ - return device_atomic_fetch_##OP(ptr, val, MemoryOrder(), MemoryScopeDevice()); \ + return device_atomic_##FETCH_OP(ptr, val, MemoryOrder(), MemoryScopeDevice()); \ } -#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(OP) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, int) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, unsigned int) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, unsigned long long) +#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(FETCH_OP) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, int) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, unsigned int) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, unsigned long long) #ifdef DESUL_CUDA_ARCH_IS_PRE_PASCAL -#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, float) +#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(FETCH_OP) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, float) #else -#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, float) \ - DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(OP, double) +#define DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(FETCH_OP) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, float) \ + DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(FETCH_OP, double) #endif -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(min) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(max) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(and) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(or) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(xor) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_min) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_max) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_and) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_or) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_xor) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(add) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(add) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(sub) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(sub) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(fetch_add) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_add) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(fetch_sub) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_sub) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(inc) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(dec) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_inc) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(fetch_dec) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(inc_mod, unsigned int) -DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(dec_mod, unsigned int) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(fetch_inc_mod, unsigned int) +DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP(fetch_dec_mod, unsigned int) #undef DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT #undef DESUL_IMPL_CUDA_DEVICE_ATOMIC_FETCH_OP_INTEGRAL diff --git a/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_Generic.hpp b/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_Generic.hpp index a94ff8ef18..530195a832 100644 --- a/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_Generic.hpp +++ b/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_Generic.hpp @@ -18,38 +18,38 @@ SPDX-License-Identifier: (BSD-3-Clause) namespace desul { namespace Impl { -#define DESUL_IMPL_ATOMIC_FETCH_OP(ANNOTATION, HOST_OR_DEVICE, OP) \ - template \ - ANNOTATION T HOST_OR_DEVICE##_atomic_fetch_##OP( \ - T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ - return HOST_OR_DEVICE##_atomic_fetch_oper( \ - OP##_operator(), dest, val, order, scope); \ - } \ - template \ - ANNOTATION T HOST_OR_DEVICE##_atomic_##OP##_fetch( \ - T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ - return HOST_OR_DEVICE##_atomic_oper_fetch( \ - OP##_operator(), dest, val, order, scope); \ +#define DESUL_IMPL_ATOMIC_FETCH_OP(ANNOTATION, HOST_OR_DEVICE, FETCH_OP, OP_FETCH) \ + template \ + ANNOTATION T HOST_OR_DEVICE##_atomic_##FETCH_OP( \ + T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ + return HOST_OR_DEVICE##_atomic_fetch_oper( \ + OP_FETCH##_operator(), dest, val, order, scope); \ + } \ + template \ + ANNOTATION T HOST_OR_DEVICE##_atomic_##OP_FETCH( \ + T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ + return HOST_OR_DEVICE##_atomic_oper_fetch( \ + OP_FETCH##_operator(), dest, val, order, scope); \ } -#define DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(OP) \ - DESUL_IMPL_ATOMIC_FETCH_OP(DESUL_IMPL_HOST_FUNCTION, host, OP) \ - DESUL_IMPL_ATOMIC_FETCH_OP(DESUL_IMPL_DEVICE_FUNCTION, device, OP) +#define DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(FETCH_OP, OP_FETCH) \ + DESUL_IMPL_ATOMIC_FETCH_OP(DESUL_IMPL_HOST_FUNCTION, host, FETCH_OP, OP_FETCH) \ + DESUL_IMPL_ATOMIC_FETCH_OP(DESUL_IMPL_DEVICE_FUNCTION, device, FETCH_OP, OP_FETCH) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(add) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(sub) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(max) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(min) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(mul) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(div) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(mod) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(and) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(or) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(xor) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(nand) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_add, add_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_sub, sub_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_max, max_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_min, min_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_mul, mul_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_div, div_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_mod, mod_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_and, and_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_or, or_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_xor, xor_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_nand, nand_fetch) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(inc_mod) -DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(dec_mod) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_inc_mod, inc_mod_fetch) +DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(fetch_dec_mod, dec_mod_fetch) #undef DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE #undef DESUL_IMPL_ATOMIC_FETCH_OP @@ -59,13 +59,13 @@ DESUL_IMPL_ATOMIC_FETCH_OP_HOST_AND_DEVICE(dec_mod) ANNOTATION T HOST_OR_DEVICE##_atomic_fetch_##OP( \ T* const dest, const unsigned int val, MemoryOrder order, MemoryScope scope) { \ return HOST_OR_DEVICE##_atomic_fetch_oper( \ - OP##_operator(), dest, val, order, scope); \ + OP##_fetch_operator(), dest, val, order, scope); \ } \ template \ ANNOTATION T HOST_OR_DEVICE##_atomic_##OP##_fetch( \ T* const dest, const unsigned int val, MemoryOrder order, MemoryScope scope) { \ return HOST_OR_DEVICE##_atomic_oper_fetch( \ - OP##_operator(), dest, val, order, scope); \ + OP##_fetch_operator(), dest, val, order, scope); \ } #define DESUL_IMPL_ATOMIC_FETCH_OP_SHIFT_HOST_AND_DEVICE(OP) \ @@ -78,19 +78,21 @@ DESUL_IMPL_ATOMIC_FETCH_OP_SHIFT_HOST_AND_DEVICE(rshift) #undef DESUL_IMPL_ATOMIC_FETCH_OP_SHIFT_HOST_AND_DEVICE #undef DESUL_IMPL_ATOMIC_FETCH_OP_SHIFT -#define DESUL_IMPL_ATOMIC_LOAD_AND_STORE(ANNOTATION, HOST_OR_DEVICE) \ - template \ - ANNOTATION T HOST_OR_DEVICE##_atomic_load( \ - const T* const dest, MemoryOrder order, MemoryScope scope) { \ - return HOST_OR_DEVICE##_atomic_fetch_oper( \ - load_operator(), const_cast(dest), T(), order, scope); \ - } \ - \ - template \ - ANNOTATION void HOST_OR_DEVICE##_atomic_store( \ - T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ - (void)HOST_OR_DEVICE##_atomic_fetch_oper( \ - store_operator(), dest, val, order, scope); \ +// NOTE: using atomic_oper_fetch in the fallback implementation of atomic_store to avoid +// reading potentially uninitialized values which would yield undefined behavior. +#define DESUL_IMPL_ATOMIC_LOAD_AND_STORE(ANNOTATION, HOST_OR_DEVICE) \ + template \ + ANNOTATION T HOST_OR_DEVICE##_atomic_load( \ + const T* const dest, MemoryOrder order, MemoryScope scope) { \ + return HOST_OR_DEVICE##_atomic_fetch_oper( \ + load_fetch_operator(), const_cast(dest), T(), order, scope); \ + } \ + \ + template \ + ANNOTATION void HOST_OR_DEVICE##_atomic_store( \ + T* const dest, const T val, MemoryOrder order, MemoryScope scope) { \ + (void)HOST_OR_DEVICE##_atomic_oper_fetch( \ + store_fetch_operator(), dest, val, order, scope); \ } DESUL_IMPL_ATOMIC_LOAD_AND_STORE(DESUL_IMPL_HOST_FUNCTION, host) diff --git a/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_HIP.hpp b/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_HIP.hpp index e9c749809d..8d9bd86825 100644 --- a/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_HIP.hpp +++ b/lib/kokkos/tpls/desul/include/desul/atomics/Fetch_Op_HIP.hpp @@ -9,99 +9,108 @@ SPDX-License-Identifier: (BSD-3-Clause) #ifndef DESUL_ATOMICS_FECH_OP_HIP_HPP_ #define DESUL_ATOMICS_FECH_OP_HIP_HPP_ +#include + namespace desul { namespace Impl { -// clang-format off -inline __device__ int device_atomic_fetch_add( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_add( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_add(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } -inline __device__ float device_atomic_fetch_add( float* ptr, float val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } -inline __device__ double device_atomic_fetch_add( double* ptr, double val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, val); } - -inline __device__ int device_atomic_fetch_sub( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_sub( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_sub(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); } -inline __device__ float device_atomic_fetch_sub( float* ptr, float val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); } -inline __device__ double device_atomic_fetch_sub( double* ptr, double val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -val); } - -inline __device__ int device_atomic_fetch_min( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_min( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_min(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMin(ptr, val); } - -inline __device__ int device_atomic_fetch_max( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_max( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_max(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicMax(ptr, val); } - -inline __device__ int device_atomic_fetch_and( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_and( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_and(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAnd(ptr, val); } - -inline __device__ int device_atomic_fetch_or ( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); } -inline __device__ unsigned int device_atomic_fetch_or ( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_or (unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicOr (ptr, val); } - -inline __device__ int device_atomic_fetch_xor( int* ptr, int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_xor( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); } -inline __device__ unsigned long long device_atomic_fetch_xor(unsigned long long* ptr, unsigned long long val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicXor(ptr, val); } - -inline __device__ int device_atomic_fetch_inc( int* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1 ); } -inline __device__ unsigned int device_atomic_fetch_inc( unsigned int* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1u ); } -inline __device__ unsigned long long device_atomic_fetch_inc(unsigned long long* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, 1ull); } - -inline __device__ int device_atomic_fetch_dec( int* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, 1 ); } -inline __device__ unsigned int device_atomic_fetch_dec( unsigned int* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicSub(ptr, 1u ); } -inline __device__ unsigned long long device_atomic_fetch_dec(unsigned long long* ptr, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicAdd(ptr, -1 ); } - -inline __device__ unsigned int device_atomic_fetch_inc_mod( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicInc(ptr, val); } -inline __device__ unsigned int device_atomic_fetch_dec_mod( unsigned int* ptr, unsigned int val, MemoryOrderRelaxed, MemoryScopeDevice) { return atomicDec(ptr, val); } -// clang-format on - -#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, TYPE) \ - template \ - __device__ TYPE device_atomic_fetch_##OP( \ - TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeDevice) { \ - __threadfence(); \ - TYPE return_val = \ - device_atomic_fetch_##OP(ptr, val, MemoryOrderRelaxed(), MemoryScopeDevice()); \ - __threadfence(); \ - return return_val; \ - } \ - template \ - __device__ TYPE device_atomic_fetch_##OP( \ - TYPE* ptr, TYPE val, MemoryOrder, MemoryScopeCore) { \ - return device_atomic_fetch_##OP(ptr, val, MemoryOrder(), MemoryScopeDevice()); \ +#define DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, T) \ + template \ + __device__ inline T device_atomic_fetch_##OP( \ + T* ptr, T val, MemoryOrder, MemoryScope) { \ + return __hip_atomic_fetch_##OP(ptr, \ + val, \ + HIPMemoryOrder::value, \ + HIPMemoryScope::value); \ } -#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(OP) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, int) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, unsigned int) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, unsigned long long) +#define DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(OP) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, int) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, long long) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, unsigned int) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, unsigned long long) -#define DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, float) \ - DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(OP, double) +#define DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT(OP) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, float) \ + DESUL_IMPL_HIP_ATOMIC_FETCH_OP(OP, double) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(min) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(max) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(and) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(or) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(xor) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(add) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(min) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(max) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(and) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(or) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL(xor) +DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT(add) +// atomic min/max gives the wrong results (tested with ROCm 6.0 on Frontier) +// DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT(min) +// DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT(max) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(add) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(add) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT(sub) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(sub) +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_OP_FLOATING_POINT +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_OP_INTEGRAL +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_OP -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(inc) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL(dec) +#define DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(T) \ + template \ + __device__ inline T device_atomic_fetch_sub( \ + T* ptr, T val, MemoryOrder, MemoryScope) { \ + return __hip_atomic_fetch_add(ptr, \ + -val, \ + HIPMemoryOrder::value, \ + HIPMemoryScope::value); \ + } -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(inc_mod, unsigned int) -DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP(dec_mod, unsigned int) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(int) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(long long) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(unsigned int) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(unsigned long long) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(float) +DESUL_IMPL_HIP_ATOMIC_FETCH_SUB(double) -#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_FLOATING_POINT -#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP_INTEGRAL -#undef DESUL_IMPL_HIP_DEVICE_ATOMIC_FETCH_OP +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_SUB + +#define DESUL_IMPL_HIP_ATOMIC_FETCH_INC(T) \ + template \ + __device__ inline T device_atomic_fetch_inc(T* ptr, MemoryOrder, MemoryScope) { \ + return __hip_atomic_fetch_add(ptr, \ + 1, \ + HIPMemoryOrder::value, \ + HIPMemoryScope::value); \ + } \ + template \ + __device__ inline T device_atomic_fetch_dec(T* ptr, MemoryOrder, MemoryScope) { \ + return __hip_atomic_fetch_add(ptr, \ + -1, \ + HIPMemoryOrder::value, \ + HIPMemoryScope::value); \ + } + +DESUL_IMPL_HIP_ATOMIC_FETCH_INC(int) +DESUL_IMPL_HIP_ATOMIC_FETCH_INC(long long) +DESUL_IMPL_HIP_ATOMIC_FETCH_INC(unsigned int) +DESUL_IMPL_HIP_ATOMIC_FETCH_INC(unsigned long long) + +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_INC + +#define DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MEMORY_SCOPE, MEMORY_SCOPE_STRING_LITERAL) \ + template \ + __device__ inline unsigned int device_atomic_fetch_inc_mod( \ + unsigned int* ptr, unsigned int val, MemoryOrder, MEMORY_SCOPE) { \ + return __builtin_amdgcn_atomic_inc32( \ + ptr, val, HIPMemoryOrder::value, MEMORY_SCOPE_STRING_LITERAL); \ + } \ + template \ + __device__ inline unsigned int device_atomic_fetch_dec_mod( \ + unsigned int* ptr, unsigned int val, MemoryOrder, MEMORY_SCOPE) { \ + return __builtin_amdgcn_atomic_dec32( \ + ptr, val, HIPMemoryOrder::value, MEMORY_SCOPE_STRING_LITERAL); \ + } + +DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MemoryScopeCore, "workgroup") +DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MemoryScopeDevice, "agent") +DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MemoryScopeNode, "") +DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD(MemoryScopeSystem, "") + +#undef DESUL_IMPL_HIP_ATOMIC_FETCH_INC_MOD } // namespace Impl } // namespace desul diff --git a/lib/kokkos/tpls/desul/include/desul/atomics/Operator_Function_Objects.hpp b/lib/kokkos/tpls/desul/include/desul/atomics/Operator_Function_Objects.hpp index be90cdbbd8..1f5159c4f8 100644 --- a/lib/kokkos/tpls/desul/include/desul/atomics/Operator_Function_Objects.hpp +++ b/lib/kokkos/tpls/desul/include/desul/atomics/Operator_Function_Objects.hpp @@ -18,7 +18,7 @@ namespace desul { namespace Impl { template -struct max_operator { +struct max_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return (val1 > val2 ? val1 : val2); @@ -30,7 +30,7 @@ struct max_operator { }; template -struct min_operator { +struct min_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return (val1 < val2 ? val1 : val2); @@ -70,55 +70,55 @@ constexpr DESUL_FUNCTION } template -struct add_operator { +struct add_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 + val2; } }; template -struct sub_operator { +struct sub_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 - val2; } }; template -struct mul_operator { +struct mul_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 * val2; } }; template -struct div_operator { +struct div_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 / val2; } }; template -struct mod_operator { +struct mod_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 % val2; } }; template -struct and_operator { +struct and_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 & val2; } }; template -struct or_operator { +struct or_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 | val2; } }; template -struct xor_operator { +struct xor_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 ^ val2; } }; template -struct nand_operator { +struct nand_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return ~(val1 & val2); @@ -126,7 +126,7 @@ struct nand_operator { }; template -struct lshift_operator { +struct lshift_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 << val2; @@ -134,7 +134,7 @@ struct lshift_operator { }; template -struct rshift_operator { +struct rshift_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return val1 >> val2; @@ -142,7 +142,7 @@ struct rshift_operator { }; template -struct inc_mod_operator { +struct inc_mod_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return ((val1 >= val2) ? Scalar1(0) : val1 + Scalar1(1)); @@ -150,7 +150,7 @@ struct inc_mod_operator { }; template -struct dec_mod_operator { +struct dec_mod_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) { return (((val1 == Scalar1(0)) | (val1 > val2)) ? val2 : (val1 - Scalar1(1))); @@ -158,13 +158,13 @@ struct dec_mod_operator { }; template -struct store_operator { +struct store_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1&, const Scalar2& val2) { return val2; } }; template -struct load_operator { +struct load_fetch_operator { DESUL_FORCEINLINE_FUNCTION static Scalar1 apply(const Scalar1& val1, const Scalar2&) { return val1; } }; diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp index 8e42a37ba7..24166462e7 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/config.hpp @@ -205,7 +205,7 @@ static_assert(_MDSPAN_CPLUSPLUS >= MDSPAN_CXX_STD_14, "mdspan requires C++14 or #endif #ifndef _MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION -# if (!defined(__NVCC__) || (__CUDACC_VER_MAJOR__ >= 11 && __CUDACC_VER_MINOR__ >= 7)) && \ +# if (!defined(__NVCC__) || (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10 >= 1170)) && \ ((defined(__cpp_deduction_guides) && __cpp_deduction_guides >= 201703) || \ (!defined(__cpp_deduction_guides) && MDSPAN_HAS_CXX_17)) # define _MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION 1 diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/extents.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/extents.hpp index 9a28c3ed5c..d58d37732d 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/extents.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/extents.hpp @@ -16,12 +16,15 @@ #pragma once #include "dynamic_extent.hpp" +#include "utility.hpp" #ifdef __cpp_lib_span #include #endif #include +#include +#include #include namespace MDSPAN_IMPL_STANDARD_NAMESPACE { @@ -30,6 +33,7 @@ namespace detail { // Function used to check compatibility of extents in converting constructor // can't be a private member function for some reason. template +MDSPAN_INLINE_FUNCTION static constexpr std::integral_constant __check_compatible_extents( std::integral_constant, std::integer_sequence, @@ -46,6 +50,7 @@ struct __compare_extent_compatible : std::integral_constant +MDSPAN_INLINE_FUNCTION static constexpr std::integral_constant< bool, _MDSPAN_FOLD_AND(__compare_extent_compatible::value)> __check_compatible_extents( @@ -59,8 +64,8 @@ template MDSPAN_INLINE_FUNCTION static constexpr bool are_valid_indices() { return - (std::is_convertible::value && ... && true) && - (std::is_nothrow_constructible::value && ... && true); + _MDSPAN_FOLD_AND(std::is_convertible::value) && + _MDSPAN_FOLD_AND(std::is_nothrow_constructible::value); } // ------------------------------------------------------------------ @@ -538,14 +543,9 @@ public: MDSPAN_INLINE_FUNCTION friend constexpr bool operator==(const extents &lhs, const extents &rhs) noexcept { - if constexpr (rank() != extents::rank()) { - return false; - } else { - using common_t = std::common_type_t; - for (size_type r = 0; r < m_rank; r++) - if(static_cast(rhs.extent(r)) != static_cast(lhs.extent(r))) return false; - } - return true; + return + rank() == extents::rank() && + detail::rankwise_equal(detail::with_rank{}, rhs, lhs, detail::extent); } #if !(MDSPAN_HAS_CXX_20) @@ -614,5 +614,80 @@ static #endif constexpr bool __is_extents_v = __is_extents::value; +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_lower_bound(InputIndexType user_index, + ExtentsIndexType /* current_extent */, + std::true_type /* is_signed */) +{ + (void) user_index; // prevent unused variable warning +#ifdef _MDSPAN_DEBUG + assert(static_cast(user_index) >= 0); +#endif +} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_lower_bound(InputIndexType /* user_index */, + ExtentsIndexType /* current_extent */, + std::false_type /* is_signed */) +{} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_upper_bound(InputIndexType user_index, + ExtentsIndexType current_extent) +{ + (void) user_index; // prevent unused variable warnings + (void) current_extent; +#ifdef _MDSPAN_DEBUG + assert(static_cast(user_index) < current_extent); +#endif +} + +// Returning true to use AND fold instead of comma +// CPP14 mode doesn't like the use of void expressions +// with the way the _MDSPAN_FOLD_AND is set up +template +MDSPAN_INLINE_FUNCTION +constexpr bool +check_one_index(InputIndex user_index, + ExtentsIndexType current_extent) +{ + check_lower_bound(user_index, current_extent, + std::integral_constant::value>{}); + check_upper_bound(user_index, current_extent); + return true; +} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_all_indices_helper(std::index_sequence, + const extents& exts, + Indices... indices) +{ + // Suppress warning about statement has no effect + (void) _MDSPAN_FOLD_AND( + (check_one_index(indices, exts.extent(RankIndices))) + ); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr void +check_all_indices(const extents& exts, + Indices... indices) +{ + check_all_indices_helper(std::make_index_sequence(), + exts, indices...); +} + } // namespace detail } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_left.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_left.hpp index 83ed9ef7fe..222fba7aa0 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_left.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_left.hpp @@ -18,8 +18,11 @@ #include "macros.hpp" #include "trait_backports.hpp" #include "extents.hpp" +#include "layout_stride.hpp" +#include "utility.hpp" +#if MDSPAN_HAS_CXX_17 #include "../__p2642_bits/layout_padded_fwd.hpp" -#include +#endif #include namespace MDSPAN_IMPL_STANDARD_NAMESPACE { @@ -133,11 +136,11 @@ class layout_left::mapping { : __extents(__other.extents()) { MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: - check_padded_layout_converting_constructor_mandates(); + check_padded_layout_converting_constructor_mandates< + extents_type, _Mapping>(detail::with_rank{}); MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: check_padded_layout_converting_constructor_preconditions< - extents_type>(__other); + extents_type>(detail::with_rank{}, __other); } #endif @@ -156,17 +159,7 @@ class layout_left::mapping { * TODO: check precondition * other.required_span_size() is a representable value of type index_type */ - #if !defined(_MDSPAN_HAS_CUDA) && !defined(_MDSPAN_HAS_HIP) && !defined(NDEBUG) - if constexpr (extents_type::rank() > 0) { - index_type stride = 1; - using common_t = std::common_type_t; - for(rank_type r=0; r<__extents.rank(); r++) { - if(static_cast(stride) != static_cast(other.stride(r))) - std::abort(); // ("Assigning layout_stride to layout_left with invalid strides."); - stride *= __extents.extent(r); - } - } - #endif + detail::validate_strides(detail::with_rank{}, layout_left{}, __extents, other); } MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED mapping& operator=(mapping const&) noexcept = default; @@ -194,6 +187,9 @@ class layout_left::mapping { ) _MDSPAN_HOST_DEVICE constexpr index_type operator()(Indices... idxs) const noexcept { +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG return __compute_offset(__rank_count<0, extents_type::rank()>(), static_cast(idxs)...); } diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_right.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_right.hpp index 3d3927df7b..284569f653 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_right.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_right.hpp @@ -18,9 +18,11 @@ #include "macros.hpp" #include "trait_backports.hpp" #include "extents.hpp" -#include #include "layout_stride.hpp" +#include "utility.hpp" +#if MDSPAN_HAS_CXX_17 #include "../__p2642_bits/layout_padded_fwd.hpp" +#endif namespace MDSPAN_IMPL_STANDARD_NAMESPACE { @@ -134,11 +136,11 @@ class layout_right::mapping { : __extents(__other.extents()) { MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: - check_padded_layout_converting_constructor_mandates(); + check_padded_layout_converting_constructor_mandates< + extents_type, _Mapping>(detail::with_rank{}); MDSPAN_IMPL_PROPOSED_NAMESPACE::detail:: check_padded_layout_converting_constructor_preconditions< - extents_type>(__other); + extents_type>(detail::with_rank{}, __other); } #endif @@ -157,17 +159,7 @@ class layout_right::mapping { * TODO: check precondition * other.required_span_size() is a representable value of type index_type */ - #if !defined(_MDSPAN_HAS_CUDA) && !defined(_MDSPAN_HAS_HIP) && !defined(NDEBUG) - if constexpr (extents_type::rank() > 0) { - index_type stride = 1; - using common_t = std::common_type_t; - for(rank_type r=__extents.rank(); r>0; r--) { - if(static_cast(stride) != static_cast(other.stride(r-1))) - std::abort(); // ("Assigning layout_stride to layout_right with invalid strides."); - stride *= __extents.extent(r-1); - } - } - #endif + detail::validate_strides(detail::with_rank{}, layout_right{}, __extents, other); } MDSPAN_INLINE_FUNCTION_DEFAULTED _MDSPAN_CONSTEXPR_14_DEFAULTED mapping& operator=(mapping const&) noexcept = default; @@ -195,6 +187,9 @@ class layout_right::mapping { ) _MDSPAN_HOST_DEVICE constexpr index_type operator()(Indices... idxs) const noexcept { +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG return __compute_offset(__rank_count<0, extents_type::rank()>(), static_cast(idxs)...); } diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp index 15ad577d14..d6cdad2ab2 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp @@ -19,14 +19,16 @@ #include "extents.hpp" #include "trait_backports.hpp" #include "compressed_pair.hpp" +#include "utility.hpp" #if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) # include "no_unique_address.hpp" #endif -#include -#include #include +#include +#include + #ifdef __cpp_lib_span #include #endif @@ -38,11 +40,11 @@ namespace MDSPAN_IMPL_STANDARD_NAMESPACE { struct layout_left { template - class mapping; + class mapping; }; struct layout_right { template - class mapping; + class mapping; }; namespace detail { @@ -79,6 +81,7 @@ namespace detail { std::bool_constant::value; }; #endif + } // namespace detail struct layout_stride { @@ -199,6 +202,20 @@ struct layout_stride { return __strides_storage_t{static_cast(s[Idxs])...}; } + MDSPAN_TEMPLATE_REQUIRES( + class IntegralType, + // The is_convertible condition is added to make sfinae valid + // the extents_type::rank() > 0 is added to avoid use of non-standard zero length c-array + (std::is_convertible::value && (extents_type::rank() > 0)) + ) + MDSPAN_INLINE_FUNCTION + // despite the requirement some compilers still complain about zero length array during parsing + // making it length 1 now, but since the thing can't be instantiated due to requirement the actual + // instantiation of strides_storage will not fail despite mismatching length + static constexpr const __strides_storage_t fill_strides(mdspan_non_standard_tag, const IntegralType (&s)[extents_type::rank()>0?extents_type::rank():1]) { + return __strides_storage_t{static_cast(s[Idxs])...}; + } + #ifdef __cpp_lib_span template MDSPAN_INLINE_FUNCTION @@ -225,7 +242,11 @@ struct layout_stride { // Can't use defaulted parameter in the __deduction_workaround template because of a bug in MSVC warning C4348. using __impl = __deduction_workaround>; - static constexpr __strides_storage_t strides_storage(std::true_type) { + static constexpr __strides_storage_t strides_storage(detail::with_rank<0>) { + return {}; + } + template + static constexpr __strides_storage_t strides_storage(detail::with_rank) { __strides_storage_t s{}; extents_type e; @@ -237,9 +258,6 @@ struct layout_stride { return s; } - static constexpr __strides_storage_t strides_storage(std::false_type) { - return {}; - } //---------------------------------------------------------------------------- @@ -262,7 +280,7 @@ struct layout_stride { : __base_t(__base_t{__member_pair_t( #endif extents_type(), - __strides_storage_t(strides_storage(std::integral_constant 0)>{})) + __strides_storage_t(strides_storage(detail::with_rank{})) #if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) } #else @@ -309,6 +327,48 @@ struct layout_stride { */ } + MDSPAN_TEMPLATE_REQUIRES( + class IntegralTypes, + /* requires */ ( + // MSVC 19.32 does not like using index_type here, requires the typename Extents::index_type + // error C2641: cannot deduce template arguments for 'MDSPAN_IMPL_STANDARD_NAMESPACE::layout_stride::mapping' + _MDSPAN_TRAIT(std::is_convertible, const std::remove_const_t&, typename Extents::index_type) && + _MDSPAN_TRAIT(std::is_nothrow_constructible, typename Extents::index_type, const std::remove_const_t&) && + (Extents::rank() > 0) + ) + ) + MDSPAN_INLINE_FUNCTION + constexpr + mapping( + mdspan_non_standard_tag, + extents_type const& e, + // despite the requirement some compilers still complain about zero length array during parsing + // making it length 1 now, but since the thing can't be instantiated due to requirement the actual + // instantiation of strides_storage will not fail despite mismatching length + IntegralTypes (&s)[extents_type::rank()>0?extents_type::rank():1] + ) noexcept +#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + : __members{ +#else + : __base_t(__base_t{__member_pair_t( +#endif + e, __strides_storage_t(__impl::fill_strides(mdspan_non_standard, s)) +#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) + } +#else + )}) +#endif + { + /* + * TODO: check preconditions + * - s[i] > 0 is true for all i in the range [0, rank_ ). + * - REQUIRED-SPAN-SIZE(e, s) is a representable value of type index_type ([basic.fundamental]). + * - If rank_ is greater than 0, then there exists a permutation P of the integers in the + * range [0, rank_), such that s[ pi ] >= s[ pi − 1 ] * e.extent( pi − 1 ) is true for + * all i in the range [1, rank_ ), where pi is the ith element of P. + */ + } + #ifdef __cpp_lib_span MDSPAN_TEMPLATE_REQUIRES( class IntegralTypes, @@ -434,6 +494,9 @@ struct layout_stride { ) MDSPAN_FORCE_INLINE_FUNCTION constexpr index_type operator()(Indices... idxs) const noexcept { +#if ! defined(NDEBUG) + detail::check_all_indices(this->extents(), idxs...); +#endif // ! NDEBUG return static_cast(__impl::_call_op_impl(*this, static_cast(idxs)...)); } @@ -444,32 +507,48 @@ struct layout_stride { MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { return true; } MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { return true; } - MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 bool is_exhaustive() const noexcept { - if constexpr (extents_type::rank() == 0) - return true; - else { - index_type span_size = required_span_size(); - if (span_size == static_cast(0)) { - if constexpr (extents_type::rank() == 1) { - return stride(0) == 1; - } else { - rank_type r_largest = 0; - for (rank_type r = 1; r < extents_type::rank(); r++) { - if (stride(r) > stride(r_largest)) { - r_largest = r; - } - } - for (rank_type r = 0; r < extents_type::rank(); r++) { - if (extents().extent(r) == 0 && r != r_largest) { - return false; - } - } - return true; - } - } else { - return required_span_size() == __get_size(extents(), std::make_index_sequence()); + + private: + constexpr bool exhaustive_for_nonzero_span_size() const + { + return required_span_size() == __get_size(extents(), std::make_index_sequence()); + } + + constexpr bool is_exhaustive_impl(detail::with_rank<0>) const + { + return true; + } + constexpr bool is_exhaustive_impl(detail::with_rank<1>) const + { + if (required_span_size() != static_cast(0)) { + return exhaustive_for_nonzero_span_size(); + } + return stride(0) == 1; + } + template + constexpr bool is_exhaustive_impl(detail::with_rank) const + { + if (required_span_size() != static_cast(0)) { + return exhaustive_for_nonzero_span_size(); + } + + rank_type r_largest = 0; + for (rank_type r = 1; r < extents_type::rank(); r++) { + if (stride(r) > stride(r_largest)) { + r_largest = r; } } + for (rank_type r = 0; r < extents_type::rank(); r++) { + if (extents().extent(r) == 0 && r != r_largest) { + return false; + } + } + return true; + } + + public: + MDSPAN_INLINE_FUNCTION _MDSPAN_CONSTEXPR_14 bool is_exhaustive() const noexcept { + return is_exhaustive_impl(detail::with_rank{}); } MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { return true; } @@ -498,15 +577,9 @@ struct layout_stride { #endif MDSPAN_INLINE_FUNCTION friend constexpr bool operator==(const mapping& x, const StridedLayoutMapping& y) noexcept { - bool strides_match = true; - if constexpr (extents_type::rank() > 0) { - using common_t = std::common_type_t; - for(rank_type r = 0; r < extents_type::rank(); r++) - strides_match = strides_match && (static_cast(x.stride(r)) == static_cast(y.stride(r))); - } return (x.extents() == y.extents()) && (__impl::__OFFSET(y) == static_cast(0)) && - strides_match; + detail::rankwise_equal(detail::with_rank{}, x, y, detail::stride); } // This one is not technically part of the proposal. Just here to make implementation a bit more optimal hopefully @@ -532,7 +605,7 @@ struct layout_stride { ) MDSPAN_INLINE_FUNCTION friend constexpr bool operator!=(const mapping& x, const StridedLayoutMapping& y) noexcept { - return not (x == y); + return !(x == y); } MDSPAN_TEMPLATE_REQUIRES( @@ -561,4 +634,34 @@ struct layout_stride { }; }; +namespace detail { + +template +constexpr void validate_strides(with_rank<0>, Layout, const Extents&, const Mapping&) +{} + +template +constexpr void validate_strides(with_rank, Layout, const Extents& ext, const Mapping& other) +{ + static_assert(std::is_same::value && + (std::is_same::value || + std::is_same::value) + , "This function is only intended to validate construction of " + "a layout_left or layout_right mapping from a layout_stride mapping."); + + constexpr auto is_left = std::is_same::value; + + typename Extents::index_type expected_stride = 1; + + for (std::size_t r = 0; r < N; r++) { + const std::size_t s = is_left ? r : N - 1 - r; + + MDSPAN_IMPL_PRECONDITION(common_integral_compare(expected_stride, other.stride(s)) + && "invalid strides for layout_{left,right}"); + + expected_stride *= ext.extent(s); + } +} + +} // namespace detail } // end namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp index 3eeb39755c..b60c426177 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp @@ -18,7 +18,12 @@ #include "config.hpp" +#include +#include #include // std::is_void +#if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_SYCL) +#include "assert.h" +#endif #ifndef _MDSPAN_HOST_DEVICE # if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) @@ -101,6 +106,69 @@ #define MDSPAN_IMPL_STANDARD_NAMESPACE_STRING MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_STANDARD_NAMESPACE) #define MDSPAN_IMPL_PROPOSED_NAMESPACE_STRING MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_STANDARD_NAMESPACE) "::" MDSPAN_PP_STRINGIFY(MDSPAN_IMPL_PROPOSED_NAMESPACE) +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +#if defined(_MDSPAN_HAS_CUDA) || defined(_MDSPAN_HAS_HIP) +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + printf("%s:%u: precondition failure: `%s`\n", file, line, cond); + assert(0); +} +#elif defined(_MDSPAN_HAS_SYCL) +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + sycl::ext::oneapi::experimental::printf("%s:%u: precondition failure: `%s`\n", file, line, cond); + assert(0); +} +#else +MDSPAN_FUNCTION inline void default_precondition_violation_handler(const char* cond, const char* file, unsigned line) +{ + std::fprintf(stderr, "%s:%u: precondition failure: `%s`\n", file, line, cond); + std::abort(); +} +#endif + +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#ifndef MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER +#define MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER(cond, file, line) \ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::default_precondition_violation_handler(cond, file, line) +#endif + +#ifndef MDSPAN_IMPL_CHECK_PRECONDITION + #ifndef NDEBUG + #define MDSPAN_IMPL_CHECK_PRECONDITION 0 + #else + #define MDSPAN_IMPL_CHECK_PRECONDITION 1 + #endif +#endif + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +template +MDSPAN_FUNCTION constexpr void precondition(const char* cond, const char* file, unsigned line) +{ + if (!check) { return; } + // in case the macro doesn't use the arguments for custom macros + (void) cond; + (void) file; + (void) line; + MDSPAN_IMPL_PRECONDITION_VIOLATION_HANDLER(cond, file, line); +} + +} // namespace detail +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#define MDSPAN_IMPL_PRECONDITION(...) \ + do { \ + if (!(__VA_ARGS__)) { \ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::precondition(#__VA_ARGS__, __FILE__, __LINE__); \ + } \ + } while (0) + // end Preprocessor helpers }}}1 //============================================================================== @@ -574,7 +642,7 @@ __fold_left_assign_impl(Args&&... args) { template -constexpr __mdspan_enable_fold_comma __fold_comma_impl(Args&&... args) noexcept { return { }; } +constexpr __mdspan_enable_fold_comma __fold_comma_impl(Args&&...) noexcept { return { }; } template struct __bools; diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp index d6ec49e65b..23114aa550 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp @@ -34,6 +34,8 @@ class mdspan private: static_assert(detail::__is_extents_v, MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::mdspan's Extents template parameter must be a specialization of " MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::extents."); + static_assert(std::is_same::value, + MDSPAN_IMPL_STANDARD_NAMESPACE_STRING "::mdspan's ElementType template parameter must be the same as its AccessorPolicy::element_type."); // Workaround for non-deducibility of the index sequence template parameter if it's given at the top level template @@ -321,7 +323,7 @@ public: #endif // MDSPAN_USE_PAREN_OPERATOR MDSPAN_INLINE_FUNCTION constexpr size_type size() const noexcept { - return __impl::__size(*this); + return static_cast(__impl::__size(*this)); }; MDSPAN_INLINE_FUNCTION constexpr bool empty() const noexcept { diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp new file mode 100644 index 0000000000..e690cd6939 --- /dev/null +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp @@ -0,0 +1,72 @@ +#pragma once + +#include +#include + +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace detail { + +// type alias used for rank-based tag dispatch +// +// this is used to enable alternatives to constexpr if when building for C++14 +// +template +using with_rank = std::integral_constant; + +template +MDSPAN_INLINE_FUNCTION +constexpr bool common_integral_compare(I1 x, I2 y) +{ + static_assert(std::is_integral::value && + std::is_integral::value, ""); + + using I = std::common_type_t; + return static_cast(x) == static_cast(y); +} + +template +MDSPAN_INLINE_FUNCTION +constexpr bool rankwise_equal(with_rank<0>, const T1&, const T2&, F) +{ + return true; +} + +template +MDSPAN_INLINE_FUNCTION +constexpr bool rankwise_equal(with_rank, const T1& x, const T2& y, F func) +{ + bool match = true; + + for (std::size_t r = 0; r < N; r++) { + match = match && common_integral_compare(func(x, r), func(y, r)); + } + + return match; +} + +constexpr struct +{ + template + MDSPAN_INLINE_FUNCTION + constexpr auto operator()(const T& x, I i) const + { + return x.extent(i); + } +} extent; + +constexpr struct +{ + template + MDSPAN_INLINE_FUNCTION + constexpr auto operator()(const T& x, I i) const + { + return x.stride(i); + } +} stride; + +} // namespace detail + +constexpr struct mdspan_non_standard_tag { +} mdspan_non_standard; + +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Graph.cpp b/lib/kokkos/tpls/mdspan/include/experimental/__p2389_bits/dims.hpp similarity index 59% rename from lib/kokkos/core/unit_test/serial/TestSerial_Graph.cpp rename to lib/kokkos/tpls/mdspan/include/experimental/__p2389_bits/dims.hpp index bff64d83e2..00045215c4 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Graph.cpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p2389_bits/dims.hpp @@ -14,5 +14,15 @@ // //@HEADER -#include -#include +#pragma once + +// backward compatibility import into experimental +namespace MDSPAN_IMPL_STANDARD_NAMESPACE { +namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { + +template< ::std::size_t Rank, class IndexType = std::size_t> +using dims = + :: MDSPAN_IMPL_STANDARD_NAMESPACE :: dextents; + +} // namespace MDSPAN_IMPL_PROPOSED_NAMESPACE +} // namespace MDSPAN_IMPL_STANDARD_NAMESPACE diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp index ca6948c9a9..e1390fdeb5 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -17,10 +17,30 @@ #pragma once #include -#include #include +#include #include // index_sequence +// Suppress spurious warning with NVCC about no return statement. +// This is a known issue in NVCC and NVC++ +// Depending on the CUDA and GCC version we need both the builtin +// and the diagnostic push. I tried really hard to find something shorter +// but no luck ... +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic push +#pragma nv_diag_suppress = implicit_return_from_non_void_function +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic push +#pragma diag_suppress implicit_return_from_non_void_function +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic push +#pragma diag_suppress = implicit_return_from_non_void_function +#endif + namespace MDSPAN_IMPL_STANDARD_NAMESPACE { //****************************************** // Return type of submdspan_mapping overloads @@ -31,18 +51,68 @@ template struct submdspan_mapping_result { }; namespace detail { +// We use const Slice& and not Slice&& because the various +// submdspan_mapping_impl overloads use their slices arguments +// multiple times. This makes perfect forwarding not useful, but we +// still don't want to pass those (possibly of size 64 x 3 bits) +// objects by value. +template +MDSPAN_INLINE_FUNCTION constexpr bool +one_slice_out_of_bounds(const IndexType &ext, const Slice &slice) { + using common_t = + std::common_type_t; + return static_cast(detail::first_of(slice)) == + static_cast(ext); +} + +template +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds_helper(std::index_sequence, + const extents &exts, + const Slices &... slices) { + return _MDSPAN_FOLD_OR( + (one_slice_out_of_bounds(exts.extent(RankIndices), slices))); +} + +template +MDSPAN_INLINE_FUNCTION constexpr bool +any_slice_out_of_bounds(const extents &exts, + const Slices &... slices) { + return any_slice_out_of_bounds_helper( + std::make_index_sequence(), exts, slices...); +} + // constructs sub strides template -MDSPAN_INLINE_FUNCTION -constexpr auto -construct_sub_strides(const SrcMapping &src_mapping, - std::index_sequence, - const std::tuple &slices_stride_factor) { +MDSPAN_INLINE_FUNCTION constexpr auto construct_sub_strides( + const SrcMapping &src_mapping, std::index_sequence, + const std::tuple &slices_stride_factor) { using index_type = typename SrcMapping::index_type; return std::array{ (static_cast(src_mapping.stride(InvMapIdxs)) * static_cast(std::get(slices_stride_factor)))...}; } + +template +struct is_range_slice { + constexpr static bool value = + std::is_same_v || + std::is_convertible_v>; +}; + +template +constexpr bool is_range_slice_v = is_range_slice::value; + +template +struct is_index_slice { + constexpr static bool value = std::is_convertible_v; +}; + +template +constexpr bool is_index_slice_v = is_index_slice::value; + } // namespace detail //********************************** @@ -51,52 +121,90 @@ construct_sub_strides(const SrcMapping &src_mapping, namespace detail { // Figure out whether to preserve layout_left -template -struct preserve_layout_left_mapping; +template +struct deduce_layout_left_submapping; -template -struct preserve_layout_left_mapping, SubRank, - SliceSpecifiers...> { - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // Slice specifiers up to subrank need to be full_extent_t - except - // for the last one which could also be tuple but not a strided index - // range slice specifiers after subrank are integrals - ((Idx > SubRank - 1) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SubRank - 1) && - std::is_convertible_v>)) && - ...); +template +struct deduce_layout_left_submapping< + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + using count_range = index_sequence_scan_impl< + 0u, (is_index_slice_v ? 0u : 1u)...>; + + constexpr static int gap_len = + (((Idx > 0 && count_range::get(Idx) == 1 && + is_index_slice_v) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_value() { + // Use layout_left for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_left for rank 1 result if leftmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx > 0 || is_range_slice_v)&&...); + } else { + // Preserve if leftmost SubRank-1 slices are full_extent_t and + // the slice at idx Subrank - 1 is a range and + // for idx > SubRank the slice is an index + return ((((Idx < SubRank - 1) && std::is_same_v) || + ((Idx == SubRank - 1) && is_range_slice_v) || + ((Idx > SubRank - 1) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_left_padded_value() { + // Technically could also keep layout_left_padded for SubRank==0 + // and SubRank==1 with leftmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // leftmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. R I I I F F F R I I for obtaining a rank-5 from a rank-10 + return ((((Idx == 0) && is_range_slice_v) || + ((Idx > 0 && Idx <= gap_len) && is_index_slice_v) || + ((Idx > gap_len && Idx < gap_len + SubRank - 1) && std::is_same_v) || + ((Idx == gap_len + SubRank - 1) && is_range_slice_v) || + ((Idx > gap_len + SubRank - 1) && is_index_slice_v)) && ... ); + } }; + +// We are reusing the same thing for layout_left and layout_left_padded +// For layout_left as source StaticStride is static_extent(0) +template +struct compute_s_static_layout_left { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence) { + size_t val = ((Idx>0 && Idx<=NumGaps ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + } // namespace detail -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif // Actual submdspan mapping call template template -MDSPAN_INLINE_FUNCTION -constexpr auto -layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) const { +MDSPAN_INLINE_FUNCTION constexpr auto +layout_left::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { // compute sub extents using src_ext_t = Extents; @@ -104,51 +212,137 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) using dst_ext_t = decltype(dst_ext); // figure out sub layout type - constexpr bool preserve_layout = detail::preserve_layout_left_mapping< - decltype(std::make_index_sequence()), dst_ext_t::rank(), - SliceSpecifiers...>::value; - using dst_layout_t = - std::conditional_t; - using dst_mapping_t = typename dst_layout_t::template mapping; + using deduce_layout = detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence, + SliceSpecifiers...>; - if constexpr (std::is_same_v) { + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + if constexpr (deduce_layout::layout_left_value()) { // layout_left case + using dst_mapping_t = typename layout_left::template mapping; + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; return submdspan_mapping_result{ - dst_mapping_t(dst_ext), - static_cast(this->operator()(detail::first_of(slices)...))}; + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; } else { // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif - static_cast(this->operator()(detail::first_of(slices)...))}; + using dst_mapping_t = typename layout_stride::mapping; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + detail::stride_of(slices)...})), +#else + std::tuple{detail::stride_of(slices)...})), +#endif + offset + }; } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) __builtin_unreachable(); #endif } -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop + +template +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{*this, 0}; + } else { + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + // The following for some reasons leads to compiler error later, while not using a typedef works: + // Compilers: CUDA 11.2 with GCC 9.1 + // + // using dst_mapping_t = typename layout_left::template mapping; + // return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + // + // Error: submdspan_mapping.hpp:299:23: error: 'dst_mapping_t' does not name a type + // 299 | using dst_mapping_t = typename layout_left::template mapping; + // The same error is given (about dst_mapping_t not naming type) when a different name is used in 299: + // using dst_mapping_t2 = typename layout_left::template mapping; + + return submdspan_mapping_result> + {typename layout_left::template mapping{dst_ext}, offset}; + } else { // general case + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_left_value() && dst_ext_t::rank() == 1) { // getting rank-1 from leftmost + using dst_mapping_t = typename layout_left::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { // can keep layout_left_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), +#else + std::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), #endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} //********************************** // layout_right submdspan_mapping @@ -156,134 +350,276 @@ layout_left::mapping::submdspan_mapping_impl(SliceSpecifiers... slices) namespace detail { // Figure out whether to preserve layout_right -template -struct preserve_layout_right_mapping; +template +struct deduce_layout_right_submapping; -template -struct preserve_layout_right_mapping, SubRank, - SliceSpecifiers...> { - constexpr static size_t SrcRank = sizeof...(SliceSpecifiers); - constexpr static bool value = - // Preserve layout for rank 0 - (SubRank == 0) || - ( - // The last subrank slice specifiers need to be full_extent_t - except - // for the srcrank-subrank one which could also be tuple but not a - // strided index range slice specifiers before srcrank-subrank are - // integrals - ((Idx < - SrcRank - SubRank) || // these are only integral slice specifiers - (std::is_same_v) || - ((Idx == SrcRank - SubRank) && - std::is_convertible_v>)) && - ...); +template +struct deduce_layout_right_submapping< + IndexType, SubRank, std::index_sequence, SliceSpecifiers...> { + + static constexpr size_t Rank = sizeof...(Idx); + using count_range = index_sequence_scan_impl< + 0u, (std::is_convertible_v ? 0u : 1u)...>; + //__static_partial_sums...>; + constexpr static int gap_len = + (((Idx < Rank - 1 && count_range::get(Idx) == SubRank - 1 && + std::is_convertible_v) + ? 1 + : 0) + + ... + 0); + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_value() { + // Use layout_right for rank 0 + if constexpr (SubRank == 0) { + return true; + // Use layout_right for rank 1 result if rightmost slice specifier is range like + } else if constexpr (SubRank == 1) { + return ((Idx < Rank - 1 || is_range_slice_v)&&...); + } else { + // Preserve if rightmost SubRank-1 slices are full_extent_t and + // the slice at idx Rank-Subrank is a range and + // for idx < Rank - SubRank the slice is an index + return ((((Idx >= Rank - SubRank) && std::is_same_v) || + ((Idx == Rank - SubRank) && is_range_slice_v) || + ((Idx < Rank - SubRank) && is_index_slice_v)) && ...); + } +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif + } + + MDSPAN_INLINE_FUNCTION + constexpr static bool layout_right_padded_value() { + // Technically could also keep layout_right_padded for SubRank==0 + // and SubRank==1 with rightmost slice specifier being a contiguous range + // but we intercept these cases separately + + // In all other cases: + // rightmost slice must be range + // then there can be a gap with index slices + // then SubRank - 2 full_extent slices + // then another range slice + // then more index slices + // e.g. I I R F F F I I I R for obtaining a rank-5 from a rank-10 + return ((((Idx == Rank - 1) && is_range_slice_v) || + ((Idx >= Rank - gap_len - 1 && Idx < Rank - 1) && is_index_slice_v) || + ((Idx > Rank - gap_len - SubRank && Idx < Rank - gap_len - 1) && std::is_same_v) || + ((Idx == Rank - gap_len - SubRank) && is_range_slice_v) || + ((Idx < Rank - gap_len - SubRank) && is_index_slice_v)) && ... ); + } }; + +// We are reusing the same thing for layout_right and layout_right_padded +// For layout_right as source StaticStride is static_extent(Rank-1) +template +struct compute_s_static_layout_right { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence) { + size_t val = ((Idx >= Extents::rank() - 1 - NumGaps && Idx < Extents::rank() - 1 ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + } // namespace detail -// Suppress spurious warning with NVCC about no return statement. -// This is a known issue in NVCC and NVC++ -// Depending on the CUDA and GCC version we need both the builtin -// and the diagnostic push. I tried really hard to find something shorter -// but no luck ... -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic push - #pragma nv_diag_suppress = implicit_return_from_non_void_function - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic push - #pragma diag_suppress implicit_return_from_non_void_function - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic push - #pragma diag_suppress = implicit_return_from_non_void_function -#endif +// Actual submdspan mapping call template template -MDSPAN_INLINE_FUNCTION -constexpr auto +MDSPAN_INLINE_FUNCTION constexpr auto layout_right::mapping::submdspan_mapping_impl( - SliceSpecifiers... slices) const { - // get sub extents + SliceSpecifiers... slices) const { + + // compute sub extents using src_ext_t = Extents; auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - // determine new layout type - constexpr bool preserve_layout = detail::preserve_layout_right_mapping< - decltype(std::make_index_sequence()), dst_ext_t::rank(), - SliceSpecifiers...>::value; - using dst_layout_t = - std::conditional_t; - using dst_mapping_t = typename dst_layout_t::template mapping; + // figure out sub layout type + using deduce_layout = detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + std::make_index_sequence, + SliceSpecifiers...>; - if constexpr (std::is_same_v) { + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + if constexpr (deduce_layout::layout_right_value()) { // layout_right case + using dst_mapping_t = typename layout_right::mapping; + return submdspan_mapping_result{dst_mapping_t(dst_ext), + offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; return submdspan_mapping_result{ - dst_mapping_t(dst_ext), - static_cast(this->operator()(detail::first_of(slices)...))}; + dst_mapping_t(dst_ext, + stride(src_ext_t::rank() - 2 - deduce_layout::gap_len)), + offset}; } else { // layout_stride case - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) - std::tuple{detail::stride_of(slices)...})), - #else - std::tuple{detail::stride_of(slices)...})), - #endif - static_cast(this->operator()(detail::first_of(slices)...))}; + using dst_mapping_t = typename layout_stride::mapping; + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA altogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + detail::stride_of(slices)...})), +#else + std::tuple{detail::stride_of(slices)...})), +#endif + offset + }; } #if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) __builtin_unreachable(); #endif } -#if defined __NVCC__ - #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ - #pragma nv_diagnostic pop - #else - #ifdef __CUDA_ARCH__ - #pragma diagnostic pop - #endif - #endif -#elif defined __NVCOMPILER - #pragma diagnostic pop + +template +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{*this, 0}; + } else { + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + // Same issue as in layout_left_padded: see comment there + // using dst_mapping_t = typename layout_right::template mapping; + // return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + return submdspan_mapping_result> + {typename layout_right::template mapping{dst_ext}, offset}; + } else { // general case + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_right_value() && dst_ext_t::rank() == 1) { // getting rank-1 from rightmost + using dst_mapping_t = typename layout_right::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { // can keep layout_right_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_right::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(Extents::rank() - 2 - deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), +#else + std::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), #endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} //********************************** // layout_stride submdspan_mapping //********************************* template template -MDSPAN_INLINE_FUNCTION -constexpr auto +MDSPAN_INLINE_FUNCTION constexpr auto layout_stride::mapping::submdspan_mapping_impl( - SliceSpecifiers... slices) const { + SliceSpecifiers... slices) const { auto dst_ext = submdspan_extents(extents(), slices...); using dst_ext_t = decltype(dst_ext); - auto inv_map = detail::inv_map_rank( - std::integral_constant(), - std::index_sequence<>(), - slices...); + auto inv_map = detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); using dst_mapping_t = typename layout_stride::template mapping; - return submdspan_mapping_result{ - dst_mapping_t(dst_ext, detail::construct_sub_strides( - *this, inv_map, - // HIP needs deduction guides to have markups so we need to be explicit - // NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have the issue - #if defined(_MDSPAN_HAS_HIP) || (defined(__NVCC__) && (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) - std::tuple(detail::stride_of(slices)...))), + + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + const bool out_of_bounds = + detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(detail::first_of(slices)...)); + + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue +#if defined(_MDSPAN_HAS_HIP) || \ + (defined(__NVCC__) && \ + (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__ * 10) < 1120) + std::tuple( + detail::stride_of(slices)...))), #else - std::tuple(detail::stride_of(slices)...))), + std::tuple(detail::stride_of(slices)...))), #endif - static_cast(this->operator()(detail::first_of(slices)...))}; + offset + }; } } // namespace MDSPAN_IMPL_STANDARD_NAMESPACE + +#if defined __NVCC__ +#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ +#pragma nv_diagnostic pop +#else +#ifdef __CUDA_ARCH__ +#pragma diagnostic pop +#endif +#endif +#elif defined __NVCOMPILER +#pragma diagnostic pop +#endif diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp index a801486792..e5f7bee4ca 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp @@ -59,6 +59,10 @@ MDSPAN_INLINE_FUNCTION constexpr size_t get_actual_static_padding_value() { } else { return dynamic_extent; } + // Missing return statement warning from NVCC and ICC +#if defined(__NVCC__) || defined(__INTEL_COMPILER) + return 0; +#endif } template @@ -69,7 +73,7 @@ struct static_array_type_for_padded_extent using extents_type = _Extents; using type = ::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::maybe_static_array< index_type, size_t, dynamic_extent, - detail::get_actual_static_padding_value()>; }; @@ -101,6 +105,10 @@ struct padded_extent { } else { return init_padding(exts, padding_value); } + // Missing return statement warning from NVCC and ICC +#if defined(__NVCC__) || defined(__INTEL_COMPILER) + return {}; +#endif } MDSPAN_INLINE_FUNCTION static constexpr static_array_type @@ -112,6 +120,10 @@ struct padded_extent { } else { return {}; } + // Missing return statement warning from NVCC and ICC +#if defined(__NVCC__) || defined(__INTEL_COMPILER) + return {}; +#endif } template @@ -123,6 +135,10 @@ struct padded_extent { } else { return {}; } + // Missing return statement warning from NVCC and ICC +#if defined(__NVCC__) || defined(__INTEL_COMPILER) + return {}; +#endif } }; } // namespace detail @@ -158,19 +174,21 @@ private: typename padded_stride_type::static_array_type padded_stride = {}; extents_type exts = {}; - constexpr index_type compute_offset(std::index_sequence<>) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence<>) const { return 0; } template - constexpr index_type compute_offset(std::index_sequence, - IndexOffset index_offset) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, IndexOffset index_offset) const { return index_offset; } template - constexpr index_type compute_offset(std::index_sequence, - IndexOffsets... index_offsets) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, + IndexOffsets... index_offsets) const { index_type indices[] = {static_cast(index_offsets)...}; // self-recursive fold trick from // https://github.com/llvm/llvm-project/blob/96e1914aa2e6d8966acbfbe2f4d184201f1aa318/libcxx/include/mdspan/layout_left.h#L144 @@ -203,7 +221,7 @@ public: #endif MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(const mapping&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED mapping& operator=(const mapping&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping& operator=(const mapping&) noexcept = default; /** * Initializes the mapping with the given extents. @@ -241,62 +259,71 @@ public: /** * Converting constructor from `layout_left::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true. - * If `OtherExtents::rank() > 1` then one of `padding_value`, `static_extent(0)`, or `OtherExtents::static_extent(0)` must be `dynamic_extent`; - * otherwise, `OtherExtents::static_extent(0)` must be equal to the least multiple of `padding_value` greater than or equal to `extents_type::static_extent(0)` + * This overload participates in overload resolution only if + * `is_constructible_v` is true. If + * `OtherExtents::rank() > 1` then one of `padding_value`, `static_extent(0)`, + * or `OtherExtents::static_extent(0)` must be `dynamic_extent`; otherwise, + * `OtherExtents::static_extent(0)` must be equal to the least multiple of + * `padding_value` greater than or equal to `extents_type::static_extent(0)` */ MDSPAN_TEMPLATE_REQUIRES( - class _OtherExtents, - /* requires */ ( - std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v<_OtherExtents, extents_type>)) + class _OtherExtents, + /* requires */ (std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v<_OtherExtents, extents_type>)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const layout_left::mapping<_OtherExtents> &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { - static_assert((_OtherExtents::rank() > 1) || (static_padding_stride != dynamic_extent) || (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) - || (static_padding_stride == _OtherExtents::static_extent(extent_to_pad_idx))); + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { + static_assert( + (_OtherExtents::rank() > 1) || + (static_padding_stride != dynamic_extent) || + (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) || + (static_padding_stride == + _OtherExtents::static_extent(extent_to_pad_idx))); } /** * Converting constructor from `layout_stride::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true + * This overload participates in overload resolution only if + * `is_constructible_v` is true */ MDSPAN_TEMPLATE_REQUIRES( - class _OtherExtents, - /* requires */ ( - std::is_constructible_v - ) - ) + class _OtherExtents, + /* requires */ (std::is_constructible_v)) MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const layout_stride::mapping<_OtherExtents> &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { - } + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) {} /** * Converting constructor from `layout_left_padded::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true. - * Either `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or `padding_value == OtherPaddingStride`. + * This overload participates in overload resolution only if + * `is_constructible_v` is true. Either + * `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or + * `padding_value == OtherPaddingStride`. */ MDSPAN_TEMPLATE_REQUIRES( - class _Mapping, - /* requires */ ( - detail::is_layout_left_padded_mapping<_Mapping>::value - && std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 1 && (padding_value == dynamic_extent || _Mapping::padding_value == dynamic_extent))) - constexpr - mapping(const _Mapping &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value + &&std::is_constructible_v< + extents_type, typename _Mapping::extents_type>)) + MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 1 && + (padding_value == dynamic_extent || + _Mapping::padding_value == dynamic_extent))) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const _Mapping &other_mapping) + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { static_assert(padding_value == dynamic_extent || _Mapping::padding_value == dynamic_extent || padding_value == _Mapping::padding_value); @@ -305,42 +332,43 @@ public: /** * Converting constructor from `layout_right_padded::mapping`. * - * This overload participates in overload resolution only if `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. + * This overload participates in overload resolution only if + * `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. */ MDSPAN_TEMPLATE_REQUIRES( - class _Mapping, - /* requires */ ( - detail::is_layout_right_padded_mapping<_Mapping>::value - && extents_type::rank() <= 1 - && std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v)) - constexpr - mapping(const _Mapping &other_mapping) noexcept - : padded_stride(padded_stride_type::init_padding(other_mapping.extents(), other_mapping.extents().extent(extent_to_pad_idx))), - exts(other_mapping.extents()) - {} + class _Mapping, + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value + &&extents_type::rank() <= 1 && + std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v)) + MDSPAN_INLINE_FUNCTION + constexpr mapping(const _Mapping &other_mapping) noexcept + : padded_stride(padded_stride_type::init_padding( + other_mapping.extents(), + other_mapping.extents().extent(extent_to_pad_idx))), + exts(other_mapping.extents()) {} - constexpr const extents_type &extents() const noexcept - { + MDSPAN_INLINE_FUNCTION constexpr const extents_type & + extents() const noexcept { return exts; } - constexpr std::array - strides() const noexcept - { - if constexpr ( extents_type::rank() == 0 ) { + MDSPAN_INLINE_FUNCTION constexpr std::array + strides() const noexcept { + if constexpr (extents_type::rank() == 0) { return {}; - } else if constexpr ( extents_type::rank() == 1 ) { + } else if constexpr (extents_type::rank() == 1) { return {1}; } else { index_type value = 1; std::array s{}; s[extent_to_pad_idx] = value; value *= padded_stride.value(0); - for (rank_type r = extent_to_pad_idx + 1; r < extents_type::rank() - 1; ++r) - { + for (rank_type r = extent_to_pad_idx + 1; r < extents_type::rank() - 1; + ++r) { s[r] = value; value *= exts.extent(r); } @@ -349,12 +377,11 @@ public: } } - constexpr index_type - required_span_size() const noexcept - { - if constexpr ( extents_type::rank() == 0 ) { + MDSPAN_INLINE_FUNCTION constexpr index_type + required_span_size() const noexcept { + if constexpr (extents_type::rank() == 0) { return 1; - } else if constexpr ( extents_type::rank() == 1 ) { + } else if constexpr (extents_type::rank() == 1) { return exts.extent(0); } else { index_type value = padded_stride.value(0); @@ -375,40 +402,51 @@ public: */ MDSPAN_TEMPLATE_REQUIRES( class... _Indices, - /* requires */ ( - sizeof...(_Indices) == extents_type::rank() && - (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::are_valid_indices()) - ) - ) - constexpr size_t operator()(_Indices... idxs) const noexcept - { + /* requires */ (sizeof...(_Indices) == extents_type::rank() && + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail:: + are_valid_indices()))) + MDSPAN_INLINE_FUNCTION constexpr size_t + operator()(_Indices... idxs) const noexcept { +#if !defined(NDEBUG) + ::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::check_all_indices(this->extents(), + idxs...); +#endif // ! NDEBUG return compute_offset(std::index_sequence_for<_Indices...>{}, idxs...); } - static constexpr bool is_always_unique() noexcept { return true; } - static constexpr bool is_always_exhaustive() noexcept - { - return (extents_type::rank() <= rank_type(1)) - || (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent - && extents_type::static_extent(extent_to_pad_idx) == padded_stride_type::static_value()); + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { + return true; } - static constexpr bool is_always_strided() noexcept { return true; } - - static constexpr bool is_unique() noexcept { return true; } - constexpr bool is_exhaustive() const noexcept - { - return (extents_type::rank() < 2) - || (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { + return (extents_type::rank() <= rank_type(1)) || + (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent && + extents_type::static_extent(extent_to_pad_idx) == + padded_stride_type::static_value()); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { + return true; } - static constexpr bool is_strided() noexcept { return true; } - constexpr index_type stride(rank_type r) const noexcept - { + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { + return (extents_type::rank() < 2) || + (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { + return true; + } + + MDSPAN_INLINE_FUNCTION + constexpr index_type stride(rank_type r) const noexcept { assert(r < extents_type::rank()); - if(r == 0) return index_type(1); + if (r == 0) + return index_type(1); index_type value = padded_stride.value(0); - for (rank_type k = 1; k < r; k++) value *= exts.extent(k); + for (rank_type k = 1; k < r; k++) + value *= exts.extent(k); return value; } @@ -416,26 +454,26 @@ public: /** * Equality operator between `layout_left_padded`s * - * This overload only participates in overload resolution if `OtherExtents::rank() == extents_type::rank()`. + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. * - * \note There is currently a difference from p2642r2, where this function is specified as taking - * `layout_left_padded< padding_value >::mapping< Extents>`. However, this makes `padding_value` non-deducible. + * \note There is currently a difference from p2642r2, where this function is + * specified as taking `layout_left_padded< padding_value >::mapping< + * Extents>`. However, this makes `padding_value` non-deducible. */ MDSPAN_TEMPLATE_REQUIRES( - class _Mapping, - /* requires */ ( - detail::is_layout_left_padded_mapping<_Mapping>::value - && (_Mapping::extents_type::rank() == extents_type::rank()) - ) - ) - friend constexpr bool operator==(const mapping &left, const _Mapping &right) noexcept - { - // Workaround for some compilers not short-circuiting properly with compile-time checks - // i.e. we can't access stride(_padding_stride_idx) of a rank 0 mapping + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator==(const mapping &left, const _Mapping &right) noexcept { + // Workaround for some compilers not short-circuiting properly with + // compile-time checks i.e. we can't access stride(_padding_stride_idx) of a + // rank 0 mapping bool strides_equal = true; - if constexpr (extents_type::rank() > rank_type(1)) - { - strides_equal = left.stride(padded_stride_idx) == right.stride(padded_stride_idx); + if constexpr (extents_type::rank() > rank_type(1)) { + strides_equal = + left.stride(padded_stride_idx) == right.stride(padded_stride_idx); } return (left.extents() == right.extents()) && strides_equal; } @@ -444,20 +482,31 @@ public: /** * Inequality operator between `layout_left_padded`s * - * This overload only participates in overload resolution if `OtherExtents::rank() == extents_type::rank()`. + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. */ MDSPAN_TEMPLATE_REQUIRES( - class _Mapping, - /* requires */ ( - detail::is_layout_left_padded_mapping<_Mapping>::value - && (_Mapping::extents_type::rank() == extents_type::rank()) - ) - ) - friend constexpr bool operator!=(const mapping &left, const _Mapping &right) noexcept - { + class _Mapping, + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator!=(const mapping &left, const _Mapping &right) noexcept { return !(left == right); } #endif + + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + MDSPAN_INLINE_FUNCTION + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + MDSPAN_INLINE_FUNCTION + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } }; template @@ -490,25 +539,27 @@ public: typename padded_stride_type::static_array_type padded_stride = {}; extents_type exts = {}; - constexpr index_type compute_offset(std::index_sequence<>) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence<>) const { return 0; } template - constexpr index_type compute_offset(std::index_sequence, - IndexOffset index_offset) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, IndexOffset index_offset) const { return index_offset; } template - constexpr index_type compute_offset(std::index_sequence, - IndexOffsets... index_offsets) const { + MDSPAN_INLINE_FUNCTION constexpr index_type + compute_offset(std::index_sequence, + IndexOffsets... index_offsets) const { // self-recursive fold trick from // https://github.com/llvm/llvm-project/blob/4d9771741d40cc9cfcccb6b033f43689d36b705a/libcxx/include/mdspan/layout_right.h#L141 index_type res = 0; ((res = static_cast(index_offsets) + (Ranks == extent_to_pad_idx ? padded_stride.value(0) - : exts.extent(Ranks)) * + : exts.extent(Ranks)) * res), ...); return res; @@ -533,7 +584,7 @@ public: #endif MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(const mapping&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED mapping& operator=(const mapping&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping& operator=(const mapping&) noexcept = default; /** * Initializes the mapping with the given extents. @@ -577,56 +628,62 @@ public: */ MDSPAN_TEMPLATE_REQUIRES( class _OtherExtents, - /* requires */ ( - std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v<_OtherExtents, extents_type>)) + /* requires */ (std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v<_OtherExtents, extents_type>)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const layout_right::mapping<_OtherExtents> &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { - static_assert((_OtherExtents::rank() > 1) || (padded_stride_type::static_value() != dynamic_extent) || (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) - || (padded_stride_type::static_value() == _OtherExtents::static_extent(extent_to_pad_idx))); + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { + static_assert( + (_OtherExtents::rank() > 1) || + (padded_stride_type::static_value() != dynamic_extent) || + (_OtherExtents::static_extent(extent_to_pad_idx) != dynamic_extent) || + (padded_stride_type::static_value() == + _OtherExtents::static_extent(extent_to_pad_idx))); } /** * Converting constructor from `layout_stride::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true + * This overload participates in overload resolution only if + * `is_constructible_v` is true */ MDSPAN_TEMPLATE_REQUIRES( class _OtherExtents, - /* requires */ ( - std::is_constructible_v - ) - ) + /* requires */ (std::is_constructible_v)) MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 0)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const layout_stride::mapping<_OtherExtents> &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - {} + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) {} /** * Converting constructor from `layout_right_padded::mapping`. * - * This overload participates in overload resolution only if `is_constructible_v` is true. - * Either `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or `padding_value == OtherPaddingStride`. + * This overload participates in overload resolution only if + * `is_constructible_v` is true. Either + * `padding_value` or `OtherPaddingStride` must be `std::dynamic_extent`, or + * `padding_value == OtherPaddingStride`. */ MDSPAN_TEMPLATE_REQUIRES( class _Mapping, - /* requires */ ( - detail::is_layout_right_padded_mapping<_Mapping>::value - && std::is_constructible_v - ) - ) + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value + &&std::is_constructible_v< + extents_type, typename _Mapping::extents_type>)) MDSPAN_CONDITIONAL_EXPLICIT((extents_type::rank() > 1 && (padding_value == dynamic_extent || _Mapping::padding_value == dynamic_extent))) + MDSPAN_INLINE_FUNCTION constexpr mapping(const _Mapping &other_mapping) - : padded_stride(padded_stride_type::init_padding(other_mapping, std::integral_constant{})), - exts(other_mapping.extents()) - { + : padded_stride(padded_stride_type::init_padding( + other_mapping, + std::integral_constant{})), + exts(other_mapping.extents()) { static_assert(padding_value == dynamic_extent || _Mapping::padding_value == dynamic_extent || padding_value == _Mapping::padding_value); @@ -635,41 +692,42 @@ public: /** * Converting constructor from `layout_left_padded::mapping`. * - * This overload participates in overload resolution only if `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. + * This overload participates in overload resolution only if + * `extents_type::rank()` is 0 or 1 and `is_constructible_v` is `true`. */ MDSPAN_TEMPLATE_REQUIRES( class _Mapping, - /* requires */ ( - detail::is_layout_left_padded_mapping<_Mapping>::value - && extents_type::rank() <= 1 - && std::is_constructible_v - ) - ) - MDSPAN_CONDITIONAL_EXPLICIT((!std::is_convertible_v)) + /* requires */ (detail::is_layout_left_padded_mapping<_Mapping>::value + &&extents_type::rank() <= 1 && + std::is_constructible_v)) + MDSPAN_CONDITIONAL_EXPLICIT( + (!std::is_convertible_v)) + MDSPAN_INLINE_FUNCTION constexpr mapping(const _Mapping &other_mapping) noexcept - : padded_stride(padded_stride_type::init_padding(other_mapping.extents(), other_mapping.extents().extent(extent_to_pad_idx))), - exts(other_mapping.extents()) - {} + : padded_stride(padded_stride_type::init_padding( + other_mapping.extents(), + other_mapping.extents().extent(extent_to_pad_idx))), + exts(other_mapping.extents()) {} - constexpr const extents_type &extents() const noexcept - { + MDSPAN_INLINE_FUNCTION constexpr const extents_type & + extents() const noexcept { return exts; } - constexpr std::array - strides() const noexcept - { - if constexpr ( extents_type::rank() == 0 ) { + MDSPAN_INLINE_FUNCTION constexpr std::array + strides() const noexcept { + if constexpr (extents_type::rank() == 0) { return {}; - } else if constexpr ( extents_type::rank() == 1 ) { + } else if constexpr (extents_type::rank() == 1) { return {1}; } else { index_type value = 1; std::array s{}; s[extent_to_pad_idx] = value; value *= padded_stride.value(0); - for (rank_type r = extent_to_pad_idx - 1; r > 0; --r) - { + for (rank_type r = extent_to_pad_idx - 1; r > 0; --r) { s[r] = value; value *= exts.extent(r); } @@ -678,17 +736,15 @@ public: } } - constexpr index_type - required_span_size() const noexcept - { - if constexpr ( extents_type::rank() == 0 ) { + MDSPAN_INLINE_FUNCTION constexpr index_type + required_span_size() const noexcept { + if constexpr (extents_type::rank() == 0) { return 1; - } else if constexpr ( extents_type::rank() == 1 ) { + } else if constexpr (extents_type::rank() == 1) { return exts.extent(0); } else { index_type value = 1; - for (rank_type r = 0; r < extent_to_pad_idx; ++r) - { + for (rank_type r = 0; r < extent_to_pad_idx; ++r) { value *= exts.extent(r); } return value * padded_stride.value(0); @@ -705,40 +761,47 @@ public: */ MDSPAN_TEMPLATE_REQUIRES( class... _Indices, - /* requires */ ( - sizeof...(_Indices) == extents_type::rank() && - (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::are_valid_indices()) - ) - ) - constexpr size_t operator()(_Indices... idxs) const noexcept - { + /* requires */ (sizeof...(_Indices) == extents_type::rank() && + (::MDSPAN_IMPL_STANDARD_NAMESPACE::detail:: + are_valid_indices()))) + MDSPAN_INLINE_FUNCTION constexpr size_t + operator()(_Indices... idxs) const noexcept { return compute_offset(std::index_sequence_for<_Indices...>{}, idxs...); } - static constexpr bool is_always_unique() noexcept { return true; } - static constexpr bool is_always_exhaustive() noexcept - { - return (extents_type::rank() <= rank_type(1)) - || (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent - && extents_type::static_extent(extent_to_pad_idx) == padded_stride_type::static_value()); + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_unique() noexcept { + return true; } - static constexpr bool is_always_strided() noexcept { return true; } - - static constexpr bool is_unique() noexcept { return true; } - constexpr bool is_exhaustive() const noexcept - { - return (extents_type::rank() < 2) - || (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_exhaustive() noexcept { + return (extents_type::rank() <= rank_type(1)) || + (extents_type::static_extent(extent_to_pad_idx) != dynamic_extent && + extents_type::static_extent(extent_to_pad_idx) == + padded_stride_type::static_value()); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_always_strided() noexcept { + return true; } - static constexpr bool is_strided() noexcept { return true; } - constexpr index_type stride(rank_type r) const noexcept - { + MDSPAN_INLINE_FUNCTION static constexpr bool is_unique() noexcept { + return true; + } + MDSPAN_INLINE_FUNCTION constexpr bool is_exhaustive() const noexcept { + return (extents_type::rank() < 2) || + (exts.extent(extent_to_pad_idx) == padded_stride.value(0)); + } + MDSPAN_INLINE_FUNCTION static constexpr bool is_strided() noexcept { + return true; + } + + MDSPAN_INLINE_FUNCTION constexpr index_type + stride(rank_type r) const noexcept { assert(r < extents_type::rank()); - if(r == extents_type::rank() - 1) return index_type(1); + if (r == extents_type::rank() - 1) + return index_type(1); index_type value = padded_stride.value(0); - for (rank_type k = extents_type::rank() - 2; k > r; k--) value *= exts.extent(k); + for (rank_type k = extents_type::rank() - 2; k > r; k--) + value *= exts.extent(k); return value; } @@ -746,26 +809,26 @@ public: /** * Equality operator between `layout_right_padded`s * - * This overload only participates in overload resolution if `OtherExtents::rank() == extents_type::rank()`. + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. * - * \note There is currently a difference from p2642r2, where this function is specified as taking - * `layout_right_padded< padding_value >::mapping< Extents>`. However, this makes `padding_value` non-deducible. + * \note There is currently a difference from p2642r2, where this function is + * specified as taking `layout_right_padded< padding_value >::mapping< + * Extents>`. However, this makes `padding_value` non-deducible. */ MDSPAN_TEMPLATE_REQUIRES( class _Mapping, - /* requires */ ( - detail::is_layout_right_padded_mapping<_Mapping>::value - && (_Mapping::extents_type::rank() == extents_type::rank()) - ) - ) - friend constexpr bool operator==(const mapping &left, const _Mapping &right) noexcept - { - // Workaround for some compilers not short-circuiting properly with compile-time checks - // i.e. we can't access stride(_padding_stride_idx) of a rank 0 mapping + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator==(const mapping &left, const _Mapping &right) noexcept { + // Workaround for some compilers not short-circuiting properly with + // compile-time checks i.e. we can't access stride(_padding_stride_idx) of a + // rank 0 mapping bool strides_equal = true; - if constexpr (extents_type::rank() > rank_type(1)) - { - strides_equal = left.stride(padded_stride_idx) == right.stride(padded_stride_idx); + if constexpr (extents_type::rank() > rank_type(1)) { + strides_equal = + left.stride(padded_stride_idx) == right.stride(padded_stride_idx); } return (left.extents() == right.extents()) && strides_equal; } @@ -774,20 +837,31 @@ public: /** * Inequality operator between `layout_right_padded`s * - * This overload only participates in overload resolution if `OtherExtents::rank() == extents_type::rank()`. + * This overload only participates in overload resolution if + * `OtherExtents::rank() == extents_type::rank()`. */ MDSPAN_TEMPLATE_REQUIRES( class _Mapping, - /* requires */ ( - detail::is_layout_right_padded_mapping<_Mapping>::value - && (_Mapping::extents_type::rank() == extents_type::rank()) - ) - ) - friend constexpr bool operator!=(const mapping &left, const _Mapping &right) noexcept - { + /* requires */ (detail::is_layout_right_padded_mapping<_Mapping>::value && + (_Mapping::extents_type::rank() == extents_type::rank()))) + MDSPAN_INLINE_FUNCTION friend constexpr bool + operator!=(const mapping &left, const _Mapping &right) noexcept { return !(left == right); } #endif + + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + MDSPAN_INLINE_FUNCTION + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + MDSPAN_INLINE_FUNCTION + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } }; } } diff --git a/lib/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp b/lib/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp index 945f091a2d..18daa28cc6 100644 --- a/lib/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp +++ b/lib/kokkos/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp @@ -17,6 +17,7 @@ #include #include "../__p0009_bits/dynamic_extent.hpp" +#include "../__p0009_bits/utility.hpp" namespace MDSPAN_IMPL_STANDARD_NAMESPACE { namespace MDSPAN_IMPL_PROPOSED_NAMESPACE { @@ -82,36 +83,49 @@ struct is_layout_right_padded_mapping<_Mapping, std::enable_if_t::template mapping>::value>> : std::true_type {}; + template -constexpr void check_padded_layout_converting_constructor_mandates() +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<0>) {} + +template +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<1>) {} + +template +constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank) { - if constexpr (_LayoutExtentsType::rank() > 1) { - using extents_type = typename _PaddedLayoutMappingType::extents_type; - constexpr auto padding_value = _PaddedLayoutMappingType::padding_value; - constexpr auto idx = layout_padded_constants::extent_to_pad_idx; - if constexpr ((_LayoutExtentsType::static_extent(idx) != dynamic_extent) && - (extents_type::static_extent(idx) != dynamic_extent) && - (padding_value != dynamic_extent)) { - if constexpr (padding_value == 0) { - static_assert(_LayoutExtentsType::static_extent(idx) == 0); - } else { - static_assert( - _LayoutExtentsType::static_extent(idx) % padding_value == 0); - } - } - } + using extents_type = typename _PaddedLayoutMappingType::extents_type; + constexpr auto padding_value = _PaddedLayoutMappingType::padding_value; + constexpr auto idx = layout_padded_constants::extent_to_pad_idx; + + constexpr auto statically_determinable = + (_LayoutExtentsType::static_extent(idx) != dynamic_extent) && + (extents_type::static_extent(idx) != dynamic_extent) && + (padding_value != dynamic_extent); + + static_assert(!statically_determinable || + (padding_value == 0 + ? _LayoutExtentsType::static_extent(idx) == 0 + : _LayoutExtentsType::static_extent(idx) % padding_value == 0), + ""); } template -constexpr void check_padded_layout_converting_constructor_preconditions([[maybe_unused]] const _OtherMapping &other_mapping) { - if constexpr (_ExtentsType::rank() > 1) { - constexpr auto padded_stride_idx = - layout_padded_constants::padded_stride_idx; - constexpr auto extent_to_pad_idx = layout_padded_constants::extent_to_pad_idx; - assert(other_mapping.stride(padded_stride_idx) == other_mapping.extents().extent(extent_to_pad_idx)); - } -} +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<0>, + const _OtherMapping&) {} +template +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank<1>, + const _OtherMapping&) {} +template +constexpr void check_padded_layout_converting_constructor_preconditions(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::with_rank, + const _OtherMapping &other_mapping) { + constexpr auto padded_stride_idx = + layout_padded_constants::padded_stride_idx; + constexpr auto extent_to_pad_idx = layout_padded_constants::extent_to_pad_idx; + MDSPAN_IMPL_PRECONDITION(other_mapping.stride(padded_stride_idx) == other_mapping.extents().extent(extent_to_pad_idx)); +} + + } } } diff --git a/lib/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp b/lib/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp index ac72a1a4e6..4a0e354ffd 100644 --- a/lib/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp +++ b/lib/kokkos/tpls/mdspan/include/mdspan/mdspan.hpp @@ -38,5 +38,6 @@ #include "../experimental/__p2642_bits/layout_padded.hpp" #include "../experimental/__p2630_bits/submdspan.hpp" #endif +#include "../experimental/__p2389_bits/dims.hpp" #endif // MDSPAN_HPP_ diff --git a/lib/linalg/dlauu2.cpp b/lib/linalg/dlauu2.cpp new file mode 100644 index 0000000000..d90a84798d --- /dev/null +++ b/lib/linalg/dlauu2.cpp @@ -0,0 +1,77 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static doublereal c_b7 = 1.; +static integer c__1 = 1; +int dlauu2_(char *uplo, integer *n, doublereal *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3; + integer i__; + doublereal aii; + extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); + extern int dscal_(integer *, doublereal *, doublereal *, integer *); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); + logical upper; + extern int xerbla_(char *, integer *, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAUU2", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + if (upper) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = + ddot_(&i__2, &a[i__ + i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda); + i__2 = i__ - 1; + i__3 = *n - i__; + dgemv_((char *)"No transpose", &i__2, &i__3, &c_b7, &a[(i__ + 1) * a_dim1 + 1], lda, + &a[i__ + (i__ + 1) * a_dim1], lda, &aii, &a[i__ * a_dim1 + 1], &c__1, + (ftnlen)12); + } else { + dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1); + } + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + aii = a[i__ + i__ * a_dim1]; + if (i__ < *n) { + i__2 = *n - i__ + 1; + a[i__ + i__ * a_dim1] = + ddot_(&i__2, &a[i__ + i__ * a_dim1], &c__1, &a[i__ + i__ * a_dim1], &c__1); + i__2 = *n - i__; + i__3 = i__ - 1; + dgemv_((char *)"Transpose", &i__2, &i__3, &c_b7, &a[i__ + 1 + a_dim1], lda, + &a[i__ + 1 + i__ * a_dim1], &c__1, &aii, &a[i__ + a_dim1], lda, (ftnlen)9); + } else { + dscal_(&i__, &aii, &a[i__ + a_dim1], lda); + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dlauum.cpp b/lib/linalg/dlauum.cpp new file mode 100644 index 0000000000..632bd4ba85 --- /dev/null +++ b/lib/linalg/dlauum.cpp @@ -0,0 +1,101 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +static integer c__1 = 1; +static integer c_n1 = -1; +static doublereal c_b15 = 1.; +int dlauum_(char *uplo, integer *n, doublereal *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + integer i__, ib, nb; + extern int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, + integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, + ftnlen, ftnlen); + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, + doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, + ftnlen); + logical upper; + extern int dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, + doublereal *, doublereal *, integer *, ftnlen, ftnlen), + dlauu2_(char *, integer *, doublereal *, integer *, integer *, ftnlen), + xerbla_(char *, integer *, ftnlen); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, + ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + upper = lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1); + if (!upper && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DLAUUM", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + nb = ilaenv_(&c__1, (char *)"DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1); + if (nb <= 1 || nb >= *n) { + dlauu2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + } else { + if (upper) { + i__1 = *n; + i__2 = nb; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3, i__4); + i__3 = i__ - 1; + dtrmm_((char *)"Right", (char *)"Upper", (char *)"Transpose", (char *)"Non-unit", &i__3, &ib, &c_b15, + &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, + (ftnlen)9, (ftnlen)8); + dlauu2_((char *)"Upper", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + dgemm_((char *)"No transpose", (char *)"Transpose", &i__3, &ib, &i__4, &c_b15, + &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + (i__ + ib) * a_dim1], lda, + &c_b15, &a[i__ * a_dim1 + 1], lda, (ftnlen)12, (ftnlen)9); + i__3 = *n - i__ - ib + 1; + dsyrk_((char *)"Upper", (char *)"No transpose", &ib, &i__3, &c_b15, + &a[i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + i__ * a_dim1], lda, + (ftnlen)5, (ftnlen)12); + } + } + } else { + i__2 = *n; + i__1 = nb; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + i__3 = nb, i__4 = *n - i__ + 1; + ib = min(i__3, i__4); + i__3 = i__ - 1; + dtrmm_((char *)"Left", (char *)"Lower", (char *)"Transpose", (char *)"Non-unit", &ib, &i__3, &c_b15, + &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], lda, (ftnlen)4, (ftnlen)5, + (ftnlen)9, (ftnlen)8); + dlauu2_((char *)"Lower", &ib, &a[i__ + i__ * a_dim1], lda, info, (ftnlen)5); + if (i__ + ib <= *n) { + i__3 = i__ - 1; + i__4 = *n - i__ - ib + 1; + dgemm_((char *)"Transpose", (char *)"No transpose", &ib, &i__3, &i__4, &c_b15, + &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + ib + a_dim1], lda, &c_b15, + &a[i__ + a_dim1], lda, (ftnlen)9, (ftnlen)12); + i__3 = *n - i__ - ib + 1; + dsyrk_((char *)"Lower", (char *)"Transpose", &ib, &i__3, &c_b15, &a[i__ + ib + i__ * a_dim1], + lda, &c_b15, &a[i__ + i__ * a_dim1], lda, (ftnlen)5, (ftnlen)9); + } + } + } + } + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/linalg/dpotri.cpp b/lib/linalg/dpotri.cpp new file mode 100644 index 0000000000..9c0a609e1b --- /dev/null +++ b/lib/linalg/dpotri.cpp @@ -0,0 +1,40 @@ +#ifdef __cplusplus +extern "C" { +#endif +#include "lmp_f2c.h" +int dpotri_(char *uplo, integer *n, doublereal *a, integer *lda, integer *info, ftnlen uplo_len) +{ + integer a_dim1, a_offset, i__1; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern int xerbla_(char *, integer *, ftnlen), + dlauum_(char *, integer *, doublereal *, integer *, integer *, ftnlen), + dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *, ftnlen, ftnlen); + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + *info = 0; + if (!lsame_(uplo, (char *)"U", (ftnlen)1, (ftnlen)1) && !lsame_(uplo, (char *)"L", (ftnlen)1, (ftnlen)1)) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1, *n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_((char *)"DPOTRI", &i__1, (ftnlen)6); + return 0; + } + if (*n == 0) { + return 0; + } + dtrtri_(uplo, (char *)"Non-unit", n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)8); + if (*info > 0) { + return 0; + } + dlauum_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); + return 0; +} +#ifdef __cplusplus +} +#endif diff --git a/lib/plumed/Install.py b/lib/plumed/Install.py index 66501a74e9..485845b67a 100644 --- a/lib/plumed/Install.py +++ b/lib/plumed/Install.py @@ -19,7 +19,7 @@ parser = ArgumentParser(prog='Install.py', # Note: must also adjust check for supported API versions in # fix_plumed.cpp when version changes from v2.n.x to v2.n+1.y -version = "2.9.1" +version = "2.9.2" mode = "static" # help message @@ -51,6 +51,7 @@ checksums = { \ '2.8.4' : '9f59c4f9bda86fe5bef19543c295a981', \ '2.9.0' : '661eabeebee05cf84bbf9dc23d7d5f46', \ '2.9.1' : 'c3b2d31479c1e9ce211719d40e9efbd7', \ + '2.9.2' : '04862602a372c1013bdfee2d6d03bace', \ } # parse and process arguments diff --git a/lib/rheo/Makefile.lammps b/lib/rheo/Makefile.lammps index ec58740370..5785f8978b 100644 --- a/lib/rheo/Makefile.lammps +++ b/lib/rheo/Makefile.lammps @@ -1,14 +1,5 @@ -# Settings that the LAMMPS build will import when this package is installed +# Settings that the LAMMPS build will import when this package library is used -ifeq ($(strip $(shell pkg-config --version)),) - # manual configuration w/o pkg-config/pkgconf - # change this to -I/path/to/your/lib/gsl/include/ - rheo_SYSINC = -I../../lib/rheo/gsl/include/ - - # change this to -L/path/to/your/lib/gsl/lib/ - rheo_SYSLIB = -L../../lib/rheo/gsl/lib/ -lgsl -lgslcblas -else - # autodetect GSL settings from pkg-config/pkgconf - rheo_SYSINC = $(shell pkg-config --cflags gsl) - rheo_SYSLIB = $(shell pkg-config --libs gsl) -endif +rheo_SYSINC = +rheo_SYSLIB = -llinalg +rheo_SYSPATH = -L../../lib/linalg$(LIBOBJDIR) diff --git a/lib/rheo/Makefile.lammps.empty b/lib/rheo/Makefile.lammps.empty new file mode 100644 index 0000000000..f71390299c --- /dev/null +++ b/lib/rheo/Makefile.lammps.empty @@ -0,0 +1,5 @@ +# Settings that the LAMMPS build will import when this package library is used + +rheo_SYSINC = +rheo_SYSLIB = +rheo_SYSPATH = diff --git a/lib/rheo/Makefile.lammps.installed b/lib/rheo/Makefile.lammps.installed new file mode 100644 index 0000000000..8900470077 --- /dev/null +++ b/lib/rheo/Makefile.lammps.installed @@ -0,0 +1,5 @@ +# Settings that the LAMMPS build will import when this package library is used + +rheo_SYSINC = +rheo_SYSLIB = -lblas -llapack +rheo_SYSPATH = diff --git a/lib/rheo/Makefile.lammps.linalg b/lib/rheo/Makefile.lammps.linalg new file mode 100644 index 0000000000..5785f8978b --- /dev/null +++ b/lib/rheo/Makefile.lammps.linalg @@ -0,0 +1,5 @@ +# Settings that the LAMMPS build will import when this package library is used + +rheo_SYSINC = +rheo_SYSLIB = -llinalg +rheo_SYSPATH = -L../../lib/linalg$(LIBOBJDIR) diff --git a/lib/rheo/README b/lib/rheo/README index ae421b6e80..fe082797f1 100644 --- a/lib/rheo/README +++ b/lib/rheo/README @@ -1,7 +1,5 @@ -This directory has a Makefile.lammps file with settings that allows LAMMPS to -dynamically link to the GSL library. This is required to use the RHEO package -in a LAMMPS input script. If you have the pkg-config command available, it -will automatically import the GSL settings. Otherwise they will have to be -added manually. - -See the header of Makefile.lammps for more info. +This directory has multiple Makefile.lammps variant files with settings that +allows LAMMPS to link with a BLAS/LAPACK or compatible library or the bundled +linalg library (which is subset of BLAS/LAPACK). Copy the suitable file +to Makefile.lammps and edit, if needed. +This is required to use the RHEO package in a LAMMPS input script. diff --git a/python/lammps/core.py b/python/lammps/core.py index 4fcda37d5d..249b4719e0 100644 --- a/python/lammps/core.py +++ b/python/lammps/core.py @@ -18,12 +18,21 @@ from __future__ import print_function import os import sys -from ctypes import * # lgtm [py/polluting-import] -from os.path import dirname,abspath,join +from ctypes import CDLL, POINTER, RTLD_GLOBAL, CFUNCTYPE, py_object, byref, cast, sizeof, \ + create_string_buffer, c_int, c_int32, c_int64, c_double, c_void_p, c_char_p, pythonapi, \ + pointer +from os.path import dirname, abspath, join from inspect import getsourcefile -from .constants import * # lgtm [py/polluting-import] -from .data import * # lgtm [py/polluting-import] +from lammps.constants import LAMMPS_AUTODETECT, LAMMPS_STRING, \ + LAMMPS_INT, LAMMPS_INT_2D, LAMMPS_DOUBLE, LAMMPS_DOUBLE_2D, LAMMPS_INT64, LAMMPS_INT64_2D, \ + LMP_STYLE_GLOBAL, LMP_STYLE_ATOM, LMP_STYLE_LOCAL, \ + LMP_TYPE_SCALAR, LMP_TYPE_VECTOR, LMP_TYPE_ARRAY, \ + LMP_SIZE_VECTOR, LMP_SIZE_ROWS, LMP_SIZE_COLS, \ + LMP_VAR_EQUAL, LMP_VAR_ATOM, LMP_VAR_VECTOR, LMP_VAR_STRING, \ + get_ctypes_int + +from lammps.data import NeighList # ------------------------------------------------------------------------- @@ -318,6 +327,8 @@ class lammps(object): self.lib.lammps_extract_atom.argtypes = [c_void_p, c_char_p] self.lib.lammps_extract_atom_datatype.argtypes = [c_void_p, c_char_p] self.lib.lammps_extract_atom_datatype.restype = c_int + self.lib.lammps_extract_atom_size.argtypes = [c_void_p, c_char_p, c_int] + self.lib.lammps_extract_atom_size.restype = c_int self.lib.lammps_extract_fix.argtypes = [c_void_p, c_char_p, c_int, c_int, c_int, c_int] @@ -941,7 +952,7 @@ class lammps(object): def extract_pair_dimension(self, name): """Retrieve pair style property dimensionality from LAMMPS - .. versionadded:: TBD + .. versionadded:: 29Aug2024 This is a wrapper around the :cpp:func:`lammps_extract_pair_dimension` function of the C-library interface. The list of supported keywords @@ -970,7 +981,7 @@ class lammps(object): def extract_pair(self, name): """Extract pair style data from LAMMPS. - .. versionadded:: TBD + .. versionadded:: 29Aug2024 This is a wrapper around the :cpp:func:`lammps_extract_pair` function of the C-library interface. Since there are no pointers in Python, this @@ -1070,31 +1081,59 @@ class lammps(object): else: return None return self.lib.lammps_extract_atom_datatype(self.lmp, newname) + # ------------------------------------------------------------------------- + # extract per-atom info datatype + + def extract_atom_size(self, name, dtype): + """Retrieve per-atom property dimensions from LAMMPS + + This is a wrapper around the :cpp:func:`lammps_extract_atom_size` + function of the C-library interface. Its documentation includes a + list of the supported keywords. + This function returns ``None`` if the keyword is not + recognized. Otherwise it will return an integer value with the size + of the per-atom vector or array. If *name* corresponds to a per-atom + array, the *dtype* keyword must be either LMP_SIZE_ROWS or LMP_SIZE_COLS + from the :ref:`type ` constants defined in the + :py:mod:`lammps` module. The return value is the requested size. + If *name* corresponds to a per-atom vector the *dtype* keyword is ignored. + + :param name: name of the property + :type name: string + :param type: either LMP_SIZE_ROWS or LMP_SIZE_COLS for arrays, otherwise ignored + :type type: int + :return: data type of per-atom property (see :ref:`py_datatype_constants`) + :rtype: int + """ + if name: newname = name.encode() + else: return None + return self.lib.lammps_extract_atom_size(self.lmp, newname, dtype) + # ------------------------------------------------------------------------- # extract per-atom info def extract_atom(self, name, dtype=LAMMPS_AUTODETECT): """Retrieve per-atom properties from LAMMPS - This is a wrapper around the :cpp:func:`lammps_extract_atom` - function of the C-library interface. Its documentation includes a - list of the supported keywords and their data types. - Since Python needs to know the data type to be able to interpret - the result, by default, this function will try to auto-detect the data type - by asking the library. You can also force a specific data type by setting ``dtype`` - to one of the :ref:`data type ` constants defined in the - :py:mod:`lammps` module. - This function returns ``None`` if either the keyword is not - recognized, or an invalid data type constant is used. + This is a wrapper around the :cpp:func:`lammps_extract_atom` function of the + C-library interface. Its documentation includes a list of the supported + keywords and their data types. Since Python needs to know the data type to + be able to interpret the result, by default, this function will try to + auto-detect the data type by asking the library. You can also force a + specific data type by setting ``dtype`` to one of the :ref:`data type + ` constants defined in the :py:mod:`lammps` module. + This function returns ``None`` if either the keyword is not recognized, or + an invalid data type constant is used. .. note:: - While the returned arrays of per-atom data are dimensioned - for the range [0:nmax] - as is the underlying storage - - the data is usually only valid for the range of [0:nlocal], - unless the property of interest is also updated for ghost - atoms. In some cases, this depends on a LAMMPS setting, see - for example :doc:`comm_modify vel yes `. + While the returned vectors or arrays of per-atom data are dimensioned for + the range [0:nmax] - as is the underlying storage - the data is usually + only valid for the range of [0:nlocal], unless the property of interest + is also updated for ghost atoms. In some cases, this depends on a LAMMPS + setting, see for example :doc:`comm_modify vel yes `. + The actual size can be determined by calling + py:meth:`extract_atom_size() `. :param name: name of the property :type name: string @@ -1105,6 +1144,7 @@ class lammps(object): ctypes.POINTER(ctypes.c_int64), ctypes.POINTER(ctypes.POINTER(ctypes.c_int64)), ctypes.POINTER(ctypes.c_double), ctypes.POINTER(ctypes.POINTER(ctypes.c_double)), or NoneType + """ if dtype == LAMMPS_AUTODETECT: dtype = self.extract_atom_datatype(name) @@ -2227,7 +2267,6 @@ class lammps(object): :param caller: reference to some object passed to the callback function :type: object, optional """ - import numpy as np def callback_wrapper(caller, ntimestep, nlocal, tag_ptr, x_ptr, fext_ptr): tag = self.numpy.iarray(self.c_tagint, tag_ptr, nlocal, 1) @@ -2522,3 +2561,7 @@ class lammps(object): newcomputeid = computeid.encode() idx = self.lib.lammps_find_compute_neighlist(self.lmp, newcomputeid, reqid) return idx + +# Local Variables: +# fill-column: 80 +# End: diff --git a/python/lammps/numpy_wrapper.py b/python/lammps/numpy_wrapper.py index 9ab7f538de..5b90cf03de 100644 --- a/python/lammps/numpy_wrapper.py +++ b/python/lammps/numpy_wrapper.py @@ -16,14 +16,15 @@ # Written by Richard Berger ################################################################################ -import warnings from ctypes import POINTER, c_void_p, c_char_p, c_double, c_int, c_int32, c_int64, cast +from .constants import LAMMPS_AUTODETECT, LAMMPS_INT, LAMMPS_INT_2D, LAMMPS_DOUBLE, \ + LAMMPS_DOUBLE_2D, LAMMPS_INT64, LAMMPS_INT64_2D, LMP_STYLE_GLOBAL, LMP_STYLE_ATOM, \ + LMP_STYLE_LOCAL, LMP_TYPE_VECTOR, LMP_TYPE_ARRAY, LMP_SIZE_VECTOR, LMP_SIZE_ROWS, \ + LMP_SIZE_COLS, LMP_VAR_EQUAL, LMP_VAR_ATOM -from .constants import * # lgtm [py/polluting-import] from .data import NeighList - class numpy_wrapper: """lammps API NumPy Wrapper @@ -54,7 +55,8 @@ class numpy_wrapper: # ------------------------------------------------------------------------- - def extract_atom(self, name, dtype=LAMMPS_AUTODETECT, nelem=LAMMPS_AUTODETECT, dim=LAMMPS_AUTODETECT): + def extract_atom(self, name, dtype=LAMMPS_AUTODETECT, nelem=LAMMPS_AUTODETECT, + dim=LAMMPS_AUTODETECT): """Retrieve per-atom properties from LAMMPS as NumPy arrays This is a wrapper around the :py:meth:`lammps.extract_atom()` method. @@ -63,16 +65,16 @@ class numpy_wrapper: .. note:: - The returned arrays of per-atom data are by default dimensioned - for the range [0:nlocal] since that data is *always* valid. The - underlying storage for the data, however, is typically allocated - for the range of [0:nmax]. Whether there is valid data in the range - [nlocal:nlocal+nghost] depends on whether the property of interest - is also updated for ghost atoms. This is not often the case. In - some cases, it depends on a LAMMPS setting, see for example - :doc:`comm_modify vel yes `. By using the optional - *nelem* parameter the size of the returned NumPy can be overridden. - There is no check whether the number of elements chosen is valid. + The returned vectors or arrays of per-atom data are dimensioned + according to the return value of :py:meth:`lammps.extract_atom_size()`. + Except for the "mass" property, the underlying storage will always be + dimensioned for the range [0:nmax]. The actual usable data may be + only in the range [0:nlocal] or [0:nlocal][0:dim]. Whether there is + valid data in the range [nlocal:nlocal+nghost] or [nlocal:local+nghost][0:dim] + depends on whether the property of interest is also updated for ghost atoms. + Also the value of *dim* depends on the value of *name*. By using the optional + *nelem* and *dim* parameters the dimensions of the returned NumPy array can + be overridden. There is no check whether the number of elements chosen is valid. :param name: name of the property :type name: string @@ -89,21 +91,10 @@ class numpy_wrapper: dtype = self.lmp.extract_atom_datatype(name) if nelem == LAMMPS_AUTODETECT: - if name == "mass": - nelem = self.lmp.extract_global("ntypes") + 1 - else: - nelem = self.lmp.extract_global("nlocal") + nelem = self.lmp.extract_atom_size(name, LMP_SIZE_ROWS) if dim == LAMMPS_AUTODETECT: if dtype in (LAMMPS_INT_2D, LAMMPS_DOUBLE_2D, LAMMPS_INT64_2D): - # TODO add other fields - if name in ("x", "v", "f", "x0","omega", "angmom", "torque", "csforce", "vforce", "vest"): - dim = 3 - elif name == "smd_data_9": - dim = 9 - elif name == "smd_stress": - dim = 6 - else: - dim = 2 + dim = self.lmp.extract_atom_size(name, LMP_SIZE_COLS) else: dim = 1 @@ -119,37 +110,6 @@ class numpy_wrapper: # ------------------------------------------------------------------------- - def extract_atom_iarray(self, name, nelem, dim=1): - warnings.warn("deprecated, use extract_atom instead", DeprecationWarning) - - if name in ['id', 'molecule']: - c_int_type = self.lmp.c_tagint - elif name in ['image']: - c_int_type = self.lmp.c_imageint - else: - c_int_type = c_int - - if dim == 1: - raw_ptr = self.lmp.extract_atom(name, LAMMPS_INT) - else: - raw_ptr = self.lmp.extract_atom(name, LAMMPS_INT_2D) - - return self.iarray(c_int_type, raw_ptr, nelem, dim) - - # ------------------------------------------------------------------------- - - def extract_atom_darray(self, name, nelem, dim=1): - warnings.warn("deprecated, use extract_atom instead", DeprecationWarning) - - if dim == 1: - raw_ptr = self.lmp.extract_atom(name, LAMMPS_DOUBLE) - else: - raw_ptr = self.lmp.extract_atom(name, LAMMPS_DOUBLE_2D) - - return self.darray(raw_ptr, nelem, dim) - - # ------------------------------------------------------------------------- - def extract_compute(self, cid, cstyle, ctype): """Retrieve data from a LAMMPS compute diff --git a/python/lammps/pylammps.py b/python/lammps/pylammps.py index 9087305ba3..1f5a1a0db9 100644 --- a/python/lammps/pylammps.py +++ b/python/lammps/pylammps.py @@ -27,8 +27,8 @@ import sys import tempfile from collections import namedtuple -from .core import lammps -from .constants import * # lgtm [py/polluting-import] +from lammps.core import lammps +from lammps.constants import LMP_VAR_EQUAL, LMP_VAR_ATOM, LMP_VAR_VECTOR, LMP_VAR_STRING # ------------------------------------------------------------------------- diff --git a/src/.gitignore b/src/.gitignore index c26eaaba30..e557a8cbb2 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -192,6 +192,8 @@ /colvarproxy_lammps_version.h /fix_colvars.cpp /fix_colvars.h +/inthash.cpp +/inthash.h /fix_plumed.cpp /fix_plumed.h /dump_molfile.cpp @@ -1078,12 +1080,12 @@ /geturl.cpp /geturl.h /gpu_extra.h -/group_ndx.cpp -/group_ndx.h +/group2ndx.cpp +/group2ndx.h /gz_file_writer.cpp /gz_file_writer.h -/ndx_group.cpp -/ndx_group.h +/ndx2group.cpp +/ndx2group.h /hyper.cpp /hyper.h /improper_class2.cpp diff --git a/src/CG-DNA/atom_vec_oxdna.cpp b/src/CG-DNA/atom_vec_oxdna.cpp index 38f78f94bf..0836e9b47c 100644 --- a/src/CG-DNA/atom_vec_oxdna.cpp +++ b/src/CG-DNA/atom_vec_oxdna.cpp @@ -37,6 +37,7 @@ AtomVecOxdna::AtomVecOxdna(LAMMPS *lmp) : AtomVec(lmp) fields_grow = {"id5p"}; fields_copy = {"id5p"}; fields_border = {"id5p"}; + fields_border_vel = {"id5p"}; fields_exchange = {"id5p"}; fields_restart = {"id5p"}; fields_data_atom = {"id", "type", "x"}; diff --git a/src/CG-SPICA/angle_spica.cpp b/src/CG-SPICA/angle_spica.cpp index e315e20f13..913428cd9b 100644 --- a/src/CG-SPICA/angle_spica.cpp +++ b/src/CG-SPICA/angle_spica.cpp @@ -522,3 +522,15 @@ double AngleSPICA::single(int type, int i1, int i2, int i3) double tk = k[type] * dtheta; return tk*dtheta + e13; } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleSPICA::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "theta0") == 0) return (void *) theta0; + return nullptr; +} diff --git a/src/CG-SPICA/angle_spica.h b/src/CG-SPICA/angle_spica.h index 539512c0e9..5e590ba7a0 100644 --- a/src/CG-SPICA/angle_spica.h +++ b/src/CG-SPICA/angle_spica.h @@ -37,6 +37,7 @@ class AngleSPICA : public Angle { void read_restart(FILE *) override; void write_data(FILE *) override; double single(int, int, int, int) override; + void *extract(const char *, int &) override; protected: double *k, *theta0; diff --git a/src/CLASS2/angle_class2.cpp b/src/CLASS2/angle_class2.cpp index 1dbaaf0568..5000f9f629 100644 --- a/src/CLASS2/angle_class2.cpp +++ b/src/CLASS2/angle_class2.cpp @@ -18,17 +18,17 @@ #include "angle_class2.h" -#include -#include #include "atom.h" -#include "neighbor.h" -#include "domain.h" #include "comm.h" +#include "domain.h" +#include "error.h" #include "force.h" #include "math_const.h" #include "memory.h" -#include "error.h" +#include "neighbor.h" +#include +#include using namespace LAMMPS_NS; using namespace MathConst; @@ -467,3 +467,17 @@ double AngleClass2::single(int type, int i1, int i2, int i3) return energy; } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleClass2::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k2") == 0) return (void *) k2; + if (strcmp(str, "k3") == 0) return (void *) k3; + if (strcmp(str, "k4") == 0) return (void *) k4; + if (strcmp(str, "theta0") == 0) return (void *) theta0; + return nullptr; +} diff --git a/src/CLASS2/angle_class2.h b/src/CLASS2/angle_class2.h index f5fbd62b57..4ed6f344ae 100644 --- a/src/CLASS2/angle_class2.h +++ b/src/CLASS2/angle_class2.h @@ -35,6 +35,7 @@ class AngleClass2 : public Angle { void read_restart(FILE *) override; void write_data(FILE *) override; double single(int, int, int, int) override; + void *extract(const char *, int &) override; protected: double *theta0, *k2, *k3, *k4; diff --git a/src/DIFFRACTION/compute_saed.cpp b/src/DIFFRACTION/compute_saed.cpp index 1350257910..e2a7bb33d9 100644 --- a/src/DIFFRACTION/compute_saed.cpp +++ b/src/DIFFRACTION/compute_saed.cpp @@ -401,7 +401,7 @@ void ComputeSAED::compute_vector() // Setting up OMP #if defined(_OPENMP) - if (me == 0 && echo) utils::logmesg(lmp," using {}OMP threads\n",comm->nthreads); + if (me == 0 && echo) utils::logmesg(lmp," using {} OMP thread(s)\n",comm->nthreads); #endif if (me == 0 && echo) utils::logmesg(lmp,"\n"); @@ -478,7 +478,7 @@ void ComputeSAED::compute_vector() } } } // End of pragma omp for region - delete [] f; + delete[] f; } auto scratch = new double[2*nRows]; @@ -499,10 +499,10 @@ void ComputeSAED::compute_vector() utils::logmesg(lmp," 100% \nTime elapsed during compute_saed = {:.2f} sec " "using {:.2f} Mbytes/processor\n-----\n", t2-t0, bytes/1024.0/1024.0); - delete [] xlocal; - delete [] typelocal; - delete [] scratch; - delete [] Fvec; + delete[] xlocal; + delete[] typelocal; + delete[] scratch; + delete[] Fvec; } /* ---------------------------------------------------------------------- diff --git a/src/DIFFRACTION/compute_xrd.cpp b/src/DIFFRACTION/compute_xrd.cpp index 11e0bb9a9f..a769be7d4f 100644 --- a/src/DIFFRACTION/compute_xrd.cpp +++ b/src/DIFFRACTION/compute_xrd.cpp @@ -332,7 +332,7 @@ void ComputeXRD::compute_array() // Setting up OMP #if defined(_OPENMP) - if ((me == 0) && echo) utils::logmesg(lmp," using {} OMP threads\n",comm->nthreads); + if ((me == 0) && echo) utils::logmesg(lmp," using {} OMP thread(s)\n",comm->nthreads); #endif if ((me == 0) && echo) { @@ -482,7 +482,7 @@ void ComputeXRD::compute_array() } } // End of pragma omp for region } // End of if LP=1 check - delete [] f; + delete[] f; } // End of pragma omp parallel region auto scratch = new double[2*size_array_rows]; @@ -503,10 +503,10 @@ void ComputeXRD::compute_array() utils::logmesg(lmp," 100% \nTime elapsed during compute_xrd = {:.2f} sec " "using {:.2f} Mbytes/processor\n-----\n", t2-t0, bytes/1024.0/1024.0); - delete [] scratch; - delete [] Fvec; - delete [] xlocal; - delete [] typelocal; + delete[] scratch; + delete[] Fvec; + delete[] xlocal; + delete[] typelocal; } /* ---------------------------------------------------------------------- diff --git a/src/DIFFRACTION/fix_saed_vtk.cpp b/src/DIFFRACTION/fix_saed_vtk.cpp index b3f6693e9e..693bb925b6 100644 --- a/src/DIFFRACTION/fix_saed_vtk.cpp +++ b/src/DIFFRACTION/fix_saed_vtk.cpp @@ -114,6 +114,7 @@ FixSAEDVTK::FixSAEDVTK(LAMMPS *lmp, int narg, char **arg) : memory->create(vector_total,nrows,"saed/vtk:vector_total"); vector_flag = 1; + extvector = 0; size_vector = nrows; if (nOutput == 0) { @@ -248,8 +249,8 @@ FixSAEDVTK::FixSAEDVTK(LAMMPS *lmp, int narg, char **arg) : FixSAEDVTK::~FixSAEDVTK() { - delete [] filename; - delete [] ids; + delete[] filename; + delete[] ids; memory->destroy(vector); memory->destroy(vector_total); if (fp && comm->me == 0) fclose(fp); diff --git a/src/DIPOLE/angle_dipole.cpp b/src/DIPOLE/angle_dipole.cpp index 6ad4a0fb4c..a66f3e1042 100644 --- a/src/DIPOLE/angle_dipole.cpp +++ b/src/DIPOLE/angle_dipole.cpp @@ -263,3 +263,15 @@ double AngleDipole::single(int type, int iRef, int iDip, int /*iDummy*/) return kdg * deltaGamma; // energy } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleDipole::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "gamma0") == 0) return (void *) gamma0; + return nullptr; +} diff --git a/src/DIPOLE/angle_dipole.h b/src/DIPOLE/angle_dipole.h index 2e55722673..de0f958f98 100644 --- a/src/DIPOLE/angle_dipole.h +++ b/src/DIPOLE/angle_dipole.h @@ -36,6 +36,7 @@ class AngleDipole : public Angle { void read_restart(FILE *) override; void write_data(FILE *) override; double single(int, int, int, int) override; + void *extract(const char *, int &) override; protected: double *k, *gamma0; diff --git a/src/DIPOLE/pair_lj_sf_dipole_sf.h b/src/DIPOLE/pair_lj_sf_dipole_sf.h index 892c227a7a..df01e3dacd 100644 --- a/src/DIPOLE/pair_lj_sf_dipole_sf.h +++ b/src/DIPOLE/pair_lj_sf_dipole_sf.h @@ -26,7 +26,7 @@ namespace LAMMPS_NS { class PairLJSFDipoleSF : public Pair { public: - PairLJSFDipoleSF(class LAMMPS *_lmp) : Pair(_lmp){}; + PairLJSFDipoleSF(class LAMMPS *_lmp) : Pair(_lmp) {}; ~PairLJSFDipoleSF() override; void compute(int, int) override; void settings(int, char **) override; diff --git a/src/EXTRA-COMMAND/group_ndx.cpp b/src/EXTRA-COMMAND/group2ndx.cpp similarity index 97% rename from src/EXTRA-COMMAND/group_ndx.cpp rename to src/EXTRA-COMMAND/group2ndx.cpp index 1dc0d3af97..56bf848923 100644 --- a/src/EXTRA-COMMAND/group_ndx.cpp +++ b/src/EXTRA-COMMAND/group2ndx.cpp @@ -1,6 +1,4 @@ -// -*- c++ -*- - -/* ---------------------------------------------------------------------- +/* -*- c++ -*-------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org @@ -16,7 +14,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ -#include "group_ndx.h" +#include "group2ndx.h" #include "atom.h" #include "comm.h" diff --git a/src/EXTRA-COMMAND/group_ndx.h b/src/EXTRA-COMMAND/group2ndx.h similarity index 83% rename from src/EXTRA-COMMAND/group_ndx.h rename to src/EXTRA-COMMAND/group2ndx.h index 685ad82d91..e4926e2d92 100644 --- a/src/EXTRA-COMMAND/group_ndx.h +++ b/src/EXTRA-COMMAND/group2ndx.h @@ -1,6 +1,4 @@ -// -*- c++ -*- - -/* ---------------------------------------------------------- +/* -*- c++ -*----------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org @@ -19,8 +17,8 @@ CommandStyle(group2ndx,Group2Ndx); // clang-format on #else -#ifndef LMP_GROUP_NDX_H -#define LMP_GROUP_NDX_H +#ifndef LMP_GROUP2NDX_H +#define LMP_GROUP2NDX_H #include "command.h" @@ -28,7 +26,7 @@ namespace LAMMPS_NS { class Group2Ndx : public Command { public: - Group2Ndx(class LAMMPS *lmp) : Command(lmp){}; + Group2Ndx(class LAMMPS *lmp) : Command(lmp) {}; void command(int, char **) override; private: diff --git a/src/EXTRA-COMMAND/ndx_group.cpp b/src/EXTRA-COMMAND/ndx2group.cpp similarity index 97% rename from src/EXTRA-COMMAND/ndx_group.cpp rename to src/EXTRA-COMMAND/ndx2group.cpp index c5b0d3cf8a..ffe159b3b8 100644 --- a/src/EXTRA-COMMAND/ndx_group.cpp +++ b/src/EXTRA-COMMAND/ndx2group.cpp @@ -1,6 +1,4 @@ -// -*- c++ -*- - -/* ---------------------------------------------------------------------- +/* -*- c++ -*--------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org @@ -16,7 +14,7 @@ Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ -#include "ndx_group.h" +#include "ndx2group.h" #include "atom.h" #include "comm.h" @@ -33,7 +31,7 @@ static std::string find_section(FILE *fp, const std::string &name) { char linebuf[BUFLEN]; - fgets(linebuf, BUFLEN, fp); + if (!fgets(linebuf, BUFLEN, fp)) throw TokenizerException("Read error", utils::getsyserror()); while (!feof(fp)) { if (utils::strmatch(linebuf, "^\\s*\\[.*\\]\\s*$")) { auto words = Tokenizer(linebuf).as_vector(); diff --git a/src/EXTRA-COMMAND/ndx_group.h b/src/EXTRA-COMMAND/ndx2group.h similarity index 84% rename from src/EXTRA-COMMAND/ndx_group.h rename to src/EXTRA-COMMAND/ndx2group.h index 0b35fb62a0..9a2f3c04b5 100644 --- a/src/EXTRA-COMMAND/ndx_group.h +++ b/src/EXTRA-COMMAND/ndx2group.h @@ -1,6 +1,4 @@ -// -*- c++ -*- - -/* ---------------------------------------------------------- +/* -*- c++ -*------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org @@ -19,8 +17,8 @@ CommandStyle(ndx2group,Ndx2Group); // clang-format on #else -#ifndef LMP_NDX_GROUP_H -#define LMP_NDX_GROUP_H +#ifndef LMP_NDX2GROUP_H +#define LMP_NDX2GROUP_H #include "command.h" #include @@ -29,7 +27,7 @@ namespace LAMMPS_NS { class Ndx2Group : public Command { public: - Ndx2Group(class LAMMPS *lmp) : Command(lmp){}; + Ndx2Group(class LAMMPS *lmp) : Command(lmp) {}; void command(int, char **) override; private: diff --git a/src/EXTRA-FIX/fix_ave_correlate_long.cpp b/src/EXTRA-FIX/fix_ave_correlate_long.cpp index 738ae3ae4c..abb1ad87de 100644 --- a/src/EXTRA-FIX/fix_ave_correlate_long.cpp +++ b/src/EXTRA-FIX/fix_ave_correlate_long.cpp @@ -166,12 +166,12 @@ FixAveCorrelateLong::FixAveCorrelateLong(LAMMPS *lmp, int narg, char **arg) : overwrite = 1; iarg += 1; } else if (strcmp(arg[iarg], "title1") == 0) { - if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix ave/correlate/long title1", error); + if (iarg + 2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/correlate/long title1", error); delete[] title1; title1 = utils::strdup(arg[iarg + 1]); iarg += 2; } else if (strcmp(arg[iarg], "title2") == 0) { - if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix ave/correlate/long title2", error); + if (iarg + 2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/correlate/long title2", error); delete[] title2; title2 = utils::strdup(arg[iarg + 1]); iarg += 2; diff --git a/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp b/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp index 0b2a6d336d..6aae8d3ff4 100644 --- a/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp +++ b/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp @@ -336,3 +336,16 @@ void AngleCosinePeriodic::born_matrix(int type, int i1, int i2, int i3, double & du = prefactor * sin(m_angle) / s; du2 = prefactor * (c * sin(m_angle) - s * cos(m_angle) * multiplicity[type]) / (s * s * s); } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleCosinePeriodic::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "b") == 0) return (void *) b; + if (strcmp(str, "multiplicity") == 0) return (void *) multiplicity; + return nullptr; +} diff --git a/src/EXTRA-MOLECULE/angle_cosine_periodic.h b/src/EXTRA-MOLECULE/angle_cosine_periodic.h index f04ed04784..f63029919e 100644 --- a/src/EXTRA-MOLECULE/angle_cosine_periodic.h +++ b/src/EXTRA-MOLECULE/angle_cosine_periodic.h @@ -36,6 +36,7 @@ class AngleCosinePeriodic : public Angle { void write_data(FILE *) override; double single(int, int, int, int) override; void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override; + void *extract(const char *, int &) override; protected: double *k; diff --git a/src/EXTRA-MOLECULE/angle_cosine_squared_restricted.cpp b/src/EXTRA-MOLECULE/angle_cosine_squared_restricted.cpp index 2da31ef893..c6d78ea133 100644 --- a/src/EXTRA-MOLECULE/angle_cosine_squared_restricted.cpp +++ b/src/EXTRA-MOLECULE/angle_cosine_squared_restricted.cpp @@ -296,3 +296,15 @@ void AngleCosineSquaredRestricted::born_matrix(int type, int i1, int i2, int i3, du2 = 2 * k[type] * numerator / denominator; } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleCosineSquaredRestricted::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "theta0") == 0) return (void *) theta0; + return nullptr; +} diff --git a/src/EXTRA-MOLECULE/angle_cosine_squared_restricted.h b/src/EXTRA-MOLECULE/angle_cosine_squared_restricted.h index 674252b7d0..b38b6bc4bd 100644 --- a/src/EXTRA-MOLECULE/angle_cosine_squared_restricted.h +++ b/src/EXTRA-MOLECULE/angle_cosine_squared_restricted.h @@ -36,6 +36,7 @@ class AngleCosineSquaredRestricted : public Angle { void write_data(FILE *) override; double single(int, int, int, int) override; void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override; + void *extract(const char *, int &) override; protected: double *k, *theta0; diff --git a/src/EXTRA-MOLECULE/angle_fourier.cpp b/src/EXTRA-MOLECULE/angle_fourier.cpp index da1667c06f..abcda6d036 100644 --- a/src/EXTRA-MOLECULE/angle_fourier.cpp +++ b/src/EXTRA-MOLECULE/angle_fourier.cpp @@ -309,3 +309,16 @@ void AngleFourier::born_matrix(int type, int i1, int i2, int i3, double &du, dou du = k[type] * (C1[type] + 4 * C2[type] * c); } +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleFourier::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "C0") == 0) return (void *) C0; + if (strcmp(str, "C1") == 0) return (void *) C1; + if (strcmp(str, "C2") == 0) return (void *) C2; + return nullptr; +} diff --git a/src/EXTRA-MOLECULE/angle_fourier.h b/src/EXTRA-MOLECULE/angle_fourier.h index 8fa5d14b26..c0e30c8e1a 100644 --- a/src/EXTRA-MOLECULE/angle_fourier.h +++ b/src/EXTRA-MOLECULE/angle_fourier.h @@ -36,6 +36,7 @@ class AngleFourier : public Angle { void write_data(FILE *) override; double single(int, int, int, int) override; void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override; + void *extract(const char *, int &) override; protected: double *k, *C0, *C1, *C2; diff --git a/src/EXTRA-MOLECULE/angle_fourier_simple.cpp b/src/EXTRA-MOLECULE/angle_fourier_simple.cpp index 6de7956ffa..143a008039 100644 --- a/src/EXTRA-MOLECULE/angle_fourier_simple.cpp +++ b/src/EXTRA-MOLECULE/angle_fourier_simple.cpp @@ -316,3 +316,16 @@ void AngleFourierSimple::born_matrix(int type, int i1, int i2, int i3, double &d du2 = k[type] * C[type] * N[type] * (cos(theta) * sin(N[type] * theta) - N[type] * sin(theta) * cos(N[type] * theta)) / pow(sin(theta),3); } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleFourierSimple::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "C") == 0) return (void *) C; + if (strcmp(str, "N") == 0) return (void *) N; + return nullptr; +} diff --git a/src/EXTRA-MOLECULE/angle_fourier_simple.h b/src/EXTRA-MOLECULE/angle_fourier_simple.h index 3296ba6067..d37b3a83a8 100644 --- a/src/EXTRA-MOLECULE/angle_fourier_simple.h +++ b/src/EXTRA-MOLECULE/angle_fourier_simple.h @@ -36,6 +36,7 @@ class AngleFourierSimple : public Angle { void write_data(FILE *) override; double single(int, int, int, int) override; void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override; + void *extract(const char *, int &) override; protected: double *k, *C, *N; diff --git a/src/EXTRA-MOLECULE/angle_quartic.cpp b/src/EXTRA-MOLECULE/angle_quartic.cpp index aade6b4534..616c81c749 100644 --- a/src/EXTRA-MOLECULE/angle_quartic.cpp +++ b/src/EXTRA-MOLECULE/angle_quartic.cpp @@ -325,3 +325,17 @@ void AngleQuartic::born_matrix(int type, int i1, int i2, int i3, double &du, dou du2 = (2.0 * k2[type] + 6.0 * k3[type] * dtheta + 12.0 * k4[type] * dtheta2) / (s*s) - (2.0 * k2[type] * dtheta + 3.0 * k3[type] * dtheta2 + 4.0 * k4[type] * dtheta3) * c / (s*s*s); } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleQuartic::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k2") == 0) return (void *) k2; + if (strcmp(str, "k3") == 0) return (void *) k3; + if (strcmp(str, "k4") == 0) return (void *) k4; + if (strcmp(str, "theta0") == 0) return (void *) theta0; + return nullptr; +} diff --git a/src/EXTRA-MOLECULE/angle_quartic.h b/src/EXTRA-MOLECULE/angle_quartic.h index 7de51b24d1..3ff7f6f3e4 100644 --- a/src/EXTRA-MOLECULE/angle_quartic.h +++ b/src/EXTRA-MOLECULE/angle_quartic.h @@ -36,6 +36,7 @@ class AngleQuartic : public Angle { void write_data(FILE *) override; double single(int, int, int, int) override; void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override; + void *extract(const char *, int &) override; protected: double *k2, *k3, *k4, *theta0; diff --git a/src/EXTRA-MOLECULE/bond_harmonic_shift.cpp b/src/EXTRA-MOLECULE/bond_harmonic_shift.cpp index bd106c8567..6c87d47f5e 100644 --- a/src/EXTRA-MOLECULE/bond_harmonic_shift.cpp +++ b/src/EXTRA-MOLECULE/bond_harmonic_shift.cpp @@ -228,3 +228,16 @@ void BondHarmonicShift::born_matrix(int type, double rsq, int /*i*/, int /*j*/, du2 = 2 * k[type]; if (r > 0.0) du = du2 * dr; } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *BondHarmonicShift::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "r0") == 0) return (void *) r0; + if (strcmp(str, "r1") == 0) return (void *) r1; + return nullptr; +} diff --git a/src/EXTRA-MOLECULE/bond_harmonic_shift.h b/src/EXTRA-MOLECULE/bond_harmonic_shift.h index 922f1ba00d..68afc57bf7 100644 --- a/src/EXTRA-MOLECULE/bond_harmonic_shift.h +++ b/src/EXTRA-MOLECULE/bond_harmonic_shift.h @@ -36,6 +36,7 @@ class BondHarmonicShift : public Bond { void write_data(FILE *) override; double single(int, double, int, int, double &) override; void born_matrix(int, double, int, int, double &, double &) override; + void *extract(const char *, int &); protected: double *k, *r0, *r1; diff --git a/src/GPU/pair_amoeba_gpu.h b/src/GPU/pair_amoeba_gpu.h index c90339585b..3f5f89424c 100644 --- a/src/GPU/pair_amoeba_gpu.h +++ b/src/GPU/pair_amoeba_gpu.h @@ -64,8 +64,7 @@ class PairAmoebaGPU : public PairAmoeba { void udirect2b_cpu(); - template - void compute_force_from_torque(const numtyp*, double**, double*); + template void compute_force_from_torque(const numtyp *, double **, double *); }; } // namespace LAMMPS_NS diff --git a/src/GPU/pair_hippo_gpu.h b/src/GPU/pair_hippo_gpu.h index 5f36d6e71f..d00c490243 100644 --- a/src/GPU/pair_hippo_gpu.h +++ b/src/GPU/pair_hippo_gpu.h @@ -65,8 +65,7 @@ class PairHippoGPU : public PairAmoeba { void udirect2b_cpu(); - template - void compute_force_from_torque(const numtyp*, double**, double*); + template void compute_force_from_torque(const numtyp *, double **, double *); }; } // namespace LAMMPS_NS diff --git a/src/GRANULAR/pair_granular.h b/src/GRANULAR/pair_granular.h index 46c5570543..f94f4f5dff 100644 --- a/src/GRANULAR/pair_granular.h +++ b/src/GRANULAR/pair_granular.h @@ -75,7 +75,7 @@ class PairGranular : public Pair { // granular models int nmodels, maxmodels; - class Granular_NS::GranularModel** models_list; + class Granular_NS::GranularModel **models_list; int **types_indices; // optional user-specified global cutoff, per-type user-specified cutoffs diff --git a/src/INTERLAYER/pair_aip_water_2dm.h b/src/INTERLAYER/pair_aip_water_2dm.h index 295cdfffb9..91f9395214 100644 --- a/src/INTERLAYER/pair_aip_water_2dm.h +++ b/src/INTERLAYER/pair_aip_water_2dm.h @@ -30,7 +30,6 @@ class PairAIPWATER2DM : virtual public PairILPTMD { protected: void settings(int, char **) override; - }; } // namespace LAMMPS_NS diff --git a/src/INTERLAYER/pair_ilp_graphene_hbn.h b/src/INTERLAYER/pair_ilp_graphene_hbn.h index e151ecc801..5d5c1cce54 100644 --- a/src/INTERLAYER/pair_ilp_graphene_hbn.h +++ b/src/INTERLAYER/pair_ilp_graphene_hbn.h @@ -39,7 +39,12 @@ class PairILPGrapheneHBN : public Pair { static constexpr int NPARAMS_PER_LINE = 13; - enum { ILP_GrhBN, ILP_TMD, SAIP_METAL, AIP_WATER_2DM }; // for telling class variants apart in shared code + enum { + ILP_GrhBN, + ILP_TMD, + SAIP_METAL, + AIP_WATER_2DM + }; // for telling class variants apart in shared code protected: int me; diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 202d46e788..f4c5fa5028 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -43,14 +43,19 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int #if defined(LMP_KOKKOS_GPU) int ngpus = lmp->kokkos->ngpus; ExecutionSpace execution_space = ExecutionSpaceFromDevice::space; -#endif -#if defined(FFT_KOKKOS_MKL) +#if defined(FFT_KOKKOS_MKL_GPU) + if (ngpus > 0 && execution_space == Host) + lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on the host CPUs in a GPU build"); +#elif defined(FFT_KOKKOS_MKL) if (ngpus > 0 && execution_space == Device) lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs"); #elif defined(FFT_KOKKOS_FFTW3) if (ngpus > 0 && execution_space == Device) lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs"); +#elif defined(FFT_KOKKOS_NVPL) + if (ngpus > 0 && execution_space == Device) + lmp->error->all(FLERR,"Cannot use the NVPL FFT library with Kokkos on GPUs"); #elif defined(FFT_KOKKOS_CUFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs"); @@ -69,6 +74,8 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int if (stack_size < 2048) cudaDeviceSetLimit(cudaLimitStackSize,2048); #endif +#endif + #endif plan = fft_3d_create_plan_kokkos(comm,nfast,nmid,nslow, @@ -150,7 +157,7 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) +#if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_CUFFT) || defined(FFT_KOKKOS_HIPFFT) || defined(FFT_KOKKOS_MKL_GPU) || defined(FFT_KOKKOS_NVPL) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; @@ -220,12 +227,17 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total1; length = plan->length1; - #if defined(FFT_KOKKOS_MKL) + #if defined(FFT_KOKKOS_MKL_GPU) + if (flag == 1) + oneapi::mkl::dft::compute_forward(*(plan->desc_fast), (FFT_SCALAR*)d_data.data()); + else + oneapi::mkl::dft::compute_backward(*(plan->desc_fast), (FFT_SCALAR*)d_data.data()); + #elif defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_fast,d_data.data()); else DftiComputeBackward(plan->handle_fast,d_data.data()); - #elif defined(FFT_KOKKOS_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) if (flag == 1) FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else @@ -266,12 +278,17 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total2; length = plan->length2; - #if defined(FFT_KOKKOS_MKL) + #if defined(FFT_KOKKOS_MKL_GPU) + if (flag == 1) + oneapi::mkl::dft::compute_forward(*(plan->desc_mid), (FFT_SCALAR*)d_data.data()); + else + oneapi::mkl::dft::compute_backward(*(plan->desc_mid), (FFT_SCALAR*)d_data.data()); + #elif defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_mid,d_data.data()); else DftiComputeBackward(plan->handle_mid,d_data.data()); - #elif defined(FFT_KOKKOS_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) if (flag == 1) FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else @@ -310,12 +327,17 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, total = plan->total3; length = plan->length3; - #if defined(FFT_KOKKOS_MKL) + #if defined(FFT_KOKKOS_MKL_GPU) + if (flag == 1) + oneapi::mkl::dft::compute_forward(*(plan->desc_slow), (FFT_SCALAR*)d_data.data()); + else + oneapi::mkl::dft::compute_backward(*(plan->desc_slow), (FFT_SCALAR*)d_data.data()); + #elif defined(FFT_KOKKOS_MKL) if (flag == 1) DftiComputeForward(plan->handle_slow,d_data.data()); else DftiComputeBackward(plan->handle_slow,d_data.data()); - #elif defined(FFT_KOKKOS_FFTW3) + #elif defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) if (flag == 1) FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); else @@ -609,7 +631,28 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl // system specific pre-computation of 1d FFT coeffs // and scaling normalization -#if defined(FFT_KOKKOS_MKL) +#if defined(FFT_KOKKOS_MKL_GPU) + sycl::queue queue = LMPDeviceType().sycl_queue(); + + plan->desc_fast = new descriptor_t (nfast); + plan->desc_fast->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, plan->total1/nfast); + plan->desc_fast->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, plan->length1); + plan->desc_fast->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, plan->length1); + plan->desc_fast->commit(queue); + + plan->desc_mid = new descriptor_t (nmid); + plan->desc_mid->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, plan->total2/nmid); + plan->desc_mid->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, plan->length2); + plan->desc_mid->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, plan->length2); + plan->desc_mid->commit(queue); + + plan->desc_slow = new descriptor_t (nslow); + plan->desc_slow->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, plan->total3/nslow); + plan->desc_slow->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, plan->length3); + plan->desc_slow->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, plan->length3); + plan->desc_slow->commit(queue); + +#elif defined(FFT_KOKKOS_MKL) DftiCreateDescriptor( &(plan->handle_fast), FFT_KOKKOS_MKL_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_TRANSFORMS, @@ -646,7 +689,7 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl #endif DftiCommitDescriptor(plan->handle_slow); -#elif defined(FFT_KOKKOS_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) #if defined (FFT_KOKKOS_FFTW_THREADS) if (nthreads > 1) { @@ -781,11 +824,15 @@ void FFT3dKokkos::fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokk if (plan->mid2_plan) remapKK->remap_3d_destroy_plan_kokkos(plan->mid2_plan); if (plan->post_plan) remapKK->remap_3d_destroy_plan_kokkos(plan->post_plan); -#if defined(FFT_KOKKOS_MKL) +#if defined(FFT_KOKKOS_MKL_GPU) + delete plan->desc_fast; + delete plan->desc_mid; + delete plan->desc_slow; +#elif defined(FFT_KOKKOS_MKL) DftiFreeDescriptor(&(plan->handle_fast)); DftiFreeDescriptor(&(plan->handle_mid)); DftiFreeDescriptor(&(plan->handle_slow)); -#elif defined(FFT_KOKKOS_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) FFTW_API(destroy_plan)(plan->plan_slow_forward); FFTW_API(destroy_plan)(plan->plan_slow_backward); FFTW_API(destroy_plan)(plan->plan_mid_forward); @@ -856,7 +903,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set -#if defined(FFT_KOKKOS_MKL) || defined(FFT_KOKKOS_FFTW3) +#if defined(FFT_KOKKOS_MKL_GPU) || defined(FFT_KOKKOS_MKL) || defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; #endif @@ -867,7 +914,17 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ // perform 1d FFTs in each of 3 dimensions // data is just an array of 0.0 -#if defined(FFT_KOKKOS_MKL) +#if defined(FFT_KOKKOS_MKL_GPU) + if (flag == -1) { + oneapi::mkl::dft::compute_forward(*(plan->desc_fast), (FFT_SCALAR*)d_data.data()); + oneapi::mkl::dft::compute_forward(*(plan->desc_mid), (FFT_SCALAR*)d_data.data()); + oneapi::mkl::dft::compute_forward(*(plan->desc_slow), (FFT_SCALAR*)d_data.data()); + } else { + oneapi::mkl::dft::compute_backward(*(plan->desc_fast), (FFT_SCALAR*)d_data.data()); + oneapi::mkl::dft::compute_backward(*(plan->desc_mid), (FFT_SCALAR*)d_data.data()); + oneapi::mkl::dft::compute_backward(*(plan->desc_slow), (FFT_SCALAR*)d_data.data()); + } +#elif defined(FFT_KOKKOS_MKL) if (flag == -1) { DftiComputeForward(plan->handle_fast,d_data.data()); DftiComputeForward(plan->handle_mid,d_data.data()); @@ -877,7 +934,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ DftiComputeBackward(plan->handle_mid,d_data.data()); DftiComputeBackward(plan->handle_slow,d_data.data()); } -#elif defined(FFT_KOKKOS_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) if (flag == -1) { FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_KOKKOS_DATA*)d_data.data(),(FFT_KOKKOS_DATA*)d_data.data()); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 48b0fd76de..ae7ef3a6d2 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -21,6 +21,14 @@ namespace LAMMPS_NS { +#if defined(FFT_KOKKOS_MKL_GPU) +#ifdef FFT_SINGLE + typedef oneapi::mkl::dft::descriptor descriptor_t; +#else + typedef oneapi::mkl::dft::descriptor descriptor_t; +#endif +#endif + // ------------------------------------------------------------------------- // plan for how to perform a 3d FFT @@ -45,11 +53,15 @@ struct fft_plan_3d_kokkos { double norm; // normalization factor for rescaling // system specific 1d FFT info -#if defined(FFT_KOKKOS_MKL) +#if defined(FFT_KOKKOS_MKL_GPU) + descriptor_t *desc_fast; + descriptor_t *desc_mid; + descriptor_t *desc_slow; +#elif defined(FFT_KOKKOS_MKL) DFTI_DESCRIPTOR *handle_fast; DFTI_DESCRIPTOR *handle_mid; DFTI_DESCRIPTOR *handle_slow; -#elif defined(FFT_KOKKOS_FFTW3) +#elif defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) FFTW_API(plan) plan_fast_forward; FFTW_API(plan) plan_fast_backward; FFTW_API(plan) plan_mid_forward; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 0cb59f49cb..be6c8c199d 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -36,8 +36,8 @@ # endif #endif -// with KOKKOS in CUDA or HIP mode we can only have -// CUFFT/HIPFFT or KISS, thus undefine all other +// with KOKKOS in CUDA, HIP, or SYCL mode we can only have +// CUFFT/HIPFFT/MKL_GPU or KISS, thus undefine all other // FFTs here #ifdef KOKKOS_ENABLE_CUDA @@ -60,12 +60,28 @@ # if defined(FFT_KOKKOS_FFTW3) # undef FFT_KOKKOS_FFTW3 # endif +# if defined(FFT_KOKKOS_NVPL) +# undef FFT_KOKKOS_NVPL +# endif # if defined(FFT_KOKKOS_MKL) # undef FFT_KOKKOS_MKL # endif # if !defined(FFT_KOKKOS_HIPFFT) && !defined(FFT_KOKKOS_KISS) # define FFT_KOKKOS_KISS # endif +#elif defined(KOKKOS_ENABLE_SYCL) +# if defined(FFT_KOKKOS_FFTW) +# undef FFT_KOKKOS_FFTW +# endif +# if defined(FFT_KOKKOS_FFTW3) +# undef FFT_KOKKOS_FFTW3 +# endif +# if defined(FFT_KOKKOS_MKL) +# undef FFT_KOKKOS_MKL +# endif +# if !defined(FFT_KOKKOS_MKL_GPU) && !defined(FFT_KOKKOS_KISS) +# define FFT_KOKKOS_KISS +# endif #else # if defined(FFT_KOKKOS_CUFFT) # error "Must enable CUDA with KOKKOS to use -DFFT_KOKKOS_CUFFT" @@ -73,6 +89,9 @@ # if defined(FFT_KOKKOS_HIPFFT) # error "Must enable HIP with KOKKOS to use -DFFT_KOKKOS_HIPFFT" # endif +# if defined(FFT_KOKKOS_MKL_GPU) +# error "Must enable SYCL with KOKKOS to use -DFFT_KOKKOS_MKL_GPU" +# endif #endif // set strings for library info output @@ -85,12 +104,27 @@ #define LMP_FFT_KOKKOS_LIB "FFTW3" #elif defined(FFT_KOKKOS_MKL) #define LMP_FFT_KOKKOS_LIB "MKL FFT" +#elif defined(FFT_KOKKOS_MKL_GPU) +#define LMP_FFT_KOKKOS_LIB "MKL_GPU FFT" +#elif defined(FFT_KOKKOS_NVPL) +#define LMP_FFT_KOKKOS_LIB "NVPL FFT" #else #define LMP_FFT_KOKKOS_LIB "KISS FFT" #endif -#if defined(FFT_KOKKOS_MKL) +#if defined(FFT_KOKKOS_MKL_GPU) + #include "CL/sycl.hpp" + #include "oneapi/mkl/dfti.hpp" + #include "mkl.h" + #if defined(FFT_SINGLE) + typedef std::complex FFT_KOKKOS_DATA; + #define FFT_KOKKOS_MKL_PREC DFTI_SINGLE + #else + typedef std::complex FFT_KOKKOS_DATA; + #define FFT_KOKKOS_MKL_PREC DFTI_DOUBLE + #endif +#elif defined(FFT_KOKKOS_MKL) #include "mkl_dfti.h" #if defined(FFT_SINGLE) typedef float _Complex FFT_KOKKOS_DATA; @@ -108,6 +142,15 @@ typedef fftw_complex FFT_KOKKOS_DATA; #define FFTW_API(function) fftw_ ## function #endif +#elif defined(FFT_KOKKOS_NVPL) + #include "nvpl_fftw.h" + #if defined(FFT_SINGLE) + typedef fftwf_complex FFT_KOKKOS_DATA; + #define FFTW_API(function) fftwf_ ## function + #else + typedef fftw_complex FFT_KOKKOS_DATA; + #define FFTW_API(function) fftw_ ## function + #endif #elif defined(FFT_KOKKOS_CUFFT) #include "cufft.h" #if defined(FFT_SINGLE) @@ -146,7 +189,7 @@ #endif // (double[2]*) is not a 1D pointer -#if defined(FFT_KOKKOS_FFTW3) +#if defined(FFT_KOKKOS_FFTW3) || defined(FFT_KOKKOS_NVPL) typedef FFT_SCALAR* FFT_KOKKOS_DATA_POINTER; #else typedef FFT_KOKKOS_DATA* FFT_KOKKOS_DATA_POINTER; diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp index 308df20c0e..bfcb66e525 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp @@ -53,7 +53,8 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK; datamask_modify = Q_MASK | X_MASK; - nmax = m_cap = 0; + nmax = 0; + m_cap_big = 0; allocated_flag = 0; nprev = 4; @@ -66,7 +67,7 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : buf = new double[2*nprev]; prev_last_rows_rank = 0; - d_mfill_offset = typename AT::t_int_scalar("acks2/kk:mfill_offset"); + d_mfill_offset = typename AT::t_bigint_scalar("acks2/kk:mfill_offset"); } /* ---------------------------------------------------------------------- */ @@ -364,7 +365,7 @@ void FixACKS2ReaxFFKokkos::pre_force(int /*vflag*/) // free duplicated memory - dup_X_diag = decltype(dup_X_diag)(); + dup_X_diag = {}; } if (neighflag != FULL) { @@ -418,10 +419,10 @@ void FixACKS2ReaxFFKokkos::pre_force(int /*vflag*/) template KOKKOS_INLINE_FUNCTION -void FixACKS2ReaxFFKokkos::num_neigh_item(int ii, int &maxneigh) const +void FixACKS2ReaxFFKokkos::num_neigh_item(int ii, bigint &totneigh) const { const int i = d_ilist[ii]; - maxneigh += d_numneigh[i]; + totneigh += d_numneigh[i]; } /* ---------------------------------------------------------------------- */ @@ -433,39 +434,39 @@ void FixACKS2ReaxFFKokkos::allocate_matrix() // determine the total space for the H matrix - m_cap = 0; + m_cap_big = 0; // limit scope of functor to allow deallocation of views { FixACKS2ReaxFFKokkosNumNeighFunctor neigh_functor(this); - Kokkos::parallel_reduce(nn,neigh_functor,m_cap); + Kokkos::parallel_reduce(nn,neigh_functor,m_cap_big); } // deallocate first to reduce memory overhead - d_firstnbr = typename AT::t_int_1d(); + d_firstnbr = typename AT::t_bigint_1d(); d_numnbrs = typename AT::t_int_1d(); d_jlist = typename AT::t_int_1d(); d_val = typename AT::t_ffloat_1d(); - d_firstnbr_X = typename AT::t_int_1d(); + d_firstnbr_X = typename AT::t_bigint_1d(); d_numnbrs_X = typename AT::t_int_1d(); d_jlist_X = typename AT::t_int_1d(); d_val_X = typename AT::t_ffloat_1d(); // H matrix - d_firstnbr = typename AT::t_int_1d("acks2/kk:firstnbr",nmax); + d_firstnbr = typename AT::t_bigint_1d("acks2/kk:firstnbr",nmax); d_numnbrs = typename AT::t_int_1d("acks2/kk:numnbrs",nmax); - d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap); - d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap); + d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap_big); + d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap_big); // X matrix - d_firstnbr_X = typename AT::t_int_1d("acks2/kk:firstnbr_X",nmax); + d_firstnbr_X = typename AT::t_bigint_1d("acks2/kk:firstnbr_X",nmax); d_numnbrs_X = typename AT::t_int_1d("acks2/kk:numnbrs_X",nmax); - d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap); - d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap); + d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap_big); + d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap_big); } /* ---------------------------------------------------------------------- */ @@ -566,7 +567,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Zero, const int &ii) template template KOKKOS_INLINE_FUNCTION -void FixACKS2ReaxFFKokkos::compute_h_item(int ii, int &m_fill, const bool &final) const +void FixACKS2ReaxFFKokkos::compute_h_item(int ii, bigint &m_fill, const bool &final) const { const int i = d_ilist[ii]; int j,jj,jtype; @@ -619,7 +620,7 @@ void FixACKS2ReaxFFKokkos::compute_h_item(int ii, int &m_fill, const m_fill++; } if (final) - d_numnbrs[i] = m_fill - d_firstnbr[i]; + d_numnbrs[i] = int(m_fill - d_firstnbr[i]); } } @@ -698,9 +699,9 @@ void FixACKS2ReaxFFKokkos::compute_h_team( // calculate the global memory offset from where the H matrix values to be // calculated by the current team will be stored in d_val - int team_firstnbr_idx = 0; + bigint team_firstnbr_idx = 0; Kokkos::single(Kokkos::PerTeam(team), - [=](int &val) { + [=](bigint &val) { int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1]; val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); @@ -726,7 +727,7 @@ void FixACKS2ReaxFFKokkos::compute_h_team( int jnum = s_numnbrs[idx]; // calculate the write-offset for atom-i's first neighbor - int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; Kokkos::single(Kokkos::PerThread(team), [&]() { d_firstnbr[i] = atomi_firstnbr_idx; }); @@ -739,7 +740,7 @@ void FixACKS2ReaxFFKokkos::compute_h_team( // are processed in batches and the batch size is vector_length for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { - int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; // count the # of neighbor atoms with non-zero electrostatic // interaction coefficients with atom-i in the current batch @@ -782,7 +783,8 @@ void FixACKS2ReaxFFKokkos::compute_h_team( valid = false; if (x(j, 2) == ztmp && x(j, 1) < ytmp) valid = false; - if (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp) + if (x(j, 2) == ztmp && x(j, 1) == ytmp && + x(j, 0) < xtmp) valid = false; } } @@ -851,7 +853,7 @@ double FixACKS2ReaxFFKokkos::calculate_H_k(const F_FLOAT &r, const F taper = taper * r + d_tap[0]; denom = r * r * r + shld; - denom = pow(denom,1.0/3.0); + denom = cbrt(denom); return taper * EV_TO_KCAL_PER_MOL / denom; } @@ -861,7 +863,7 @@ double FixACKS2ReaxFFKokkos::calculate_H_k(const F_FLOAT &r, const F template template KOKKOS_INLINE_FUNCTION -void FixACKS2ReaxFFKokkos::compute_x_item(int ii, int &m_fill, const bool &final) const +void FixACKS2ReaxFFKokkos::compute_x_item(int ii, bigint &m_fill, const bool &final) const { // The X_diag array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_X_diag = ScatterViewHelper,decltype(dup_X_diag),decltype(ndup_X_diag)>::get(dup_X_diag,ndup_X_diag); @@ -927,7 +929,7 @@ void FixACKS2ReaxFFKokkos::compute_x_item(int ii, int &m_fill, const } if (final) { a_X_diag[i] += tmp; - d_numnbrs_X[i] = m_fill - d_firstnbr_X[i]; + d_numnbrs_X[i] = int(m_fill - d_firstnbr_X[i]); } } } @@ -1005,9 +1007,9 @@ void FixACKS2ReaxFFKokkos::compute_x_team( // calculate the global memory offset from where the H matrix values to be // calculated by the current team will be stored in d_val_X - int team_firstnbr_idx = 0; + bigint team_firstnbr_idx = 0; Kokkos::single(Kokkos::PerTeam(team), - [=](int &val) { + [=](bigint &val) { int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1]; val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); @@ -1033,7 +1035,7 @@ void FixACKS2ReaxFFKokkos::compute_x_team( int jnum = s_numnbrs[idx]; // calculate the write-offset for atom-i's first neighbor - int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; Kokkos::single(Kokkos::PerThread(team), [&]() { d_firstnbr_X[i] = atomi_firstnbr_idx; }); @@ -1046,7 +1048,7 @@ void FixACKS2ReaxFFKokkos::compute_x_team( // are processed in batches and the batch size is vector_length for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { - int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; // count the # of neighbor atoms with non-zero electrostatic // interaction coefficients with atom-i in the current batch @@ -1417,7 +1419,7 @@ void FixACKS2ReaxFFKokkos::sparse_matvec_acks2(typename AT::t_ffloat // free duplicated memory - dup_bb = decltype(dup_bb)(); + dup_bb = {}; } } @@ -1464,7 +1466,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2SparseMatvec3_Half::operator() (TagACKS2SparseMatvec3_Half::operator() (TagACKS2SparseMatvec3_Full, c F_FLOAT sum; F_FLOAT sum2; - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &sum) { + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT &sum) { const int j = d_jlist(jj); sum += d_val(jj) * d_xx[j]; }, sum); team.team_barrier(); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const int &jj, F_FLOAT &sum2) { + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const bigint &jj, F_FLOAT &sum2) { const int j = d_jlist_X(jj); sum2 += d_val_X(jj) * d_xx[NN + j]; }, sum2); @@ -1865,8 +1867,8 @@ double FixACKS2ReaxFFKokkos::memory_usage() bytes += nmax*4 * sizeof(double); // storage bytes += size*11 * sizeof(double); // storage bytes += n_cap*4 * sizeof(int); // matrix... - bytes += m_cap*2 * sizeof(int); - bytes += m_cap*2 * sizeof(double); + bytes += m_cap_big*2 * sizeof(int); + bytes += m_cap_big*2 * sizeof(double); return bytes; } diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.h b/src/KOKKOS/fix_acks2_reaxff_kokkos.h index cb16b4cd24..6adca39d17 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.h +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.h @@ -74,7 +74,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { DAT::tdual_ffloat_1d get_s() {return k_s;} KOKKOS_INLINE_FUNCTION - void num_neigh_item(int, int&) const; + void num_neigh_item(int, bigint&) const; KOKKOS_INLINE_FUNCTION void operator()(TagACKS2Zero, const int&) const; @@ -84,7 +84,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { template KOKKOS_INLINE_FUNCTION - void compute_h_item(int, int &, const bool &) const; + void compute_h_item(int, bigint &, const bool &) const; template KOKKOS_INLINE_FUNCTION @@ -92,7 +92,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { template KOKKOS_INLINE_FUNCTION - void compute_x_item(int, int &, const bool &) const; + void compute_x_item(int, bigint &, const bool &) const; template KOKKOS_INLINE_FUNCTION @@ -173,8 +173,9 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { int allocated_flag, last_allocate; int need_dup,prev_last_rows_rank; double* buf; + bigint m_cap_big; - typename AT::t_int_scalar d_mfill_offset; + typename AT::t_bigint_scalar d_mfill_offset; typedef Kokkos::DualView tdual_int_1d; Kokkos::DualView k_params; @@ -197,12 +198,12 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { DAT::tdual_ffloat_2d k_bcut; typename AT::t_ffloat_2d d_bcut; - typename AT::t_int_1d d_firstnbr; + typename AT::t_bigint_1d d_firstnbr; typename AT::t_int_1d d_numnbrs; typename AT::t_int_1d d_jlist; typename AT::t_ffloat_1d d_val; - typename AT::t_int_1d d_firstnbr_X; + typename AT::t_bigint_1d d_firstnbr_X; typename AT::t_int_1d d_numnbrs_X; typename AT::t_int_1d d_jlist_X; typename AT::t_ffloat_1d d_val_X; @@ -264,21 +265,21 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { template struct FixACKS2ReaxFFKokkosNumNeighFunctor { typedef DeviceType device_type; - typedef int value_type; + typedef bigint value_type; FixACKS2ReaxFFKokkos c; FixACKS2ReaxFFKokkosNumNeighFunctor(FixACKS2ReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &maxneigh) const { - c.num_neigh_item(ii, maxneigh); + void operator()(const int ii, bigint &totneigh) const { + c.num_neigh_item(ii, totneigh); } }; template struct FixACKS2ReaxFFKokkosComputeHFunctor { int atoms_per_team, vector_length; - typedef int value_type; + typedef bigint value_type; typedef Kokkos::ScratchMemorySpace scratch_space; FixACKS2ReaxFFKokkos c; @@ -293,7 +294,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor { }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &m_fill, const bool &final) const { + void operator()(const int ii, bigint &m_fill, const bool &final) const { c.template compute_h_item(ii,m_fill,final); } @@ -325,7 +326,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor { template struct FixACKS2ReaxFFKokkosComputeXFunctor { int atoms_per_team, vector_length; - typedef int value_type; + typedef bigint value_type; typedef Kokkos::ScratchMemorySpace scratch_space; FixACKS2ReaxFFKokkos c; @@ -340,7 +341,7 @@ struct FixACKS2ReaxFFKokkosComputeXFunctor { }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &m_fill, const bool &final) const { + void operator()(const int ii, bigint &m_fill, const bool &final) const { c.template compute_x_item(ii,m_fill,final); } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index deb41944bc..7ef4505b06 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -62,7 +62,8 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : datamask_read = X_MASK | V_MASK | F_MASK | Q_MASK | MASK_MASK | TYPE_MASK | TAG_MASK; datamask_modify = X_MASK; - nmax = m_cap = 0; + nmax = 0; + m_cap_big = 0; allocated_flag = 0; nprev = 4; maxexchange = nprev*2; @@ -71,7 +72,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : memory->destroy(t_hist); grow_arrays(atom->nmax); - d_mfill_offset = typename AT::t_int_scalar("qeq/kk:mfill_offset"); + d_mfill_offset = typename AT::t_bigint_scalar("qeq/kk:mfill_offset"); converged = 0; } @@ -290,7 +291,7 @@ void FixQEqReaxFFKokkos::pre_force(int /*vflag*/) // free duplicated memory if (need_dup) - dup_o = decltype(dup_o)(); + dup_o = {}; atomKK->modified(execution_space,datamask_modify); @@ -301,10 +302,10 @@ void FixQEqReaxFFKokkos::pre_force(int /*vflag*/) template KOKKOS_INLINE_FUNCTION -void FixQEqReaxFFKokkos::num_neigh_item(int ii, int &maxneigh) const +void FixQEqReaxFFKokkos::num_neigh_item(int ii, bigint &totneigh) const { const int i = d_ilist[ii]; - maxneigh += d_numneigh[i]; + totneigh += d_numneigh[i]; } /* ---------------------------------------------------------------------- */ @@ -316,25 +317,25 @@ void FixQEqReaxFFKokkos::allocate_matrix() // determine the total space for the H matrix - m_cap = 0; + m_cap_big = 0; // limit scope of functor to allow deallocation of views { FixQEqReaxFFKokkosNumNeighFunctor neigh_functor(this); - Kokkos::parallel_reduce(nn,neigh_functor,m_cap); + Kokkos::parallel_reduce(nn,neigh_functor,m_cap_big); } // deallocate first to reduce memory overhead - d_firstnbr = typename AT::t_int_1d(); + d_firstnbr = typename AT::t_bigint_1d(); d_numnbrs = typename AT::t_int_1d(); d_jlist = typename AT::t_int_1d(); d_val = typename AT::t_ffloat_1d(); - d_firstnbr = typename AT::t_int_1d("qeq/kk:firstnbr",nmax); + d_firstnbr = typename AT::t_bigint_1d("qeq/kk:firstnbr",nmax); d_numnbrs = typename AT::t_int_1d("qeq/kk:numnbrs",nmax); - d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap); - d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap); + d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap_big); + d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap_big); } /* ---------------------------------------------------------------------- */ @@ -405,7 +406,7 @@ void FixQEqReaxFFKokkos::operator()(TagQEqZero, const int &ii) const template template KOKKOS_INLINE_FUNCTION -void FixQEqReaxFFKokkos::compute_h_item(int ii, int &m_fill, const bool &final) const +void FixQEqReaxFFKokkos::compute_h_item(int ii, bigint &m_fill, const bool &final) const { const int i = d_ilist[ii]; int j,jj,jtype; @@ -458,7 +459,7 @@ void FixQEqReaxFFKokkos::compute_h_item(int ii, int &m_fill, const b m_fill++; } if (final) - d_numnbrs[i] = m_fill - d_firstnbr[i]; + d_numnbrs[i] = int(m_fill - d_firstnbr[i]); } } @@ -537,9 +538,9 @@ void FixQEqReaxFFKokkos::compute_h_team( // calculate the global memory offset from where the H matrix values to be // calculated by the current team will be stored in d_val - int team_firstnbr_idx = 0; + bigint team_firstnbr_idx = 0; Kokkos::single(Kokkos::PerTeam(team), - [=](int &val) { + [=](bigint &val) { int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1]; val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); @@ -565,7 +566,7 @@ void FixQEqReaxFFKokkos::compute_h_team( int jnum = s_numnbrs[idx]; // removed "const" to work around GCC 7 bug // calculate the write-offset for atom-i's first neighbor - int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; Kokkos::single(Kokkos::PerThread(team), [&]() { d_firstnbr[i] = atomi_firstnbr_idx; }); @@ -578,7 +579,7 @@ void FixQEqReaxFFKokkos::compute_h_team( // are processed in batches and the batch size is vector_length for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { - int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; // count the # of neighbor atoms with non-zero electrostatic // interaction coefficients with atom-i in the current batch @@ -935,7 +936,7 @@ void FixQEqReaxFFKokkos::operator()(TagQEqSparseMatvec2_Half::operator()(TagQEqSparseMatvec2_Full, const const int i = d_ilist[k]; if (mask[i] & groupbit) { F_FLOAT2 doitmp; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT2& doi) { + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT2& doi) { const int j = d_jlist(jj); const auto d_val_jj = d_val(jj); if (!(converged & 1)) @@ -1286,8 +1287,8 @@ double FixQEqReaxFFKokkos::memory_usage() bytes = atom->nmax*nprev*2 * sizeof(F_FLOAT); // s_hist & t_hist bytes += (double)atom->nmax*8 * sizeof(F_FLOAT); // storage bytes += (double)n_cap*2 * sizeof(int); // matrix... - bytes += (double)m_cap * sizeof(int); - bytes += (double)m_cap * sizeof(F_FLOAT); + bytes += (double)m_cap_big * sizeof(int); + bytes += (double)m_cap_big * sizeof(F_FLOAT); return bytes; } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 92026b209d..0733a518a2 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -70,7 +70,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { void pre_force(int) override; KOKKOS_INLINE_FUNCTION - void num_neigh_item(int, int&) const; + void num_neigh_item(int, bigint&) const; KOKKOS_INLINE_FUNCTION void operator()(TagQEqZero, const int&) const; @@ -80,7 +80,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { template KOKKOS_INLINE_FUNCTION - void compute_h_item(int, int &, const bool &) const; + void compute_h_item(int, bigint &, const bool &) const; template KOKKOS_INLINE_FUNCTION @@ -201,8 +201,9 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { int allocated_flag, last_allocate; int need_dup; int converged; + bigint m_cap_big; - typename AT::t_int_scalar d_mfill_offset; + typename AT::t_bigint_scalar d_mfill_offset; typedef Kokkos::DualView tdual_int_1d; Kokkos::DualView k_params; @@ -227,7 +228,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { DAT::tdual_ffloat_1d k_tap; typename AT::t_ffloat_1d d_tap; - typename AT::t_int_1d d_firstnbr; + typename AT::t_bigint_1d d_firstnbr; typename AT::t_int_1d d_numnbrs; typename AT::t_int_1d d_jlist; typename AT::t_ffloat_1d d_val; @@ -290,21 +291,21 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { template struct FixQEqReaxFFKokkosNumNeighFunctor { typedef DeviceType device_type; - typedef int value_type; + typedef bigint value_type; FixQEqReaxFFKokkos c; FixQEqReaxFFKokkosNumNeighFunctor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &maxneigh) const { - c.num_neigh_item(ii, maxneigh); + void operator()(const int ii, bigint &totneigh) const { + c.num_neigh_item(ii, totneigh); } }; template struct FixQEqReaxFFKokkosComputeHFunctor { int atoms_per_team, vector_length; - typedef int value_type; + typedef bigint value_type; typedef Kokkos::ScratchMemorySpace scratch_space; FixQEqReaxFFKokkos c; @@ -319,7 +320,7 @@ struct FixQEqReaxFFKokkosComputeHFunctor { }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &m_fill, const bool &final) const { + void operator()(const int ii, bigint &m_fill, const bool &final) const { c.template compute_h_item(ii,m_fill,final); } diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index b25e2dad59..52826d7b04 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -172,7 +172,6 @@ void FixShakeKokkos::init() k_angle_distance.sync(); } - /* ---------------------------------------------------------------------- run setup for minimization. ------------------------------------------------------------------------- */ @@ -460,8 +459,8 @@ void FixShakeKokkos::post_force(int vflag) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index f4ba967c9a..b3edb0e6a0 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -149,6 +149,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) set_flag = 1; } } + if ((str = getenv("PALS_LOCAL_RANKID"))) { + if (ngpus > 0) { + int local_rank = atoi(str); + device = local_rank % ngpus; + if (device >= skip_gpu) device++; + set_flag = 1; + } + } if (ngpus > 1 && !set_flag) error->all(FLERR,"Could not determine local MPI rank for multiple " @@ -638,10 +646,10 @@ void KokkosLMP::accelerator(int narg, char **arg) called by Finish ------------------------------------------------------------------------- */ -int KokkosLMP::neigh_count(int m) +bigint KokkosLMP::neigh_count(int m) { int inum = 0; - int nneigh = 0; + bigint nneigh = 0; ArrayTypes::t_int_1d h_ilist; ArrayTypes::t_int_1d h_numneigh; diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 748aff7f83..419de62dec 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -64,7 +64,7 @@ class KokkosLMP : protected Pointers { static void initialize(const Kokkos::InitializationSettings&, Error *); static void finalize(); void accelerator(int, char **); - int neigh_count(int); + bigint neigh_count(int); template int need_dup(int qeq_flag = 0) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 7f0eb5c105..9d3e3fca0f 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -341,17 +341,17 @@ public: // define precision // handle global precision, force, energy, positions, kspace separately -#ifndef PRECISION -#define PRECISION 2 +#ifndef LMP_PRECISION +#define LMP_PRECISION 2 #endif -#if PRECISION==1 +#if LMP_PRECISION==1 typedef float LMP_FLOAT; #else typedef double LMP_FLOAT; #endif #ifndef PREC_FORCE -#define PREC_FORCE PRECISION +#define PREC_FORCE LMP_PRECISION #endif #if PREC_FORCE==1 @@ -361,7 +361,7 @@ typedef double F_FLOAT; #endif #ifndef PREC_ENERGY -#define PREC_ENERGY PRECISION +#define PREC_ENERGY LMP_PRECISION #endif #if PREC_ENERGY==1 @@ -521,7 +521,7 @@ struct BinOp3DLAMMPS { }; #ifndef PREC_POS -#define PREC_POS PRECISION +#define PREC_POS LMP_PRECISION #endif #if PREC_POS==1 @@ -531,7 +531,7 @@ typedef double X_FLOAT; #endif #ifndef PREC_VELOCITIES -#define PREC_VELOCITIES PRECISION +#define PREC_VELOCITIES LMP_PRECISION #endif #if PREC_VELOCITIES==1 @@ -641,6 +641,14 @@ typedef tdual_int_scalar::t_dev_const t_int_scalar_const; typedef tdual_int_scalar::t_dev_um t_int_scalar_um; typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um; +typedef Kokkos:: + DualView tdual_bigint_scalar; +typedef tdual_bigint_scalar::t_dev t_bigint_scalar; +typedef tdual_bigint_scalar::t_dev_const t_bigint_scalar_const; +typedef tdual_bigint_scalar::t_dev_um t_bigint_scalar_um; +typedef tdual_bigint_scalar::t_dev_const_um t_bigint_scalar_const_um; +typedef tdual_bigint_scalar::t_dev_const_randomread t_bigint_scalar_randomread; + typedef Kokkos:: DualView tdual_tagint_scalar; typedef tdual_tagint_scalar::t_dev t_tagint_scalar; @@ -666,6 +674,14 @@ typedef tdual_int_1d::t_dev_um t_int_1d_um; typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um; typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread; +typedef Kokkos:: + DualView tdual_bigint_1d; +typedef tdual_bigint_1d::t_dev t_bigint_1d; +typedef tdual_bigint_1d::t_dev_const t_bigint_1d_const; +typedef tdual_bigint_1d::t_dev_um t_bigint_1d_um; +typedef tdual_bigint_1d::t_dev_const_um t_bigint_1d_const_um; +typedef tdual_bigint_1d::t_dev_const_randomread t_bigint_1d_randomread; + typedef Kokkos:: DualView tdual_int_1d_3; typedef tdual_int_1d_3::t_dev t_int_1d_3; @@ -974,6 +990,12 @@ typedef tdual_int_scalar::t_host_const t_int_scalar_const; typedef tdual_int_scalar::t_host_um t_int_scalar_um; typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um; +typedef Kokkos::DualView tdual_bigint_scalar; +typedef tdual_bigint_scalar::t_host t_bigint_scalar; +typedef tdual_bigint_scalar::t_host_const t_bigint_scalar_const; +typedef tdual_bigint_scalar::t_host_um t_bigint_scalar_um; +typedef tdual_bigint_scalar::t_host_const_um t_bigint_scalar_const_um; + typedef Kokkos::DualView tdual_tagint_scalar; typedef tdual_tagint_scalar::t_host t_tagint_scalar; typedef tdual_tagint_scalar::t_host_const t_tagint_scalar_const; @@ -994,6 +1016,13 @@ typedef tdual_int_1d::t_host_um t_int_1d_um; typedef tdual_int_1d::t_host_const_um t_int_1d_const_um; typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread; +typedef Kokkos::DualView tdual_bigint_1d; +typedef tdual_bigint_1d::t_host t_bigint_1d; +typedef tdual_bigint_1d::t_host_const t_bigint_1d_const; +typedef tdual_bigint_1d::t_host_um t_bigint_1d_um; +typedef tdual_bigint_1d::t_host_const_um t_bigint_1d_const_um; +typedef tdual_bigint_1d::t_host_const_randomread t_bigint_1d_randomread; + typedef Kokkos::DualView tdual_int_1d_3; typedef tdual_int_1d_3::t_host t_int_1d_3; typedef tdual_int_1d_3::t_host_const t_int_1d_3_const; diff --git a/src/KOKKOS/meam_dens_init_kokkos.h b/src/KOKKOS/meam_dens_init_kokkos.h index 68e69430fd..dd63be96bd 100644 --- a/src/KOKKOS/meam_dens_init_kokkos.h +++ b/src/KOKKOS/meam_dens_init_kokkos.h @@ -294,20 +294,20 @@ MEAMKokkos::meam_dens_init(int inum_half, int ntype, typename AT::t_ Kokkos::Experimental::contribute(d_arho3mb, dup_arho3mb); // free duplicated memory - dup_rho0 = decltype(dup_rho0)(); - dup_arho2b = decltype(dup_arho2b)(); - dup_arho1 = decltype(dup_arho1)(); - dup_arho2 = decltype(dup_arho2)(); - dup_arho3 = decltype(dup_arho3)(); - dup_arho3b = decltype(dup_arho3b)(); - dup_t_ave = decltype(dup_t_ave)(); - dup_tsq_ave = decltype(dup_tsq_ave)(); + dup_rho0 = {}; + dup_arho2b = {}; + dup_arho1 = {}; + dup_arho2 = {}; + dup_arho3 = {}; + dup_arho3b = {}; + dup_t_ave = {}; + dup_tsq_ave = {}; // msmeam - dup_arho2mb = decltype(dup_arho2mb)(); - dup_arho1m = decltype(dup_arho1m)(); - dup_arho2m = decltype(dup_arho2m)(); - dup_arho3m = decltype(dup_arho3m)(); - dup_arho3mb = decltype(dup_arho3mb)(); + dup_arho2mb = {}; + dup_arho1m = {}; + dup_arho2m = {}; + dup_arho3m = {}; + dup_arho3mb = {}; } } diff --git a/src/KOKKOS/meam_force_kokkos.h b/src/KOKKOS/meam_force_kokkos.h index a546ab54d4..1875e22dcf 100644 --- a/src/KOKKOS/meam_force_kokkos.h +++ b/src/KOKKOS/meam_force_kokkos.h @@ -75,9 +75,9 @@ void MEAMKokkos::meam_force( if (vflag_atom) Kokkos::Experimental::contribute(d_vatom, dup_vatom); // free duplicated memory - dup_f = decltype(dup_f)(); - if (eflag_atom) dup_eatom = decltype(dup_eatom)(); - if (vflag_atom) dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + if (eflag_atom) dup_eatom = {}; + if (vflag_atom) dup_vatom = {}; } } diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index 0c7555875e..026c8afcb4 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -327,13 +327,23 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type*** &array) /* ---------------------------------------------------------------------- reallocate Kokkos views without initialization deallocate first to reduce memory use + for the first case, enforce values are given for all dimensions + for the second case, allow zero values given for dimensions ------------------------------------------------------------------------- */ template -static void realloc_kokkos(TYPE &data, const char *name, Indices... ns) +static std::enable_if_t realloc_kokkos(TYPE &data, const char *name, Indices... ns) { data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)), ns...); + data = TYPE(std::string(name), ns...); +} + +template +static std::enable_if_t realloc_kokkos_allow_zero(TYPE &data, const char *name, Indices... ns) +{ + data = TYPE(); + if constexpr (sizeof...(Indices) != 0) + data = TYPE(std::string(name), ns...); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/pair_adp_kokkos.cpp b/src/KOKKOS/pair_adp_kokkos.cpp index 1297d62651..999a67ca49 100644 --- a/src/KOKKOS/pair_adp_kokkos.cpp +++ b/src/KOKKOS/pair_adp_kokkos.cpp @@ -297,12 +297,12 @@ void PairADPKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_rho = decltype(dup_rho)(); - dup_mu = decltype(dup_mu)(); - dup_lambda = decltype(dup_lambda)(); - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_rho = {}; + dup_mu = {}; + dup_lambda = {}; + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_dpd_ext_kokkos.cpp b/src/KOKKOS/pair_dpd_ext_kokkos.cpp index 636235d1c8..95c9d304f3 100644 --- a/src/KOKKOS/pair_dpd_ext_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_ext_kokkos.cpp @@ -207,9 +207,9 @@ void PairDPDExtKokkos::compute(int eflagin, int vflagin) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp b/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp index 213b344fbb..91d1183957 100644 --- a/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp @@ -212,8 +212,8 @@ void PairDPDExtTstatKokkos::compute(int eflagin, int vflagin) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_dpd_kokkos.cpp b/src/KOKKOS/pair_dpd_kokkos.cpp index f888b5f6ce..0ebf8ccae0 100644 --- a/src/KOKKOS/pair_dpd_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_kokkos.cpp @@ -207,9 +207,9 @@ void PairDPDKokkos::compute(int eflagin, int vflagin) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_dpd_tstat_kokkos.cpp b/src/KOKKOS/pair_dpd_tstat_kokkos.cpp index 63dbda3b59..d51cce629a 100644 --- a/src/KOKKOS/pair_dpd_tstat_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_tstat_kokkos.cpp @@ -211,8 +211,8 @@ void PairDPDTstatKokkos::compute(int eflagin, int vflagin) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index b02faced1e..90a82616a6 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -309,10 +309,10 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_rho = decltype(dup_rho)(); - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_rho = {}; + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index 4da146e68e..11719a8979 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -309,10 +309,10 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_rho = decltype(dup_rho)(); - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_rho = {}; + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 54ffa84f2d..1e870555dc 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -304,10 +304,10 @@ void PairEAMKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_rho = decltype(dup_rho)(); - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_rho = {}; + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp index e7d376c870..746055f28c 100644 --- a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp +++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp @@ -84,7 +84,15 @@ PairPACEExtrapolationKokkos::~PairPACEExtrapolationKokkos() memoryKK->destroy_kokkos(k_eatom,eatom); memoryKK->destroy_kokkos(k_vatom,vatom); - // deallocate views of views in serial to prevent issues in Kokkos tools + deallocate_views_of_views(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::deallocate_views_of_views() +{ + // deallocate views of views in serial to prevent race conditions if (k_splines_gk.h_view.data()) { for (int i = 0; i < nelements; i++) { @@ -244,15 +252,7 @@ void PairPACEExtrapolationKokkos::copy_splines() { auto basis_set = aceimpl->basis_set; - if (k_splines_gk.d_view.data()) { - for (int i = 0; i < nelements; i++) { - for (int j = 0; j < nelements; j++) { - k_splines_gk.h_view(i, j).deallocate(); - k_splines_rnl.h_view(i, j).deallocate(); - k_splines_hc.h_view(i, j).deallocate(); - } - } - } + deallocate_views_of_views(); k_splines_gk = Kokkos::DualView("pace:splines_gk", nelements, nelements); k_splines_rnl = Kokkos::DualView("pace:splines_rnl", nelements, nelements); @@ -808,8 +808,8 @@ void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.h b/src/KOKKOS/pair_pace_extrapolation_kokkos.h index df8a0c1740..c1c1debd45 100644 --- a/src/KOKKOS/pair_pace_extrapolation_kokkos.h +++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.h @@ -296,6 +296,8 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { t_ace_3d3 f_ij; + void deallocate_views_of_views(); + public: struct SplineInterpolatorKokkos { int ntot, nlut, num_of_functions; diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp index 4407d1231e..0afbb7540e 100644 --- a/src/KOKKOS/pair_pace_kokkos.cpp +++ b/src/KOKKOS/pair_pace_kokkos.cpp @@ -84,7 +84,15 @@ PairPACEKokkos::~PairPACEKokkos() memoryKK->destroy_kokkos(k_eatom,eatom); memoryKK->destroy_kokkos(k_vatom,vatom); - // deallocate views of views in serial to prevent issues in Kokkos tools + deallocate_views_of_views(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEKokkos::deallocate_views_of_views() +{ + // deallocate views of views in serial to prevent race conditions if (k_splines_gk.h_view.data()) { for (int i = 0; i < nelements; i++) { @@ -240,15 +248,7 @@ void PairPACEKokkos::copy_splines() { auto basis_set = aceimpl->basis_set; - if (k_splines_gk.d_view.data()) { - for (int i = 0; i < nelements; i++) { - for (int j = 0; j < nelements; j++) { - k_splines_gk.h_view(i, j).deallocate(); - k_splines_rnl.h_view(i, j).deallocate(); - k_splines_hc.h_view(i, j).deallocate(); - } - } - } + deallocate_views_of_views(); k_splines_gk = Kokkos::DualView("pace:splines_gk", nelements, nelements); k_splines_rnl = Kokkos::DualView("pace:splines_rnl", nelements, nelements); @@ -753,8 +753,8 @@ void PairPACEKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_pace_kokkos.h b/src/KOKKOS/pair_pace_kokkos.h index e22c61f0ea..6b43e52614 100644 --- a/src/KOKKOS/pair_pace_kokkos.h +++ b/src/KOKKOS/pair_pace_kokkos.h @@ -283,6 +283,8 @@ class PairPACEKokkos : public PairPACE { t_ace_3d3 f_ij; + void deallocate_views_of_views(); + public: struct SplineInterpolatorKokkos { int ntot, nlut, num_of_functions; diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index 7dd86e07a9..b0a53a27fd 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -105,14 +105,23 @@ PairReaxFFKokkos::~PairReaxFFKokkos() memoryKK->destroy_kokkos(k_tmpbo,tmpbo); tmpbo = nullptr; - // deallocate views of views in serial to prevent race condition in profiling tools + deallocate_views_of_views(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairReaxFFKokkos::deallocate_views_of_views() +{ + + // deallocate views of views in serial to prevent race conditions for (int i = 0; i < (int)k_LR.extent(0); i++) { for (int j = 0; j < (int)k_LR.extent(1); j++) { - k_LR.h_view(i,j).d_vdW = decltype(k_LR.h_view(i,j).d_vdW )(); - k_LR.h_view(i,j).d_CEvd = decltype(k_LR.h_view(i,j).d_CEvd )(); - k_LR.h_view(i,j).d_ele = decltype(k_LR.h_view(i,j).d_ele )(); - k_LR.h_view(i,j).d_CEclmb = decltype(k_LR.h_view(i,j).d_CEclmb)(); + k_LR.h_view(i,j).d_vdW = {}; + k_LR.h_view(i,j).d_CEvd = {}; + k_LR.h_view(i,j).d_ele = {}; + k_LR.h_view(i,j).d_CEclmb = {}; } } } @@ -409,8 +418,8 @@ void PairReaxFFKokkos::init_md() int ntypes = atom->ntypes; Init_Lookup_Tables(); + deallocate_views_of_views(); k_LR = tdual_LR_lookup_table_kk_2d("lookup:LR",ntypes+1,ntypes+1); - d_LR = k_LR.template view(); for (int i = 1; i <= ntypes; ++i) { if (map[i] == -1) continue; @@ -578,7 +587,7 @@ void PairReaxFFKokkos::Deallocate_Lookup_Tables() for (i = 0; i <= ntypes; ++i) { if (map[i] == -1) continue; for (j = i; j <= ntypes; ++j) { - if (map[i] == -1) continue; + if (map[j] == -1) continue; if (LR[i][j].n) { sfree(LR[i][j].y); sfree(LR[i][j].H); @@ -1092,19 +1101,19 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) // free scatterview memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); - dup_dDeltap_self = decltype(dup_dDeltap_self)(); - dup_total_bo = decltype(dup_total_bo)(); - dup_CdDelta = decltype(dup_CdDelta)(); + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; + dup_dDeltap_self = {}; + dup_total_bo = {}; + dup_CdDelta = {}; } else { - ndup_f = decltype(ndup_f)(); - ndup_eatom = decltype(ndup_eatom)(); - ndup_vatom = decltype(ndup_vatom)(); - ndup_dDeltap_self = decltype(ndup_dDeltap_self)(); - ndup_total_bo = decltype(ndup_total_bo)(); - ndup_CdDelta = decltype(ndup_CdDelta)(); + ndup_f = {}; + ndup_eatom = {}; + ndup_vatom = {}; + ndup_dDeltap_self = {}; + ndup_total_bo = {}; + ndup_CdDelta = {}; } d_neighbors = typename AT::t_neighbors_2d(); @@ -1392,7 +1401,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulo const int tmin = MIN(itype, jtype); const int tmax = MAX(itype, jtype); - const LR_lookup_table_kk& t = d_LR(tmin,tmax); + const LR_lookup_table_kk& t = k_LR.template view()(tmin,tmax); /* Cubic Spline Interpolation */ @@ -1492,23 +1501,21 @@ void PairReaxFFKokkos::allocate_array() { // free scatterview memory if (need_dup) { - dup_dDeltap_self = decltype(dup_dDeltap_self)(); - dup_total_bo = decltype(dup_total_bo)(); - dup_CdDelta = decltype(dup_CdDelta)(); + dup_dDeltap_self = {}; + dup_total_bo = {}; + dup_CdDelta = {}; } else { - ndup_dDeltap_self = decltype(ndup_dDeltap_self)(); - ndup_total_bo = decltype(ndup_total_bo)(); - ndup_CdDelta = decltype(ndup_CdDelta)(); + ndup_dDeltap_self = {}; + ndup_total_bo = {}; + ndup_CdDelta = {}; } if (cut_hbsq > 0.0) { - MemKK::realloc_kokkos(d_hb_first,"reaxff/kk:hb_first",nmax); MemKK::realloc_kokkos(d_hb_num,"reaxff/kk:hb_num",nmax); - MemKK::realloc_kokkos(d_hb_list,"reaxff/kk:hb_list",nmax*maxhb); + MemKK::realloc_kokkos(d_hb_list,"reaxff/kk:hb_list", nmax, maxhb); } - MemKK::realloc_kokkos(d_bo_first,"reaxff/kk:bo_first",nmax); MemKK::realloc_kokkos(d_bo_num,"reaxff/kk:bo_num",nmax); - MemKK::realloc_kokkos(d_bo_list,"reaxff/kk:bo_list",nmax*maxbo); + MemKK::realloc_kokkos(d_bo_list,"reaxff/kk:bo_list", nmax, maxbo); MemKK::realloc_kokkos(d_BO,"reaxff/kk:BO",nmax,maxbo); MemKK::realloc_kokkos(d_BO_s,"reaxff/kk:BO",nmax,maxbo); @@ -1539,9 +1546,9 @@ void PairReaxFFKokkos::allocate_array() MemKK::realloc_kokkos(d_Deltap,"reaxff/kk:Deltap",nmax); MemKK::realloc_kokkos(d_total_bo,"reaxff/kk:total_bo",nmax); - MemKK::realloc_kokkos(d_Cdbo,"reaxff/kk:Cdbo",nmax,3*maxbo); - MemKK::realloc_kokkos(d_Cdbopi,"reaxff/kk:Cdbopi",nmax,3*maxbo); - MemKK::realloc_kokkos(d_Cdbopi2,"reaxff/kk:Cdbopi2",nmax,3*maxbo); + MemKK::realloc_kokkos(d_Cdbo,"reaxff/kk:Cdbo",nmax,maxbo); + MemKK::realloc_kokkos(d_Cdbopi,"reaxff/kk:Cdbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_Cdbopi2,"reaxff/kk:Cdbopi2",nmax,maxbo); MemKK::realloc_kokkos(d_Delta,"reaxff/kk:Delta",nmax); MemKK::realloc_kokkos(d_Delta_boc,"reaxff/kk:Delta_boc",nmax); @@ -1598,19 +1605,10 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< F_FLOAT dDeltap_self_i[3] = {0.0,0.0,0.0}; F_FLOAT total_bo_i = 0.0; - d_bo_first[i] = i*maxbo; - const int bo_first_i = d_bo_first[i]; - int ihb = -1; - int hb_first_i; - if (cut_hbsq > 0.0) { + if (cut_hbsq > 0.0) ihb = paramssing(itype).p_hbond; - if (ihb == 1) { - d_hb_first[i] = i*maxhb; - hb_first_i = d_hb_first[i]; - } - } int nnz; blocking_t selected_jj[blocksize]; @@ -1624,9 +1622,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< int j = d_neighbors(i,jj); j &= NEIGHMASK; - d_bo_first[j] = j*maxbo; - d_hb_first[j] = j*maxhb; - delij[0] = x(j,0) - xtmp; delij[1] = x(j,1) - ytmp; delij[2] = x(j,2) - ztmp; @@ -1655,7 +1650,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2]; // hbond list - build_hb_list(rsq, i, hb_first_i, ihb, j, jtype); + build_hb_list(rsq, i, ihb, j, jtype); if (rsq > cut_bosq) continue; @@ -1672,23 +1667,23 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< BO = BO_s + BO_pi + BO_pi2; if (BO < bo_cut) continue; - int ii_index = -1; - int jj_index = -1; - if (build_bo_list(bo_first_i, i, j, ii_index, jj_index)) { + int i_index = -1; + int j_index = -1; + if (build_bo_list(i, j, i_index, j_index)) { // from BondOrder1 - d_BO(i,jj_index) = BO; - d_BO_s(i,jj_index) = BO_s; + d_BO(i,j_index) = BO; + d_BO_s(i,j_index) = BO_s; - d_BO(j,ii_index) = BO; - d_BO_s(j,ii_index) = BO_s; + d_BO(j,i_index) = BO; + d_BO_s(j,i_index) = BO_s; - d_BO_pi(j,ii_index) = BO_pi; - d_BO_pi2(j,ii_index) = BO_pi2; + d_BO_pi(j,i_index) = BO_pi; + d_BO_pi2(j,i_index) = BO_pi2; - d_BO_pi(i,jj_index) = BO_pi; - d_BO_pi2(i,jj_index) = BO_pi2; + d_BO_pi(i,j_index) = BO_pi; + d_BO_pi2(i,j_index) = BO_pi2; F_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij; F_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij; @@ -1701,18 +1696,18 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlocking< for (int d = 0; d < 3; d++) dDeltap_self_i[d] += dBOp_i[d]; for (int d = 0; d < 3; d++) a_dDeltap_self(j,d) += -dBOp_i[d]; - d_dln_BOp_pi(i,jj_index) = -(BO_pi*Cln_BOp_pi); - d_dln_BOp_pi(j,ii_index) = -(BO_pi*Cln_BOp_pi); + d_dln_BOp_pi(i,j_index) = -(BO_pi*Cln_BOp_pi); + d_dln_BOp_pi(j,i_index) = -(BO_pi*Cln_BOp_pi); - d_dln_BOp_pi2(i,jj_index) = -(BO_pi2*Cln_BOp_pi2); - d_dln_BOp_pi2(j,ii_index) = -(BO_pi2*Cln_BOp_pi2); + d_dln_BOp_pi2(i,j_index) = -(BO_pi2*Cln_BOp_pi2); + d_dln_BOp_pi2(j,i_index) = -(BO_pi2*Cln_BOp_pi2); - d_dBOp(i,jj_index) = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2); - d_dBOp(j,ii_index) = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2); - d_BO(i,jj_index) = BO - bo_cut; - d_BO(j,ii_index) = BO - bo_cut; - d_BO_s(i,jj_index) = BO_s - bo_cut; - d_BO_s(j,ii_index) = BO_s - bo_cut; + d_dBOp(i,j_index) = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2); + d_dBOp(j,i_index) = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2); + d_BO(i,j_index) = BO - bo_cut; + d_BO(j,i_index) = BO - bo_cut; + d_BO_s(i,j_index) = BO_s - bo_cut; + d_BO_s(j,i_index) = BO_s - bo_cut; total_bo_i += (BO - bo_cut); a_total_bo[j] += (BO - bo_cut); } @@ -1742,19 +1737,10 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlockingP F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3]; - d_bo_first[i] = i*maxbo; - const int bo_first_i = d_bo_first[i]; - int ihb = -1; - int hb_first_i; - if (cut_hbsq > 0.0) { + if (cut_hbsq > 0.0) ihb = paramssing(itype).p_hbond; - if (ihb == 1) { - d_hb_first[i] = i*maxhb; - hb_first_i = d_hb_first[i]; - } - } int nnz; blocking_t selected_jj[blocksize]; @@ -1772,9 +1758,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlockingP int j = d_neighbors(i,jj); j &= NEIGHMASK; - d_bo_first[j] = j*maxbo; - d_hb_first[j] = j*maxhb; - delij[0] = x(j,0) - xtmp; delij[1] = x(j,1) - ytmp; delij[2] = x(j,2) - ztmp; @@ -1800,7 +1783,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlockingP const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2]; // hbond list - build_hb_list(rsq, i, hb_first_i, ihb, j, jtype); + build_hb_list(rsq, i, ihb, j, jtype); if (rsq > cut_bosq) continue; @@ -1817,9 +1800,9 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfBlockingP BO = BO_s + BO_pi + BO_pi2; if (BO < bo_cut) continue; - int ii_index = -1; - int jj_index = -1; - build_bo_list(bo_first_i, i, j, ii_index, jj_index); + int i_index = -1; + int j_index = -1; + build_bo_list(i, j, i_index, j_index); } } } @@ -1840,26 +1823,15 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfPreview 0.0) { + if (cut_hbsq > 0.0) ihb = paramssing(itype).p_hbond; - if (ihb == 1) { - d_hb_first[i] = i*maxhb; - hb_first_i = d_hb_first[i]; - } - } for (int jj = 0; jj < jnum; jj++) { int j = d_neighbors(i,jj); j &= NEIGHMASK; - d_bo_first[j] = j*maxbo; - d_hb_first[j] = j*maxhb; const int jtype = type(j); delij[0] = x(j,0) - xtmp; @@ -1868,7 +1840,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfPreview(rsq, i, hb_first_i, ihb, j, jtype); + build_hb_list(rsq, i, ihb, j, jtype); if (rsq > cut_bosq) continue; @@ -1885,10 +1857,10 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfPreview(bo_first_i, i, j, ii_index, jj_index); + build_bo_list(i, j, i_index, j_index); } } @@ -1897,7 +1869,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsHalfPreview template KOKKOS_INLINE_FUNCTION -void PairReaxFFKokkos::build_hb_list(F_FLOAT rsq, int i, int hb_first_i, int ihb, int j, int jtype) const { +void PairReaxFFKokkos::build_hb_list(F_FLOAT rsq, int i, int ihb, int j, int jtype) const { int i_index, j_index; int jhb = -1; @@ -1905,30 +1877,26 @@ void PairReaxFFKokkos::build_hb_list(F_FLOAT rsq, int i, int hb_firs jhb = paramssing(jtype).p_hbond; if (ihb == 1 && jhb == 2) { if (NEIGHFLAG == HALF) { - j_index = hb_first_i + d_hb_num[i]; + j_index = d_hb_num[i]; d_hb_num[i]++; } else - j_index = hb_first_i + Kokkos::atomic_fetch_add(&d_hb_num[i],1); + j_index = Kokkos::atomic_fetch_add(&d_hb_num[i],1); - const int jj_index = j_index - hb_first_i; - - if (jj_index >= maxhb) - d_resize_hb() = MAX(d_resize_hb(),jj_index+1); + if (j_index >= maxhb) + d_resize_hb() = MAX(d_resize_hb(), j_index+1); else - d_hb_list[j_index] = j; + d_hb_list(i, j_index) = j; } else if (j < nlocal && ihb == 2 && jhb == 1) { if (NEIGHFLAG == HALF) { - i_index = d_hb_first[j] + d_hb_num[j]; + i_index = d_hb_num[j]; d_hb_num[j]++; } else - i_index = d_hb_first[j] + Kokkos::atomic_fetch_add(&d_hb_num[j],1); + i_index = Kokkos::atomic_fetch_add(&d_hb_num[j],1); - const int ii_index = i_index - d_hb_first[j]; - - if (ii_index >= maxhb) - d_resize_hb() = MAX(d_resize_hb(),ii_index+1); + if (i_index >= maxhb) + d_resize_hb() = MAX(d_resize_hb(), i_index+1); else - d_hb_list[i_index] = i; + d_hb_list(j, i_index) = i; } } @@ -1939,31 +1907,27 @@ void PairReaxFFKokkos::build_hb_list(F_FLOAT rsq, int i, int hb_firs template template KOKKOS_INLINE_FUNCTION -bool PairReaxFFKokkos::build_bo_list(int bo_first_i, int i, int j, int& ii_index, int& jj_index) const { - int i_index, j_index; +bool PairReaxFFKokkos::build_bo_list(int i, int j, int& i_index, int& j_index) const { if (NEIGHFLAG == HALF) { - j_index = bo_first_i + d_bo_num[i]; - i_index = d_bo_first[j] + d_bo_num[j]; + j_index = d_bo_num[i]; + i_index = d_bo_num[j]; d_bo_num[i]++; d_bo_num[j]++; } else { - j_index = bo_first_i + Kokkos::atomic_fetch_add(&d_bo_num[i],1); - i_index = d_bo_first[j] + Kokkos::atomic_fetch_add(&d_bo_num[j],1); + j_index = Kokkos::atomic_fetch_add(&d_bo_num[i],1); + i_index = Kokkos::atomic_fetch_add(&d_bo_num[j],1); } - jj_index = j_index - bo_first_i; - ii_index = i_index - d_bo_first[j]; - bool set_dB_flag = true; - if (jj_index >= maxbo || ii_index >= maxbo) { - const int max_val = MAX(ii_index+1,jj_index+1); + if (j_index >= maxbo || i_index >= maxbo) { + const int max_val = MAX(i_index + 1, j_index + 1); d_resize_bo() = MAX(d_resize_bo(),max_val); set_dB_flag = false; } else { - d_bo_list[j_index] = j; - d_bo_list[i_index] = i; + d_bo_list(i, j_index) = j; + d_bo_list(j, i_index) = i; set_dB_flag = true; } @@ -1987,13 +1951,11 @@ void PairReaxFFKokkos::operator()(TagPairReaxBuildListsFull, const i F_FLOAT dDeltap_self_i[3] = {0.0,0.0,0.0}; F_FLOAT total_bo_i = 0.0; - const int j_start = d_bo_first[i]; - const int j_end = j_start + d_bo_num[i]; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + const int jnum = d_bo_num[i]; + for (int j_index = 0; j_index < jnum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; const int jtype = type(j); - const int j_index = jj - j_start; delij[0] = x(j,0) - xtmp; delij[1] = x(j,1) - ytmp; delij[2] = x(j,2) - ztmp; @@ -2102,23 +2064,19 @@ void PairReaxFFKokkos::operator()(TagPairReaxBondOrder2, const int & const int i = d_ilist[ii]; const int itype = type(i); - const int j_start = d_bo_first[i]; - const int j_end = j_start + d_bo_num[i]; + const int jnum = d_bo_num[i]; const F_FLOAT val_i = paramssing(itype).valency; d_total_bo[i] = 0.0; F_FLOAT total_bo = 0.0; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + for (int j_index = 0; j_index < jnum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; const int jtype = type(j); - const int j_index = jj - j_start; - const int i_index = maxbo+j_index; // calculate corrected BO and total bond order - const F_FLOAT val_j = paramssing(jtype).valency; const F_FLOAT ovc = paramstwbp(itype,jtype).ovc; const F_FLOAT v13cor = paramstwbp(itype,jtype).v13cor; @@ -2213,10 +2171,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxBondOrder2, const int & d_Cdbo(i,j_index) = 0.0; d_Cdbopi(i,j_index) = 0.0; d_Cdbopi2(i,j_index) = 0.0; - d_Cdbo(j,i_index) = 0.0; - d_Cdbopi(j,i_index) = 0.0; - d_Cdbopi2(j,i_index) = 0.0; - d_CdDelta[j] = 0.0; } d_CdDelta[i] = 0.0; @@ -2271,20 +2225,18 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeMulti1, const in if (imass > 21.0) dfvl = 0.0; else dfvl = 1.0; - const int j_start = d_bo_first[i]; - const int j_end = j_start + d_bo_num[i]; + const int jnum = d_bo_num[i]; F_FLOAT sum_ovun1 = 0.0; F_FLOAT sum_ovun2 = 0.0; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + for (int j_index = 0; j_index < jnum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; const int jtype = type(j); - const int j_index = jj - j_start; sum_ovun1 += paramstwbp(itype,jtype).p_ovun1 * paramstwbp(itype,jtype).De_s * d_BO(i,j_index); - sum_ovun2 += (d_Delta[j] - dfvl * d_Delta_lp_temp[j]) * (d_BO_pi(i,j_index) + d_BO_pi2(i,j_index)); + sum_ovun2 += (d_Delta[j] - dfvl * d_Delta_lp_temp[j]) * (d_BO_pi(i, j_index) + d_BO_pi2(i,j_index)); } d_sum_ovun(i,1) += sum_ovun1; d_sum_ovun(i,2) += sum_ovun2; @@ -2394,16 +2346,14 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeMulti2 0 || enobondsflag) a_CdDelta[i] += CEunder3; - const int j_start = d_bo_first[i]; - const int j_end = j_start + d_bo_num[i]; + const int jnum = d_bo_num[i]; F_FLOAT CdDelta_i = 0.0; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + for (int j_index = 0; j_index < jnum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; const int jtype = type(j); const F_FLOAT jmass = paramssing(jtype).mass; - const int j_index = jj - j_start; const F_FLOAT De_s = paramstwbp(itype,jtype).De_s; // multibody lone pair: correction for C2 @@ -2453,24 +2403,23 @@ void PairReaxFFKokkos::operator()(TagPairReaxCountAngularTorsion(i, itype, j_start, j_end, location_angular); + int count_angular = preprocess_angular(i, itype, jnum, location_angular); location_angular = Kokkos::atomic_fetch_add(&d_count_angular_torsion(0), count_angular); if (POPULATE) { // Fill buffer for `i` - preprocess_angular(i, itype, j_start, j_end, location_angular); + preprocess_angular(i, itype, jnum, location_angular); } // Torsion @@ -2482,12 +2431,12 @@ void PairReaxFFKokkos::operator()(TagPairReaxCountAngularTorsion(i, itype, itag, xtmp, ytmp, ztmp, j_start, j_end, location_torsion); + int count_torsion = preprocess_torsion(i, itype, itag, xtmp, ytmp, ztmp, jnum, location_torsion); location_torsion = Kokkos::atomic_fetch_add(&d_count_angular_torsion(1), count_torsion); if (POPULATE) { // Fill buffer for `i` - preprocess_torsion(i, itype, itag, xtmp, ytmp, ztmp, j_start, j_end, location_torsion); + preprocess_torsion(i, itype, itag, xtmp, ytmp, ztmp, jnum, location_torsion); } } @@ -2496,7 +2445,7 @@ void PairReaxFFKokkos::operator()(TagPairReaxCountAngularTorsion KOKKOS_INLINE_FUNCTION -void PairReaxFFKokkos::compute_angular_sbo(int i, int itype, int j_start, int j_end) const { +void PairReaxFFKokkos::compute_angular_sbo(int i, int itype, int jnum) const { F_FLOAT SBO2, CSBO2, dSBO1, dSBO2; @@ -2506,8 +2455,7 @@ void PairReaxFFKokkos::compute_angular_sbo(int i, int itype, int j_s F_FLOAT SBOp = 0.0; F_FLOAT prod_SBO = 1.0; - for (int jj = j_start; jj < j_end; jj++) { - const int j_index = jj - j_start; + for (int j_index = 0; j_index < jnum; j_index++) { const F_FLOAT bo_ij = d_BO(i,j_index); SBOp += (d_BO_pi(i,j_index) + d_BO_pi2(i,j_index)); @@ -2560,29 +2508,26 @@ void PairReaxFFKokkos::compute_angular_sbo(int i, int itype, int j_s template template KOKKOS_INLINE_FUNCTION -int PairReaxFFKokkos::preprocess_angular(int i, int itype, int j_start, int j_end, int location_angular) const { +int PairReaxFFKokkos::preprocess_angular(int i, int itype, int jnum, int location_angular) const { int count_angular = 0; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + for (int j_index = 0; j_index < jnum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; - const int j_index = jj - j_start; const F_FLOAT bo_ij = d_BO(i,j_index); if (bo_ij <= thb_cut) continue; if (i >= nlocal && j >= nlocal) continue; - const int i_index = maxbo + j_index; const int jtype = type(j); - for (int kk = jj+1; kk < j_end; kk++) { + for (int k_index = j_index + 1; k_index < jnum; k_index++) { //for (int kk = j_start; kk < j_end; kk++) { - int k = d_bo_list[kk]; + int k = d_bo_list(i, k_index); k &= NEIGHMASK; if (k == j) continue; - const int k_index = kk - j_start; const F_FLOAT bo_ik = d_BO(i,k_index); if (bo_ij <= thb_cut || bo_ik <= thb_cut || bo_ij * bo_ik <= thb_cutsq) continue; @@ -2600,14 +2545,14 @@ int PairReaxFFKokkos::preprocess_angular(int i, int itype, int j_sta pack.i0 = i; pack.i1 = j; pack.i2 = k; - pack.i3 = j_start; + pack.i3 = jnum; d_angular_pack(location_angular, 0) = pack; - // Second pack stores i_index, j_index, k_index, and j_end - pack.i0 = i_index; + // Second pack stores j_index and k_index + // i0 is unused because there's no i_index pack.i1 = j_index; pack.i2 = k_index; - pack.i3 = j_end; + // i3 is unused d_angular_pack(location_angular, 1) = pack; location_angular++; @@ -2626,17 +2571,16 @@ template template KOKKOS_INLINE_FUNCTION int PairReaxFFKokkos::preprocess_torsion(int i, int /*itype*/, tagint itag, - F_FLOAT xtmp, F_FLOAT ytmp, F_FLOAT ztmp, int j_start, int j_end, int location_torsion) const { + F_FLOAT xtmp, F_FLOAT ytmp, F_FLOAT ztmp, int jknum, int location_torsion) const { // in reaxff_torsion_angles: j = i, k = j, i = k; int count_torsion = 0; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + for (int j_index = 0; j_index < jknum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; const tagint jtag = tag(j); - const int j_index = jj - j_start; // skip half of the interactions if (itag > jtag) { @@ -2652,23 +2596,20 @@ int PairReaxFFKokkos::preprocess_torsion(int i, int /*itype*/, tagin const F_FLOAT bo_ij = d_BO(i,j_index); if (bo_ij < thb_cut) continue; - const int l_start = d_bo_first[j]; - const int l_end = l_start + d_bo_num[j]; + const int lnum = d_bo_num[j]; - for (int kk = j_start; kk < j_end; kk++) { - int k = d_bo_list[kk]; + for (int k_index = 0; k_index < jknum; k_index++) { + int k = d_bo_list(i, k_index); k &= NEIGHMASK; if (k == j) continue; - const int k_index = kk - j_start; const F_FLOAT bo_ik = d_BO(i,k_index); if (bo_ik < thb_cut) continue; - for (int ll = l_start; ll < l_end; ll++) { - int l = d_bo_list[ll]; + for (int l_index = 0; l_index < lnum; l_index++) { + int l = d_bo_list(j, l_index); l &= NEIGHMASK; if (l == i) continue; - const int l_index = ll - l_start; const F_FLOAT bo_jl = d_BO(j,l_index); if (l == k || bo_jl < thb_cut || bo_ij*bo_ik*bo_jl < thb_cut) continue; @@ -2708,9 +2649,9 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeAngularPreproces auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); - Kokkos::View::value,Kokkos::MemoryTraits::value>> a_Cdbo = d_Cdbo; - Kokkos::View::value,Kokkos::MemoryTraits::value>> a_Cdbopi = d_Cdbopi; - Kokkos::View::value,Kokkos::MemoryTraits::value>> a_Cdbopi2 = d_Cdbopi2; + Kokkos::View::value>> a_Cdbo = d_Cdbo; + Kokkos::View::value>> a_Cdbopi = d_Cdbopi; + Kokkos::View::value>> a_Cdbopi2 = d_Cdbopi2; auto v_CdDelta = ScatterViewHelper,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); auto a_CdDelta = v_CdDelta.template access>(); @@ -2750,13 +2691,13 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeAngularPreproces const int i = pack.i0; const int j = pack.i1; const int k = pack.i2; - const int j_start = pack.i3; + const int jnum = pack.i3; pack = d_angular_pack(apack, 1); - const int i_index = pack.i0; + // i0 is unused const int j_index = pack.i1; const int k_index = pack.i2; - const int j_end = pack.i3; + // i3 is unused const int itype = type(i); const X_FLOAT xtmp = x(i,0); @@ -2906,17 +2847,13 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeAngularPreproces // Forces a_Cdbo(i,j_index) += (CEval1 + CEpen2 + (CEcoa1 - CEcoa4)); - a_Cdbo(j,i_index) += (CEval1 + CEpen2 + (CEcoa1 - CEcoa4)); a_Cdbo(i,k_index) += (CEval2 + CEpen3 + (CEcoa2 - CEcoa5)); - a_Cdbo(k,i_index) += (CEval2 + CEpen3 + (CEcoa2 - CEcoa5)); CdDelta_i += ((CEval3 + CEval7) + CEpen1 + CEcoa3); CdDelta_j += CEcoa4; a_CdDelta[k] += CEcoa5; - for (int ll = j_start; ll < j_end; ll++) { - const int l_index = ll - j_start; - + for (int l_index = 0; l_index < jnum; l_index++) { temp_bo_jt = d_BO(i,l_index); temp = temp_bo_jt * temp_bo_jt * temp_bo_jt; pBOjt7 = temp * temp * temp_bo_jt; @@ -2970,8 +2907,8 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTorsionPreproces auto v_CdDelta = ScatterViewHelper,decltype(dup_CdDelta),decltype(ndup_CdDelta)>::get(dup_CdDelta,ndup_CdDelta); auto a_CdDelta = v_CdDelta.template access>(); - Kokkos::View::value,Kokkos::MemoryTraits::value>> a_Cdbo = d_Cdbo; - Kokkos::View::value,Kokkos::MemoryTraits::value>> a_Cdbopi = d_Cdbopi; + Kokkos::View::value>> a_Cdbo = d_Cdbo; + Kokkos::View::value>> a_Cdbopi = d_Cdbopi; //auto a_Cdbo = dup_Cdbo.template access>(); // in reaxff_torsion_angles: j = i, k = j, i = k; @@ -3332,21 +3269,18 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeHydrogen= HB_THRESHOLD) { - hblist[top] = jj; + hblist[top] = j_index; top ++; } } @@ -3354,8 +3288,8 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeHydrogen::operator()(TagPairReaxComputeHydrogen KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::operator()(TagPairReaxUpdateBond, const int &ii) const { - Kokkos::View::value>> a_Cdbo = d_Cdbo; - Kokkos::View::value>> a_Cdbopi = d_Cdbopi; - Kokkos::View::value>> a_Cdbopi2 = d_Cdbopi2; + Kokkos::View::value>> a_Cdbo = d_Cdbo; + Kokkos::View::value>> a_Cdbopi = d_Cdbopi; + Kokkos::View::value>> a_Cdbopi2 = d_Cdbopi2; const int i = d_ilist[ii]; const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); const tagint itag = tag(i); - const int j_start = d_bo_first[i]; - const int j_end = j_start + d_bo_num[i]; + const int jnum = d_bo_num[i]; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + for (int j_index = 0; j_index < jnum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; const tagint jtag = tag(j); @@ -3493,19 +3425,16 @@ void PairReaxFFKokkos::operator()(TagPairReaxUpdateBond, if (!flag) continue; - const int j_index = jj - j_start; const F_FLOAT Cdbo_i = d_Cdbo(i,j_index); const F_FLOAT Cdbopi_i = d_Cdbopi(i,j_index); const F_FLOAT Cdbopi2_i = d_Cdbopi2(i,j_index); - const int k_start = d_bo_first[j]; - const int k_end = k_start + d_bo_num[j]; + const int knum = d_bo_num[j]; - for (int kk = k_start; kk < k_end; kk++) { - int k = d_bo_list[kk]; + for (int k_index = 0; k_index < knum; k_index++) { + int k = d_bo_list(j, k_index); k &= NEIGHMASK; if (k != i) continue; - const int k_index = kk - k_start; a_Cdbo(j,k_index) += Cdbo_i; a_Cdbopi(j,k_index) += Cdbopi_i; @@ -3533,13 +3462,12 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeBond1::operator()(TagPairReaxComputeBond1::operator()(TagPairReaxComputeBond2::operator()(TagPairReaxComputeBond2::operator()(TagPairReaxComputeBond2::operator()(TagPairReaxComputeBond2::calculate_find_bond_item(int ii, int &numbond int nj = 0; if (mask[i] & groupbit) { - const int j_start = d_bo_first[i]; - const int j_end = j_start + d_bo_num[i]; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + const int jnum = d_bo_num[i]; + for (int j_index = 0; j_index < jnum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; if (mask[j] & groupbit) { const tagint jtag = tag[j]; - const int j_index = jj - j_start; - double bo_tmp = d_BO(i,j_index); + double bo_tmp = d_BO(i, j_index); if (bo_tmp > bo_cut_bond) { d_neighid(i,nj) = jtag; @@ -4401,15 +4321,13 @@ KOKKOS_INLINE_FUNCTION void PairReaxFFKokkos::operator()(TagPairReaxFindBondSpecies, const int &i) const { int nj = 0; - const int j_start = d_bo_first[i]; - const int j_end = j_start + d_bo_num[i]; - for (int jj = j_start; jj < j_end; jj++) { - int j = d_bo_list[jj]; + const int jnum = d_bo_num[i]; + for (int j_index = 0; j_index < jnum; j_index++) { + int j = d_bo_list(i, j_index); j &= NEIGHMASK; if (j < i) continue; - const int j_index = jj - j_start; - double bo_tmp = d_BO(i,j_index); + double bo_tmp = d_BO(i, j_index); if (bo_tmp >= 0.10) { // Why is this a hardcoded value? k_tmpid.view()(i,nj) = j; diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h index 5f228ebd19..4c7127c17b 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.h +++ b/src/KOKKOS/pair_reaxff_kokkos.h @@ -178,14 +178,14 @@ class PairReaxFFKokkos : public PairReaxFF { // TagPairReaxBuildListsHalfBlocking, HalfBlockingPreview, HalfPreview template KOKKOS_INLINE_FUNCTION - void build_hb_list(F_FLOAT, int, int, int, int, int) const; + void build_hb_list(F_FLOAT, int, int, int, int) const; // Isolated function that builds the bond order list, reused across // TagPairReaxBuildListsHalfBlocking, HalfBlockingPreview, HalfPreview // Returns if we need to populate d_d* functions or not template KOKKOS_INLINE_FUNCTION - bool build_bo_list(int, int, int, int&, int&) const; + bool build_bo_list(int, int, int&, int&) const; KOKKOS_INLINE_FUNCTION void operator()(TagPairReaxBuildListsFull, const int&) const; @@ -245,17 +245,17 @@ class PairReaxFFKokkos : public PairReaxFF { // Abstraction for computing SBSO2, CSBO2, dSBO1, dsBO2 KOKKOS_INLINE_FUNCTION - void compute_angular_sbo(int, int, int, int) const; + void compute_angular_sbo(int, int, int) const; // Abstraction for counting and populating angular intermediates template KOKKOS_INLINE_FUNCTION - int preprocess_angular(int, int, int, int, int) const; + int preprocess_angular(int, int, int, int) const; // Abstraction for counting and populating torsion intermediated template KOKKOS_INLINE_FUNCTION - int preprocess_torsion(int, int, tagint, F_FLOAT, F_FLOAT, F_FLOAT, int, int, int) const; + int preprocess_torsion(int, int, tagint, F_FLOAT, F_FLOAT, F_FLOAT, int, int) const; template KOKKOS_INLINE_FUNCTION @@ -384,6 +384,7 @@ class PairReaxFFKokkos : public PairReaxFF { F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *dril, F_FLOAT *drjl, F_FLOAT *drkl) const; protected: + void deallocate_views_of_views(); void allocate(); void allocate_array(); void setup(); @@ -436,7 +437,7 @@ class PairReaxFFKokkos : public PairReaxFF { typename AT::t_ffloat_2d_dl d_C1dbo, d_C2dbo, d_C3dbo; typename AT::t_ffloat_2d_dl d_C1dbopi, d_C2dbopi, d_C3dbopi, d_C4dbopi; typename AT::t_ffloat_2d_dl d_C1dbopi2, d_C2dbopi2, d_C3dbopi2, d_C4dbopi2; - typename AT::t_ffloat_2d_dl d_Cdbo, d_Cdbopi, d_Cdbopi2, d_dDeltap_self; + typename AT::t_ffloat_2d_dl d_dDeltap_self, d_Cdbo, d_Cdbopi, d_Cdbopi2; int need_dup; @@ -470,7 +471,8 @@ class PairReaxFFKokkos : public PairReaxFF { typename AT::t_int_1d_randomread d_ilist; typename AT::t_int_1d_randomread d_numneigh; - typename AT::t_int_1d d_bo_first, d_bo_num, d_bo_list, d_hb_first, d_hb_num, d_hb_list; + typename AT::t_int_1d d_bo_num, d_hb_num; + typename AT::t_int_2d d_bo_list, d_hb_list; DAT::tdual_int_scalar k_resize_bo, k_resize_hb; typename AT::t_int_scalar d_resize_bo, d_resize_hb; @@ -496,7 +498,6 @@ class PairReaxFFKokkos : public PairReaxFF { typedef typename tdual_LR_lookup_table_kk_2d::t_dev t_LR_lookup_table_kk_2d; tdual_LR_lookup_table_kk_2d k_LR; - t_LR_lookup_table_kk_2d d_LR; DAT::tdual_int_2d k_tmpid; DAT::tdual_ffloat_2d k_tmpbo; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 839240c62f..97c7d17ea9 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -525,8 +525,8 @@ void PairSNAPKokkos::compute(int eflag_in, // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp index 01b856a7b5..d62af5a78f 100644 --- a/src/KOKKOS/pair_sw_kokkos.cpp +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -186,9 +186,9 @@ void PairSWKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 1a0d45e435..c2099f95b5 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -293,9 +293,9 @@ void PairTersoffKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index b941755d4b..3e651f1433 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -283,9 +283,9 @@ void PairTersoffMODKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index 08d6cb17d7..3d6d1ea27b 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -296,9 +296,9 @@ void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) // free duplicated memory if (need_dup) { - dup_f = decltype(dup_f)(); - dup_eatom = decltype(dup_eatom)(); - dup_vatom = decltype(dup_vatom)(); + dup_f = {}; + dup_eatom = {}; + dup_vatom = {}; } } diff --git a/src/KOKKOS/pair_uf3_kokkos.cpp b/src/KOKKOS/pair_uf3_kokkos.cpp index 59112ddab0..da7660d0d0 100644 --- a/src/KOKKOS/pair_uf3_kokkos.cpp +++ b/src/KOKKOS/pair_uf3_kokkos.cpp @@ -1655,7 +1655,7 @@ double PairUF3Kokkos::single(int /*i*/, int /*j*/, int itype, int jt namespace LAMMPS_NS { template class PairUF3Kokkos; -#ifdef KOKKOS_ENABLE_GPU +#ifdef LMP_KOKKOS_GPU template class PairUF3Kokkos; #endif } // namespace LAMMPS_NS diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 73e2c1f06f..273a53ab8f 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -794,7 +794,7 @@ void PPPMKokkos::allocate() // 2nd FFT returns data in 3d brick decomposition // remap takes data from 3d brick to FFT decomposition - int collective_flag = 0; // not yet supported in Kokkos version + int collective_flag = force->kspace->collective_flag; int gpu_aware_flag = lmp->kokkos->gpu_aware_flag; int tmp; diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index 0d539ada83..573f4c2508 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -103,12 +103,10 @@ template void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { - // collective flag not yet supported - // use point-to-point communication - - int i,isend,irecv; typename FFT_AT::t_FFT_SCALAR_1d d_scratch; + int me; + MPI_Comm_rank(plan->comm,&me); if (plan->memory == 0) d_scratch = d_buf; @@ -116,70 +114,132 @@ void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d d_scratch = plan->d_scratch; // post all recvs into scratch space + // If not using GPU-aware MPI, mirror data to host FFT_SCALAR* v_scratch = d_scratch.data(); - if (!plan->usegpu_aware) { - plan->h_scratch = Kokkos::create_mirror_view(d_scratch); - v_scratch = plan->h_scratch.data(); - } - - for (irecv = 0; irecv < plan->nrecv; irecv++) { - FFT_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; - MPI_Irecv(scratch,plan->recv_size[irecv], - MPI_FFT_SCALAR,plan->recv_proc[irecv],0, - plan->comm,&plan->request[irecv]); - } - FFT_SCALAR* v_sendbuf = plan->d_sendbuf.data(); if (!plan->usegpu_aware) { + plan->h_scratch = Kokkos::create_mirror_view(d_scratch); plan->h_sendbuf = Kokkos::create_mirror_view(plan->d_sendbuf); + v_scratch = plan->h_scratch.data(); v_sendbuf = plan->h_sendbuf.data(); } - // send all messages to other procs + // use point-to-point communication - for (isend = 0; isend < plan->nsend; isend++) { - int in_offset = plan->send_offset[isend]; - plan->pack(d_in,in_offset, - plan->d_sendbuf,0,&plan->packplan[isend]); + if (!plan->usecollective) { + int i,isend,irecv; - if (!plan->usegpu_aware) - Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); - MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, - plan->send_proc[isend],0,plan->comm); - } + for (irecv = 0; irecv < plan->nrecv; irecv++) { + FFT_SCALAR* scratch = v_scratch + plan->recv_bufloc[irecv]; + MPI_Irecv(scratch,plan->recv_size[irecv], + MPI_FFT_SCALAR,plan->recv_proc[irecv],0, + plan->comm,&plan->request[irecv]); + } - // copy in -> scratch -> out for self data + // send all messages to other procs - if (plan->self) { - isend = plan->nsend; - irecv = plan->nrecv; + for (isend = 0; isend < plan->nsend; isend++) { + int in_offset = plan->send_offset[isend]; + plan->pack(d_in,in_offset, + plan->d_sendbuf,0,&plan->packplan[isend]); - int in_offset = plan->send_offset[isend]; - int scratch_offset = plan->recv_bufloc[irecv]; - int out_offset = plan->recv_offset[irecv]; + if (!plan->usegpu_aware) + Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); - plan->pack(d_in,in_offset, - d_scratch,scratch_offset, - &plan->packplan[isend]); - plan->unpack(d_scratch,scratch_offset, - d_out,out_offset,&plan->unpackplan[irecv]); - } + MPI_Send(v_sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, + plan->send_proc[isend],0,plan->comm); + } - // unpack all messages from scratch -> out + // copy in -> scratch -> out for self data - for (i = 0; i < plan->nrecv; i++) { - MPI_Waitany(plan->nrecv,plan->request,&irecv,MPI_STATUS_IGNORE); + if (plan->self) { + isend = plan->nsend; + irecv = plan->nrecv; - int scratch_offset = plan->recv_bufloc[irecv]; - int out_offset = plan->recv_offset[irecv]; + int in_offset = plan->send_offset[isend]; + int scratch_offset = plan->recv_bufloc[irecv]; + int out_offset = plan->recv_offset[irecv]; - if (!plan->usegpu_aware) - Kokkos::deep_copy(d_scratch,plan->h_scratch); + plan->pack(d_in,in_offset, + d_scratch,scratch_offset, + &plan->packplan[isend]); + plan->unpack(d_scratch,scratch_offset, + d_out,out_offset,&plan->unpackplan[irecv]); + } - plan->unpack(d_scratch,scratch_offset, - d_out,out_offset,&plan->unpackplan[irecv]); + // unpack all messages from scratch -> out + + for (i = 0; i < plan->nrecv; i++) { + MPI_Waitany(plan->nrecv,plan->request,&irecv,MPI_STATUS_IGNORE); + + int scratch_offset = plan->recv_bufloc[irecv]; + int out_offset = plan->recv_offset[irecv]; + + if (!plan->usegpu_aware) + Kokkos::deep_copy(d_scratch,plan->h_scratch); + + plan->unpack(d_scratch,scratch_offset, + d_out,out_offset,&plan->unpackplan[irecv]); + } + } else { + if (plan->commringlen > 0) { + int isend,irecv; + + // populate send data + // buffers are allocated and count/displacement buffers + // are populated in remap_3d_create_plan_kokkos + + int numpacked = 0; + for (isend = 0; isend < plan->commringlen; isend++) { + if (plan->sendcnts[isend]) { + plan->pack(d_in,plan->send_offset[numpacked], + plan->d_sendbuf,plan->sdispls[isend], + &plan->packplan[numpacked]); + numpacked++; + } + else if (plan->commringlist[isend] == me && plan->self) { + numpacked++; + } + } + + if (!plan->usegpu_aware) + Kokkos::deep_copy(plan->h_sendbuf,plan->d_sendbuf); + + MPI_Alltoallv(v_sendbuf, plan->sendcnts, plan->sdispls, + MPI_FFT_SCALAR, v_scratch, plan->rcvcnts, + plan->rdispls, MPI_FFT_SCALAR, plan->comm); + + // unpack the data from the recv buffer into out + + if (!plan->usegpu_aware) + Kokkos::deep_copy(d_scratch,plan->h_scratch); + + // copy in -> scratch -> out for self data + + if (plan->self) { + plan->pack(d_in,plan->send_offset[plan->selfnsendloc], + plan->d_sendbuf,plan->sdispls[plan->selfcommringloc], + &plan->packplan[plan->selfnsendloc]); + plan->unpack(plan->d_sendbuf,plan->sdispls[plan->selfcommringloc], + d_out,plan->recv_offset[plan->selfnrecvloc], + &plan->unpackplan[plan->selfnrecvloc]); + } + + numpacked = 0; + for (irecv = 0; irecv < plan->commringlen; irecv++) { + if (plan->rcvcnts[irecv]) { + plan->unpack(d_scratch,plan->rdispls[irecv], + d_out,plan->recv_offset[numpacked], + &plan->unpackplan[numpacked]); + numpacked++; + } + else if (plan->commringlist[irecv] == me && plan->self) { + numpacked++; + } + } + } } } @@ -263,224 +323,488 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat out.khi = out_khi; out.ksize = out.khi - out.klo + 1; - // combine output extents across all procs - inarray = (struct extent_3d *) malloc(nprocs*sizeof(struct extent_3d)); if (inarray == nullptr) return nullptr; outarray = (struct extent_3d *) malloc(nprocs*sizeof(struct extent_3d)); if (outarray == nullptr) return nullptr; - MPI_Allgather(&out,sizeof(struct extent_3d),MPI_BYTE, - outarray,sizeof(struct extent_3d),MPI_BYTE,comm); - - // count send collides, including self - - nsend = 0; - iproc = me; - for (i = 0; i < nprocs; i++) { - iproc++; - if (iproc == nprocs) iproc = 0; - nsend += remap_3d_collide(&in,&outarray[iproc],&overlap); - } - - // malloc space for send info - - if (nsend) { - plan->pack = PackKokkos::pack_3d; - - plan->send_offset = (int *) malloc(nsend*sizeof(int)); - plan->send_size = (int *) malloc(nsend*sizeof(int)); - plan->send_proc = (int *) malloc(nsend*sizeof(int)); - plan->packplan = (struct pack_plan_3d *) - malloc(nsend*sizeof(struct pack_plan_3d)); - - if (plan->send_offset == nullptr || plan->send_size == nullptr || - plan->send_proc == nullptr || plan->packplan == nullptr) return nullptr; - } - - // store send info, with self as last entry - - nsend = 0; - iproc = me; - for (i = 0; i < nprocs; i++) { - iproc++; - if (iproc == nprocs) iproc = 0; - if (remap_3d_collide(&in,&outarray[iproc],&overlap)) { - plan->send_proc[nsend] = iproc; - plan->send_offset[nsend] = nqty * - ((overlap.klo-in.klo)*in.jsize*in.isize + - ((overlap.jlo-in.jlo)*in.isize + overlap.ilo-in.ilo)); - plan->packplan[nsend].nfast = nqty*overlap.isize; - plan->packplan[nsend].nmid = overlap.jsize; - plan->packplan[nsend].nslow = overlap.ksize; - plan->packplan[nsend].nstride_line = nqty*in.isize; - plan->packplan[nsend].nstride_plane = nqty*in.jsize*in.isize; - plan->packplan[nsend].nqty = nqty; - plan->send_size[nsend] = nqty*overlap.isize*overlap.jsize*overlap.ksize; - nsend++; - } - } - - // plan->nsend = # of sends not including self - - if (nsend && plan->send_proc[nsend-1] == me) { - if (plan->usecollective) // for collectives include self in nsend list - plan->nsend = nsend; - else - plan->nsend = nsend - 1; - } else - plan->nsend = nsend; - - // combine input extents across all procs + // combine input & output extents across all procs MPI_Allgather(&in,sizeof(struct extent_3d),MPI_BYTE, inarray,sizeof(struct extent_3d),MPI_BYTE,comm); + MPI_Allgather(&out,sizeof(struct extent_3d),MPI_BYTE, + outarray,sizeof(struct extent_3d),MPI_BYTE,comm); - // count recv collides, including self + // for efficiency, handle collective & non-collective setup separately - nrecv = 0; - iproc = me; - for (i = 0; i < nprocs; i++) { - iproc++; - if (iproc == nprocs) iproc = 0; - nrecv += remap_3d_collide(&out,&inarray[iproc],&overlap); - } + if (!plan->usecollective) { + // count send & recv collides, including self - // malloc space for recv info - - if (nrecv) { - if (permute == 0) - plan->unpack = PackKokkos::unpack_3d; - else if (permute == 1) { - if (nqty == 1) - plan->unpack = PackKokkos::unpack_3d_permute1_1; - else if (nqty == 2) - plan->unpack = PackKokkos::unpack_3d_permute1_2; - else - plan->unpack = PackKokkos::unpack_3d_permute1_n; - } - else if (permute == 2) { - if (nqty == 1) - plan->unpack = PackKokkos::unpack_3d_permute2_1; - else if (nqty == 2) - plan->unpack = PackKokkos::unpack_3d_permute2_2; - else - plan->unpack = PackKokkos::unpack_3d_permute2_n; + nsend = 0; + nrecv = 0; + for (i = 0; i < nprocs; i++) { + nsend += remap_3d_collide(&in,&outarray[i],&overlap); + nrecv += remap_3d_collide(&out,&inarray[i],&overlap); } - plan->recv_offset = (int *) malloc(nrecv*sizeof(int)); - plan->recv_size = (int *) malloc(nrecv*sizeof(int)); - plan->recv_proc = (int *) malloc(nrecv*sizeof(int)); - plan->recv_bufloc = (int *) malloc(nrecv*sizeof(int)); - plan->request = (MPI_Request *) malloc(nrecv*sizeof(MPI_Request)); - plan->unpackplan = (struct pack_plan_3d *) - malloc(nrecv*sizeof(struct pack_plan_3d)); + // malloc space for send & recv info - if (plan->recv_offset == nullptr || plan->recv_size == nullptr || - plan->recv_proc == nullptr || plan->recv_bufloc == nullptr || - plan->request == nullptr || plan->unpackplan == nullptr) return nullptr; - } + if (nsend) { + plan->pack = PackKokkos::pack_3d; - // store recv info, with self as last entry + plan->send_offset = (int *) malloc(nsend*sizeof(int)); + plan->send_size = (int *) malloc(nsend*sizeof(int)); + plan->send_proc = (int *) malloc(nsend*sizeof(int)); + plan->packplan = (struct pack_plan_3d *) + malloc(nsend*sizeof(struct pack_plan_3d)); - ibuf = 0; - nrecv = 0; - iproc = me; + if (plan->send_offset == nullptr || plan->send_size == nullptr || + plan->send_proc == nullptr || plan->packplan == nullptr) return nullptr; + } - for (i = 0; i < nprocs; i++) { - iproc++; - if (iproc == nprocs) iproc = 0; - if (remap_3d_collide(&out,&inarray[iproc],&overlap)) { - plan->recv_proc[nrecv] = iproc; - plan->recv_bufloc[nrecv] = ibuf; - - if (permute == 0) { - plan->recv_offset[nrecv] = nqty * - ((overlap.klo-out.klo)*out.jsize*out.isize + - (overlap.jlo-out.jlo)*out.isize + (overlap.ilo-out.ilo)); - plan->unpackplan[nrecv].nfast = nqty*overlap.isize; - plan->unpackplan[nrecv].nmid = overlap.jsize; - plan->unpackplan[nrecv].nslow = overlap.ksize; - plan->unpackplan[nrecv].nstride_line = nqty*out.isize; - plan->unpackplan[nrecv].nstride_plane = nqty*out.jsize*out.isize; - plan->unpackplan[nrecv].nqty = nqty; - } + if (nrecv) { + if (permute == 0) + plan->unpack = PackKokkos::unpack_3d; else if (permute == 1) { - plan->recv_offset[nrecv] = nqty * - ((overlap.ilo-out.ilo)*out.ksize*out.jsize + - (overlap.klo-out.klo)*out.jsize + (overlap.jlo-out.jlo)); - plan->unpackplan[nrecv].nfast = overlap.isize; - plan->unpackplan[nrecv].nmid = overlap.jsize; - plan->unpackplan[nrecv].nslow = overlap.ksize; - plan->unpackplan[nrecv].nstride_line = nqty*out.jsize; - plan->unpackplan[nrecv].nstride_plane = nqty*out.ksize*out.jsize; - plan->unpackplan[nrecv].nqty = nqty; + if (nqty == 1) + plan->unpack = PackKokkos::unpack_3d_permute1_1; + else if (nqty == 2) + plan->unpack = PackKokkos::unpack_3d_permute1_2; + else + plan->unpack = PackKokkos::unpack_3d_permute1_n; } - else { - plan->recv_offset[nrecv] = nqty * - ((overlap.jlo-out.jlo)*out.isize*out.ksize + - (overlap.ilo-out.ilo)*out.ksize + (overlap.klo-out.klo)); - plan->unpackplan[nrecv].nfast = overlap.isize; - plan->unpackplan[nrecv].nmid = overlap.jsize; - plan->unpackplan[nrecv].nslow = overlap.ksize; - plan->unpackplan[nrecv].nstride_line = nqty*out.ksize; - plan->unpackplan[nrecv].nstride_plane = nqty*out.isize*out.ksize; - plan->unpackplan[nrecv].nqty = nqty; + else if (permute == 2) { + if (nqty == 1) + plan->unpack = PackKokkos::unpack_3d_permute2_1; + else if (nqty == 2) + plan->unpack = PackKokkos::unpack_3d_permute2_2; + else + plan->unpack = PackKokkos::unpack_3d_permute2_n; } - plan->recv_size[nrecv] = nqty*overlap.isize*overlap.jsize*overlap.ksize; - ibuf += plan->recv_size[nrecv]; - nrecv++; + plan->recv_offset = (int *) malloc(nrecv*sizeof(int)); + plan->recv_size = (int *) malloc(nrecv*sizeof(int)); + plan->recv_proc = (int *) malloc(nrecv*sizeof(int)); + plan->recv_bufloc = (int *) malloc(nrecv*sizeof(int)); + plan->request = (MPI_Request *) malloc(nrecv*sizeof(MPI_Request)); + plan->unpackplan = (struct pack_plan_3d *) + malloc(nrecv*sizeof(struct pack_plan_3d)); + + if (plan->recv_offset == nullptr || plan->recv_size == nullptr || + plan->recv_proc == nullptr || plan->recv_bufloc == nullptr || + plan->request == nullptr || plan->unpackplan == nullptr) return nullptr; } + + // store send info, with self as last entry + + nsend = 0; + iproc = me; + for (i = 0; i < nprocs; i++) { + iproc++; + if (iproc == nprocs) iproc = 0; + if (remap_3d_collide(&in,&outarray[iproc],&overlap)) { + plan->send_proc[nsend] = iproc; + plan->send_offset[nsend] = nqty * + ((overlap.klo-in.klo)*in.jsize*in.isize + + ((overlap.jlo-in.jlo)*in.isize + overlap.ilo-in.ilo)); + plan->packplan[nsend].nfast = nqty*overlap.isize; + plan->packplan[nsend].nmid = overlap.jsize; + plan->packplan[nsend].nslow = overlap.ksize; + plan->packplan[nsend].nstride_line = nqty*in.isize; + plan->packplan[nsend].nstride_plane = nqty*in.jsize*in.isize; + plan->packplan[nsend].nqty = nqty; + plan->send_size[nsend] = nqty*overlap.isize*overlap.jsize*overlap.ksize; + nsend++; + } + } + + // plan->nsend = # of sends not including self + + if (nsend && plan->send_proc[nsend-1] == me) plan->nsend = nsend - 1; + else plan->nsend = nsend; + + // store recv info, with self as last entry + + ibuf = 0; + nrecv = 0; + iproc = me; + + for (i = 0; i < nprocs; i++) { + iproc++; + if (iproc == nprocs) iproc = 0; + if (remap_3d_collide(&out,&inarray[iproc],&overlap)) { + plan->recv_proc[nrecv] = iproc; + plan->recv_bufloc[nrecv] = ibuf; + + if (permute == 0) { + plan->recv_offset[nrecv] = nqty * + ((overlap.klo-out.klo)*out.jsize*out.isize + + (overlap.jlo-out.jlo)*out.isize + (overlap.ilo-out.ilo)); + plan->unpackplan[nrecv].nfast = nqty*overlap.isize; + plan->unpackplan[nrecv].nmid = overlap.jsize; + plan->unpackplan[nrecv].nslow = overlap.ksize; + plan->unpackplan[nrecv].nstride_line = nqty*out.isize; + plan->unpackplan[nrecv].nstride_plane = nqty*out.jsize*out.isize; + plan->unpackplan[nrecv].nqty = nqty; + } + else if (permute == 1) { + plan->recv_offset[nrecv] = nqty * + ((overlap.ilo-out.ilo)*out.ksize*out.jsize + + (overlap.klo-out.klo)*out.jsize + (overlap.jlo-out.jlo)); + plan->unpackplan[nrecv].nfast = overlap.isize; + plan->unpackplan[nrecv].nmid = overlap.jsize; + plan->unpackplan[nrecv].nslow = overlap.ksize; + plan->unpackplan[nrecv].nstride_line = nqty*out.jsize; + plan->unpackplan[nrecv].nstride_plane = nqty*out.ksize*out.jsize; + plan->unpackplan[nrecv].nqty = nqty; + } + else { + plan->recv_offset[nrecv] = nqty * + ((overlap.jlo-out.jlo)*out.isize*out.ksize + + (overlap.ilo-out.ilo)*out.ksize + (overlap.klo-out.klo)); + plan->unpackplan[nrecv].nfast = overlap.isize; + plan->unpackplan[nrecv].nmid = overlap.jsize; + plan->unpackplan[nrecv].nslow = overlap.ksize; + plan->unpackplan[nrecv].nstride_line = nqty*out.ksize; + plan->unpackplan[nrecv].nstride_plane = nqty*out.isize*out.ksize; + plan->unpackplan[nrecv].nqty = nqty; + } + + plan->recv_size[nrecv] = nqty*overlap.isize*overlap.jsize*overlap.ksize; + ibuf += plan->recv_size[nrecv]; + nrecv++; + } + } + + // plan->nrecv = # of recvs not including self + + if (nrecv && plan->recv_proc[nrecv-1] == me) plan->nrecv = nrecv - 1; + else plan->nrecv = nrecv; + + // init remaining fields in remap plan + + plan->memory = memory; + + if (nrecv == plan->nrecv) plan->self = 0; + else plan->self = 1; + + + // the plan->d_sendbuf and plan->d_recvbuf are used by both the + // collective & non-collective implementations. + // For non-collective, the buffer size is MAX(send_size) for any one send + + // find biggest send message (not including self) and malloc space for it + + size = 0; + for (nsend = 0; nsend < plan->nsend; nsend++) + size = MAX(size,plan->send_size[nsend]); + + if (size) { + plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); + if (!plan->d_sendbuf.data()) return nullptr; + } + + // if requested, allocate internal scratch space for recvs, + // only need it if I will receive any data (including self) + + if (memory == 1) { + if (nrecv > 0) { + plan->d_scratch = + typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + if (!plan->d_scratch.data()) return nullptr; + } + } + + // Non-collectives do not use MPI Communicator Groups + + MPI_Comm_dup(comm,&plan->comm); + } else { + + // Improved approach - use an AllReduce to aggregate which ranks need to be included + // To do this, we build the local proc's send/receive list, then do an AllReduce + // to create the send/recv count for the Alltoallv + + // local arrays to be used in the allreduce + // start with max length -- nprocs. Unused entries will be removed later + + int *local_cnts = (int*) malloc(2*nprocs*sizeof(int)); + if (local_cnts == nullptr) return nullptr; + int *local_sendcnts = local_cnts; + int *local_recvcnts = (local_cnts + nprocs); + + // local arrays used to store the results of the allreduce + + int *global_cnts = (int*) malloc(2*nprocs*sizeof(int)); + if (global_cnts == nullptr) return nullptr; + int *global_sendcnts = global_cnts; + int *global_recvcnts = (global_cnts + nprocs); + + // count send & recv collides, including self + + nsend = 0; + nrecv = 0; + for (i = 0; i < nprocs; i++) { + local_sendcnts[i] = remap_3d_collide(&in,&outarray[i],&overlap); + local_recvcnts[i] = remap_3d_collide(&out,&inarray[i],&overlap); + nsend += local_sendcnts[i]; + nrecv += local_recvcnts[i]; + } + + // perform an AllReduce to get the counts from all other processors and build sendcnts list + + MPI_Allreduce(local_cnts, global_cnts, 2*nprocs, MPI_INT, MPI_SUM, comm); + + // now remove procs that are 0 in send or recv to create minimized sendcnts/recvcnts for AlltoAllv + // also builds commringlist -- which is already sorted + + int *commringlist = (int*) malloc(nprocs * sizeof(int)); + int commringlen = 0; + + for (i = 0; i < nprocs; i++) { + if (global_sendcnts[i] > 0 || global_recvcnts[i] > 0) { + commringlist[commringlen] = i; + commringlen++; + } + } + + // resize commringlist to final size + + commringlist = (int *) realloc(commringlist, commringlen*sizeof(int)); + + // set the plan->commringlist + + plan->commringlen = commringlen; + plan->commringlist = commringlist; + + // clean up local buffers that are finished + + local_sendcnts = nullptr; + local_recvcnts = nullptr; + global_recvcnts = nullptr; + global_sendcnts = nullptr; + free(local_cnts); + free(global_cnts); + + // malloc space for send & recv info + // if the current proc is involved in any way in the communication, allocate space + // because of the Alltoallv, both send and recv have to be initialized even if + // only one of those is performed + + if (nsend || nrecv) { + + // send space + + plan->selfcommringloc = -1; + plan->selfnsendloc = -1; + plan->selfnrecvloc = -1; + + plan->nsend = nsend; + plan->pack = PackKokkos::pack_3d; + + plan->send_offset = (int *) malloc(nsend*sizeof(int)); + plan->send_size = (int *) malloc(plan->commringlen*sizeof(int)); + + plan->sendcnts = (int *) malloc(plan->commringlen*sizeof(int)); + plan->sdispls = (int *) malloc(plan->commringlen*sizeof(int)); + + // only used when sendcnt > 0 + + plan->packplan = (struct pack_plan_3d *) + malloc(nsend*sizeof(struct pack_plan_3d)); + + if (plan->send_offset == nullptr || plan->send_size == nullptr || + plan->sendcnts == nullptr || plan->sdispls == nullptr || + plan->packplan == nullptr) return nullptr; + + // recv space + + plan->nrecv = nrecv; + + if (permute == 0) + plan->unpack = PackKokkos::unpack_3d; + else if (permute == 1) { + if (nqty == 1) + plan->unpack = PackKokkos::unpack_3d_permute1_1; + else if (nqty == 2) + plan->unpack = PackKokkos::unpack_3d_permute1_2; + else + plan->unpack = PackKokkos::unpack_3d_permute1_n; + } + else if (permute == 2) { + if (nqty == 1) + plan->unpack = PackKokkos::unpack_3d_permute2_1; + else if (nqty == 2) + plan->unpack = PackKokkos::unpack_3d_permute2_2; + else + plan->unpack = PackKokkos::unpack_3d_permute2_n; + } + + plan->recv_offset = (int *) malloc(nrecv*sizeof(int)); + plan->recv_size = (int *) malloc(plan->commringlen*sizeof(int)); + + plan->rcvcnts = (int *) malloc(plan->commringlen*sizeof(int)); + plan->rdispls = (int *) malloc(plan->commringlen*sizeof(int)); + + // only used when recvcnt > 0 + + plan->unpackplan = (struct pack_plan_3d *) + malloc(nrecv*sizeof(struct pack_plan_3d)); + + if (plan->recv_offset == nullptr || plan->recv_size == nullptr || + plan->rcvcnts == nullptr || plan->rdispls == nullptr || + plan->unpackplan == nullptr) return nullptr; + } + + // store send info, with self as last entry + + nsend = 0; + ibuf = 0; + int total_send_size = 0; + for (i = 0; i < plan->commringlen; i++) { + iproc = plan->commringlist[i]; + if (iproc == me) { + plan->selfcommringloc = i; + plan->selfnsendloc = nsend; + } + if (remap_3d_collide(&in,&outarray[iproc],&overlap)) { + //plan->send_proc[nsend] = i; + // number of entries required for this pack's 3-d coords + plan->send_offset[nsend] = nqty * + ((overlap.klo-in.klo)*in.jsize*in.isize + + ((overlap.jlo-in.jlo)*in.isize + overlap.ilo-in.ilo)); + plan->packplan[nsend].nfast = nqty*overlap.isize; + plan->packplan[nsend].nmid = overlap.jsize; + plan->packplan[nsend].nslow = overlap.ksize; + plan->packplan[nsend].nstride_line = nqty*in.isize; + plan->packplan[nsend].nstride_plane = nqty*in.jsize*in.isize; + plan->packplan[nsend].nqty = nqty; + // total amount of overlap + plan->send_size[i] = nqty*overlap.isize*overlap.jsize*overlap.ksize; + plan->sendcnts[i] = plan->send_size[i]; + plan->sdispls[i] = ibuf; + ibuf += plan->send_size[i]; + nsend++; + } else { + plan->send_size[i] = 0; + plan->sdispls[i] = ibuf; + plan->sendcnts[i] = 0; + } + total_send_size += plan->send_size[i]; + } + + if (total_send_size) { + plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",total_send_size); + if (!plan->d_sendbuf.data()) return nullptr; + } + + // store recv info, with self as last entry + + ibuf = 0; + nrecv = 0; + + for (i = 0; i < plan->commringlen; i++) { + iproc = plan->commringlist[i]; + if (iproc == me) { + plan->selfnrecvloc = nrecv; + } + if (remap_3d_collide(&out,&inarray[iproc],&overlap)) { + + if (permute == 0) { + plan->recv_offset[nrecv] = nqty * + ((overlap.klo-out.klo)*out.jsize*out.isize + + (overlap.jlo-out.jlo)*out.isize + (overlap.ilo-out.ilo)); + plan->unpackplan[nrecv].nfast = nqty*overlap.isize; + plan->unpackplan[nrecv].nmid = overlap.jsize; + plan->unpackplan[nrecv].nslow = overlap.ksize; + plan->unpackplan[nrecv].nstride_line = nqty*out.isize; + plan->unpackplan[nrecv].nstride_plane = nqty*out.jsize*out.isize; + plan->unpackplan[nrecv].nqty = nqty; + } + else if (permute == 1) { + plan->recv_offset[nrecv] = nqty * + ((overlap.ilo-out.ilo)*out.ksize*out.jsize + + (overlap.klo-out.klo)*out.jsize + (overlap.jlo-out.jlo)); + plan->unpackplan[nrecv].nfast = overlap.isize; + plan->unpackplan[nrecv].nmid = overlap.jsize; + plan->unpackplan[nrecv].nslow = overlap.ksize; + plan->unpackplan[nrecv].nstride_line = nqty*out.jsize; + plan->unpackplan[nrecv].nstride_plane = nqty*out.ksize*out.jsize; + plan->unpackplan[nrecv].nqty = nqty; + } + else { + plan->recv_offset[nrecv] = nqty * + ((overlap.jlo-out.jlo)*out.isize*out.ksize + + (overlap.ilo-out.ilo)*out.ksize + (overlap.klo-out.klo)); + plan->unpackplan[nrecv].nfast = overlap.isize; + plan->unpackplan[nrecv].nmid = overlap.jsize; + plan->unpackplan[nrecv].nslow = overlap.ksize; + plan->unpackplan[nrecv].nstride_line = nqty*out.ksize; + plan->unpackplan[nrecv].nstride_plane = nqty*out.isize*out.ksize; + plan->unpackplan[nrecv].nqty = nqty; + } + + plan->recv_size[i] = nqty*overlap.isize*overlap.jsize*overlap.ksize; + plan->rcvcnts[i] = plan->recv_size[i]; + plan->rdispls[i] = ibuf; + ibuf += plan->recv_size[i]; + nrecv++; + } else { + plan->recv_size[i] = 0; + plan->rcvcnts[i] = 0; + plan->rdispls[i] = ibuf; + } + } + + // init remaining fields in remap plan + + plan->memory = memory; + + if (plan->sendcnts[plan->selfcommringloc]) { + plan->self = 1; + plan->sendcnts[plan->selfcommringloc] = 0; + plan->rcvcnts[plan->selfcommringloc] = 0; + } + else { + plan->self = 0; + } + + + // if requested, allocate internal scratch space for recvs, + // only need it if I will receive any data (including self) + + if (memory == 1) { + if (nrecv > 0) { + plan->d_scratch = + typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + if (!plan->d_scratch.data()) return nullptr; + } + } + + // if using collective and the commringlist is NOT empty create a + // communicator for the plan based off an MPI_Group created with + // ranks from the commringlist + + if (plan->commringlen > 0) { + MPI_Group orig_group, new_group; + MPI_Comm_group(comm, &orig_group); + MPI_Group_incl(orig_group, plan->commringlen, + plan->commringlist, &new_group); + MPI_Comm_create(comm, new_group, &plan->comm); + } + + // if using collective and the comm ring list is empty create + // a communicator for the plan with an empty group + + else + MPI_Comm_create(comm, MPI_GROUP_EMPTY, &plan->comm); } - // plan->nrecv = # of recvs not including self - // for collectives include self in the nsend list - - if (nrecv && plan->recv_proc[nrecv-1] == me) { - if (plan->usecollective) plan->nrecv = nrecv; - else plan->nrecv = nrecv - 1; - } else plan->nrecv = nrecv; - - // init remaining fields in remap plan - - plan->memory = memory; - - if (nrecv == plan->nrecv) plan->self = 0; - else plan->self = 1; - // free locally malloced space free(inarray); free(outarray); - // find biggest send message (not including self) and malloc space for it - - size = 0; - for (nsend = 0; nsend < plan->nsend; nsend++) - size = MAX(size,plan->send_size[nsend]); - - if (size) { - plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); - if (!plan->d_sendbuf.data()) return nullptr; - } - - // if requested, allocate internal scratch space for recvs, - // only need it if I will receive any data (including self) - - if (memory == 1) { - if (nrecv > 0) { - plan->d_scratch = - typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); - if (!plan->d_scratch.data()) return nullptr; - } - } - - // not using collective - dup comm - - MPI_Comm_dup(comm,&plan->comm); - // return pointer to plan return plan; @@ -500,22 +824,45 @@ void RemapKokkos::remap_3d_destroy_plan_kokkos(struct remap_plan_3d_ if (!((plan->usecollective) && (plan->commringlen == 0))) MPI_Comm_free(&plan->comm); - // free internal arrays + if (plan->usecollective) { + if (plan->commringlist != nullptr) { + free(plan->commringlist); + free(plan->sendcnts); + free(plan->rcvcnts); + free(plan->sdispls); + free(plan->rdispls); + } - if (plan->nsend || plan->self) { - free(plan->send_offset); - free(plan->send_size); - free(plan->send_proc); - free(plan->packplan); - } + if (plan->nsend) { + free(plan->send_offset); + free(plan->send_size); + free(plan->packplan); + } - if (plan->nrecv || plan->self) { - free(plan->recv_offset); - free(plan->recv_size); - free(plan->recv_proc); - free(plan->recv_bufloc); - free(plan->request); - free(plan->unpackplan); + if (plan->nrecv) { + free(plan->recv_offset); + free(plan->recv_size); + free(plan->unpackplan); + } + } else { + + // free arrays used in pt2pt communication + + if (plan->nsend || plan->self) { + free(plan->send_offset); + free(plan->send_size); + free(plan->send_proc); + free(plan->packplan); + } + + if (plan->nrecv || plan->self) { + free(plan->recv_offset); + free(plan->recv_size); + free(plan->recv_proc); + free(plan->recv_bufloc); + free(plan->request); + free(plan->unpackplan); + } } // free plan itself diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index 77a3b1a37a..b0ccdb342d 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -44,6 +44,7 @@ struct remap_plan_3d_kokkos { int *recv_size; // size of each recv message int *recv_proc; // proc to recv each message from int *recv_bufloc; // offset in scratch buf for each recv + int *nrecvmap; // maps receive index to rank index MPI_Request *request; // MPI request for each posted recv struct pack_plan_3d *unpackplan; // unpack plan for each recv message int nrecv; // # of recvs from other procs @@ -52,9 +53,17 @@ struct remap_plan_3d_kokkos { int memory; // user provides scratch space or not MPI_Comm comm; // group of procs performing remap int usecollective; // use collective or point-to-point MPI + int usegpu_aware; // use GPU-Aware MPI or not + // variables for collective MPI only int commringlen; // length of commringlist int *commringlist; // ranks on communication ring of this plan - int usegpu_aware; // use GPU-Aware MPI or not + int *sendcnts; // # of elements in send buffer for each rank + int *rcvcnts; // # of elements in recv buffer for each rank + int *sdispls; // extraction location in send buffer for each rank + int *rdispls; // extraction location in recv buffer for each rank + int selfcommringloc; // current proc's location in commringlist + int selfnsendloc; // current proc's location in send lists + int selfnrecvloc; // current proc's location in recv lists }; template diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index 858df5df6c..d839362aa5 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -162,8 +162,10 @@ void VerletKokkos::setup(int flag) lmp->kokkos->auto_sync = 0; modify->setup(vflag); - output->setup(flag); lmp->kokkos->auto_sync = 1; + + atomKK->sync(Host,ALL_MASK); + output->setup(flag); update->setupflag = 0; } @@ -252,6 +254,7 @@ void VerletKokkos::setup_minimal(int flag) lmp->kokkos->auto_sync = 0; modify->setup(vflag); lmp->kokkos->auto_sync = 1; + update->setupflag = 0; } diff --git a/src/KSPACE/fft3d.cpp b/src/KSPACE/fft3d.cpp index a9956f4397..9ee798b50d 100644 --- a/src/KSPACE/fft3d.cpp +++ b/src/KSPACE/fft3d.cpp @@ -72,14 +72,14 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) { FFT_SCALAR norm; -#if defined(FFT_FFTW3) +#if defined(FFT_FFTW3) || defined(FFT_NVPL) FFT_SCALAR *out_ptr; #endif FFT_DATA *data,*copy; // system specific constants -#if defined(FFT_FFTW3) +#if defined(FFT_FFTW3) || defined(FFT_NVPL) FFTW_API(plan) theplan; #else // nothing to do for other FFTs @@ -105,7 +105,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_fast,data); else DftiComputeBackward(plan->handle_fast,data); -#elif defined(FFT_FFTW3) +#elif defined(FFT_FFTW3) || defined(FFT_NVPL) if (flag == 1) theplan=plan->plan_fast_forward; else @@ -139,7 +139,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_mid,data); else DftiComputeBackward(plan->handle_mid,data); -#elif defined(FFT_FFTW3) +#elif defined(FFT_FFTW3) || defined(FFT_NVPL) if (flag == 1) theplan=plan->plan_mid_forward; else @@ -173,7 +173,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_slow,data); else DftiComputeBackward(plan->handle_slow,data); -#elif defined(FFT_FFTW3) +#elif defined(FFT_FFTW3) || defined(FFT_NVPL) if (flag == 1) theplan=plan->plan_slow_forward; else @@ -203,11 +203,11 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) if (flag == -1 && plan->scaled) { norm = plan->norm; const int num = plan->normnum; -#if defined(FFT_FFTW3) +#if defined(FFT_FFTW3) || defined(FFT_NVPL) out_ptr = (FFT_SCALAR *)out; #endif for (int i = 0; i < num; i++) { -#if defined(FFT_FFTW3) +#if defined(FFT_FFTW3) || defined(FFT_NVPL) *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_MKL) @@ -515,7 +515,7 @@ struct fft_plan_3d *fft_3d_create_plan( #endif DftiCommitDescriptor(plan->handle_slow); -#elif defined(FFT_FFTW3) +#elif defined(FFT_FFTW3) || defined(FFT_NVPL) #if defined(FFT_FFTW_THREADS) if (nthreads > 1) { FFTW_API(init_threads)(); @@ -613,7 +613,7 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan) DftiFreeDescriptor(&(plan->handle_fast)); DftiFreeDescriptor(&(plan->handle_mid)); DftiFreeDescriptor(&(plan->handle_slow)); -#elif defined(FFT_FFTW3) +#elif defined(FFT_FFTW3) || defined(FFT_NVPL) FFTW_API(destroy_plan)(plan->plan_slow_forward); FFTW_API(destroy_plan)(plan->plan_slow_backward); FFTW_API(destroy_plan)(plan->plan_mid_forward); @@ -714,7 +714,7 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) { int i,num; FFT_SCALAR norm; -#if defined(FFT_FFTW3) +#if defined(FFT_FFTW3) || defined(FFT_NVPL) FFT_SCALAR *data_ptr; #endif @@ -733,7 +733,7 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set -#if defined(FFT_MKL) || defined(FFT_FFTW3) +#if defined(FFT_MKL) || defined(FFT_FFTW3) || defined(FFT_NVPL) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; #endif @@ -754,7 +754,7 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) DftiComputeBackward(plan->handle_mid,data); DftiComputeBackward(plan->handle_slow,data); } -#elif defined(FFT_FFTW3) +#elif defined(FFT_FFTW3) || defined(FFT_NVPL) FFTW_API(plan) theplan; if (flag == 1) theplan=plan->plan_fast_forward; @@ -795,11 +795,11 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) if (flag == -1 && plan->scaled) { norm = plan->norm; num = MIN(plan->normnum,nsize); -#if defined(FFT_FFTW3) +#if defined(FFT_FFTW3) || defined(FFT_NVPL) data_ptr = (FFT_SCALAR *)data; #endif for (i = 0; i < num; i++) { -#if defined(FFT_FFTW3) +#if defined(FFT_FFTW3) || defined(FFT_NVPL) *(data_ptr++) *= norm; *(data_ptr++) *= norm; #elif defined(FFT_MKL) diff --git a/src/KSPACE/fft3d.h b/src/KSPACE/fft3d.h index f4ddeebc4d..34b9c3d4fc 100644 --- a/src/KSPACE/fft3d.h +++ b/src/KSPACE/fft3d.h @@ -31,6 +31,10 @@ typedef MKL_Complex8 FFT_DATA; typedef fftwf_complex FFT_DATA; #define FFTW_API(function) fftwf_##function +#elif defined(FFT_NVPL) +#include "nvpl_fftw.h" +typedef fftwf_complex FFT_DATA; +#define FFTW_API(function) fftwf_##function #else /* use a stripped down version of kiss fft as default fft */ @@ -64,6 +68,11 @@ typedef MKL_Complex16 FFT_DATA; typedef fftw_complex FFT_DATA; #define FFTW_API(function) fftw_##function +#elif defined(FFT_NVPL) +#include "nvpl_fftw.h" +typedef fftw_complex FFT_DATA; +#define FFTW_API(function) fftw_##function + #else /* use a stripped down version of kiss fft as default fft */ @@ -108,7 +117,7 @@ struct fft_plan_3d { DFTI_DESCRIPTOR *handle_fast; DFTI_DESCRIPTOR *handle_mid; DFTI_DESCRIPTOR *handle_slow; -#elif defined(FFT_FFTW3) +#elif defined(FFT_FFTW3) || defined(FFT_NVPL) FFTW_API(plan) plan_fast_forward; FFTW_API(plan) plan_fast_backward; FFTW_API(plan) plan_mid_forward; diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 4fe5075f44..9665d2392d 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -1384,15 +1384,20 @@ void PPPM::set_grid_local() // npey_fft,npez_fft = # of procs in y,z dims // if nprocs is small enough, proc can own 1 or more entire xy planes, // else proc owns 2d sub-blocks of yz plane + // NOTE: commented out lines support this + // need to ensure fft3d.cpp and remap.cpp support 2D planes // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions // nlo_fft,nhi_fft = lower/upper limit of the section // of the global FFT mesh that I own in x-pencil decomposition int npey_fft,npez_fft; - if (nz_pppm >= nprocs) { - npey_fft = 1; - npez_fft = nprocs; - } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + //if (nz_pppm >= nprocs) { + // npey_fft = 1; + // npez_fft = nprocs; + //} else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); + + procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft); int me_y = me % npey_fft; int me_z = me / npey_fft; diff --git a/src/LEPTON/pair_lepton_coul.h b/src/LEPTON/pair_lepton_coul.h index 8153792bd5..c58177c6cb 100644 --- a/src/LEPTON/pair_lepton_coul.h +++ b/src/LEPTON/pair_lepton_coul.h @@ -27,8 +27,8 @@ namespace LAMMPS_NS { class PairLeptonCoul : public PairLepton { public: - PairLeptonCoul(class LAMMPS *_lmp) : PairLepton(_lmp){}; - ~PairLeptonCoul() override{}; + PairLeptonCoul(class LAMMPS *_lmp) : PairLepton(_lmp) {}; + ~PairLeptonCoul() override {}; void compute(int, int) override; void settings(int, char **) override; void init_style() override; diff --git a/src/LEPTON/pair_lepton_sphere.h b/src/LEPTON/pair_lepton_sphere.h index ab586a309b..9e2642ac50 100644 --- a/src/LEPTON/pair_lepton_sphere.h +++ b/src/LEPTON/pair_lepton_sphere.h @@ -27,7 +27,7 @@ namespace LAMMPS_NS { class PairLeptonSphere : public PairLepton { public: - PairLeptonSphere(class LAMMPS *_lmp) : PairLepton(_lmp){}; + PairLeptonSphere(class LAMMPS *_lmp) : PairLepton(_lmp) {}; void compute(int, int) override; void settings(int, char **) override; diff --git a/src/MAKE/MACHINES/Makefile.aurora b/src/MAKE/MACHINES/Makefile.aurora new file mode 100644 index 0000000000..7c656e6379 --- /dev/null +++ b/src/MAKE/MACHINES/Makefile.aurora @@ -0,0 +1,121 @@ +# aurora = Intel Sapphire Rapids CPU, mpicxx compiler (compatible w/ GPU package) + +SHELL = /bin/sh + +# --------------------------------------------------------------------- +# compiler/linker settings +# specify flags and libraries needed for your compiler + +CC = mpicxx +OPTFLAGS = -DSHAKEATOMIC -DCOMMPARA -xSAPPHIRERAPIDS -O2 -ffp-model=fast -qoverride-limits -qopt-zmm-usage=high +CCFLAGS = -qopenmp -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG $(OPTFLAGS) +CCFLAGS += -I$(MKLROOT)/include +SHFLAGS = -fPIC +DEPFLAGS = -M + +LINK = mpicxx +LINKFLAGS = -qopenmp $(OPTFLAGS) +LIB = -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core +SIZE = size + +ARCHIVE = ar +ARFLAGS = -rc +SHLIBFLAGS = -shared + +# --------------------------------------------------------------------- +# LAMMPS-specific settings, all OPTIONAL +# specify settings for LAMMPS features you will use +# if you change any -D setting, do full re-compile after "make clean" + +# LAMMPS ifdef settings +# see possible settings in Section 3.5 of the manual + +LMP_INC = -DLAMMPS_GZIP + +# MPI library +# see discussion in Section 3.4 of the manual +# MPI wrapper compiler/linker can provide this info +# can point to dummy MPI library in src/STUBS as in Makefile.serial +# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts +# INC = path for mpi.h, MPI compiler settings +# PATH = path for MPI library +# LIB = name of MPI library + +MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 +MPI_PATH = +MPI_LIB = + +# FFT library +# see discussion in Section 3.5.2 of manual +# can be left blank to use provided KISS FFT library +# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings +# PATH = path for FFT library +# LIB = name of FFT library + +FFT_INC = -DFFT_MKL -DFFT_SINGLE +FFT_PATH = +FFT_LIB = + +# JPEG and/or PNG library +# see discussion in Section 3.5.4 of manual +# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC +# INC = path(s) for jpeglib.h and/or png.h +# PATH = path(s) for JPEG library and/or PNG library +# LIB = name(s) of JPEG library and/or PNG library + +JPG_INC = +JPG_PATH = +JPG_LIB = + +# --------------------------------------------------------------------- +# build rules and dependencies +# do not edit this section + +include Makefile.package.settings +include Makefile.package + +EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) +EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) +EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) +EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) +EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) + +# Path to src files + +vpath %.cpp .. +vpath %.h .. + +# Link target + +$(EXE): main.o $(LMPLIB) $(EXTRA_LINK_DEPENDS) + $(LINK) $(LINKFLAGS) main.o $(EXTRA_PATH) $(LMPLINK) $(EXTRA_LIB) $(LIB) -o $@ + $(SIZE) $@ + +# Library targets + +$(ARLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) + @rm -f ../$(ARLIB) + $(ARCHIVE) $(ARFLAGS) ../$(ARLIB) $(OBJ) + @rm -f $(ARLIB) + @ln -s ../$(ARLIB) $(ARLIB) + +$(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS) + $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o ../$(SHLIB) \ + $(OBJ) $(EXTRA_LIB) $(LIB) + @rm -f $(SHLIB) + @ln -s ../$(SHLIB) $(SHLIB) + +# Compilation rules + +%.o:%.cpp + $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< + +# Individual dependencies + +depend : fastdep.exe $(SRC) + @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 + +fastdep.exe: ../DEPEND/fastdep.c + icx -O -o $@ $< + +sinclude .depend diff --git a/src/MAKE/MACHINES/Makefile.aurora_kokkos b/src/MAKE/MACHINES/Makefile.aurora_kokkos index a263d4cb8c..f164188dc0 100644 --- a/src/MAKE/MACHINES/Makefile.aurora_kokkos +++ b/src/MAKE/MACHINES/Makefile.aurora_kokkos @@ -7,12 +7,14 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -g -O3 -DNDEBUG +CCFLAGS = -g -O3 -DNDEBUG -w +CCFLAGS += -fsycl-device-code-split=per_kernel SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx LINKFLAGS = -g -O3 +LINKFLAGS += -flink-huge-device-code -fsycl-max-parallel-link-jobs=64 LIB = SIZE = size @@ -52,9 +54,9 @@ MPI_LIB = # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -FFT_PATH = -FFT_LIB = +FFT_INC = -DFFT_KOKKOS_MKL_GPU -DFFT_SINGLE -I${MKL_ROOT}/include +FFT_PATH = -L${MKL_ROOT}/lib -L${TBB_ROOT}/lib/intel64/gcc4.8 +FFT_LIB = -lmkl_sycl_dft -lmkl_intel_ilp64 -lmkl_tbb_thread -lmkl_core -ltbb # JPEG and/or PNG library # see discussion in Section 3.5.4 of manual @@ -116,6 +118,6 @@ depend : fastdep.exe $(SRC) @./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1 fastdep.exe: ../DEPEND/fastdep.c - cc -O -o $@ $< + icx -O -o $@ $< sinclude .depend diff --git a/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only b/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only index e1f7005617..5c39ac8f3e 100644 --- a/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only +++ b/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only @@ -7,7 +7,7 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -g -O3 -DNDEBUG +CCFLAGS = -g -O3 -DNDEBUG -DKOKKOS_ENABLE_ATOMICS_BYPASS SHFLAGS = -fPIC # uncomment when compiling with Intel 21.5 or older FMTFLAGS = # -std=c++11 diff --git a/src/MANYBODY/pair_bop.h b/src/MANYBODY/pair_bop.h index b210d1cc07..cdc6033f00 100644 --- a/src/MANYBODY/pair_bop.h +++ b/src/MANYBODY/pair_bop.h @@ -57,18 +57,18 @@ class PairBOP : public Pair { struct PairList1 { double r, dis[3]; double betaS, dBetaS, betaP, dBetaP, rep, dRep; - PairList1(){}; + PairList1() {}; }; struct PairList2 { double r, dis[3]; double rep, dRep; - PairList2(){}; + PairList2() {}; }; struct TripleList { double G, dG, cosAng, dCosAngi[3], dCosAngj[3], dCosAngk[3]; - TripleList(){}; + TripleList() {}; }; struct B_SG { diff --git a/src/MANYBODY/pair_meam_spline.h b/src/MANYBODY/pair_meam_spline.h index ee09b045cf..47f3f3d8df 100644 --- a/src/MANYBODY/pair_meam_spline.h +++ b/src/MANYBODY/pair_meam_spline.h @@ -197,16 +197,10 @@ class PairMEAMSpline : public Pair { } /// Returns the number of bytes used by this function object. - double memory_usage() const - { - return sizeof(*this) + sizeof(X[0]) * N * 3; - } + double memory_usage() const { return sizeof(*this) + sizeof(X[0]) * N * 3; } /// Returns the cutoff radius of this function. - double cutoff() const - { - return X[N - 1]; - } + double cutoff() const { return X[N - 1]; } /// Writes a Gnuplot script that plots the spline function. void writeGnuplot(const char *filename, const char *title = nullptr) const; diff --git a/src/MANYBODY/pair_meam_sw_spline.h b/src/MANYBODY/pair_meam_sw_spline.h index 9123f8c560..a5c1b0ffd4 100644 --- a/src/MANYBODY/pair_meam_sw_spline.h +++ b/src/MANYBODY/pair_meam_sw_spline.h @@ -187,16 +187,10 @@ class PairMEAMSWSpline : public Pair { } /// Returns the number of bytes used by this function object. - double memory_usage() const - { - return sizeof(*this) + sizeof(X[0]) * N * 3; - } + double memory_usage() const { return sizeof(*this) + sizeof(X[0]) * N * 3; } /// Returns the cutoff radius of this function. - double cutoff() const - { - return X[N - 1]; - } + double cutoff() const { return X[N - 1]; } /// Writes a Gnuplot script that plots the spline function. void writeGnuplot(const char *filename, const char *title = nullptr) const; diff --git a/src/MANYBODY/pair_rebomos.h b/src/MANYBODY/pair_rebomos.h index 856a52ca81..d36eb41a74 100644 --- a/src/MANYBODY/pair_rebomos.h +++ b/src/MANYBODY/pair_rebomos.h @@ -49,7 +49,7 @@ class PairREBOMoS : public Pair { int *REBO_numneigh; // # of pair neighbors for each atom int **REBO_firstneigh; // ptr to 1st neighbor of each atom - double *nM, *nS; // sum of weighting fns with REBO neighs + double *nM, *nS; // sum of weighting fns with REBO neighs double rcmin[2][2], rcmax[2][2], rcmaxsq[2][2], rcmaxp[2][2]; double Q[2][2], alpha[2][2], A[2][2], BIJc[2][2], Beta[2][2]; diff --git a/src/MANYBODY/pair_tersoff_mod_c.h b/src/MANYBODY/pair_tersoff_mod_c.h index aff1883bbd..8cea97baaf 100644 --- a/src/MANYBODY/pair_tersoff_mod_c.h +++ b/src/MANYBODY/pair_tersoff_mod_c.h @@ -26,7 +26,7 @@ namespace LAMMPS_NS { class PairTersoffMODC : public PairTersoffMOD { public: - PairTersoffMODC(class LAMMPS *lmp) : PairTersoffMOD(lmp){}; + PairTersoffMODC(class LAMMPS *lmp) : PairTersoffMOD(lmp) {}; static constexpr int NPARAMS_PER_LINE = 21; diff --git a/src/MC/fix_charge_regulation.cpp b/src/MC/fix_charge_regulation.cpp index cd035781c8..ddf14f6804 100644 --- a/src/MC/fix_charge_regulation.cpp +++ b/src/MC/fix_charge_regulation.cpp @@ -218,7 +218,7 @@ void FixChargeRegulation::init() { int flagall = flag; MPI_Allreduce(&flag, &flagall, 1, MPI_INT, MPI_SUM, world); - if (flagall && comm->me == 0) + if (flagall) error->all(FLERR, "fix charge/regulation cannot exchange " "individual atoms (ions) belonging to a molecule"); } diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp index e0f1cd243b..8fb778207b 100644 --- a/src/MC/fix_gcmc.cpp +++ b/src/MC/fix_gcmc.cpp @@ -564,7 +564,7 @@ void FixGCMC::init() if (molecule[i]) flag = 1; int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall && comm->me == 0) + if (flagall) error->all(FLERR, "Fix gcmc cannot exchange individual atoms belonging to a molecule"); } @@ -579,7 +579,7 @@ void FixGCMC::init() if (molecule[i] == 0) flag = 1; int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall && comm->me == 0) + if (flagall) error->all(FLERR, "All mol IDs should be set for fix gcmc group atoms"); } diff --git a/src/MC/fix_widom.cpp b/src/MC/fix_widom.cpp index 2c76050430..c72beb5051 100644 --- a/src/MC/fix_widom.cpp +++ b/src/MC/fix_widom.cpp @@ -356,7 +356,7 @@ void FixWidom::init() if (molecule[i] == 0) flag = 1; int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); - if (flagall && comm->me == 0) + if (flagall) error->all(FLERR, "All mol IDs should be set for fix widom group atoms"); } diff --git a/src/ML-IAP/mliap_data.cpp b/src/ML-IAP/mliap_data.cpp index 5d847ee25e..1ca57e2745 100644 --- a/src/ML-IAP/mliap_data.cpp +++ b/src/ML-IAP/mliap_data.cpp @@ -185,7 +185,6 @@ void MLIAPData::generate_neighdata(NeighList *list_in, int eflag_in, int vflag_i int jtype = type[j]; const int jelem = map[jtype]; - lmp_firstneigh[ii][jj] = firstneigh[i][jj]; if (rsq < descriptor->cutsq[ielem][jelem]) { pair_i[ij] = i; jatoms[ij] = j; @@ -193,6 +192,7 @@ void MLIAPData::generate_neighdata(NeighList *list_in, int eflag_in, int vflag_i rij[ij][0] = delx; rij[ij][1] = dely; rij[ij][2] = delz; + lmp_firstneigh[ii][ninside] = firstneigh[i][jj]; ij++; ninside++; } @@ -228,6 +228,7 @@ void MLIAPData::grow_neigharrays() memory->grow(ielems, natomneigh, "MLIAPData:ielems"); memory->grow(itypes, natomneigh, "MLIAPData:itypes"); memory->grow(numneighs, natomneigh, "MLIAPData:numneighs"); + memory->grow(lmp_firstneigh, natomneigh, nneigh_max, "MLIAPData:lmp_firstneigh"); natomneigh_max = natomneigh; } diff --git a/src/ML-IAP/mliap_descriptor.cpp b/src/ML-IAP/mliap_descriptor.cpp index 34f81c3902..b8acd6329e 100644 --- a/src/ML-IAP/mliap_descriptor.cpp +++ b/src/ML-IAP/mliap_descriptor.cpp @@ -28,6 +28,7 @@ MLIAPDescriptor::MLIAPDescriptor(LAMMPS *lmp) : cutghost(nullptr), radelem(nullptr), wjelem(nullptr) { cutmax = 0.0; + allocated_elements = 0; } /* ---------------------------------------------------------------------- */ diff --git a/src/ML-IAP/mliap_descriptor_ace.cpp b/src/ML-IAP/mliap_descriptor_ace.cpp index fd059b822b..b13699193c 100644 --- a/src/ML-IAP/mliap_descriptor_ace.cpp +++ b/src/ML-IAP/mliap_descriptor_ace.cpp @@ -57,7 +57,6 @@ MLIAPDescriptorACE::MLIAPDescriptorACE(LAMMPS *_lmp, char *yacefilename) : { acemlimpl = new ACE_ML_impl; - allocated_elements = 0; //read in file with CG coefficients or c_tilde coefficients ctilde_file = yacefilename; delete acemlimpl->basis_set; diff --git a/src/ML-PACE/pair_pace.h b/src/ML-PACE/pair_pace.h index 5cff7045fa..a972e857d2 100644 --- a/src/ML-PACE/pair_pace.h +++ b/src/ML-PACE/pair_pace.h @@ -55,7 +55,7 @@ class PairPACE : public Pair { int nmax_corerep; virtual void allocate(); - double *corerep_factor; //per-atom core-rep factor (= 1 - fcut) + double *corerep_factor; //per-atom core-rep factor (= 1 - fcut) int flag_corerep_factor; double **scale; diff --git a/src/ML-QUIP/pair_quip.cpp b/src/ML-QUIP/pair_quip.cpp index a2de4bf38a..35ba4ff8e4 100644 --- a/src/ML-QUIP/pair_quip.cpp +++ b/src/ML-QUIP/pair_quip.cpp @@ -286,7 +286,7 @@ void PairQUIP::coeff(int narg, char **arg) // and returns the necessary size of quip_potential. This behavior // is invoked by setting n_potential_quip to 0. n_quip_potential = 0; - quip_potential = new int[0]; + quip_potential = new int[1]; quip_lammps_potential_initialise(quip_potential, &n_quip_potential, &cutoff, quip_file, &n_quip_file, quip_string, &n_quip_string); delete[] quip_potential; diff --git a/src/ML-QUIP/pair_quip.h b/src/ML-QUIP/pair_quip.h index 2cbbcd4af8..7f23ab4478 100644 --- a/src/ML-QUIP/pair_quip.h +++ b/src/ML-QUIP/pair_quip.h @@ -43,8 +43,10 @@ class PairQUIP : public Pair { double init_one(int, int) override; void allocate(); void *extract(const char *, int &); + protected: double scale; + private: double cutoff; int *quip_potential; diff --git a/src/MOLECULE/angle_charmm.cpp b/src/MOLECULE/angle_charmm.cpp index 1b66260c55..11b5abd699 100644 --- a/src/MOLECULE/angle_charmm.cpp +++ b/src/MOLECULE/angle_charmm.cpp @@ -309,3 +309,15 @@ double AngleCharmm::single(int type, int i1, int i2, int i3) return (tk * dtheta + rk * dr); } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleCharmm::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "theta0") == 0) return (void *) theta0; + return nullptr; +} diff --git a/src/MOLECULE/angle_charmm.h b/src/MOLECULE/angle_charmm.h index 2a77ac7864..cc4095e90f 100644 --- a/src/MOLECULE/angle_charmm.h +++ b/src/MOLECULE/angle_charmm.h @@ -35,6 +35,7 @@ class AngleCharmm : public Angle { void read_restart(FILE *) override; void write_data(FILE *) override; double single(int, int, int, int) override; + void *extract(const char *, int &) override; protected: double *k, *theta0, *k_ub, *r_ub; diff --git a/src/MOLECULE/angle_harmonic.cpp b/src/MOLECULE/angle_harmonic.cpp index e9f1c528ef..040cbe7530 100644 --- a/src/MOLECULE/angle_harmonic.cpp +++ b/src/MOLECULE/angle_harmonic.cpp @@ -276,17 +276,17 @@ void AngleHarmonic::born_matrix(int type, int i1, int i2, int i3, double &du, do double delx1 = x[i1][0] - x[i2][0]; double dely1 = x[i1][1] - x[i2][1]; double delz1 = x[i1][2] - x[i2][2]; - domain->minimum_image(delx1,dely1,delz1); - double r1 = sqrt(delx1*delx1 + dely1*dely1 + delz1*delz1); + domain->minimum_image(delx1, dely1, delz1); + double r1 = sqrt(delx1 * delx1 + dely1 * dely1 + delz1 * delz1); double delx2 = x[i3][0] - x[i2][0]; double dely2 = x[i3][1] - x[i2][1]; double delz2 = x[i3][2] - x[i2][2]; - domain->minimum_image(delx2,dely2,delz2); - double r2 = sqrt(delx2*delx2 + dely2*dely2 + delz2*delz2); + domain->minimum_image(delx2, dely2, delz2); + double r2 = sqrt(delx2 * delx2 + dely2 * dely2 + delz2 * delz2); - double c = delx1*delx2 + dely1*dely2 + delz1*delz2; - c /= r1*r2; + double c = delx1 * delx2 + dely1 * dely2 + delz1 * delz2; + c /= r1 * r2; if (c > 1.0) c = 1.0; if (c < -1.0) c = -1.0; double theta = acos(c); diff --git a/src/MOLECULE/bond_fene_expand.cpp b/src/MOLECULE/bond_fene_expand.cpp index c7821b1826..e115596eb1 100644 --- a/src/MOLECULE/bond_fene_expand.cpp +++ b/src/MOLECULE/bond_fene_expand.cpp @@ -273,3 +273,18 @@ double BondFENEExpand::single(int type, double rsq, int /*i*/, int /*j*/, double return eng; } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *BondFENEExpand::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k") == 0) return (void *) k; + if (strcmp(str, "r0") == 0) return (void *) r0; + if (strcmp(str, "epsilon") == 0) return (void *) epsilon; + if (strcmp(str, "sigma") == 0) return (void *) sigma; + if (strcmp(str, "shift") == 0) return (void *) shift; + return nullptr; +} diff --git a/src/MOLECULE/bond_fene_expand.h b/src/MOLECULE/bond_fene_expand.h index cdce710ea1..13524b0972 100644 --- a/src/MOLECULE/bond_fene_expand.h +++ b/src/MOLECULE/bond_fene_expand.h @@ -36,6 +36,7 @@ class BondFENEExpand : public Bond { void read_restart(FILE *) override; void write_data(FILE *) override; double single(int, double, int, int, double &) override; + void *extract(const char *, int &) override; protected: double *k, *r0, *epsilon, *sigma, *shift; diff --git a/src/MOLECULE/dihedral_multi_harmonic.cpp b/src/MOLECULE/dihedral_multi_harmonic.cpp index 8e6685cac9..2d1e16b9e4 100644 --- a/src/MOLECULE/dihedral_multi_harmonic.cpp +++ b/src/MOLECULE/dihedral_multi_harmonic.cpp @@ -326,8 +326,8 @@ void DihedralMultiHarmonic::write_data(FILE *fp) /* ---------------------------------------------------------------------- */ -void DihedralMultiHarmonic::born_matrix(int nd, int i1, int i2, int i3, int i4, - double &du, double &du2) +void DihedralMultiHarmonic::born_matrix(int nd, int i1, int i2, int i3, int i4, double &du, + double &du2) { double vb1x, vb1y, vb1z, vb2x, vb2y, vb2z, vb3x, vb3y, vb3z, vb2xm, vb2ym, vb2zm; double sb1, sb3, rb1, rb3, c0, b1mag2, b1mag, b2mag2; diff --git a/src/MOLECULE/dihedral_opls.cpp b/src/MOLECULE/dihedral_opls.cpp index eced454d68..e99d83f631 100644 --- a/src/MOLECULE/dihedral_opls.cpp +++ b/src/MOLECULE/dihedral_opls.cpp @@ -336,8 +336,7 @@ void DihedralOPLS::write_data(FILE *fp) /* ----------------------------------------------------------------------*/ -void DihedralOPLS::born_matrix(int nd, int i1, int i2, int i3, int i4, - double &du, double &du2) +void DihedralOPLS::born_matrix(int nd, int i1, int i2, int i3, int i4, double &du, double &du2) { double vb1x, vb1y, vb1z, vb2x, vb2y, vb2z, vb3x, vb3y, vb3z, vb2xm, vb2ym, vb2zm; double sb1, sb3, rb1, rb3, c0, b1mag2, b1mag, b2mag2; @@ -425,9 +424,10 @@ void DihedralOPLS::born_matrix(int nd, int i1, int i2, int i3, int i4, si = sin(phi); if (fabs(si) < SMALLER) si = SMALLER; - du = k1[type] - 2.0 * k2[type] * sin(2.0 * phi) / si + 3.0 * k3[type] * sin(3.0 * phi) / si - - 4.0 * k4[type] * sin(4.0 * phi) / si; - du2 = (4.0 * k2[type] * si * cos(2.0 * phi) - 2.0 * k2[type] * sin(2.0 * phi) - - 9.0 * k3[type] * si * cos(3.0 * phi) + 3.0 * k3[type] * sin(3.0 * phi) - + 16.0 * k4[type] * si * cos(4.0 * phi) - 4.0 * k4[type] * sin(4.0 * phi)) / (si * si * si); + du = k1[type] - 2.0 * k2[type] * sin(2.0 * phi) / si + 3.0 * k3[type] * sin(3.0 * phi) / si - + 4.0 * k4[type] * sin(4.0 * phi) / si; + du2 = (4.0 * k2[type] * si * cos(2.0 * phi) - 2.0 * k2[type] * sin(2.0 * phi) - + 9.0 * k3[type] * si * cos(3.0 * phi) + 3.0 * k3[type] * sin(3.0 * phi) + + 16.0 * k4[type] * si * cos(4.0 * phi) - 4.0 * k4[type] * sin(4.0 * phi)) / + (si * si * si); } diff --git a/src/MOLECULE/pair_hbond_dreiding_lj.cpp b/src/MOLECULE/pair_hbond_dreiding_lj.cpp index 274f8bc2a3..4536cc8e05 100644 --- a/src/MOLECULE/pair_hbond_dreiding_lj.cpp +++ b/src/MOLECULE/pair_hbond_dreiding_lj.cpp @@ -396,14 +396,14 @@ void PairHbondDreidingLJ::init_style() // and computing forces on A,H which may be on different procs if (atom->molecular == Atom::ATOMIC) - error->all(FLERR,"Pair style hbond/dreiding requires molecular system"); + error->all(FLERR,"Pair style hbond/dreiding/lj requires molecular system"); if (atom->tag_enable == 0) - error->all(FLERR,"Pair style hbond/dreiding requires atom IDs"); + error->all(FLERR,"Pair style hbond/dreiding/lj requires atom IDs"); if (atom->map_style == Atom::MAP_NONE) - error->all(FLERR,"Pair style hbond/dreiding requires an atom map, " + error->all(FLERR,"Pair style hbond/dreiding/lj requires an atom map, " "see atom_modify"); if (force->newton_pair == 0) - error->all(FLERR,"Pair style hbond/dreiding requires newton pair on"); + error->all(FLERR,"Pair style hbond/dreiding/lj requires newton pair on"); // set donor[M]/acceptor[M] if any atom of type M is a donor/acceptor @@ -419,7 +419,7 @@ void PairHbondDreidingLJ::init_style() acceptor[j] = 1; } - if (!anyflag) error->all(FLERR,"No pair hbond/dreiding coefficients set"); + if (!anyflag) error->all(FLERR,"No pair hbond/dreiding/lj coefficients set"); // set additional param values // offset is for LJ only, angle term is not included diff --git a/src/MOLECULE/pair_hbond_dreiding_morse.cpp b/src/MOLECULE/pair_hbond_dreiding_morse.cpp index c8bc0a627d..d976b66460 100644 --- a/src/MOLECULE/pair_hbond_dreiding_morse.cpp +++ b/src/MOLECULE/pair_hbond_dreiding_morse.cpp @@ -323,14 +323,14 @@ void PairHbondDreidingMorse::init_style() // and computing forces on A,H which may be on different procs if (atom->molecular == Atom::ATOMIC) - error->all(FLERR,"Pair style hbond/dreiding requires molecular system"); + error->all(FLERR,"Pair style hbond/dreiding/morse requires molecular system"); if (atom->tag_enable == 0) - error->all(FLERR,"Pair style hbond/dreiding requires atom IDs"); + error->all(FLERR,"Pair style hbond/dreiding/morse requires atom IDs"); if (atom->map_style == Atom::MAP_NONE) - error->all(FLERR,"Pair style hbond/dreiding requires an atom map, " + error->all(FLERR,"Pair style hbond/dreiding/morse requires an atom map, " "see atom_modify"); if (force->newton_pair == 0) - error->all(FLERR,"Pair style hbond/dreiding requires newton pair on"); + error->all(FLERR,"Pair style hbond/dreiding/morse requires newton pair on"); // set donor[M]/acceptor[M] if any atom of type M is a donor/acceptor @@ -346,7 +346,7 @@ void PairHbondDreidingMorse::init_style() acceptor[j] = 1; } - if (!anyflag) error->all(FLERR,"No pair hbond/dreiding coefficients set"); + if (!anyflag) error->all(FLERR,"No pair hbond/dreiding/morse coefficients set"); // set additional param values // offset is for Morse only, angle term is not included diff --git a/src/OPT/pair_aip_water_2dm_opt.h b/src/OPT/pair_aip_water_2dm_opt.h index 50b5043360..18eee58d72 100644 --- a/src/OPT/pair_aip_water_2dm_opt.h +++ b/src/OPT/pair_aip_water_2dm_opt.h @@ -1,4 +1,4 @@ - /* -*- c++ -*- ---------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org @@ -20,8 +20,8 @@ PairStyle(aip/water/2dm/opt,PairAIPWATER2DMOpt); #ifndef LMP_PAIR_AIP_WATER_2DM_OPT_H #define LMP_PAIR_AIP_WATER_2DM_OPT_H -#include "pair_ilp_graphene_hbn_opt.h" #include "pair_aip_water_2dm.h" +#include "pair_ilp_graphene_hbn_opt.h" namespace LAMMPS_NS { diff --git a/src/OPT/pair_ilp_graphene_hbn_opt.h b/src/OPT/pair_ilp_graphene_hbn_opt.h index 01b66bb2fa..f2fa30d595 100644 --- a/src/OPT/pair_ilp_graphene_hbn_opt.h +++ b/src/OPT/pair_ilp_graphene_hbn_opt.h @@ -35,8 +35,8 @@ class PairILPGrapheneHBNOpt : virtual public PairILPGrapheneHBN { protected: void update_internal_list(); template - void calc_atom_normal(int i, int itype, int *ILP_neigh, int nneigh, double *normal, double (*dnormdri)[3], - double (*dnormdrk)[3][3]); + void calc_atom_normal(int i, int itype, int *ILP_neigh, int nneigh, double *normal, + double (*dnormdri)[3], double (*dnormdrk)[3][3]); template void eval(); int *layered_neigh; @@ -51,7 +51,6 @@ class PairILPGrapheneHBNOpt : virtual public PairILPGrapheneHBN { SAIP_BNCH, WATER, }; - }; } // namespace LAMMPS_NS diff --git a/src/Purge.list b/src/Purge.list index 14708a111e..7098d39e3a 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -53,6 +53,11 @@ lmpinstalledpkgs.h lmpgitversion.h mliap_model_python_couple.cpp mliap_model_python_couple.h +# renamed in September 2024 +group_ndx.cpp +group_ndx.h +ndx_group.cpp +ndx_group.h # removed in August 2023 dump_atom_mpiio.cpp dump_atom_mpiio.h diff --git a/src/QEQ/fix_qeq.cpp b/src/QEQ/fix_qeq.cpp index 0e1335282c..e09921d11b 100644 --- a/src/QEQ/fix_qeq.cpp +++ b/src/QEQ/fix_qeq.cpp @@ -237,8 +237,9 @@ void FixQEq::reallocate_storage() void FixQEq::allocate_matrix() { - int i,ii,inum,m; + int i,ii,inum; int *ilist, *numneigh; + bigint m; int mincap; double safezone; @@ -261,7 +262,10 @@ void FixQEq::allocate_matrix() i = ilist[ii]; m += numneigh[i]; } - m_cap = MAX((int)(m * safezone), mincap * MIN_NBRS); + bigint m_cap_big = (bigint)MAX(m * safezone, mincap * MIN_NBRS); + if (m_cap_big > MAXSMALLINT) + error->one(FLERR,"Too many neighbors in fix {}",style); + m_cap = m_cap_big; H.n = n_cap; H.m = m_cap; diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index b80231cf44..1e4ae33ee6 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -3693,7 +3693,6 @@ int FixBondReact::insert_atoms_setup(tagint **my_update_mega_glove, int iupdate) imageint *imageflags; double **coords,lamda[3],rotmat[3][3]; double *newcoord; - double **v = atom->v; double t,delx,dely,delz,rsq; memory->create(coords,twomol->natoms,3,"bond/react:coords"); @@ -3709,12 +3708,11 @@ int FixBondReact::insert_atoms_setup(tagint **my_update_mega_glove, int iupdate) } // find current max atom and molecule IDs - tagint *tag = atom->tag; double **x = atom->x; tagint *molecule = atom->molecule; int nlocal = atom->nlocal; - tagint maxmol_all; + tagint maxmol_all = 0;; for (int i = 0; i < nlocal; i++) maxmol_all = MAX(maxmol_all,molecule[i]); MPI_Allreduce(MPI_IN_PLACE,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world); diff --git a/src/REAXFF/fix_qeq_reaxff.cpp b/src/REAXFF/fix_qeq_reaxff.cpp index 7e935fd6cd..921f6e0261 100644 --- a/src/REAXFF/fix_qeq_reaxff.cpp +++ b/src/REAXFF/fix_qeq_reaxff.cpp @@ -338,7 +338,8 @@ void FixQEqReaxFF::reallocate_storage() void FixQEqReaxFF::allocate_matrix() { - int i,ii,m; + int i,ii; + bigint m; int mincap; double safezone; @@ -360,7 +361,10 @@ void FixQEqReaxFF::allocate_matrix() i = ilist[ii]; m += numneigh[i]; } - m_cap = MAX((int)(m * safezone), mincap * REAX_MIN_NBRS); + bigint m_cap_big = (bigint)MAX(m * safezone, mincap * REAX_MIN_NBRS); + if (m_cap_big > MAXSMALLINT) + error->one(FLERR,"Too many neighbors in fix {}",style); + m_cap = m_cap_big; H.n = n_cap; H.m = m_cap; diff --git a/src/REAXFF/pair_reaxff.cpp b/src/REAXFF/pair_reaxff.cpp index b9f4f6c838..08e90933b2 100644 --- a/src/REAXFF/pair_reaxff.cpp +++ b/src/REAXFF/pair_reaxff.cpp @@ -174,6 +174,7 @@ void PairReaxFF::allocate() memory->create(cutsq,n+1,n+1,"pair:cutsq"); memory->create(cutghost,n+1,n+1,"pair:cutghost"); map = new int[n+1]; + for (int i = 0; i <= n; ++i) map[i] = -1; chi = new double[n+1]; eta = new double[n+1]; diff --git a/src/REAXFF/reaxff_allocate.cpp b/src/REAXFF/reaxff_allocate.cpp index 06ebc20f30..9e125bea05 100644 --- a/src/REAXFF/reaxff_allocate.cpp +++ b/src/REAXFF/reaxff_allocate.cpp @@ -169,16 +169,23 @@ namespace ReaxFF { static int Reallocate_HBonds_List(reax_system *system, reax_list *hbonds) { int i, total_hbonds; + LAMMPS_NS::bigint total_hbonds_big; int mincap = system->mincap; double saferzone = system->saferzone; - total_hbonds = 0; + total_hbonds_big = 0; for (i = 0; i < system->n; ++i) if ((system->my_atoms[i].Hindex) >= 0) { - total_hbonds += system->my_atoms[i].num_hbonds; + total_hbonds_big += system->my_atoms[i].num_hbonds; } - total_hbonds = (int)(MAX(total_hbonds*saferzone, mincap*system->minhbonds)); + total_hbonds_big = (LAMMPS_NS::bigint)(MAX(total_hbonds_big*saferzone, mincap*system->minhbonds)); + + auto error = system->error_ptr; + if (total_hbonds_big > MAXSMALLINT) + error->one(FLERR,"Too many hydrogen bonds in pair reaxff"); + + total_hbonds = total_hbonds_big; Delete_List(hbonds); Make_List(system->Hcap, total_hbonds, TYP_HBOND, hbonds); @@ -190,17 +197,24 @@ namespace ReaxFF { reax_list *bonds, int *total_bonds, int *est_3body) { int i; + LAMMPS_NS::bigint total_bonds_big; int mincap = system->mincap; double safezone = system->safezone; - *total_bonds = 0; + total_bonds_big = 0; *est_3body = 0; for (i = 0; i < system->N; ++i) { *est_3body += SQR(system->my_atoms[i].num_bonds); - *total_bonds += system->my_atoms[i].num_bonds; + total_bonds_big += system->my_atoms[i].num_bonds; } - *total_bonds = (int)(MAX(*total_bonds * safezone, mincap*MIN_BONDS)); + total_bonds_big = (LAMMPS_NS::bigint)(MAX(total_bonds_big * safezone, mincap*MIN_BONDS)); + + auto error = system->error_ptr; + if (total_bonds_big > MAXSMALLINT) + error->one(FLERR,"Too many bonds in pair reaxff"); + + *total_bonds = total_bonds_big; if (system->omp_active) for (i = 0; i < bonds->num_intrs; ++i) diff --git a/src/RHEO/Install.sh b/src/RHEO/Install.sh index e34ca3a555..07a439f44b 100644 --- a/src/RHEO/Install.sh +++ b/src/RHEO/Install.sh @@ -47,6 +47,7 @@ if (test $1 = 1) then sed -i -e 's/[^ \t]*rheo[^ \t]* //' ../Makefile.package sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(rheo_SYSINC) |' ../Makefile.package sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(rheo_SYSLIB) |' ../Makefile.package + sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(rheo_SYSPATH) |' ../Makefile.package fi if (test -e ../Makefile.package.settings) then diff --git a/src/RHEO/README b/src/RHEO/README index 4b6f2a162a..15b642442a 100644 --- a/src/RHEO/README +++ b/src/RHEO/README @@ -3,8 +3,9 @@ multiphase fluid systems. The authors include Joel Clemmer (Sandia), Thomas O'Connor (Carnegie Mellon), and Eric Palermo (Carnegie Mellon). Bond style rheo/shell, compute style rheo/property/atom, and fix style -rheo/temperature all depend on the BPM package. +rheo/temperature depend on the BPM package, so it is required to install +the BPM package with RHEO. -This package requires the GNU scientific library (GSL). We recommend version -2.7 or later. To build this package, one must first separately install GSL in -a location that can be found by your environment. +This package requires the BLAS/LAPACK. This can be either a seperate installation +or you can use the bundled "linalg" library. Please see the LAMMPS manual at +https://docs.lammps.org/Build_extras.html#rheo for details. diff --git a/src/RHEO/compute_rheo_kernel.cpp b/src/RHEO/compute_rheo_kernel.cpp index dd05901b32..453f2b9028 100644 --- a/src/RHEO/compute_rheo_kernel.cpp +++ b/src/RHEO/compute_rheo_kernel.cpp @@ -37,10 +37,6 @@ #include "utils.h" #include -#include -#include -#include -#include using namespace LAMMPS_NS; using namespace RHEO_NS; @@ -50,6 +46,13 @@ using namespace MathExtra; // max value of Mdim 1 + dim + dim * (dim + 1) / 2 with dim = 3 static constexpr int MAX_MDIM = 12; +// declare LAPACK functions + +extern "C" { + void dpotrf_(const char *uplo, const int *n, double *a, const int *lda, int *info); + void dpotri_(const char *uplo, const int *n, double *a, const int *lda, int *info); +} + /* ---------------------------------------------------------------------- */ ComputeRHEOKernel::ComputeRHEOKernel(LAMMPS *lmp, int narg, char **arg) : @@ -89,7 +92,7 @@ ComputeRHEOKernel::ComputeRHEOKernel(LAMMPS *lmp, int narg, char **arg) : comm_forward_save = comm_forward; corrections_calculated = 0; - gsl_error_flag = 0; + lapack_error_flag = 0; } /* ---------------------------------------------------------------------- */ @@ -156,9 +159,9 @@ void ComputeRHEOKernel::init_list(int /*id*/, NeighList *ptr) int ComputeRHEOKernel::check_corrections(int i) { - // Skip if there were gsl errors for this atom - if (gsl_error_flag) - if (gsl_error_tags.find(atom->tag[i]) != gsl_error_tags.end()) return 0; + // Skip if there were lapack errors for this atom + if (lapack_error_flag) + if (lapack_error_tags.find(atom->tag[i]) != lapack_error_tags.end()) return 0; // Skip if undercoordinated if (coordination[i] < zmin) return 0; @@ -558,19 +561,15 @@ void ComputeRHEOKernel::calc_dw_rk2(int i, double delx, double dely, double delz void ComputeRHEOKernel::compute_peratom() { - gsl_error_flag = 0; - gsl_error_tags.clear(); + lapack_error_flag = 0; + lapack_error_tags.clear(); if (kernel_style == QUINTIC) return; corrections_calculated = 1; - int i, j, ii, jj, inum, jnum, a, b, gsl_error; + int i, j, ii, jj, inum, jnum, a, b, lapack_error; double xtmp, ytmp, ztmp, r, rsq, w, vj, rhoj; double dx[3]; - gsl_matrix_view gM; - - // Turn off GSL error handler, revert RK to Quintic when insufficient neighbors - gsl_set_error_handler_off(); double **x = atom->x; int *type = atom->type; @@ -633,7 +632,7 @@ void ComputeRHEOKernel::compute_peratom() } } else if (correction_order > 0) { - // Moment matrix M and polynomial basis vector cut (1d for gsl compatibility) + // Moment matrix M and polynomial basis vector cut (1d for LAPACK compatibility) double H[MAX_MDIM], M[MAX_MDIM * MAX_MDIM]; for (ii = 0; ii < inum; ii++) { @@ -647,7 +646,9 @@ void ComputeRHEOKernel::compute_peratom() // Zero upper-triangle M and cut (will be symmetric): for (a = 0; a < Mdim; a++) { - for (b = a; b < Mdim; b++) { M[a * Mdim + b] = 0; } + for (b = a; b < Mdim; b++) { + M[a * Mdim + b] = 0; + } } for (jj = 0; jj < jnum; jj++) { @@ -700,37 +701,50 @@ void ComputeRHEOKernel::compute_peratom() // Populate the upper triangle for (a = 0; a < Mdim; a++) { - for (b = a; b < Mdim; b++) { M[a * Mdim + b] += H[a] * H[b] * w * vj; } + for (b = a; b < Mdim; b++) { + M[a * Mdim + b] += H[a] * H[b] * w * vj; + } } } } // Populate the lower triangle from the symmetric entries of M: for (a = 0; a < Mdim; a++) { - for (b = a; b < Mdim; b++) { M[b * Mdim + a] = M[a * Mdim + b]; } + for (b = a; b < Mdim; b++) { + M[b * Mdim + a] = M[a * Mdim + b]; + } } // Skip if undercoordinated if (coordination[i] < zmin) continue; - // Use gsl to get Minv, use Cholesky decomposition since the + // Use LAPACK to get Minv, use Cholesky decomposition since the // polynomials are independent, M is symmetrix & positive-definite - gM = gsl_matrix_view_array(M, Mdim, Mdim); - gsl_error = gsl_linalg_cholesky_decomp1(&gM.matrix); + const char uplo = 'U'; + dpotrf_(&uplo, &Mdim, M, &Mdim, &lapack_error); - if (gsl_error) { - //Revert to uncorrected SPH for this particle - gsl_error_flag = 1; - gsl_error_tags.insert(tag[i]); + if (lapack_error) { + // Revert to uncorrected SPH for this particle + lapack_error_flag = 1; + lapack_error_tags.insert(tag[i]); - //check if not positive-definite - if (gsl_error != GSL_EDOM) - error->warning(FLERR, "Failed decomposition in rheo/kernel, gsl_error = {}", gsl_error); + // check if not positive-definite + if (lapack_error > 0) + error->warning(FLERR, "Failed DPOTRF2 decomposition in rheo/kernel, info = {}", + lapack_error); continue; } - gsl_linalg_cholesky_invert(&gM.matrix); //M is now M^-1 + // M is now M^-1 + dpotri_(&uplo, &Mdim, M, &Mdim, &lapack_error); + + // make result matrix symmetric + for (int i = 0; i < Mdim; ++i) { + for (int j = i+1; j < Mdim; ++j) { + M[i * Mdim + j] = M[j * Mdim + i]; + } + } // Correction coefficients are columns of M^-1 multiplied by an appropriate coefficient // Solve the linear system several times to get coefficientns diff --git a/src/RHEO/compute_rheo_kernel.h b/src/RHEO/compute_rheo_kernel.h index 20516255be..8b70509e6a 100644 --- a/src/RHEO/compute_rheo_kernel.h +++ b/src/RHEO/compute_rheo_kernel.h @@ -53,8 +53,8 @@ class ComputeRHEOKernel : public Compute { private: int comm_stage, comm_forward_save; int interface_flag; - int gsl_error_flag; - std::unordered_set gsl_error_tags; + int lapack_error_flag; + std::unordered_set lapack_error_tags; int corrections_calculated; int kernel_style, zmin, dim, Mdim, ncor; diff --git a/src/YAFF/angle_mm3.cpp b/src/YAFF/angle_mm3.cpp index 3ff7df1653..920041f7e9 100644 --- a/src/YAFF/angle_mm3.cpp +++ b/src/YAFF/angle_mm3.cpp @@ -327,3 +327,15 @@ void AngleMM3::born_matrix(int type, int i1, int i2, int i3, double &du, double du = -k2[type] * df / s; du2 = k2[type] * (d2f - df * c / s) / (s * s) ; } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *AngleMM3::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k2") == 0) return (void *) k2; + if (strcmp(str, "theta0") == 0) return (void *) theta0; + return nullptr; +} diff --git a/src/YAFF/angle_mm3.h b/src/YAFF/angle_mm3.h index 22f5bd746c..126b275e72 100644 --- a/src/YAFF/angle_mm3.h +++ b/src/YAFF/angle_mm3.h @@ -36,6 +36,7 @@ class AngleMM3 : public Angle { void write_data(FILE *) override; double single(int, int, int, int) override; void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override; + void *extract(const char *, int &) override; protected: double *theta0, *k2; diff --git a/src/YAFF/bond_mm3.cpp b/src/YAFF/bond_mm3.cpp index 31ce2dad3e..b3e69881e1 100644 --- a/src/YAFF/bond_mm3.cpp +++ b/src/YAFF/bond_mm3.cpp @@ -238,3 +238,15 @@ void BondMM3::born_matrix(int type, double rsq, int /*i*/, int /*j*/, double &du du = 2.0 * k2[type] * dr + 3.0 * K3 * dr2 + 4.0 * K4 * dr3; du2 = 2.0 * k2[type] + 6.0 * K3 * dr + 12.0 * K4 * dr2; } + +/* ---------------------------------------------------------------------- + return ptr to internal members upon request +------------------------------------------------------------------------ */ + +void *BondMM3::extract(const char *str, int &dim) +{ + dim = 1; + if (strcmp(str, "k2") == 0) return (void *) k2; + if (strcmp(str, "r0") == 0) return (void *) r0; + return nullptr; +} diff --git a/src/YAFF/bond_mm3.h b/src/YAFF/bond_mm3.h index ea89ac826d..b9ebf464bb 100644 --- a/src/YAFF/bond_mm3.h +++ b/src/YAFF/bond_mm3.h @@ -36,6 +36,7 @@ class BondMM3 : public Bond { void write_data(FILE *) override; double single(int, double, int, int, double &) override; void born_matrix(int, double, int, int, double &, double &) override; + void *extract(const char *, int &) override; protected: double *r0, *k2; diff --git a/src/angle_hybrid.cpp b/src/angle_hybrid.cpp index a015882a15..1261a78176 100644 --- a/src/angle_hybrid.cpp +++ b/src/angle_hybrid.cpp @@ -320,6 +320,14 @@ void AngleHybrid::init_style() if (styles[m]) styles[m]->init_style(); } +/* ---------------------------------------------------------------------- */ + +int AngleHybrid::check_itype(int itype, char *substyle) +{ + if (strcmp(keywords[map[itype]], substyle) == 0) return 1; + return 0; +} + /* ---------------------------------------------------------------------- return an equilbrium angle length ------------------------------------------------------------------------- */ diff --git a/src/angle_hybrid.h b/src/angle_hybrid.h index a6da29245e..a84096b297 100644 --- a/src/angle_hybrid.h +++ b/src/angle_hybrid.h @@ -42,8 +42,10 @@ class AngleHybrid : public Angle { double single(int, int, int, int) override; double memory_usage() override; + int check_itype(int, char *); + protected: - int *map; // which style each angle type points to + int *map; // which style each angle type points to int *nanglelist; // # of angles in sub-style anglelists int *maxangle; // max # of angles sub-style lists can store int ***anglelist; // anglelist for each sub-style diff --git a/src/angle_write.cpp b/src/angle_write.cpp index 48420ae7be..1be5f1acac 100644 --- a/src/angle_write.cpp +++ b/src/angle_write.cpp @@ -148,8 +148,10 @@ void AngleWrite::command(int narg, char **arg) FILE *coeffs; char line[MAXLINE] = {'\0'}; coeffs = fopen(coeffs_file.c_str(), "r"); + if (!coeffs) + error->one(FLERR, "Unable to open temporary file {}: {}", coeffs_file, utils::getsyserror()); for (int i = 0; i < atom->nangletypes; ++i) { - fgets(line, MAXLINE, coeffs); + utils::sfgets(FLERR, line, MAXLINE, coeffs, coeffs_file.c_str(), error); writer->input->one(fmt::format("angle_coeff {}", line)); } fclose(coeffs); diff --git a/src/atom.cpp b/src/atom.cpp index 52cc2c9bc9..e0fceffe9c 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -1911,7 +1911,11 @@ void Atom::allocate_type_arrays() if (avec->mass_type == AtomVec::PER_TYPE) { mass = new double[ntypes+1]; mass_setflag = new int[ntypes+1]; - for (int itype = 1; itype <= ntypes; itype++) mass_setflag[itype] = 0; + // start loop from 0 to avoid uninitialized access when operating on the whole array + for (int itype = 0; itype <= ntypes; itype++) { + mass_setflag[itype] = 0; + mass[itype] = 0.0; + } } } @@ -2739,20 +2743,21 @@ Classes rarely need to check on ghost communication and so `find_custom` is typically preferred to this function. See :doc:`pair amoeba ` for an example where checking ghost communication is necessary. \endverbatim - * \param name Name of the property (w/o a "i_" or "d_" or "i2_" or "d2_" prefix) - * \param &flag Returns data type of property: 0 for int, 1 for double - * \param &cols Returns number of values: 0 for a single value, 1 or more for a vector of values - * \param &ghost Returns whether property is communicated to ghost atoms: 0 for no, 1 for yes + * \param name Name of the property (w/o a "i_" or "d_" or "i2_" or "d2_" prefix) + * \param &flag Returns data type of property: 0 for int, 1 for double + * \param &cols Returns number of values: 0 for a single value, 1 or more for a vector of values + * \param &ghost Returns whether property is communicated to ghost atoms: 0 for no, 1 for yes * \return index of property in the respective list of properties */ int Atom::find_custom_ghost(const char *name, int &flag, int &cols, int &ghost) { int i = find_custom(name, flag, cols); + ghost = 0; if (i == -1) return i; if ((flag == 0) && (cols == 0)) ghost = ivghost[i]; else if ((flag == 1) && (cols == 0)) ghost = dvghost[i]; - else if ((flag == 0) && (cols == 1)) ghost = iaghost[i]; - else if ((flag == 1) && (cols == 1)) ghost = daghost[i]; + else if ((flag == 0) && (cols > 0)) ghost = iaghost[i]; + else if ((flag == 1) && (cols > 0)) ghost = daghost[i]; return i; } @@ -2999,11 +3004,13 @@ length of the data area, and a short description. - N double values defined by fix property/atom array name *See also* - :cpp:func:`lammps_extract_atom` + :cpp:func:`lammps_extract_atom`, + :cpp:func:`lammps_extract_atom_datatype`, + :cpp:func:`lammps_extract_atom_size` \endverbatim * - * \sa extract_datatype + * \sa extract_datatype, extract_size * * \param name string with the keyword of the desired property. Typically the name of the pointer variable returned @@ -3142,7 +3149,7 @@ void *Atom::extract(const char *name) \endverbatim * - * \sa extract + * \sa extract extract_size * * \param name string with the keyword of the desired property. * \return data type constant for desired property or -1 */ @@ -3177,10 +3184,14 @@ int Atom::extract_datatype(const char *name) if (strcmp(name,"temperature") == 0) return LAMMPS_DOUBLE; if (strcmp(name,"heatflow") == 0) return LAMMPS_DOUBLE; + // PERI package (and in part MACHDYN) + if (strcmp(name,"vfrac") == 0) return LAMMPS_DOUBLE; if (strcmp(name,"s0") == 0) return LAMMPS_DOUBLE; if (strcmp(name,"x0") == 0) return LAMMPS_DOUBLE_2D; + // AWPMD package (and in part EFF and ELECTRODE) + if (strcmp(name,"espin") == 0) return LAMMPS_INT; if (strcmp(name,"spin") == 0) return LAMMPS_INT; // backwards compatibility if (strcmp(name,"eradius") == 0) return LAMMPS_DOUBLE; @@ -3248,14 +3259,255 @@ int Atom::extract_datatype(const char *name) if (!array) index = find_custom(&name[2],flag,cols); else index = find_custom(&name[3],flag,cols); + // consistency checks if (index < 0) return -1; if (which != flag) return -1; if ((!array && cols) || (array && !cols)) return -1; - if (which == 0) return LAMMPS_INT; - else return LAMMPS_DOUBLE; + if (!which && !array) return LAMMPS_INT; + if (which && !array) return LAMMPS_DOUBLE; + if (!which && array) return LAMMPS_INT_2D; + if (which && array) return LAMMPS_DOUBLE_2D; } + return -1; +} +/** Provide vector or array size info of internal data of the Atom class + * +\verbatim embed:rst + +.. versionadded:: TBD + +\endverbatim + * + * \sa extract extract_datatype + * + * \param name string with the keyword of the desired property. + * \param type either LMP_SIZE_ROWS or LMP_SIZE_COLS for per-atom array or ignored + * \return size of the vector or size of the array for the requested dimension or -1 */ + +int Atom::extract_size(const char *name, int type) +{ + // -------------------------------------------------------------------- + // 6th customization section: customize by adding new variable name + + const auto datatype = extract_datatype(name); + const auto nall = nlocal + nghost; + const auto ghost_vel = comm->ghost_velocity; + + if ((datatype == LAMMPS_DOUBLE_2D) || (datatype == LAMMPS_INT_2D)) { + if (type == LMP_SIZE_ROWS) { + if (strcmp(name,"x") == 0) return nall; + if (strcmp(name,"v") == 0) { + if (ghost_vel) return nall; + else return nlocal; + } + if (strcmp(name,"f") == 0) return nall; + if (strcmp(name,"mu") == 0) return nall; + if (strcmp(name,"omega") == 0) { + if (ghost_vel) return nall; + else return nlocal; + } + if (strcmp(name,"angmom") == 0) { + if (ghost_vel) return nall; + else return nlocal; + } + if (strcmp(name,"torque") == 0) return nlocal; + if (strcmp(name,"quat") == 0) { + if (ghost_vel) return nall; + else return nlocal; + } + + // PERI package + + if (strcmp(name,"x0") == 0) return nall; + + // SPIN package + + if (strcmp(name,"sp") == 0) return nall; + if (strcmp(name,"fm") == 0) return nlocal; + if (strcmp(name,"fm_long") == 0) return nlocal; + + // AWPMD package + + if (strcmp(name,"cs") == 0) { + if (ghost_vel) return nall; + else return nlocal; + } + if (strcmp(name,"csforce") == 0) return nlocal; + if (strcmp(name,"vforce") == 0) return nlocal; + + // SPH package + + if (strcmp(name,"vest") == 0) return nall; + + // MACHDYN package + + if (strcmp(name, "smd_data_9") == 0) return LAMMPS_DOUBLE_2D; + if (strcmp(name, "smd_stress") == 0) return LAMMPS_DOUBLE_2D; + + } else if (type == LMP_SIZE_COLS) { + + if (strcmp(name,"x") == 0) return 3; + if (strcmp(name,"v") == 0) return 3; + if (strcmp(name,"f") == 0) return 3; + if (strcmp(name,"mu") == 0) return 4; + if (strcmp(name,"omega") == 0) return 3; + if (strcmp(name,"angmom") == 0) return 3; + if (strcmp(name,"torque") == 0) return 3; + if (strcmp(name,"quat") == 0) return 4; + + // PERI package + + if (strcmp(name,"x0") == 0) return 3; + + // SPIN package + + if (strcmp(name,"sp") == 0) return 4; + if (strcmp(name,"fm") == 0) return 3; + if (strcmp(name,"fm_long") == 0) return 3; + + // AWPMD package + + if (strcmp(name,"cs") == 0) return 2; + if (strcmp(name,"csforce") == 0) return 2; + if (strcmp(name,"vforce") == 0) return 3; + + // SPH package + + if (strcmp(name,"vest") == 0) return 3; + + // MACHDYN package + + if (strcmp(name, "smd_data_9") == 0) return 9; + if (strcmp(name, "smd_stress") == 0) return 6; + } + + // custom arrays + + if (utils::strmatch(name,"^[id]2_")) { + int which = 0; + if (name[0] == 'd') which = 1; + + int index,flag,cols,ghost; + index = find_custom_ghost(&name[3],flag,cols,ghost); + + // consistency checks + if (index < 0) return -1; + if (which != flag) return -1; + if (!cols) return -1; + + if (type == LMP_SIZE_ROWS) { + if (ghost) return nall; + else return nlocal; + } else if (type == LMP_SIZE_COLS) { + return cols; + } + } + } else { + + if (strcmp(name,"mass") == 0) return ntypes + 1; + + if (strcmp(name,"id") == 0) return nall; + if (strcmp(name,"type") == 0) return nall; + if (strcmp(name,"mask") == 0) return nall; + if (strcmp(name,"image") == 0) return nlocal; + if (strcmp(name,"molecule") == 0) return nall; + if (strcmp(name,"q") == 0) return nall; + if (strcmp(name,"radius") == 0) return nall; + if (strcmp(name,"rmass") == 0) return nall; + + // ASPHERE package + + if (strcmp(name,"ellipsoid") == 0) return nlocal; + + // BODY package + + if (strcmp(name,"line") == 0) return nlocal; + if (strcmp(name,"tri") == 0) return nlocal; + if (strcmp(name,"body") == 0) return nlocal; + + // PERI package (and in part MACHDYN) + + if (strcmp(name,"vfrac") == 0) return nall; + if (strcmp(name,"s0") == 0) return nall; + + // AWPMD package (and in part EFF and ELECTRODE) + + if (strcmp(name,"espin") == 0) return nall; + if (strcmp(name,"spin") == 0) return nall; // backwards compatibility + if (strcmp(name,"eradius") == 0) return nall; + if (strcmp(name,"ervel") == 0) return nlocal; + if (strcmp(name,"erforce") == 0) return nlocal; + if (strcmp(name,"ervelforce") == 0) return nlocal; + if (strcmp(name,"etag") == 0) return nall; + + // CG-DNA package + + if (strcmp(name,"id5p") == 0) return nall; + + // RHEO package + + if (strcmp(name,"temperature") == 0) return nlocal; + if (strcmp(name,"heatflow") == 0) return nlocal; + if (strcmp(name,"rheo_status") == 0) return nall; + if (strcmp(name,"conductivity") == 0) return nlocal; + if (strcmp(name,"pressure") == 0) return nlocal; + if (strcmp(name,"viscosity") == 0) return nlocal; + + // SPH package + + if (strcmp(name,"rho") == 0) return nall; + if (strcmp(name,"drho") == 0) return nlocal; + if (strcmp(name,"esph") == 0) return nall; + if (strcmp(name,"desph") == 0) return nlocal; + if (strcmp(name,"cv") == 0) return nall; + + // MACHDYN package + + if (strcmp(name, "contact_radius") == 0) return nall; + if (strcmp(name, "eff_plastic_strain") == 0) return nlocal; + if (strcmp(name, "eff_plastic_strain_rate") == 0) return nlocal; + if (strcmp(name, "damage") == 0) return nlocal; + + // DPD-REACT package + + if (strcmp(name,"dpdTheta") == 0) return nall; + + // DPD-MESO package + + if (strcmp(name,"edpd_temp") == 0) return nall; + + // DIELECTRIC package + + if (strcmp(name,"area") == 0) return nall; + if (strcmp(name,"ed") == 0) return nall; + if (strcmp(name,"em") == 0) return nall; + if (strcmp(name,"epsilon") == 0) return nall; + if (strcmp(name,"curvature") == 0) return nall; + if (strcmp(name,"q_unscaled") == 0) return nall; + + // end of customization section + // -------------------------------------------------------------------- + + // custom vectors + + if (utils::strmatch(name,"^[id]_")) { + int which = 0; + if (name[0] == 'd') which = 1; + + int index,flag,cols,ghost; + index = find_custom_ghost(&name[2],flag,cols,ghost); + + // consistency checks + if (index < 0) return -1; + if (which != flag) return -1; + if (cols) return -1; + + if (ghost) return nall; + else return nlocal; + } + } return -1; } diff --git a/src/atom.h b/src/atom.h index bd5b352cd0..c98f06cbe8 100644 --- a/src/atom.h +++ b/src/atom.h @@ -378,6 +378,7 @@ class Atom : protected Pointers { void *extract(const char *); int extract_datatype(const char *); + int extract_size(const char *, int); inline int *get_map_array() { return map_array; }; inline int get_map_size() { return map_tag_max + 1; }; diff --git a/src/bond_hybrid.cpp b/src/bond_hybrid.cpp index 307cbd72fd..bd5badb54c 100644 --- a/src/bond_hybrid.cpp +++ b/src/bond_hybrid.cpp @@ -385,6 +385,14 @@ void BondHybrid::init_style() else map[0] = -1; } +/* ---------------------------------------------------------------------- */ + +int BondHybrid::check_itype(int itype, char *substyle) +{ + if (strcmp(keywords[map[itype]], substyle) == 0) return 1; + return 0; +} + /* ---------------------------------------------------------------------- return an equilbrium bond length ------------------------------------------------------------------------- */ diff --git a/src/bond_hybrid.h b/src/bond_hybrid.h index ba520b81b4..d93b5c7558 100644 --- a/src/bond_hybrid.h +++ b/src/bond_hybrid.h @@ -44,6 +44,8 @@ class BondHybrid : public Bond { double single(int, double, int, int, double &) override; double memory_usage() override; + int check_itype(int, char *); + protected: int *map; // which style each bond type points to int has_quartic; // which style, if any is a quartic bond style diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index ee94c2d9a7..40bb206bd2 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -214,8 +214,10 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : iarg += 2; } else if (strcmp(arg[iarg], "inputs") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, mycmd + " inputs", error); - if (strcmp(arg[iarg+1], "peratom") == 0) input_mode = PERATOM; - else if (strcmp(arg[iarg+1], "local") == 0) input_mode = LOCAL; + if (strcmp(arg[iarg + 1], "peratom") == 0) + input_mode = PERATOM; + else if (strcmp(arg[iarg + 1], "local") == 0) + input_mode = LOCAL; iarg += 2; } else error->all(FLERR, "Unknown compute {} keyword: {}", style, arg[iarg]); @@ -242,7 +244,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : for (auto &val : values) { if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F) { - if (input_mode == LOCAL) error->all(FLERR,"Compute {} inputs must be all local"); + if (input_mode == LOCAL) error->all(FLERR, "Compute {} inputs must be all local"); } else if (val.which == ArgInfo::COMPUTE) { val.val.c = modify->get_compute_by_id(val.id); @@ -251,11 +253,14 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : if (input_mode == PERATOM) { if (!val.val.c->peratom_flag) - error->all(FLERR, "Compute {} compute {} does not calculate per-atom values", style, val.id); + error->all(FLERR, "Compute {} compute {} does not calculate per-atom values", style, + val.id); if (val.argindex == 0 && val.val.c->size_peratom_cols != 0) - error->all(FLERR, "Compute {} compute {} does not calculate a per-atom vector", style, val.id); + error->all(FLERR, "Compute {} compute {} does not calculate a per-atom vector", style, + val.id); if (val.argindex && val.val.c->size_peratom_cols == 0) - error->all(FLERR, "Compute {} compute {} does not calculate a per-atom array", style, val.id); + error->all(FLERR, "Compute {} compute {} does not calculate a per-atom array", style, + val.id); if (val.argindex && val.argindex > val.val.c->size_peratom_cols) error->all(FLERR, "Compute {} compute {} array is accessed out-of-range", style, val.id); @@ -263,9 +268,11 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : if (!val.val.c->local_flag) error->all(FLERR, "Compute {} compute {} does not calculate local values", style, val.id); if (val.argindex == 0 && val.val.c->size_local_cols != 0) - error->all(FLERR, "Compute {} compute {} does not calculate a local vector", style, val.id); + error->all(FLERR, "Compute {} compute {} does not calculate a local vector", style, + val.id); if (val.argindex && val.val.c->size_local_cols == 0) - error->all(FLERR, "Compute {} compute {} does not calculate a local array", style, val.id); + error->all(FLERR, "Compute {} compute {} does not calculate a local array", style, + val.id); if (val.argindex && val.argindex > val.val.c->size_local_cols) error->all(FLERR, "Compute {} compute {} array is accessed out-of-range", style, val.id); } @@ -278,7 +285,8 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : if (!val.val.f->peratom_flag) error->all(FLERR, "Compute {} fix {} does not calculate per-atom values", style, val.id); if (val.argindex == 0 && (val.val.f->size_peratom_cols != 0)) - error->all(FLERR, "Compute {} fix {} does not calculate a per-atom vector", style, val.id); + error->all(FLERR, "Compute {} fix {} does not calculate a per-atom vector", style, + val.id); if (val.argindex && (val.val.f->size_peratom_cols == 0)) error->all(FLERR, "Compute {} fix {} does not calculate a per-atom array", style, val.id); if (val.argindex && (val.argindex > val.val.f->size_peratom_cols)) @@ -296,7 +304,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : } } else if (val.which == ArgInfo::VARIABLE) { - if (input_mode == LOCAL) error->all(FLERR,"Compute {} inputs must be all local"); + if (input_mode == LOCAL) error->all(FLERR, "Compute {} inputs must be all local"); val.val.v = input->variable->find(val.id.c_str()); if (val.val.v < 0) error->all(FLERR, "Variable name {} for compute {} does not exist", val.id, style); @@ -417,7 +425,8 @@ void ComputeReduce::compute_vector() } else if (mode == MINN) { if (!replace) { for (int m = 0; m < nvalues; m++) - MPI_Allreduce(&onevec[m], &vector[m], 1, MPI_DOUBLE, this->scalar_reduction_operation, world); + MPI_Allreduce(&onevec[m], &vector[m], 1, MPI_DOUBLE, this->scalar_reduction_operation, + world); } else { for (int m = 0; m < nvalues; m++) @@ -437,7 +446,8 @@ void ComputeReduce::compute_vector() } else if (mode == MAXX) { if (!replace) { for (int m = 0; m < nvalues; m++) - MPI_Allreduce(&onevec[m], &vector[m], 1, MPI_DOUBLE, this->scalar_reduction_operation, world); + MPI_Allreduce(&onevec[m], &vector[m], 1, MPI_DOUBLE, this->scalar_reduction_operation, + world); } else { for (int m = 0; m < nvalues; m++) diff --git a/src/dihedral_write.cpp b/src/dihedral_write.cpp index dd1ca1de6a..1d0f908e2c 100644 --- a/src/dihedral_write.cpp +++ b/src/dihedral_write.cpp @@ -149,8 +149,10 @@ void DihedralWrite::command(int narg, char **arg) FILE *coeffs; char line[MAXLINE] = {'\0'}; coeffs = fopen(coeffs_file.c_str(), "r"); + if (!coeffs) + error->one(FLERR, "Unable to open temporary file {}: {}", utils::getsyserror()); for (int i = 0; i < atom->ndihedraltypes; ++i) { - fgets(line, MAXLINE, coeffs); + utils::sfgets(FLERR, line, MAXLINE, coeffs, coeffs_file.c_str(), error); writer->input->one(fmt::format("dihedral_coeff {}", line)); } fclose(coeffs); diff --git a/src/error.h b/src/error.h index 89d168652a..805bd4cd0d 100644 --- a/src/error.h +++ b/src/error.h @@ -29,14 +29,14 @@ class Error : protected Pointers { [[noreturn]] void all(const std::string &, int, const std::string &); template - void all(const std::string &file, int line, const std::string &format, Args &&...args) + [[noreturn]] void all(const std::string &file, int line, const std::string &format, Args &&...args) { _all(file, line, format, fmt::make_format_args(args...)); } [[noreturn]] void one(const std::string &, int, const std::string &); template - void one(const std::string &file, int line, const std::string &format, Args &&...args) + [[noreturn]] void one(const std::string &file, int line, const std::string &format, Args &&...args) { _one(file, line, format, fmt::make_format_args(args...)); } diff --git a/src/fix_adapt.cpp b/src/fix_adapt.cpp index cad157f2be..c725707b29 100644 --- a/src/fix_adapt.cpp +++ b/src/fix_adapt.cpp @@ -15,8 +15,10 @@ #include "fix_adapt.h" #include "angle.h" +#include "angle_hybrid.h" #include "atom.h" #include "bond.h" +#include "bond_hybrid.h" #include "domain.h" #include "error.h" #include "fix_store_atom.h" @@ -386,11 +388,15 @@ void FixAdapt::init() if (utils::strmatch(force->pair_style,"^hybrid")) { auto pair = dynamic_cast(force->pair); - for (i = ad->ilo; i <= ad->ihi; i++) - for (j = MAX(ad->jlo,i); j <= ad->jhi; j++) - if (!pair->check_ijtype(i,j,pstyle)) - error->all(FLERR,"Fix adapt type pair range is not valid " - "for pair hybrid sub-style {}", pstyle); + if (pair) { + for (i = ad->ilo; i <= ad->ihi; i++) { + for (j = MAX(ad->jlo,i); j <= ad->jhi; j++) { + if (!pair->check_ijtype(i,j,pstyle)) + error->all(FLERR,"Fix adapt type pair range is not valid " + "for pair hybrid sub-style {}", pstyle); + } + } + } } delete[] pstyle; @@ -416,8 +422,16 @@ void FixAdapt::init() if (ad->bdim == 1) ad->vector = (double *) ptr; - if (utils::strmatch(force->bond_style,"^hybrid")) - error->all(FLERR,"Fix adapt does not support bond_style hybrid"); + if (utils::strmatch(force->bond_style,"^hybrid")) { + auto bond = dynamic_cast(force->bond); + if (bond) { + for (i = ad->ilo; i <= ad->ihi; i++) { + if (!bond->check_itype(i,bstyle)) + error->all(FLERR,"Fix adapt type bond range is not valid " + "for pair hybrid sub-style {}", bstyle); + } + } + } delete[] bstyle; @@ -442,8 +456,16 @@ void FixAdapt::init() if (ad->adim == 1) ad->vector = (double *) ptr; - if (utils::strmatch(force->angle_style,"^hybrid")) - error->all(FLERR,"Fix adapt does not support angle_style hybrid"); + if (utils::strmatch(force->angle_style,"^hybrid")) { + auto angle = dynamic_cast(force->angle); + if (angle) { + for (i = ad->ilo; i <= ad->ihi; i++) { + if (!angle->check_itype(i,astyle)) + error->all(FLERR,"Fix adapt type angle range is not valid " + "for pair hybrid sub-style {}", astyle); + } + } + } delete[] astyle; diff --git a/src/fix_ave_chunk.cpp b/src/fix_ave_chunk.cpp index d9723cec9f..6a3c2e2032 100644 --- a/src/fix_ave_chunk.cpp +++ b/src/fix_ave_chunk.cpp @@ -153,7 +153,7 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) : while (iarg < nargnew) { if (strcmp(arg[iarg],"norm") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk norm", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk norm", error); if (strcmp(arg[iarg+1],"all") == 0) { normflag = ALL; scaleflag = ATOM; @@ -166,13 +166,13 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) : } else error->all(FLERR,"Unknown fix ave/chunk norm mode: {}", arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"ave") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk ave", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk ave", error); if (strcmp(arg[iarg+1],"one") == 0) ave = ONE; else if (strcmp(arg[iarg+1],"running") == 0) ave = RUNNING; else if (strcmp(arg[iarg+1],"window") == 0) ave = WINDOW; else error->all(FLERR,"Unknown fix ave/chunk ave mode: {}", arg[iarg+1]); if (ave == WINDOW) { - if (iarg+3 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk ave window", error); + if (iarg+3 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk ave window", error); nwindow = utils::inumeric(FLERR,arg[iarg+2],false,lmp); if (nwindow <= 0) error->all(FLERR,"Illegal fix ave/chunk number of windows: {}", nwindow); } @@ -180,21 +180,21 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) : if (ave == WINDOW) iarg++; } else if (strcmp(arg[iarg],"bias") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk bias", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk bias", error); biasflag = 1; id_bias = utils::strdup(arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"adof") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk adof", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk adof", error); adof = utils::numeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"cdof") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk cdof", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk cdof", error); cdof = utils::numeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if ((strcmp(arg[iarg],"file") == 0) || (strcmp(arg[iarg],"append") == 0)) { - if (iarg+2 > narg) + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, std::string("fix ave/chunk ")+arg[iarg], error); if (comm->me == 0) { if (strcmp(arg[iarg],"file") == 0) fp = fopen(arg[iarg+1],"w"); @@ -208,23 +208,23 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) : overwrite = 1; iarg += 1; } else if (strcmp(arg[iarg],"format") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk format", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk format", error); delete[] format_user; format_user = utils::strdup(arg[iarg+1]); format = format_user; iarg += 2; } else if (strcmp(arg[iarg],"title1") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk title1", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk title1", error); delete[] title1; title1 = utils::strdup(arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"title2") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk title2", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk title2", error); delete[] title2; title2 = utils::strdup(arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"title3") == 0) { - if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "fix ave/chunk title3", error); + if (iarg+2 > nargnew) utils::missing_cmd_args(FLERR, "fix ave/chunk title3", error); delete[] title3; title3 = utils::strdup(arg[iarg+1]); iarg += 2; diff --git a/src/fix_ave_grid.cpp b/src/fix_ave_grid.cpp index ca89c918ba..1b69c5644c 100644 --- a/src/fix_ave_grid.cpp +++ b/src/fix_ave_grid.cpp @@ -199,14 +199,14 @@ FixAveGrid::FixAveGrid(LAMMPS *lmp, int narg, char **arg) : while (iarg < nargnew) { if (strcmp(arg[iarg],"discard") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/grid command"); + if (iarg+2 > nargnew) error->all(FLERR,"Illegal fix ave/grid command"); if (strcmp(arg[iarg+1],"yes") == 0) discardflag = DISCARD; else if (strcmp(arg[iarg+1],"no") == 0) discardflag = KEEP; else error->all(FLERR,"Illegal fix ave/grid command"); iarg += 2; } else if (strcmp(arg[iarg],"norm") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/grid command"); + if (iarg+2 > nargnew) error->all(FLERR,"Illegal fix ave/grid command"); if (strcmp(arg[iarg+1],"all") == 0) normflag = ALL; else if (strcmp(arg[iarg+1],"sample") == 0) normflag = SAMPLE; else if (strcmp(arg[iarg+1],"none") == 0) normflag = NONORM; @@ -214,13 +214,13 @@ FixAveGrid::FixAveGrid(LAMMPS *lmp, int narg, char **arg) : iarg += 2; } else if (strcmp(arg[iarg],"ave") == 0) { - if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/grid command"); + if (iarg+2 > nargnew) error->all(FLERR,"Illegal fix ave/grid command"); if (strcmp(arg[iarg+1],"one") == 0) aveflag = ONE; else if (strcmp(arg[iarg+1],"running") == 0) aveflag = RUNNING; else if (strcmp(arg[iarg+1],"window") == 0) aveflag = WINDOW; else error->all(FLERR,"Illegal fix ave/grid command"); if (aveflag == WINDOW) { - if (iarg+3 > narg) error->all(FLERR,"Illegal fix ave/grid command"); + if (iarg+3 > nargnew) error->all(FLERR,"Illegal fix ave/grid command"); nwindow = utils::inumeric(FLERR,arg[iarg+2],false,lmp); if (nwindow <= 0) error->all(FLERR,"Illegal fix ave/grid command"); iarg++; @@ -228,19 +228,19 @@ FixAveGrid::FixAveGrid(LAMMPS *lmp, int narg, char **arg) : iarg += 2; } else if (strcmp(arg[iarg],"bias") == 0) { - if (iarg+2 > narg) + if (iarg+2 > nargnew) error->all(FLERR,"Illegal fix ave/grid command"); biasflag = 1; id_bias = utils::strdup(arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"adof") == 0) { - if (iarg+2 > narg) + if (iarg+2 > nargnew) error->all(FLERR,"Illegal fix ave/grid command"); adof = utils::numeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"cdof") == 0) { - if (iarg+2 > narg) + if (iarg+2 > nargnew) error->all(FLERR,"Illegal fix ave/grid command"); cdof = utils::numeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; diff --git a/src/fix_efield.cpp b/src/fix_efield.cpp index 81be66b3e3..a5f02cc7c8 100644 --- a/src/fix_efield.cpp +++ b/src/fix_efield.cpp @@ -114,7 +114,8 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) : } if (estr && pstr) - error->all(FLERR, "Must not use energy and potential keywords at the same time with fix efield"); + error->all(FLERR, + "Must not use energy and potential keywords at the same time with fix efield"); force_flag = 0; fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0; @@ -171,7 +172,8 @@ void FixEfield::init() if (xstr) { xvar = input->variable->find(xstr); - if (xvar < 0) error->all(FLERR, "Variable {} for x-field in fix {} does not exist", xstr, style); + if (xvar < 0) + error->all(FLERR, "Variable {} for x-field in fix {} does not exist", xstr, style); if (input->variable->equalstyle(xvar)) xstyle = EQUAL; else if (input->variable->atomstyle(xvar)) @@ -182,7 +184,8 @@ void FixEfield::init() if (ystr) { yvar = input->variable->find(ystr); - if (yvar < 0) error->all(FLERR, "Variable {} for y-field in fix {} does not exist", ystr, style); + if (yvar < 0) + error->all(FLERR, "Variable {} for y-field in fix {} does not exist", ystr, style); if (input->variable->equalstyle(yvar)) ystyle = EQUAL; else if (input->variable->atomstyle(yvar)) @@ -193,7 +196,8 @@ void FixEfield::init() if (zstr) { zvar = input->variable->find(zstr); - if (zvar < 0) error->all(FLERR, "Variable {} for z-field in fix {} does not exist", zstr, style); + if (zvar < 0) + error->all(FLERR, "Variable {} for z-field in fix {} does not exist", zstr, style); if (input->variable->equalstyle(zvar)) zstyle = EQUAL; else if (input->variable->atomstyle(zvar)) @@ -213,7 +217,8 @@ void FixEfield::init() if (pstr) { pvar = input->variable->find(pstr); - if (pvar < 0) error->all(FLERR, "Variable {} for potential in fix {} does not exist", pstr, style); + if (pvar < 0) + error->all(FLERR, "Variable {} for potential in fix {} does not exist", pstr, style); if (input->variable->atomstyle(pvar)) pstyle = ATOM; else @@ -244,8 +249,10 @@ void FixEfield::init() error->all(FLERR, "Cannot use variable energy with constant efield in fix {}", style); if (varflag == CONSTANT && pstyle != NONE) error->all(FLERR, "Cannot use variable potential with constant efield in fix {}", style); - if ((varflag == EQUAL || varflag == ATOM) && update->whichflag == 2 && estyle == NONE && pstyle == NONE) - error->all(FLERR, "Must use variable energy or potential with fix {} during minimization", style); + if ((varflag == EQUAL || varflag == ATOM) && update->whichflag == 2 && estyle == NONE && + pstyle == NONE) + error->all(FLERR, "Must use variable energy or potential with fix {} during minimization", + style); if (utils::strmatch(update->integrate_style, "^respa")) { ilevel_respa = (dynamic_cast(update->integrate))->nlevels - 1; @@ -403,8 +410,10 @@ void FixEfield::post_force(int vflag) } f[i][2] += fz; fsum[3] += fz; - if (pstyle == ATOM) fsum[0] += qe2f * q[i] * efield[i][3]; - else if (estyle == ATOM) fsum[0] += efield[i][3]; + if (pstyle == ATOM) + fsum[0] += qe2f * q[i] * efield[i][3]; + else if (estyle == ATOM) + fsum[0] += efield[i][3]; } } @@ -504,8 +513,10 @@ void FixEfield::update_efield_variables() } else if (zstyle == ATOM) { input->variable->compute_atom(zvar, igroup, &efield[0][2], 4, 0); } - if (pstyle == ATOM) input->variable->compute_atom(pvar, igroup, &efield[0][3], 4, 0); - else if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0); + if (pstyle == ATOM) + input->variable->compute_atom(pvar, igroup, &efield[0][3], 4, 0); + else if (estyle == ATOM) + input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0); modify->addstep_compute(update->ntimestep + 1); } diff --git a/src/info.cpp b/src/info.cpp index 98ed06f498..2bacea69cf 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -1316,6 +1316,10 @@ std::string Info::get_fft_info() #else fft_info += "FFT library = MKL\n"; #endif +#elif defined(FFT_MKL_GPU) + fft_info += "FFT library = MKL GPU\n"; +#elif defined(FFT_NVPL) + fft_info += "FFT library = NVPL\n"; #elif defined(FFT_FFTW3) #if defined(FFT_FFTW_THREADS) fft_info += "FFT library = FFTW3 with threads\n"; @@ -1338,12 +1342,16 @@ std::string Info::get_fft_info() #else fft_info += "KOKKOS FFT library = FFTW3\n"; #endif +#elif defined(FFT_KOKKOS_NVPL) + fft_info += "KOKKOS FFT library = NVPL\n"; #elif defined(FFT_KOKKOS_MKL) #if defined(FFT_KOKKOS_MKL_THREADS) fft_info += "KOKKOS FFT library = MKL with threads\n"; #else fft_info += "KOKKOS FFT library = MKL\n"; #endif +#elif defined(FFT_KOKKOS_MKL_GPU) + fft_info += "KOKKOS FFT library = MKL GPU\n"; #else fft_info += "KOKKOS FFT library = KISS\n"; #endif diff --git a/src/library.cpp b/src/library.cpp index 097cffd68a..29cec30488 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -35,6 +35,7 @@ #include "group.h" #include "info.h" #include "input.h" +#include "lattice.h" #include "lmppython.h" #include "memory.h" #include "modify.h" @@ -1414,6 +1415,9 @@ int lammps_extract_global_datatype(void * /*handle*/, const char *name) if (strcmp(name,"xy") == 0) return LAMMPS_DOUBLE; if (strcmp(name,"xz") == 0) return LAMMPS_DOUBLE; if (strcmp(name,"yz") == 0) return LAMMPS_DOUBLE; + if (strcmp(name,"xlattice") == 0) return LAMMPS_DOUBLE; + if (strcmp(name,"ylattice") == 0) return LAMMPS_DOUBLE; + if (strcmp(name,"zlattice") == 0) return LAMMPS_DOUBLE; if (strcmp(name,"procgrid") == 0) return LAMMPS_INT; if (strcmp(name,"natoms") == 0) return LAMMPS_BIGINT; @@ -1510,9 +1514,9 @@ The function :cpp:func:`lammps_extract_global_datatype` will directly report the "native" data type. The following tables are provided: * :ref:`Timestep settings ` -* :ref:`Git revision and version settings ` * :ref:`Simulation box settings ` * :ref:`System property settings ` +* :ref:`Git revision and version settings ` * :ref:`Unit settings ` .. _extract_timestep_settings: @@ -1552,35 +1556,6 @@ report the "native" data type. The following tables are provided: - :math:`N_{respa}` - length of the time steps with r-RESPA. See :doc:`run_style`. -.. _extract_git_settings: - -**Git revision and version settings** - -.. list-table:: - :header-rows: 1 - :widths: 16 14 10 60 - - * - Name - - Type - - Length - - Description - * - git_commit - - const char \* - - 1 - - Git commit hash for the LAMMPS version. - * - git_branch - - const char \* - - 1 - - Git branch for the LAMMPS version. - * - git_descriptor - - const char \* - - 1 - - Combined descriptor for the git revision - * - lammps_version - - const char \* - - 1 - - LAMMPS version string. - .. _extract_box_settings: **Simulation box settings** @@ -1649,6 +1624,18 @@ report the "native" data type. The following tables are provided: - double - 1 - triclinic tilt factor; see :doc:`Howto_triclinic`. + * - xlattice + - double + - 1 + - lattice spacing in x-direction; see :doc:`lattice command `. + * - ylattice + - double + - 1 + - lattice spacing in y-direction; see :doc:`lattice command `. + * - zlattice + - double + - 1 + - lattice spacing in z-direction; see :doc:`lattice command `. * - procgrid - int - 3 @@ -1763,6 +1750,35 @@ report the "native" data type. The following tables are provided: - 1 - string with the current KSpace style. +.. _extract_git_settings: + +**Git revision and version settings** + +.. list-table:: + :header-rows: 1 + :widths: 16 14 10 60 + + * - Name + - Type + - Length + - Description + * - git_commit + - const char \* + - 1 + - Git commit hash for the LAMMPS version. + * - git_branch + - const char \* + - 1 + - Git branch for the LAMMPS version. + * - git_descriptor + - const char \* + - 1 + - Combined descriptor for the git revision + * - lammps_version + - const char \* + - 1 + - LAMMPS version string. + .. _extract_unit_settings: **Unit settings** @@ -1917,6 +1933,9 @@ void *lammps_extract_global(void *handle, const char *name) if (strcmp(name,"xy") == 0) return (void *) &lmp->domain->xy; if (strcmp(name,"xz") == 0) return (void *) &lmp->domain->xz; if (strcmp(name,"yz") == 0) return (void *) &lmp->domain->yz; + if (strcmp(name,"xlattice") == 0) return (void *) &lmp->domain->lattice->xlattice; + if (strcmp(name,"ylattice") == 0) return (void *) &lmp->domain->lattice->ylattice; + if (strcmp(name,"zlattice") == 0) return (void *) &lmp->domain->lattice->zlattice; if (((lmp->comm->layout == Comm::LAYOUT_UNIFORM) || (lmp->comm->layout == Comm::LAYOUT_NONUNIFORM)) && (strcmp(name,"procgrid") == 0)) return (void *) &lmp->comm->procgrid; @@ -1972,7 +1991,7 @@ void *lammps_extract_global(void *handle, const char *name) * \verbatim embed:rst -.. versionadded:: TBD +.. versionadded:: 29Aug2024 This function returns an integer that specified the dimensionality of the data that can be extracted from the current pair style with ``Pair::extract()``. @@ -2005,7 +2024,7 @@ int lammps_extract_pair_dimension(void * handle, const char *name) * \verbatim embed:rst -.. versionadded:: TBD +.. versionadded:: 29Aug2024 This function returns a pointer to data available from the current pair style with ``Pair::extract()``. The dimensionality of the returned @@ -2068,10 +2087,13 @@ int lammps_map_atom(void *handle, const void *id) .. versionadded:: 18Sep2020 -This function returns an integer that encodes the data type of the per-atom -property with the specified name. See :cpp:enum:`_LMP_DATATYPE_CONST` for valid -values. Callers of :cpp:func:`lammps_extract_atom` can use this information -to then decide how to cast the ``void *`` pointer and access the data. +This function returns an integer that encodes the data type of the +per-atom property with the specified name. See +:cpp:enum:`_LMP_DATATYPE_CONST` for valid values. Callers of +:cpp:func:`lammps_extract_atom` can use this information to decide how +to cast the ``void *`` pointer and access the data. In addition, +:cpp:func:`lammps_extract_atom_size` can be used to get information +about the vector or array dimensions. \endverbatim * @@ -2089,18 +2111,53 @@ int lammps_extract_atom_datatype(void *handle, const char *name) /* ---------------------------------------------------------------------- */ +/** Get dimension info of a LAMMPS per-atom property + * +\verbatim embed:rst + +.. versionadded:: TBD + +This function returns an integer with the size of the per-atom +property with the specified name. This allows to accurately determine +the size of the per-atom data vectors or arrays. For per-atom arrays, +the *type* argument is required to return either the number of rows or the +number of columns. It is ignored for per-atom vectors. + +Callers of :cpp:func:`lammps_extract_atom` can use this information in +combination with the result from :cpp:func:`lammps_extract_atom_datatype` +to decide how to cast the ``void *`` pointer and access the data. + +\endverbatim + * + * \param handle pointer to a previously created LAMMPS instance + * \param name string with the name of the extracted property + * \param type either LMP_SIZE_ROWS or LMP_SIZE_COLS if *name* refers + to a per-atom array otherwise ignored + * \return integer with the size of the vector or array dimension or -1 + * */ + +int lammps_extract_atom_size(void *handle, const char *name, int type) +{ + auto lmp = (LAMMPS *) handle; + return lmp->atom->extract_size(name, type); +} + +/* ---------------------------------------------------------------------- */ + /** Get pointer to a LAMMPS per-atom property. * \verbatim embed:rst -This function returns a pointer to the location of per-atom properties -(and per-atom-type properties in the case of the 'mass' keyword). -Per-atom data is distributed across sub-domains and thus MPI ranks. The -returned pointer is cast to ``void *`` and needs to be cast to a pointer -of data type that the entity represents. +This function returns a pointer to the location of per-atom properties (and +per-atom-type properties in the case of the 'mass' keyword). Per-atom data is +distributed across sub-domains and thus MPI ranks. The returned pointer is cast +to ``void *`` and needs to be cast to a pointer of data type that the entity +represents. You can use the functions :cpp:func:`lammps_extract_atom_datatype` +and :cpp:func:`lammps_extract_atom_size` to determine data type, dimensions and +sizes of the storage pointed to by the returned pointer. -A table with supported keywords is included in the documentation -of the :cpp:func:`Atom::extract() ` function. +A table with supported keywords is included in the documentation of the +:cpp:func:`Atom::extract() ` function. .. warning:: @@ -5859,7 +5916,7 @@ int lammps_config_has_ffmpeg_support() { \verbatim embed:rst -.. versionadded::TBD +.. versionadded::29Aug2024 The LAMMPS :doc:`geturl command ` supports downloading files through using `the libcurl library `_. @@ -7008,5 +7065,5 @@ int lammps_python_api_version() { } // Local Variables: -// fill-column: 72 +// fill-column: 80 // End: diff --git a/src/library.h b/src/library.h index ff16aaa088..dbfd32a542 100644 --- a/src/library.h +++ b/src/library.h @@ -172,6 +172,7 @@ int lammps_map_atom(void *handle, const void *id); * ---------------------------------------------------------------------- */ int lammps_extract_atom_datatype(void *handle, const char *name); +int lammps_extract_atom_size(void *handle, const char *name, int type); void *lammps_extract_atom(void *handle, const char *name); /* ---------------------------------------------------------------------- diff --git a/src/lmpfftsettings.h b/src/lmpfftsettings.h index 1b9c89274c..3bcab4a61b 100644 --- a/src/lmpfftsettings.h +++ b/src/lmpfftsettings.h @@ -39,6 +39,8 @@ #define LMP_FFT_LIB "FFTW3" #elif defined(FFT_MKL) #define LMP_FFT_LIB "MKL FFT" +#elif defined(FFT_MKL_GPU) +#define LMP_FFT_LIB "MKL GPU FFT" #elif defined(FFT_CUFFT) #define LMP_FFT_LIB "cuFFT" #elif defined(FFT_HIPFFT) diff --git a/src/region.h b/src/region.h index f273485dce..19fdec31c7 100644 --- a/src/region.h +++ b/src/region.h @@ -20,6 +20,8 @@ namespace LAMMPS_NS { class Region : protected Pointers { public: + enum { CONSTANT, VARIABLE }; + char *id, *style; Region **reglist; int interior; // 1 for interior, 0 for exterior diff --git a/src/region_block.cpp b/src/region_block.cpp index efa3d8ca6a..9376016843 100644 --- a/src/region_block.cpp +++ b/src/region_block.cpp @@ -23,8 +23,6 @@ using namespace LAMMPS_NS; -enum { CONSTANT, VARIABLE }; - static constexpr double BIG = 1.0e20; /* ---------------------------------------------------------------------- */ diff --git a/src/region_cone.cpp b/src/region_cone.cpp index dc37eeefe3..401ed53735 100644 --- a/src/region_cone.cpp +++ b/src/region_cone.cpp @@ -27,8 +27,6 @@ using namespace LAMMPS_NS; -enum { CONSTANT, VARIABLE }; - static constexpr double BIG = 1.0e20; /* ---------------------------------------------------------------------- */ diff --git a/src/region_cylinder.cpp b/src/region_cylinder.cpp index 11783dc125..2ad0ba82f5 100644 --- a/src/region_cylinder.cpp +++ b/src/region_cylinder.cpp @@ -26,8 +26,6 @@ using namespace LAMMPS_NS; static constexpr double BIG = 1.0e20; -enum { CONSTANT, VARIABLE }; - /* ---------------------------------------------------------------------- */ RegCylinder::RegCylinder(LAMMPS *lmp, int narg, char **arg) : diff --git a/src/region_ellipsoid.cpp b/src/region_ellipsoid.cpp index daabd621c8..a0b4b9e544 100644 --- a/src/region_ellipsoid.cpp +++ b/src/region_ellipsoid.cpp @@ -23,8 +23,6 @@ using namespace LAMMPS_NS; -enum { CONSTANT, VARIABLE }; - static double GetRoot2D(double r0, double z0, double z1, double g); static double GetRoot3D(double r0, double r1, double z0, double z1, double z2, double g); diff --git a/src/region_plane.cpp b/src/region_plane.cpp index 154b072633..6dc162eead 100644 --- a/src/region_plane.cpp +++ b/src/region_plane.cpp @@ -14,6 +14,9 @@ #include "region_plane.h" #include "error.h" +#include "input.h" +#include "update.h" +#include "variable.h" #include @@ -21,13 +24,48 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -RegPlane::RegPlane(LAMMPS *lmp, int narg, char **arg) : Region(lmp, narg, arg) +RegPlane::RegPlane(LAMMPS *lmp, int narg, char **arg) : Region(lmp, narg, arg), + xstr(nullptr), ystr(nullptr), zstr(nullptr) { + xvar = yvar = zvar = 0.0; + options(narg - 8, &arg[8]); - xp = xscale * utils::numeric(FLERR, arg[2], false, lmp); - yp = yscale * utils::numeric(FLERR, arg[3], false, lmp); - zp = zscale * utils::numeric(FLERR, arg[4], false, lmp); + if (utils::strmatch(arg[2], "^v_")) { + xstr = utils::strdup(arg[2] + 2); + xp = 0.0; + xstyle = VARIABLE; + varshape = 1; + } else { + xp = xscale * utils::numeric(FLERR, arg[2], false, lmp); + xstyle = CONSTANT; + } + + if (utils::strmatch(arg[3], "^v_")) { + ystr = utils::strdup(arg[3] + 2); + yp = 0.0; + ystyle = VARIABLE; + varshape = 1; + } else { + yp = yscale * utils::numeric(FLERR, arg[3], false, lmp); + ystyle = CONSTANT; + } + + if (utils::strmatch(arg[4], "^v_")) { + zstr = utils::strdup(arg[4] + 2); + zp = 0.0; + zstyle = VARIABLE; + varshape = 1; + } else { + zp = zscale * utils::numeric(FLERR, arg[4], false, lmp); + zstyle = CONSTANT; + } + + if (varshape) { + variable_check(); + RegPlane::shape_update(); + } + normal[0] = xscale * utils::numeric(FLERR, arg[5], false, lmp); normal[1] = yscale * utils::numeric(FLERR, arg[6], false, lmp); normal[2] = zscale * utils::numeric(FLERR, arg[7], false, lmp); @@ -54,9 +92,20 @@ RegPlane::RegPlane(LAMMPS *lmp, int narg, char **arg) : Region(lmp, narg, arg) RegPlane::~RegPlane() { + delete[] xstr; + delete[] ystr; + delete[] zstr; delete[] contact; } +/* ---------------------------------------------------------------------- */ + +void RegPlane::init() +{ + Region::init(); + if (varshape) variable_check(); +} + /* ---------------------------------------------------------------------- inside = 1 if x,y,z is on normal side of plane or on plane inside = 0 if x,y,z is on non-normal side of plane and not on plane @@ -113,3 +162,45 @@ int RegPlane::surface_exterior(double *x, double cutoff) } return 0; } + +/* ---------------------------------------------------------------------- + change region shape via variable evaluation +------------------------------------------------------------------------- */ + +void RegPlane::shape_update() +{ + if (xstyle == VARIABLE) xp = xscale * input->variable->compute_equal(xvar); + + if (ystyle == VARIABLE) yp = yscale * input->variable->compute_equal(yvar); + + if (zstyle == VARIABLE) zp = zscale * input->variable->compute_equal(zvar); +} + +/* ---------------------------------------------------------------------- + error check on existence of variable +------------------------------------------------------------------------- */ + +void RegPlane::variable_check() +{ + if (xstyle == VARIABLE) { + xvar = input->variable->find(xstr); + if (xvar < 0) error->all(FLERR, "Variable {} for region plane does not exist", xstr); + if (!input->variable->equalstyle(xvar)) + error->all(FLERR, "Variable {} for region plane is invalid style", xstr); + } + + if (ystyle == VARIABLE) { + yvar = input->variable->find(ystr); + if (yvar < 0) error->all(FLERR, "Variable {} for region plane does not exist", ystr); + if (!input->variable->equalstyle(yvar)) + error->all(FLERR, "Variable {} for region plane is invalid style", ystr); + } + + if (zstyle == VARIABLE) { + zvar = input->variable->find(zstr); + if (zvar < 0) error->all(FLERR, "Variable {} for region plane does not exist", zstr); + if (!input->variable->equalstyle(zvar)) + error->all(FLERR, "Variable {} for region plane is invalid style", zstr); + } +} + diff --git a/src/region_plane.h b/src/region_plane.h index 2025586a7c..0e4ecda6d4 100644 --- a/src/region_plane.h +++ b/src/region_plane.h @@ -28,13 +28,23 @@ class RegPlane : public Region { public: RegPlane(class LAMMPS *, int, char **); ~RegPlane() override; + void init() override; int inside(double, double, double) override; int surface_interior(double *, double) override; int surface_exterior(double *, double) override; + void shape_update() override; private: double xp, yp, zp; double normal[3]; + + int xstyle, xvar; + int ystyle, yvar; + int zstyle, zvar; + char *xstr, *ystr, *zstr; + + void variable_check(); + }; } // namespace LAMMPS_NS diff --git a/src/region_sphere.cpp b/src/region_sphere.cpp index cd20a697d4..f449978938 100644 --- a/src/region_sphere.cpp +++ b/src/region_sphere.cpp @@ -22,8 +22,6 @@ using namespace LAMMPS_NS; -enum { CONSTANT, VARIABLE }; - /* ---------------------------------------------------------------------- */ RegSphere::RegSphere(LAMMPS *lmp, int narg, char **arg) : diff --git a/src/variable.cpp b/src/variable.cpp index b2f6c2882c..279c14d999 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -613,7 +613,7 @@ void Variable::set(int narg, char **arg) // unrecognized variable style - } else error->all(FLERR,"Unknown variable keyword: {}", arg[1]); + } else error->all(FLERR,"Unknown variable style: {}", arg[1]); // set name of variable, if not replacing one flagged with replaceflag // name must be all alphanumeric chars or underscores diff --git a/src/version.h b/src/version.h index af7e87b61f..9c382b3768 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1,2 @@ -#define LAMMPS_VERSION "27 Jun 2024" +#define LAMMPS_VERSION "29 Aug 2024" #define LAMMPS_UPDATE "Development" diff --git a/tools/lammps-gui/CMakeLists.txt b/tools/lammps-gui/CMakeLists.txt index 73e945820b..fc111f5c64 100644 --- a/tools/lammps-gui/CMakeLists.txt +++ b/tools/lammps-gui/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.16) -project(lammps-gui VERSION 1.6.9 LANGUAGES CXX) +project(lammps-gui VERSION 1.6.11 LANGUAGES CXX) set(CMAKE_AUTOUIC ON) set(CMAKE_AUTOMOC ON) @@ -86,9 +86,7 @@ else() add_compile_options(/wd4244) add_compile_options(/wd4267) add_compile_options(/wd4250) - if(LAMMPS_EXCEPTIONS) - add_compile_options(/EHsc) - endif() + add_compile_options(/EHsc) endif() add_compile_definitions(_CRT_SECURE_NO_WARNINGS) endif() @@ -130,6 +128,8 @@ set(PROJECT_SOURCES chartviewer.h codeeditor.cpp codeeditor.h + findandreplace.cpp + findandreplace.h helpers.cpp highlighter.cpp highlighter.h diff --git a/tools/lammps-gui/TODO.md b/tools/lammps-gui/TODO.md index e3782e6446..b36f0a4ce8 100644 --- a/tools/lammps-gui/TODO.md +++ b/tools/lammps-gui/TODO.md @@ -2,12 +2,21 @@ LAMMPS-GUI TODO list: # Short term goals (v1.x) +- implement a timed "Auto-Save" feature that saves after some idle time. set timeout in Editor preferences. +- add a "Filter data" checkbox to the "Charts" window to select whether data should be dropped. +- add a "Charts tab" to the preferences with the following (default) settings: + - default filter data yes/no + - default smooth parameters + - default plot colors + - enable "raw" or "smooth" or "both" +- add QLineEdit field to enter plot title +- add a "Colors" menu to the image viewer to adjust color settings for the + current image (unlike the defaults in the perferences) including assigning + colors to individual atom types. +- Support color by property (e.g. scan computes or fixes with per-atom data), define colormaps etc. +- Add a "Diameters" dialog where diamaters can by specified by atom type - figure out how widgets can be resized to fraction of available screen size. - figure out stacking order of frames and whether it can be more flexible -- implement a timed "Auto-Save" feature that saves after some idle time. set timeout in Editor preferences. -- add a "Colors" menu to the image viewer to adjust color settings for the - current image (unlike the defaults in the perferences). Support color by - property (e.g. scan computes or fixes with per-atom data), define colormaps etc. - implement indenting regions for (nested) loops? - implement data file manager GUI with the following features: diff --git a/tools/lammps-gui/chartviewer.cpp b/tools/lammps-gui/chartviewer.cpp index 71ba07de4c..eb5444de8e 100644 --- a/tools/lammps-gui/chartviewer.cpp +++ b/tools/lammps-gui/chartviewer.cpp @@ -424,7 +424,7 @@ void ChartViewer::add_data(int step, double data) if (last_step < step) { last_step = step; - // do not add data that deviates by more than 5 sigma from the average + // do not add data that deviates by more than 4 sigma from the average // over the last 5 to 20 data items. this is a hack to work around // getting corrupted data from lammps_get_last_thermo() const auto &points = series->points(); diff --git a/tools/lammps-gui/codeeditor.cpp b/tools/lammps-gui/codeeditor.cpp index fd86b5199e..8083f1e2c5 100644 --- a/tools/lammps-gui/codeeditor.cpp +++ b/tools/lammps-gui/codeeditor.cpp @@ -218,7 +218,10 @@ CodeEditor::CodeEditor(QWidget *parent) : help_index.close(); } + setBackgroundRole(QPalette::Light); lineNumberArea = new LineNumberArea(this); + lineNumberArea->setBackgroundRole(QPalette::Dark); + lineNumberArea->setAutoFillBackground(true); connect(this, &CodeEditor::blockCountChanged, this, &CodeEditor::updateLineNumberAreaWidth); connect(this, &CodeEditor::updateRequest, this, &CodeEditor::updateLineNumberArea); updateLineNumberAreaWidth(0); @@ -669,7 +672,7 @@ void CodeEditor::lineNumberAreaPaintEvent(QPaintEvent *event) if (block.isVisible() && bottom >= event->rect().top()) { QString number = QString::number(blockNumber + 1) + " "; if ((highlight == NO_HIGHLIGHT) || (blockNumber != std::abs(highlight))) { - painter.setPen(Qt::black); + painter.setPen(palette().color(QPalette::WindowText)); } else { number = QString(">") + QString::number(blockNumber + 1) + "<"; if (highlight < 0) diff --git a/tools/lammps-gui/findandreplace.cpp b/tools/lammps-gui/findandreplace.cpp new file mode 100644 index 0000000000..e986dc4edf --- /dev/null +++ b/tools/lammps-gui/findandreplace.cpp @@ -0,0 +1,148 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "findandreplace.h" + +#include "codeeditor.h" +#include "lammpsgui.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* ---------------------------------------------------------------------- */ + +FindAndReplace::FindAndReplace(CodeEditor *_editor, QWidget *parent) : + QDialog(parent), editor(_editor), search(nullptr), replace(nullptr), withcase(nullptr), + wrap(nullptr), whole(nullptr) +{ + auto *layout = new QGridLayout; + search = new QLineEdit; + replace = new QLineEdit; + withcase = new QCheckBox("Match case"); + wrap = new QCheckBox("Wrap around"); + whole = new QCheckBox("Whole word"); + auto *next = new QPushButton("&Next"); + auto *replone = new QPushButton("&Replace"); + auto *replall = new QPushButton("Replace &All"); + auto *done = new QPushButton("&Done"); + + layout->addWidget(new QLabel("Find:"), 0, 0, Qt::AlignRight); + layout->addWidget(search, 0, 1, 1, 2, Qt::AlignLeft); + layout->addWidget(new QLabel("Replace with:"), 1, 0, Qt::AlignRight); + layout->addWidget(replace, 1, 1, 1, 2, Qt::AlignLeft); + layout->addWidget(withcase, 2, 0, Qt::AlignLeft); + layout->addWidget(wrap, 2, 1, Qt::AlignLeft); + layout->addWidget(whole, 2, 2, Qt::AlignLeft); + wrap->setChecked(true); + + auto *buttons = new QHBoxLayout; + buttons->addWidget(next); + buttons->addWidget(replone); + buttons->addWidget(replall); + buttons->addWidget(done); + layout->addLayout(buttons, 3, 0, 1, 3, Qt::AlignHCenter); + + connect(next, &QPushButton::released, this, &FindAndReplace::find_next); + connect(replone, &QPushButton::released, this, &FindAndReplace::replace_next); + connect(replall, &QPushButton::released, this, &FindAndReplace::replace_all); + connect(done, &QPushButton::released, this, &QDialog::accept); + + auto action = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q), this); + connect(action, &QShortcut::activated, this, &FindAndReplace::quit); + + setLayout(layout); + setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png")); + setWindowTitle("LAMMPS-GUI - Find and Replace"); +} + +/* ---------------------------------------------------------------------- */ + +void FindAndReplace::find_next() +{ + auto text = search->text(); + + int find_flags = 0; + if (withcase->isChecked()) find_flags |= QTextDocument::FindCaseSensitively; + if (whole->isChecked()) find_flags |= QTextDocument::FindWholeWords; + + if (!text.isEmpty()) { + if (!editor->find(text, (QTextDocument::FindFlag)find_flags) && wrap->isChecked()) { + // nothing found from the current position to the end, reposition cursor and beginning + editor->moveCursor(QTextCursor::Start, QTextCursor::MoveAnchor); + editor->find(text, (QTextDocument::FindFlag)find_flags); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FindAndReplace::replace_next() +{ + auto text = search->text(); + if (text.isEmpty()) return; + + auto cursor = editor->textCursor(); + auto flag = withcase->isChecked() ? Qt::CaseSensitive : Qt::CaseInsensitive; + + // if selected text at cursor location matches search text, replace + if (QString::compare(cursor.selectedText(), search->text(), flag) == 0) + cursor.insertText(replace->text()); + + find_next(); +} + +/* ---------------------------------------------------------------------- */ + +void FindAndReplace::replace_all() +{ + auto text = search->text(); + if (text.isEmpty()) return; + + // drop selection if we have one + auto cursor = editor->textCursor(); + if (cursor.hasSelection()) cursor.movePosition(QTextCursor::Left); + + find_next(); + cursor = editor->textCursor(); + + // keep replacing until find_next() does not find anything anymore + while (cursor.hasSelection()) { + cursor.insertText(replace->text()); + find_next(); + cursor = editor->textCursor(); + } +} + +/* ---------------------------------------------------------------------- */ + +void FindAndReplace::quit() +{ + LammpsGui *main = nullptr; + for (QWidget *widget : QApplication::topLevelWidgets()) + if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); + if (main) main->quit(); +} + +// Local Variables: +// c-basic-offset: 4 +// End: diff --git a/tools/lammps-gui/findandreplace.h b/tools/lammps-gui/findandreplace.h new file mode 100644 index 0000000000..7c34c50543 --- /dev/null +++ b/tools/lammps-gui/findandreplace.h @@ -0,0 +1,46 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef FIND_AND_REPLACE_H +#define FIND_AND_REPLACE_H + +#include "codeeditor.h" +#include + +class QLineEdit; +class QCheckBox; + +class FindAndReplace : public QDialog { + Q_OBJECT + +public: + explicit FindAndReplace(CodeEditor *_editor, QWidget *parent = nullptr); + ~FindAndReplace() = default; + +private slots: + void find_next(); + void replace_next(); + void replace_all(); + void quit(); + +private: + CodeEditor *editor; + QLineEdit *search, *replace; + QCheckBox *withcase, *wrap, *whole; +}; + +#endif + +// Local Variables: +// c-basic-offset: 4 +// End: diff --git a/tools/lammps-gui/helpers.cpp b/tools/lammps-gui/helpers.cpp index 47d09f4515..cac5d86482 100644 --- a/tools/lammps-gui/helpers.cpp +++ b/tools/lammps-gui/helpers.cpp @@ -13,9 +13,12 @@ #include "helpers.h" +#include +#include #include #include #include +#include #include #include @@ -84,6 +87,16 @@ void purge_directory(const QString &dir) } } +// compare black level of foreground and background color +bool is_light_theme() +{ + QPalette p; + int fg = p.brush(QPalette::Active, QPalette::WindowText).color().black(); + int bg = p.brush(QPalette::Active, QPalette::Window).color().black(); + + return (fg > bg); +} + // Local Variables: // c-basic-offset: 4 // End: diff --git a/tools/lammps-gui/helpers.h b/tools/lammps-gui/helpers.h index b22b6e72c4..b3269e6d61 100644 --- a/tools/lammps-gui/helpers.h +++ b/tools/lammps-gui/helpers.h @@ -28,6 +28,9 @@ extern bool has_exe(const QString &exe); // recursively purge a directory extern void purge_directory(const QString &dir); +// flag if light or dark theme +extern bool is_light_theme(); + #endif // Local Variables: // c-basic-offset: 4 diff --git a/tools/lammps-gui/highlighter.cpp b/tools/lammps-gui/highlighter.cpp index 3f79c7a73e..ec196438d9 100644 --- a/tools/lammps-gui/highlighter.cpp +++ b/tools/lammps-gui/highlighter.cpp @@ -12,6 +12,28 @@ ------------------------------------------------------------------------- */ #include "highlighter.h" +#include "helpers.h" +#include + +// workaround for Qt-5.12 +#if QT_VERSION < QT_VERSION_CHECK(5, 14, 0) +namespace QColorConstants { +const QColor Red = QColor::fromRgb(0xff, 0x00, 0x00); +const QColor Green = QColor::fromRgb(0x00, 0xff, 0x00); +const QColor Blue = QColor::fromRgb(0x00, 0x00, 0xff); +const QColor Cyan = QColor::fromRgb(0x00, 0xff, 0xff); +const QColor Magenta = QColor::fromRgb(0xff, 0x00, 0xff); +const QColor Yellow = QColor::fromRgb(0xff, 0xff, 0x00); +namespace Svg { +const QColor dodgerblue = QColor::fromRgb(0x1e, 0x90, 0xff); +const QColor indianred = QColor::fromRgb(0xcd, 0x5c, 0x5c); +const QColor lightcoral = QColor::fromRgb(0xf0, 0x80, 0x80); +const QColor lightgray = QColor::fromRgb(0xd3, 0xd3, 0xd3); +const QColor lightgreen = QColor::fromRgb(0x90, 0xee, 0x90); +const QColor lightskyblue = QColor::fromRgb(0x87, 0xce, 0xfa); +} // namespace Svg +} // namespace QColorConstants +#endif Highlighter::Highlighter(QTextDocument *parent) : QSyntaxHighlighter(parent), @@ -54,27 +76,54 @@ Highlighter::Highlighter(QTextDocument *parent) : isTriple(QStringLiteral("[^\"]*\"\"\"[^\"]*")), isString(QStringLiteral("(\".+?\"|'.+?'|\"\"\".*\"\"\")")), in_triple(false) { - formatNumber.setForeground(Qt::blue); - formatString.setForeground(Qt::darkGreen); - formatString.setFontWeight(QFont::Normal); - formatComment.setForeground(Qt::red); - formatSpecial.setForeground(Qt::darkMagenta); - formatSpecial.setFontWeight(QFont::Bold); - formatParticle.setForeground(Qt::darkRed); - formatParticle.setFontWeight(QFont::Bold); - formatRun.setForeground(Qt::darkBlue); - formatRun.setFontWeight(QFont::Bold); - formatVariable.setForeground(Qt::darkGray); - formatVariable.setFontWeight(QFont::Bold); + if (is_light_theme()) { + // syntax colors for light themes + formatNumber.setForeground(Qt::blue); + formatString.setForeground(Qt::darkGreen); + formatString.setFontWeight(QFont::Normal); + formatComment.setForeground(Qt::red); + formatSpecial.setForeground(Qt::darkMagenta); + formatSpecial.setFontWeight(QFont::Bold); + formatParticle.setForeground(Qt::darkRed); + formatParticle.setFontWeight(QFont::Bold); + formatRun.setForeground(Qt::darkBlue); + formatRun.setFontWeight(QFont::Bold); + formatVariable.setForeground(Qt::darkGray); + formatVariable.setFontWeight(QFont::Bold); - formatOutput.setForeground(Qt::darkYellow); - formatOutput.setFontWeight(QFont::Bold); - formatRead.setForeground(Qt::magenta); - formatRead.setFontWeight(QFont::Bold); - formatLattice.setForeground(Qt::darkGreen); - formatLattice.setFontWeight(QFont::Bold); - formatSetup.setForeground(Qt::darkCyan); - formatSetup.setFontWeight(QFont::Bold); + formatOutput.setForeground(Qt::darkYellow); + formatOutput.setFontWeight(QFont::Bold); + formatRead.setForeground(Qt::magenta); + formatRead.setFontWeight(QFont::Bold); + formatLattice.setForeground(Qt::darkGreen); + formatLattice.setFontWeight(QFont::Bold); + formatSetup.setForeground(Qt::darkCyan); + formatSetup.setFontWeight(QFont::Bold); + } else { + // syntax colors for dark themes + formatNumber.setForeground(QColorConstants::Svg::dodgerblue); + formatString.setForeground(QColorConstants::Green); + formatString.setFontWeight(QFont::Normal); + formatComment.setForeground(QColorConstants::Red); + formatComment.setFontWeight(QFont::Bold); + formatSpecial.setForeground(QColorConstants::Magenta); + formatSpecial.setFontWeight(QFont::Bold); + formatParticle.setForeground(QColorConstants::Svg::indianred); + formatParticle.setFontWeight(QFont::Bold); + formatRun.setForeground(QColorConstants::Svg::lightskyblue); + formatRun.setFontWeight(QFont::Bold); + formatVariable.setForeground(QColorConstants::Svg::lightgray); + formatVariable.setFontWeight(QFont::Bold); + + formatOutput.setForeground(QColorConstants::Yellow); + formatOutput.setFontWeight(QFont::Bold); + formatRead.setForeground(QColorConstants::Svg::lightcoral); + formatRead.setFontWeight(QFont::Bold); + formatLattice.setForeground(QColorConstants::Svg::lightgreen); + formatLattice.setFontWeight(QFont::Bold); + formatSetup.setForeground(QColorConstants::Cyan); + formatSetup.setFontWeight(QFont::Bold); + } } void Highlighter::highlightBlock(const QString &text) diff --git a/tools/lammps-gui/icons/search.png b/tools/lammps-gui/icons/search.png new file mode 100644 index 0000000000..1790200d49 Binary files /dev/null and b/tools/lammps-gui/icons/search.png differ diff --git a/tools/lammps-gui/imageviewer.cpp b/tools/lammps-gui/imageviewer.cpp index 725d557606..0b3c58abd3 100644 --- a/tools/lammps-gui/imageviewer.cpp +++ b/tools/lammps-gui/imageviewer.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -135,9 +137,10 @@ static const QString blank(" "); ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidget *parent) : QDialog(parent), menuBar(new QMenuBar), imageLabel(new QLabel), scrollArea(new QScrollArea), - saveAsAct(nullptr), copyAct(nullptr), cmdAct(nullptr), zoomInAct(nullptr), zoomOutAct(nullptr), - normalSizeAct(nullptr), lammps(_lammps), group("all"), filename(fileName), useelements(false), - usediameter(false), usesigma(false) + buttonBox(nullptr), scaleFactor(1.0), atomSize(1.0), saveAsAct(nullptr), copyAct(nullptr), + cmdAct(nullptr), zoomInAct(nullptr), zoomOutAct(nullptr), normalSizeAct(nullptr), + lammps(_lammps), group("all"), filename(fileName), useelements(false), usediameter(false), + usesigma(false) { imageLabel->setBackgroundRole(QPalette::Base); imageLabel->setSizePolicy(QSizePolicy::Ignored, QSizePolicy::Ignored); @@ -163,6 +166,13 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge renderstatus->setEnabled(false); renderstatus->setToolTip("Render status"); renderstatus->setObjectName("renderstatus"); + auto *asize = new QLineEdit(QString::number(atomSize)); + auto *valid = new QDoubleValidator(1.0e-10, 1.0e10, 10, asize); + asize->setValidator(valid); + asize->setObjectName("atomSize"); + asize->setToolTip("Set Atom size"); + asize->setEnabled(false); + asize->hide(); settings.beginGroup("snapshot"); auto *xval = new QSpinBox; xval->setRange(100, 10000); @@ -179,6 +189,7 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge yval->setToolTip("Set rendered image height"); yval->setMinimumSize(bsize); settings.endGroup(); + connect(asize, &QLineEdit::editingFinished, this, &ImageViewer::set_atom_size); connect(xval, &QAbstractSpinBox::editingFinished, this, &ImageViewer::edit_size); connect(yval, &QAbstractSpinBox::editingFinished, this, &ImageViewer::edit_size); @@ -249,6 +260,11 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge menuLayout->addWidget(menuBar); menuLayout->addWidget(renderstatus); + menuLayout->addWidget(new QLabel(" Atom Size: ")); + menuLayout->addWidget(asize); + // hide item initially + menuLayout->itemAt(2)->widget()->setObjectName("AtomLabel"); + menuLayout->itemAt(2)->widget()->hide(); menuLayout->addWidget(new QLabel(" Width: ")); menuLayout->addWidget(xval); menuLayout->addWidget(new QLabel(" Height: ")); @@ -307,7 +323,7 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge doanti->setChecked(antialias); scaleFactor = 1.0; - resize(image.width() + 20, image.height() + 75); + resize(image.width() + 25, image.height() + 80); scrollArea->setVisible(true); updateActions(); @@ -356,6 +372,13 @@ void ImageViewer::reset_view() createImage(); } +void ImageViewer::set_atom_size() +{ + auto *field = qobject_cast(sender()); + atomSize = field->text().toDouble(); + createImage(); +} + void ImageViewer::edit_size() { auto *field = qobject_cast(sender()); @@ -560,10 +583,43 @@ void ImageViewer::createImage() if (useelements || usediameter || usesigma) { auto *button = findChild("vdw"); if (button) button->setEnabled(true); + auto *edit = findChild("atomSize"); + if (edit) { + edit->setEnabled(false); + edit->hide(); + } + auto *label = findChild("AtomLabel"); + if (label) { + label->setEnabled(false); + label->hide(); + } + } else { adiams.clear(); auto *button = findChild("vdw"); if (button) button->setEnabled(false); + + auto *label = findChild("AtomLabel"); + if (label) { + label->setEnabled(true); + label->show(); + } + auto *edit = findChild("atomSize"); + if (edit) { + if (!edit->isEnabled()) { + edit->setEnabled(true); + edit->show(); + // initialize with lattice spacing + auto *xlattice = (const double *)lammps->extract_global("xlattice"); + if (xlattice) atomSize = *xlattice; + edit->setText(QString::number(atomSize)); + } + atomSize = edit->text().toDouble(); + } + if (atomSize != 1.0) { + for (int i = 1; i <= ntypes; ++i) + adiams += QString("adiam %1 %2 ").arg(i).arg(atomSize); + } } // color @@ -607,6 +663,7 @@ void ImageViewer::createImage() dumpcmd += " backcolor " + settings.value("background", "black").toString(); if (useelements) dumpcmd += blank + elements + blank + adiams + blank; if (usesigma) dumpcmd += blank + adiams + blank; + if (!useelements && !usesigma && (atomSize != 1.0)) dumpcmd += blank + adiams + blank; settings.endGroup(); last_dump_cmd = dumpcmd; @@ -617,10 +674,10 @@ void ImageViewer::createImage() const QImage newImage = reader.read(); dumpfile.remove(); - // read of new image failed. Don't try to scale and load it. + // read of new image failed. nothing left to do. if (newImage.isNull()) return; - // scale back to achieve antialiasing + // show show image image = newImage; imageLabel->setPixmap(QPixmap::fromImage(image)); imageLabel->adjustSize(); diff --git a/tools/lammps-gui/imageviewer.h b/tools/lammps-gui/imageviewer.h index 8e72cea7bf..0c175bd03f 100644 --- a/tools/lammps-gui/imageviewer.h +++ b/tools/lammps-gui/imageviewer.h @@ -34,13 +34,15 @@ class ImageViewer : public QDialog { Q_OBJECT public: - explicit ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidget *parent = nullptr); + explicit ImageViewer(const QString &fileName, LammpsWrapper *_lammps, + QWidget *parent = nullptr); private slots: void saveAs(); void copy(); void quit(); + void set_atom_size(); void edit_size(); void reset_view(); void toggle_ssao(); @@ -75,7 +77,8 @@ private: QLabel *imageLabel; QScrollArea *scrollArea; QDialogButtonBox *buttonBox; - double scaleFactor = 1.0; + double scaleFactor; + double atomSize; QAction *saveAsAct; QAction *copyAct; diff --git a/tools/lammps-gui/lammps-gui.appdata.xml b/tools/lammps-gui/lammps-gui.appdata.xml index 95652129a1..4c8843957e 100644 --- a/tools/lammps-gui/lammps-gui.appdata.xml +++ b/tools/lammps-gui/lammps-gui.appdata.xml @@ -54,8 +54,22 @@ + + + + + + + Resolve plugin mode issues. + Add -p command line flag to override path to liblammps.so + + + Added search and replace functionality + Converged command line argument parsing using Qt facilities + Added dark mode adjustments to syntax highlighting + Add field to enter Atom size, if not determined otherwise diff --git a/tools/lammps-gui/lammpsgui.cpp b/tools/lammps-gui/lammpsgui.cpp index 8162507d02..fe6b8c5391 100644 --- a/tools/lammps-gui/lammpsgui.cpp +++ b/tools/lammps-gui/lammpsgui.cpp @@ -15,6 +15,7 @@ #include "chartviewer.h" #include "fileviewer.h" +#include "findandreplace.h" #include "helpers.h" #include "highlighter.h" #include "imageviewer.h" @@ -68,18 +69,13 @@ static const QString blank(" "); static constexpr int BUFLEN = 256; -LammpsGui::LammpsGui(QWidget *parent, const char *filename) : +LammpsGui::LammpsGui(QWidget *parent, const QString &filename) : QMainWindow(parent), ui(new Ui::LammpsGui), highlighter(nullptr), capturer(nullptr), status(nullptr), logwindow(nullptr), imagewindow(nullptr), chartwindow(nullptr), slideshow(nullptr), logupdater(nullptr), dirstatus(nullptr), progress(nullptr), prefdialog(nullptr), lammpsstatus(nullptr), varwindow(nullptr), wizard(nullptr), runner(nullptr), is_running(false), run_counter(0) { -#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) - // register QList only needed for Qt5 - qRegisterMetaTypeStreamOperators>("QList"); -#endif - docver = ""; ui->setupUi(this); this->setCentralWidget(ui->textEdit); @@ -90,27 +86,34 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) : // use $HOME if we get dropped to "/" like on macOS if (current_dir == "/") current_dir = QDir::homePath(); inspectList.clear(); + setAutoFillBackground(true); // restore and initialize settings QSettings settings; #if defined(LAMMPS_GUI_USE_PLUGIN) - plugin_path.clear(); - std::string deffile = settings.value("plugin_path", "liblammps.so").toString().toStdString(); - for (const char *libfile : {deffile.c_str(), "./liblammps.so", "liblammps.dylib", - "./liblammps.dylib", "liblammps.dll"}) { - if (lammps.load_lib(libfile)) { - auto canonical = QFileInfo(libfile).canonicalFilePath(); - plugin_path = canonical.toStdString(); - settings.setValue("plugin_path", canonical); - break; + plugin_path = + QFileInfo(settings.value("plugin_path", "liblammps.so").toString()).canonicalFilePath(); + if (!lammps.load_lib(plugin_path.toStdString().c_str())) { + // fall back to defaults + for (const char *libfile : + {"./liblammps.so", "liblammps.dylib", "./liblammps.dylib", "liblammps.dll"}) { + if (lammps.load_lib(libfile)) { + plugin_path = QFileInfo(libfile).canonicalFilePath(); + settings.setValue("plugin_path", plugin_path); + break; + } else { + plugin_path.clear(); + } } } - if (plugin_path.empty()) { + if (plugin_path.isEmpty()) { // none of the plugin paths could load, remove key settings.remove("plugin_path"); - QMessageBox::critical(this, "Error", "Cannot open LAMMPS shared library file"); + QMessageBox::critical(this, "Error", + "Cannot open LAMMPS shared library file.\n" + "Use -p command line flag to specify a path to the library."); exit(1); } #endif @@ -205,6 +208,7 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) : connect(ui->actionPaste, &QAction::triggered, this, &LammpsGui::paste); connect(ui->actionUndo, &QAction::triggered, this, &LammpsGui::undo); connect(ui->actionRedo, &QAction::triggered, this, &LammpsGui::redo); + connect(ui->actionSearchAndReplace, &QAction::triggered, this, &LammpsGui::findandreplace); connect(ui->actionRun_Buffer, &QAction::triggered, this, &LammpsGui::run_buffer); connect(ui->actionRun_File, &QAction::triggered, this, &LammpsGui::run_file); connect(ui->actionStop_LAMMPS, &QAction::triggered, this, &LammpsGui::stop_run); @@ -278,7 +282,7 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) : dirstatus->show(); ui->statusbar->addWidget(progress); - if (filename) { + if (filename.size() > 0) { open_file(filename); } else { setWindowTitle("LAMMPS-GUI - Editor - *unknown*"); @@ -502,7 +506,7 @@ void LammpsGui::start_exe() void LammpsGui::update_recents(const QString &filename) { QSettings settings; - recent = settings.value("recent").value>(); + if (settings.contains("recent")) recent = settings.value("recent").value>(); for (int i = 0; i < recent.size(); ++i) { QFileInfo fi(recent[i]); @@ -514,7 +518,10 @@ void LammpsGui::update_recents(const QString &filename) if (!filename.isEmpty() && !recent.contains(filename)) recent.prepend(filename); if (recent.size() > 5) recent.removeLast(); - settings.setValue("recent", QVariant::fromValue(recent)); + if (recent.size() > 0) + settings.setValue("recent", QVariant::fromValue(recent)); + else + settings.remove("recent"); ui->action_1->setVisible(false); if ((recent.size() > 0) && !recent[0].isEmpty()) { @@ -1428,12 +1435,16 @@ void LammpsGui::setFont(const QFont &newfont) void LammpsGui::about() { std::string version = "This is LAMMPS-GUI version " LAMMPS_GUI_VERSION; - version += " using Qt version " QT_VERSION_STR "\n"; + version += " using Qt version " QT_VERSION_STR; + if (is_light_theme()) + version += " using light theme\n"; + else + version += " using dark theme\n"; if (lammps.has_plugin()) { version += "LAMMPS library loaded as plugin"; - if (!plugin_path.empty()) { + if (!plugin_path.isEmpty()) { version += " from file "; - version += plugin_path; + version += plugin_path.toStdString(); } } else { version += "LAMMPS library linked to executable"; @@ -1862,6 +1873,14 @@ void LammpsGui::edit_variables() } } +void LammpsGui::findandreplace() +{ + FindAndReplace find(ui->textEdit, this); + find.setFont(font()); + find.setObjectName("find"); + find.exec(); +} + void LammpsGui::preferences() { QSettings settings; diff --git a/tools/lammps-gui/lammpsgui.h b/tools/lammps-gui/lammpsgui.h index 0cf6677149..a269e1a384 100644 --- a/tools/lammps-gui/lammpsgui.h +++ b/tools/lammps-gui/lammpsgui.h @@ -68,7 +68,7 @@ class LammpsGui : public QMainWindow { friend class Tutorial2Wizard; public: - LammpsGui(QWidget *parent = nullptr, const char *filename = nullptr); + LammpsGui(QWidget *parent = nullptr, const QString &filename = QString()); ~LammpsGui() override; protected: @@ -115,6 +115,7 @@ private slots: void paste(); void undo(); void redo(); + void findandreplace(); void run_buffer() { do_run(true); } void run_file() { do_run(false); } @@ -171,7 +172,7 @@ private: LammpsWrapper lammps; LammpsRunner *runner; QString docver; - std::string plugin_path; + QString plugin_path; bool is_running; int run_counter; std::vector lammps_args; diff --git a/tools/lammps-gui/lammpsgui.qrc b/tools/lammps-gui/lammpsgui.qrc index cf9dd20dda..8111edd44b 100644 --- a/tools/lammps-gui/lammpsgui.qrc +++ b/tools/lammps-gui/lammpsgui.qrc @@ -58,6 +58,7 @@ icons/preferences-desktop.png icons/process-stop.png icons/run-file.png + icons/search.png icons/system-box.png icons/system-help.png icons/system-run.png diff --git a/tools/lammps-gui/lammpsgui.ui b/tools/lammps-gui/lammpsgui.ui index 1517168327..045e0f84a8 100644 --- a/tools/lammps-gui/lammpsgui.ui +++ b/tools/lammps-gui/lammpsgui.ui @@ -62,6 +62,8 @@ + + @@ -312,12 +314,23 @@ Ctrl+Shift+H + + + + + + &Find and Replace... + + + Ctrl+F + + - Pre&ferences... + P&references... Ctrl+P diff --git a/tools/lammps-gui/lammpswrapper.cpp b/tools/lammps-gui/lammpswrapper.cpp index ed2bde1c9f..70d271f547 100644 --- a/tools/lammps-gui/lammpswrapper.cpp +++ b/tools/lammps-gui/lammpswrapper.cpp @@ -115,7 +115,7 @@ double LammpsWrapper::extract_variable(const char *keyword) } double val = *((double *)ptr); #if defined(LAMMPS_GUI_USE_PLUGIN) - ptr = ((liblammpsplugin_t *)plugin_handle)->free(ptr); + ((liblammpsplugin_t *)plugin_handle)->free(ptr); #else lammps_free(ptr); #endif diff --git a/tools/lammps-gui/main.cpp b/tools/lammps-gui/main.cpp index 736a37d58b..53bdaca8fd 100644 --- a/tools/lammps-gui/main.cpp +++ b/tools/lammps-gui/main.cpp @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include #include #include @@ -27,6 +30,11 @@ int main(int argc, char *argv[]) { Q_INIT_RESOURCE(lammpsgui); +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) + // register QList only needed for Qt5 + qRegisterMetaTypeStreamOperators>("QList"); +#endif + QApplication app(argc, argv); // enforce using the plain ASCII C locale within the GUI. QLocale::setDefault(QLocale::c()); @@ -40,13 +48,32 @@ int main(int argc, char *argv[]) "\nA graphical editor for LAMMPS input files with syntax highlighting and\n" "auto-completion that can run LAMMPS directly. It has built-in capabilities\n" "for monitoring, visualization, plotting, and capturing console output."); +#if defined(LAMMPS_GUI_USE_PLUGIN) + QCommandLineOption plugindir(QStringList() << "p" + << "pluginpath", + "Path to LAMMPS shared library", "path"); + parser.addOption(plugindir); +#endif + parser.addHelpOption(); parser.addVersionOption(); parser.addPositionalArgument("file", "The LAMMPS input file to open (optional)."); - parser.process(app); // this removes known arguments + parser.process(app); - const char *infile = nullptr; - if (argc > 1) infile = argv[1]; +#if defined(LAMMPS_GUI_USE_PLUGIN) + if (parser.isSet(plugindir)) { + QStringList pluginpath = parser.values(plugindir); + if (pluginpath.length() > 0) { + QSettings settings; + settings.setValue("plugin_path", QString(pluginpath.at(0))); + settings.sync(); + } + } +#endif + + QString infile; + QStringList args = parser.positionalArguments(); + if (args.size() > 0) infile = args[0]; LammpsGui w(nullptr, infile); w.show(); return app.exec(); diff --git a/tools/lammps-gui/org.lammps.lammps-gui.yml b/tools/lammps-gui/org.lammps.lammps-gui.yml index 26f35531ce..548c4ce93e 100644 --- a/tools/lammps-gui/org.lammps.lammps-gui.yml +++ b/tools/lammps-gui/org.lammps.lammps-gui.yml @@ -108,5 +108,5 @@ modules: - -D BUILD_TOOLS=yes sources: - type: git - url: https://github.com/akohlmey/lammps.git - branch: collected-small-fixes + url: https://github.com/lammps/lammps.git + branch: release diff --git a/tools/msi2lmp/src/GetParameters.c b/tools/msi2lmp/src/GetParameters.c index 921e37491f..b7796de939 100644 --- a/tools/msi2lmp/src/GetParameters.c +++ b/tools/msi2lmp/src/GetParameters.c @@ -44,7 +44,7 @@ void GetParameters() for (i=0; i < no_atom_types; i++) { backwards = -1; - strncpy(potential_types[0],atomtypes[i].potential,5); + memcpy(potential_types[0],atomtypes[i].potential,5); k = find_match(1,potential_types,ff_atomtypes,&backwards); if (k < 0) { printf(" Unable to find mass for %s\n",atomtypes[i].potential); @@ -63,7 +63,7 @@ void GetParameters() for (i=0; i < no_atom_types; i++) { backwards = 0; for (j=0; j < 2; j++) atomtypes[i].params[j] = 0.0; - strncpy(potential_types[0],atomtypes[i].potential,5); + memcpy(potential_types[0],atomtypes[i].potential,5); k = find_match(1,potential_types,ff_vdw,&backwards); if (k < 0) { get_equivs(1,potential_types,equiv_types); @@ -101,7 +101,7 @@ void GetParameters() printf("\n Atom Types, Masses and VDW Parameters\n"); for (i=0; i < no_atom_types; i++) { printf(" %3s %8.4f %8.4f %8.4f\n", - atomtypes[i].potential,atomtypes[i].mass, atomtypes[i].params[0],atomtypes[i].params[1]); + atomtypes[i].potential,atomtypes[i].mass,atomtypes[i].params[0],atomtypes[i].params[1]); } } @@ -115,8 +115,7 @@ void GetParameters() backwards = 0; for (j=0; j < 4; j++) bondtypes[i].params[j] = 0.0; for (j=0; j < 2; j++) - strncpy(potential_types[j], - atomtypes[bondtypes[i].types[j]].potential,5); + memcpy(potential_types[j],atomtypes[bondtypes[i].types[j]].potential,5); k = find_match(2,potential_types,ff_bond,&backwards); if (k < 0) { get_equivs(2,potential_types,equiv_types); @@ -172,7 +171,7 @@ void GetParameters() backwards = 0; for (j=0; j < 4; j++) angletypes[i].params[j] = 0.0; for (j=0; j < 3; j++) - strncpy(potential_types[j],atomtypes[angletypes[i].types[j]].potential,5); + memcpy(potential_types[j],atomtypes[angletypes[i].types[j]].potential,5); k = find_match(3,potential_types,ff_ang,&backwards); if (k < 0) { get_equivs(3,potential_types,equiv_types); @@ -295,8 +294,7 @@ void GetParameters() for (j=0; j < 6; j++) dihedraltypes[i].params[j] = 0.0; for (j=0; j < 4; j++) - strncpy(potential_types[j], - atomtypes[dihedraltypes[i].types[j]].potential,5); + memcpy(potential_types[j],atomtypes[dihedraltypes[i].types[j]].potential,5); backwards = 0; k = find_match(4,potential_types,ff_tor,&backwards); @@ -614,8 +612,7 @@ void GetParameters() for (i=0; i < no_oop_types; i++) { for (j=0; j < 3; j++) ooptypes[i].params[j] = 0.0; for (j=0; j < 4; j++) - strncpy(potential_types[j], - atomtypes[ooptypes[i].types[j]].potential,5); + memcpy(potential_types[j],atomtypes[ooptypes[i].types[j]].potential,5); k = find_improper_body_data(potential_types,ff_oop,&rearrange); if (k < 0) { @@ -658,8 +655,7 @@ void GetParameters() for (j=0; j < 3; j++) ooptypes[i].params[j] = 0.0; for (j=0; j < 4; j++) - strncpy(potential_types[j], - atomtypes[ooptypes[i].types[j]].potential,5); + memcpy(potential_types[j],atomtypes[ooptypes[i].types[j]].potential,5); k = find_trigonal_body_data(potential_types,ff_oop); if (k < 0) { get_equivs(5,potential_types,equiv_types); @@ -715,8 +711,7 @@ void GetParameters() for (j=0; j < 6; j++) ooptypes[i].angleangle_params[j] = 0.0; for (j=0; j < 4; j++) - strncpy(potential_types[j], - atomtypes[ooptypes[i].types[j]].potential,5); + memcpy(potential_types[j],atomtypes[ooptypes[i].types[j]].potential,5); tabc = get_t0(ooptypes[i].types[0], @@ -763,8 +758,7 @@ void GetParameters() for (i=0; i < no_angleangle_types; i++) { for (j=0; j < 6; j++) angleangletypes[i].params[j] = 0.0; for (j=0; j < 4; j++) - strncpy(potential_types[j], - atomtypes[angleangletypes[i].types[j]].potential,5); + memcpy(potential_types[j],atomtypes[angleangletypes[i].types[j]].potential,5); tabc = get_t0(angleangletypes[i].types[0], angleangletypes[i].types[1], @@ -841,44 +835,44 @@ int find_improper_body_data(char types1[][5],struct FrcFieldItem item, /* a b d c */ *rearrange_ptr = 1; - strncpy(mirror_types[0],types1[0],5); - strncpy(mirror_types[1],types1[1],5); - strncpy(mirror_types[2],types1[3],5); - strncpy(mirror_types[3],types1[2],5); + memcpy(mirror_types[0],types1[0],5); + memcpy(mirror_types[1],types1[1],5); + memcpy(mirror_types[2],types1[3],5); + memcpy(mirror_types[3],types1[2],5); k = find_match(4,mirror_types,item,&backwards); if (k >= 0) return k; /* d b a c */ *rearrange_ptr = 2; - strncpy(mirror_types[0],types1[3],5); - strncpy(mirror_types[2],types1[0],5); - strncpy(mirror_types[3],types1[2],5); + memcpy(mirror_types[0],types1[3],5); + memcpy(mirror_types[2],types1[0],5); + memcpy(mirror_types[3],types1[2],5); k = find_match(4,mirror_types,item,&backwards); if (k >= 0) return k; /* d b c a */ *rearrange_ptr = 3; - strncpy(mirror_types[2],types1[2],5); - strncpy(mirror_types[3],types1[0],5); + memcpy(mirror_types[2],types1[2],5); + memcpy(mirror_types[3],types1[0],5); k = find_match(4,mirror_types,item,&backwards); if (k >= 0) return k; /* c b a d */ *rearrange_ptr = 4; - strncpy(mirror_types[0],types1[2],5); - strncpy(mirror_types[2],types1[0],5); - strncpy(mirror_types[3],types1[3],5); + memcpy(mirror_types[0],types1[2],5); + memcpy(mirror_types[2],types1[0],5); + memcpy(mirror_types[3],types1[3],5); k = find_match(4,mirror_types,item,&backwards); if (k >= 0) return k; /* c b d a */ *rearrange_ptr = 5; - strncpy(mirror_types[2],types1[3],5); - strncpy(mirror_types[3],types1[0],5); + memcpy(mirror_types[2],types1[3],5); + memcpy(mirror_types[3],types1[0],5); k = find_match(4,mirror_types,item,&backwards); return k; } @@ -973,39 +967,39 @@ int find_trigonal_body_data(char types1[][5],struct FrcFieldItem item) /* a b d c */ - strncpy(mirror_types[0],types1[0],5); - strncpy(mirror_types[1],types1[1],5); - strncpy(mirror_types[2],types1[3],5); - strncpy(mirror_types[3],types1[2],5); + memcpy(mirror_types[0],types1[0],5); + memcpy(mirror_types[1],types1[1],5); + memcpy(mirror_types[2],types1[3],5); + memcpy(mirror_types[3],types1[2],5); k = find_match(4,mirror_types,item,&backwards); if (k >= 0) return k; /* d b a c */ - strncpy(mirror_types[0],types1[3],5); - strncpy(mirror_types[2],types1[0],5); - strncpy(mirror_types[3],types1[2],5); + memcpy(mirror_types[0],types1[3],5); + memcpy(mirror_types[2],types1[0],5); + memcpy(mirror_types[3],types1[2],5); k = find_match(4,mirror_types,item,&backwards); if (k >= 0) return k; /* d b c a */ - strncpy(mirror_types[2],types1[2],5); - strncpy(mirror_types[3],types1[0],5); + memcpy(mirror_types[2],types1[2],5); + memcpy(mirror_types[3],types1[0],5); k = find_match(4,mirror_types,item,&backwards); if (k >= 0) return k; /* c b a d */ - strncpy(mirror_types[0],types1[2],5); - strncpy(mirror_types[2],types1[0],5); - strncpy(mirror_types[3],types1[3],5); + memcpy(mirror_types[0],types1[2],5); + memcpy(mirror_types[2],types1[0],5); + memcpy(mirror_types[3],types1[3],5); k = find_match(4,mirror_types,item,&backwards); if (k >= 0) return k; /* c b d a */ - strncpy(mirror_types[2],types1[3],5); - strncpy(mirror_types[3],types1[0],5); + memcpy(mirror_types[2],types1[3],5); + memcpy(mirror_types[3],types1[0],5); k = find_match(4,mirror_types,item,&backwards); return k; } @@ -1015,41 +1009,41 @@ int find_angleangle_data(char types1[][5],struct FrcFieldItem item,int kloc[3]) int k,backwards = -1; char mirror_types[4][5]; - strncpy(mirror_types[1],types1[1],5); + memcpy(mirror_types[1],types1[1],5); /* go for first parameter a b c d or d b c a */ k = find_match(4,types1,item,&backwards); if (k < 0) { - strncpy(mirror_types[0],types1[3],5); - strncpy(mirror_types[2],types1[2],5); - strncpy(mirror_types[3],types1[0],5); + memcpy(mirror_types[0],types1[3],5); + memcpy(mirror_types[2],types1[2],5); + memcpy(mirror_types[3],types1[0],5); k = find_match(4,mirror_types,item,&backwards); } kloc[0] = k; /* go for second parameter d b a c or c b a d */ - strncpy(mirror_types[0],types1[3],5); - strncpy(mirror_types[2],types1[0],5); - strncpy(mirror_types[3],types1[2],5); + memcpy(mirror_types[0],types1[3],5); + memcpy(mirror_types[2],types1[0],5); + memcpy(mirror_types[3],types1[2],5); k = find_match(4,mirror_types,item,&backwards); if (k < 0) { - strncpy(mirror_types[0],types1[2],5); - strncpy(mirror_types[3],types1[3],5); + memcpy(mirror_types[0],types1[2],5); + memcpy(mirror_types[3],types1[3],5); k = find_match(4,mirror_types,item,&backwards); } kloc[1] = k; /* go for third parameter a b d c or c b d a */ - strncpy(mirror_types[0],types1[0],5); - strncpy(mirror_types[2],types1[3],5); - strncpy(mirror_types[3],types1[2],5); + memcpy(mirror_types[0],types1[0],5); + memcpy(mirror_types[2],types1[3],5); + memcpy(mirror_types[3],types1[2],5); k = find_match(4,mirror_types,item,&backwards); if (k < 0) { - strncpy(mirror_types[0],types1[2],5); - strncpy(mirror_types[3],types1[0],5); + memcpy(mirror_types[0],types1[2],5); + memcpy(mirror_types[3],types1[0],5); k = find_match(4,mirror_types,item,&backwards); } kloc[2] = k; @@ -1250,25 +1244,25 @@ void get_equivs(int ic,char potential_types[][5],char equiv_types[][5]) switch (ic) { case 1: k = find_equiv_type(potential_types[0]); - if (k > -1) strncpy(equiv_types[0],equivalence.data[k].ff_types[1],5); + if (k > -1) memcpy(equiv_types[0],equivalence.data[k].ff_types[1],5); break; case 2: for (i=0; i < 2; i++) { k = find_equiv_type(potential_types[i]); - if (k > -1) strncpy(equiv_types[i],equivalence.data[k].ff_types[2],5); + if (k > -1) memcpy(equiv_types[i],equivalence.data[k].ff_types[2],5); } break; case 3: for (i=0; i < 3; i++) { k = find_equiv_type(potential_types[i]); - if (k > -1) strncpy(equiv_types[i],equivalence.data[k].ff_types[3],5); + if (k > -1) memcpy(equiv_types[i],equivalence.data[k].ff_types[3],5); } break; case 4: for (i=0; i < 4; i++) { k = find_equiv_type(potential_types[i]); - if (k > -1) strncpy(equiv_types[i],equivalence.data[k].ff_types[4],5); + if (k > -1) memcpy(equiv_types[i],equivalence.data[k].ff_types[4],5); } break; @@ -1276,7 +1270,7 @@ void get_equivs(int ic,char potential_types[][5],char equiv_types[][5]) for (i=0; i < 4; i++) { k = find_equiv_type(potential_types[i]); if (k > -1) - strncpy(equiv_types[i],equivalence.data[k].ff_types[5],5); + memcpy(equiv_types[i],equivalence.data[k].ff_types[5],5); } break; default: diff --git a/tools/msi2lmp/src/MakeLists.c b/tools/msi2lmp/src/MakeLists.c index 18b261a561..4f9a9f1548 100644 --- a/tools/msi2lmp/src/MakeLists.c +++ b/tools/msi2lmp/src/MakeLists.c @@ -476,7 +476,7 @@ void build_atomtypes_list() { int j,k,n,match,atom_type=0; - strncpy(atomtypes[0].potential,atoms[0].potential,5); + memcpy(atomtypes[0].potential,atoms[0].potential,5); atoms[0].type = 0; atomtypes[0].no_connect = atoms[0].no_connect; @@ -497,7 +497,7 @@ void build_atomtypes_list() if (match == 0) { atom_type = n; atomtypes[n].no_connect = atoms[j].no_connect; - strncpy(atomtypes[n++].potential,atoms[j].potential,5); + memcpy(atomtypes[n++].potential,atoms[j].potential,5); } if (n >= MAX_ATOM_TYPES) { fprintf(stderr,"Too many atom types (> 100) - error\n"); diff --git a/tools/msi2lmp/src/ReadMdfFile.c b/tools/msi2lmp/src/ReadMdfFile.c index 96a6a01ab2..253121d001 100644 --- a/tools/msi2lmp/src/ReadMdfFile.c +++ b/tools/msi2lmp/src/ReadMdfFile.c @@ -144,7 +144,7 @@ void ReadMdfFile(void) molecule[n].residue[j].end = i; molecule[n].residue[++j].start = i; - strncpy(molecule[n].residue[j].name,atoms[i].residue_string,MAX_NAME); + memcpy(molecule[n].residue[j].name,atoms[i].residue_string,MAX_NAME); } } molecule[n].residue[j].end = molecule[n].end; @@ -167,10 +167,9 @@ void ReadMdfFile(void) for (n=0; n < no_molecules; n++) { for (j=0; j < molecule[n].no_residues; j++) { - for (i=molecule[n].residue[j].start; i < molecule[n].residue[j].end; - i++) { + for (i=molecule[n].residue[j].start; i < molecule[n].residue[j].end; i++) { for (l=0; l < atoms[i].no_connect; l++) { - strncpy(temp_string,atoms[i].connections[l],MAX_STRING); + memcpy(temp_string,atoms[i].connections[l],MAX_STRING); temp_residue = strtok(temp_string,":"); temp_atom_name = strtok(NULL,"%"); diff --git a/tools/msi2lmp/src/SearchAndFill.c b/tools/msi2lmp/src/SearchAndFill.c index 35de0c81fe..a26554aeaa 100644 --- a/tools/msi2lmp/src/SearchAndFill.c +++ b/tools/msi2lmp/src/SearchAndFill.c @@ -212,7 +212,7 @@ void SearchAndFill(struct FrcFieldItem *item) item->data[replace].ver = version; item->data[replace].ref = reference; for (i=0; i < item->number_of_members; i++) { - strncpy(item->data[replace].ff_types[i],atom_types[i],5); + memcpy(item->data[replace].ff_types[i],atom_types[i],5); } for (i=0; i < item->number_of_parameters; i++) { item->data[replace].ff_param[i] = parameters[i]; @@ -230,7 +230,7 @@ void SearchAndFill(struct FrcFieldItem *item) item->data[ctr].ver = version; item->data[ctr].ref = reference; for (i=0; i < item->number_of_members; i++) { - strncpy(item->data[ctr].ff_types[i],atom_types[i],5); + memcpy(item->data[ctr].ff_types[i],atom_types[i],5); } for (i=0; i < item->number_of_parameters; i++) { item->data[ctr].ff_param[i] = parameters[i]; diff --git a/tools/msi2lmp/src/msi2lmp.c b/tools/msi2lmp/src/msi2lmp.c index 68aaf566b2..8228fd0f7f 100644 --- a/tools/msi2lmp/src/msi2lmp.c +++ b/tools/msi2lmp/src/msi2lmp.c @@ -2,6 +2,10 @@ * * msi2lmp.exe * +* v3.9.11 AK - Replace call to strncpy() with memcpy() when copying atom type strings +* to avoid problems with fixed array sizes +* - update tests for newer LAMMPS versions +* * v3.9.10 AK - Substitute UTF-8 characters in .frc files with known ASCII equivalents * - add help message output * diff --git a/tools/msi2lmp/src/msi2lmp.h b/tools/msi2lmp/src/msi2lmp.h index 3e1de85cbe..ca745bdc6e 100644 --- a/tools/msi2lmp/src/msi2lmp.h +++ b/tools/msi2lmp/src/msi2lmp.h @@ -36,7 +36,7 @@ #include /* IWYU pragma: export */ -#define MSI2LMP_VERSION "v3.9.10 / 10 Mar 2023" +#define MSI2LMP_VERSION "v3.9.11 / 6 Sep 2024" #define PI_180 0.01745329251994329576 diff --git a/tools/msi2lmp/test/data-compare.pl b/tools/msi2lmp/test/data-compare.pl index 7411f2a017..38bbed051d 100755 --- a/tools/msi2lmp/test/data-compare.pl +++ b/tools/msi2lmp/test/data-compare.pl @@ -2,11 +2,12 @@ # Tool to validate and compare two LAMMPS data files # with "inexact" floating point comparisons # July 2013 by Axel Kohlmeyer +# last update September 2024 by Axel Kohlmeyer use strict; use warnings; -my $version = 'v0.3'; +my $version = 'v0.4'; # delta for floating point comparisons. my $small = 1.0e-4; @@ -596,6 +597,26 @@ sub read_data { last; } + # apply sort + if ($data->{nbonds} > 1) { + my ($did_swap, $num) = (1, $data->{nbonds}); + while ($did_swap) { + $did_swap = 0; + for ($i=0; $i < $num-1; ++$i) { + $j = $i+1; + if (($data->{bond1}[$i] > $data->{bond1}[$j]) + or (($data->{bond1}[$i] == $data->{bond1}[$j]) + and ($data->{bond2}[$i] > $data->{bond2}[$j]))) { + $did_swap = 1; + my @tmp = ($data->{bondt}[$i], $data->{bond1}[$i], $data->{bond2}[$i]); + ($data->{bondt}[$i], $data->{bond1}[$i], $data->{bond2}[$i]) = + ($data->{bondt}[$j], $data->{bond1}[$j], $data->{bond2}[$j]); + ($data->{bondt}[$j], $data->{bond1}[$j], $data->{bond2}[$j]) = @tmp; + } + } + --$num; + } + } } elsif ($1 eq "Angles") { $data->{anglet} = []; $data->{angle1} = []; @@ -638,6 +659,33 @@ sub read_data { last; } + # apply sort + if ($data->{nangles} > 1) { + my ($did_swap, $num) = (1, $data->{nangles}); + while ($did_swap) { + $did_swap = 0; + for ($i=0; $i < $num-1; ++$i) { + $j = $i+1; + if (($data->{angle1}[$i] > $data->{angle1}[$j]) + or (($data->{angle1}[$i] == $data->{angle1}[$j]) + and ($data->{angle2}[$i] > $data->{angle2}[$j])) + or (($data->{angle1}[$i] == $data->{angle1}[$j]) + and ($data->{angle2}[$i] == $data->{angle2}[$j]) + and ($data->{angle3}[$i] > $data->{angle3}[$j]))) { + $did_swap = 1; + my @tmp = ($data->{anglet}[$i], $data->{angle1}[$i], + $data->{angle2}[$i], $data->{angle3}[$i]); + ($data->{anglet}[$i], $data->{angle1}[$i], + $data->{angle2}[$i], $data->{angle3}[$i]) = + ($data->{anglet}[$j], $data->{angle1}[$j], + $data->{angle2}[$j], $data->{angle3}[$j]); + ($data->{anglet}[$j], $data->{angle1}[$j], + $data->{angle2}[$j], $data->{angle3}[$j]) = @tmp; + } + } + --$num; + } + } } elsif ($1 eq "Dihedrals") { $data->{dihedralt} = []; $data->{dihedral1} = []; @@ -684,6 +732,38 @@ sub read_data { last; } + # apply sort + if ($data->{ndihedrals} > 1) { + my ($did_swap, $num) = (1, $data->{ndihedrals}); + while ($did_swap) { + $did_swap = 0; + for ($i=0; $i < $num-1; ++$i) { + $j = $i+1; + if (($data->{dihedral1}[$i] > $data->{dihedral1}[$j]) + or (($data->{dihedral1}[$i] == $data->{dihedral1}[$j]) + and ($data->{dihedral2}[$i] > $data->{dihedral2}[$j])) + or (($data->{dihedral1}[$i] == $data->{dihedral1}[$j]) + and ($data->{dihedral2}[$i] == $data->{dihedral2}[$j]) + and ($data->{dihedral3}[$i] > $data->{dihedral3}[$j])) + or (($data->{dihedral1}[$i] == $data->{dihedral1}[$j]) + and ($data->{dihedral2}[$i] == $data->{dihedral2}[$j]) + and ($data->{dihedral3}[$i] == $data->{dihedral3}[$j]) + and ($data->{dihedral4}[$i] > $data->{dihedral4}[$j]))) { + $did_swap = 1; + my @tmp = ($data->{dihedralt}[$i], $data->{dihedral1}[$i], + $data->{dihedral2}[$i], $data->{dihedral3}[$i], + $data->{dihedral4}[$i]); + ($data->{dihedralt}[$i], $data->{dihedral1}[$i], $data->{dihedral2}[$i], + $data->{dihedral3}[$i], $data->{dihedral4}[$i]) = + ($data->{dihedralt}[$j], $data->{dihedral1}[$j], + $data->{dihedral2}[$j], $data->{dihedral3}[$j], $data->{dihedral4}[$j]); + ($data->{dihedralt}[$j], $data->{dihedral1}[$j], $data->{dihedral2}[$j], + $data->{dihedral3}[$j], $data->{dihedral4}[$j]) = @tmp; + } + } + --$num; + } + } } elsif ($1 eq "Impropers") { $data->{impropert} = []; $data->{improper1} = []; @@ -730,6 +810,39 @@ sub read_data { last; } + # apply sort + if ($data->{nimpropers} > 1) { + my ($did_swap, $num) = (1, $data->{nimpropers}); + while ($did_swap) { + $did_swap = 0; + for ($i=0; $i < $num-1; ++$i) { + $j = $i+1; + if (($data->{improper1}[$i] > $data->{improper1}[$j]) + or (($data->{improper1}[$i] == $data->{improper1}[$j]) + and ($data->{improper2}[$i] > $data->{improper2}[$j])) + or (($data->{improper1}[$i] == $data->{improper1}[$j]) + and ($data->{improper2}[$i] == $data->{improper2}[$j]) + and ($data->{improper3}[$i] > $data->{improper3}[$j])) + or (($data->{improper1}[$i] == $data->{improper1}[$j]) + and ($data->{improper2}[$i] == $data->{improper2}[$j]) + and ($data->{improper3}[$i] == $data->{improper3}[$j]) + and ($data->{improper4}[$i] > $data->{improper4}[$j]))) { + $did_swap = 1; + my @tmp = ($data->{impropert}[$i], $data->{improper1}[$i], + $data->{improper2}[$i], $data->{improper3}[$i], + $data->{improper4}[$i]); + ($data->{impropert}[$i], $data->{improper1}[$i], $data->{improper2}[$i], + $data->{improper3}[$i], $data->{improper4}[$i]) = + ($data->{impropert}[$j], $data->{improper1}[$j], + $data->{improper2}[$j], $data->{improper3}[$j], $data->{improper4}[$j]); + ($data->{impropert}[$j], $data->{improper1}[$j], $data->{improper2}[$j], + $data->{improper3}[$j], $data->{improper4}[$j]) = @tmp; + } + } + --$num; + } + } + } else { die "Bad data: $_"; } diff --git a/tools/msi2lmp/test/in.PyAC_bulk-clayff b/tools/msi2lmp/test/in.PyAC_bulk-clayff index 1c6663a679..923891b920 100644 --- a/tools/msi2lmp/test/in.PyAC_bulk-clayff +++ b/tools/msi2lmp/test/in.PyAC_bulk-clayff @@ -5,9 +5,9 @@ atom_style full pair_style lj/cut/coul/long 15.0 pair_modify mix geometric bond_style harmonic -kspace_style pppm 1.0e-5 read_data PyAC_bulk-clayff.data +kspace_style pppm 1.0e-5 thermo_style multi minimize 0.0 0.0 100 1000 diff --git a/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data b/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data index 6b6602d69d..0a304ea296 100644 --- a/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data +++ b/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data @@ -1,4 +1,4 @@ -LAMMPS data file. msi2lmp v3.9.8 / 06 Oct 2016 / CGCMM for PyAC_bulk-clayff +LAMMPS data file. msi2lmp v3.9.10 / 10 Mar 2023 / CGCMM for PyAC_bulk-clayff 1280 atoms 128 bonds @@ -11,7 +11,7 @@ LAMMPS data file. msi2lmp v3.9.8 / 06 Oct 2016 / CGCMM for PyAC_bulk-clayff -10.320000000 10.320000000 xlo xhi -17.931646038 17.931646038 ylo yhi - -9.196614681 9.196614681 zlo zhi + -9.189871922 9.189871922 zlo zhi 0.225338675 -3.393877748 -0.363656523 xy xz yz Masses @@ -676,27 +676,27 @@ Atoms # full 638 1 3 -1.050000 -0.590722660 -5.885450799 -3.293650032 1 1 0 # ob 639 1 3 -1.050000 0.277898824 -8.390728187 -3.055632561 1 1 0 # ob 640 1 5 0.425000 -4.694842406 -6.184112754 -1.194847627 1 1 0 # ho - 641 1 1 1.575000 0.889889112 1.315464043 9.189872068 0 0 0 # ao - 642 1 2 2.100000 5.101961261 9.067732250 -6.523590700 0 0 1 # st - 643 1 2 2.100000 5.138292081 3.048848378 -6.517157818 0 0 1 # st - 644 1 3 -1.050000 4.852463935 0.176965538 -8.141927041 0 0 1 # ob - 645 1 3 -1.050000 5.291802919 2.921349398 -8.139170101 0 0 1 # ob - 646 1 4 -0.950000 2.692063605 1.889886595 -8.209932150 0 0 1 # oh - 647 1 3 -1.050000 1.366795516 3.605171499 -5.905112111 0 0 1 # ob - 648 1 3 -1.050000 4.839493963 1.584366125 -5.909707262 0 0 1 # ob - 649 1 3 -1.050000 3.970872479 4.089643514 -6.147724732 0 0 1 # ob - 650 1 5 0.425000 3.783613862 1.883028080 -8.008509666 0 0 1 # ho - 651 1 1 1.575000 3.498056417 5.798375353 9.189872068 0 0 0 # ao - 652 1 2 2.100000 2.493793956 4.584820940 -6.523590700 0 0 1 # st - 653 1 2 2.100000 2.586459538 7.531759689 -6.517157818 0 0 1 # st - 654 1 3 -1.050000 2.300631392 4.659876849 -8.141927041 0 0 1 # ob - 655 1 3 -1.050000 2.739970376 7.404260709 -8.139170101 0 0 1 # ob - 656 1 4 -0.950000 5.300230909 6.372797905 -8.209932150 0 0 1 # oh - 657 1 3 -1.050000 3.974962821 8.088082810 -5.905112111 0 0 1 # ob - 658 1 3 -1.050000 2.287661420 6.067277436 -5.909707262 0 0 1 # ob - 659 1 3 -1.050000 1.419039936 8.572554825 -6.147724732 0 0 1 # ob - 660 1 5 0.425000 6.391781167 6.365939391 -8.008509666 0 0 1 # ho - 661 1 1 1.575000 0.932567998 7.286701985 9.189872068 0 0 0 # ao + 641 1 1 1.575000 4.283766860 1.679120566 -9.189871776 0 0 1 # ao + 642 1 2 2.100000 5.101961261 9.067732250 -6.510105182 0 0 1 # st + 643 1 2 2.100000 5.138292081 3.048848378 -6.503672300 0 0 1 # st + 644 1 3 -1.050000 4.852463935 0.176965538 -8.128441523 0 0 1 # ob + 645 1 3 -1.050000 5.291802919 2.921349398 -8.125684583 0 0 1 # ob + 646 1 4 -0.950000 2.692063605 1.889886595 -8.196446632 0 0 1 # oh + 647 1 3 -1.050000 1.366795516 3.605171499 -5.891626593 0 0 1 # ob + 648 1 3 -1.050000 4.839493963 1.584366125 -5.896221744 0 0 1 # ob + 649 1 3 -1.050000 3.970872479 4.089643514 -6.134239214 0 0 1 # ob + 650 1 5 0.425000 3.783613862 1.883028080 -7.995024148 0 0 1 # ho + 651 1 1 1.575000 6.891934165 6.162031876 -9.189871776 0 0 1 # ao + 652 1 2 2.100000 2.493793956 4.584820940 -6.510105182 0 0 1 # st + 653 1 2 2.100000 2.586459538 7.531759689 -6.503672300 0 0 1 # st + 654 1 3 -1.050000 2.300631392 4.659876849 -8.128441523 0 0 1 # ob + 655 1 3 -1.050000 2.739970376 7.404260709 -8.125684583 0 0 1 # ob + 656 1 4 -0.950000 5.300230909 6.372797905 -8.196446632 0 0 1 # oh + 657 1 3 -1.050000 3.974962821 8.088082810 -5.891626593 0 0 1 # ob + 658 1 3 -1.050000 2.287661420 6.067277436 -5.896221744 0 0 1 # ob + 659 1 3 -1.050000 1.419039936 8.572554825 -6.134239214 0 0 1 # ob + 660 1 5 0.425000 6.391781167 6.365939391 -7.995024148 0 0 1 # ho + 661 1 1 1.575000 4.326445746 7.650358508 -9.189871776 0 0 1 # ao 662 1 2 2.100000 0.114373597 -0.101909699 6.510105475 0 0 0 # st 663 1 2 2.100000 0.078042778 5.916974173 6.503672592 0 0 0 # st 664 1 3 -1.050000 0.363870923 8.788857013 8.128441816 0 0 0 # ob @@ -706,7 +706,7 @@ Atoms # full 668 1 3 -1.050000 0.376840895 7.381456426 5.896222036 0 0 0 # ob 669 1 3 -1.050000 1.245462379 4.876179037 6.134239507 0 0 0 # ob 670 1 5 0.425000 1.432720996 7.082794471 7.995024441 0 0 0 # ho - 671 1 1 1.575000 -1.675599307 2.803790675 9.189872068 0 0 0 # ao + 671 1 1 1.575000 1.718278441 3.167447198 -9.189871776 0 0 1 # ao 672 1 2 2.100000 2.722540902 4.381001611 6.510105475 0 0 0 # st 673 1 2 2.100000 2.629875320 1.434062862 6.503672592 0 0 0 # st 674 1 3 -1.050000 2.915703466 4.305945702 8.128441816 0 0 0 # ob @@ -716,27 +716,27 @@ Atoms # full 678 1 3 -1.050000 2.928673438 2.898545115 5.896222036 0 0 0 # ob 679 1 3 -1.050000 3.797294922 0.393267726 6.134239507 0 0 0 # ob 680 1 5 0.425000 -1.175446309 2.599883160 7.995024441 0 0 0 # ho - 681 1 1 1.575000 6.049888960 1.315464043 9.189872068 0 0 0 # ao - 682 1 2 2.100000 -10.378038892 9.067732250 -6.523590700 1 0 1 # st - 683 1 2 2.100000 -10.341708072 3.048848378 -6.517157818 1 0 1 # st - 684 1 3 -1.050000 10.012463782 0.176965538 -8.141927041 0 0 1 # ob - 685 1 3 -1.050000 -10.188197233 2.921349398 -8.139170101 1 0 1 # ob - 686 1 4 -0.950000 7.852063452 1.889886595 -8.209932150 0 0 1 # oh - 687 1 3 -1.050000 6.526795364 3.605171499 -5.905112111 0 0 1 # ob - 688 1 3 -1.050000 -10.640506189 1.584366125 -5.909707262 1 0 1 # ob - 689 1 3 -1.050000 9.130872326 4.089643514 -6.147724732 0 0 1 # ob - 690 1 5 0.425000 8.943613710 1.883028080 -8.008509666 0 0 1 # ho - 691 1 1 1.575000 -11.981943736 5.798375353 9.189872068 1 0 0 # ao - 692 1 2 2.100000 7.653793804 4.584820940 -6.523590700 0 0 1 # st - 693 1 2 2.100000 7.746459385 7.531759689 -6.517157818 0 0 1 # st - 694 1 3 -1.050000 7.460631240 4.659876849 -8.141927041 0 0 1 # ob - 695 1 3 -1.050000 7.899970224 7.404260709 -8.139170101 0 0 1 # ob - 696 1 4 -0.950000 -10.179769243 6.372797905 -8.209932150 1 0 1 # oh - 697 1 3 -1.050000 9.134962668 8.088082810 -5.905112111 0 0 1 # ob - 698 1 3 -1.050000 7.447661268 6.067277436 -5.909707262 0 0 1 # ob - 699 1 3 -1.050000 6.579039784 8.572554825 -6.147724732 0 0 1 # ob - 700 1 5 0.425000 -9.088218986 6.365939391 -8.008509666 1 0 1 # ho - 701 1 1 1.575000 6.092567845 7.286701985 9.189872068 0 0 0 # ao + 681 1 1 1.575000 9.443766708 1.679120566 -9.189871776 0 0 1 # ao + 682 1 2 2.100000 -10.378038892 9.067732250 -6.510105182 1 0 1 # st + 683 1 2 2.100000 -10.341708072 3.048848378 -6.503672300 1 0 1 # st + 684 1 3 -1.050000 10.012463782 0.176965538 -8.128441523 0 0 1 # ob + 685 1 3 -1.050000 -10.188197233 2.921349398 -8.125684583 1 0 1 # ob + 686 1 4 -0.950000 7.852063452 1.889886595 -8.196446632 0 0 1 # oh + 687 1 3 -1.050000 6.526795364 3.605171499 -5.891626593 0 0 1 # ob + 688 1 3 -1.050000 -10.640506189 1.584366125 -5.896221744 1 0 1 # ob + 689 1 3 -1.050000 9.130872326 4.089643514 -6.134239214 0 0 1 # ob + 690 1 5 0.425000 8.943613710 1.883028080 -7.995024148 0 0 1 # ho + 691 1 1 1.575000 -8.588065988 6.162031876 -9.189871776 1 0 1 # ao + 692 1 2 2.100000 7.653793804 4.584820940 -6.510105182 0 0 1 # st + 693 1 2 2.100000 7.746459385 7.531759689 -6.503672300 0 0 1 # st + 694 1 3 -1.050000 7.460631240 4.659876849 -8.128441523 0 0 1 # ob + 695 1 3 -1.050000 7.899970224 7.404260709 -8.125684583 0 0 1 # ob + 696 1 4 -0.950000 -10.179769243 6.372797905 -8.196446632 1 0 1 # oh + 697 1 3 -1.050000 9.134962668 8.088082810 -5.891626593 0 0 1 # ob + 698 1 3 -1.050000 7.447661268 6.067277436 -5.896221744 0 0 1 # ob + 699 1 3 -1.050000 6.579039784 8.572554825 -6.134239214 0 0 1 # ob + 700 1 5 0.425000 -9.088218986 6.365939391 -7.995024148 1 0 1 # ho + 701 1 1 1.575000 9.486445593 7.650358508 -9.189871776 0 0 1 # ao 702 1 2 2.100000 5.274373445 -0.101909699 6.510105475 0 0 0 # st 703 1 2 2.100000 5.238042625 5.916974173 6.503672592 0 0 0 # st 704 1 3 -1.050000 5.523870771 8.788857013 8.128441816 0 0 0 # ob @@ -746,7 +746,7 @@ Atoms # full 708 1 3 -1.050000 5.536840742 7.381456426 5.896222036 0 0 0 # ob 709 1 3 -1.050000 6.405462227 4.876179037 6.134239507 0 0 0 # ob 710 1 5 0.425000 6.592720843 7.082794471 7.995024441 0 0 0 # ho - 711 1 1 1.575000 3.484400541 2.803790675 9.189872068 0 0 0 # ao + 711 1 1 1.575000 6.878278289 3.167447198 -9.189871776 0 0 1 # ao 712 1 2 2.100000 -12.757459251 4.381001611 6.510105475 1 0 0 # st 713 1 2 2.100000 -12.850124832 1.434062862 6.503672592 1 0 0 # st 714 1 3 -1.050000 -12.564296687 4.305945702 8.128441816 1 0 0 # ob @@ -756,27 +756,27 @@ Atoms # full 718 1 3 -1.050000 -12.551326715 2.898545115 5.896222036 1 0 0 # ob 719 1 3 -1.050000 -11.682705231 0.393267726 6.134239507 1 0 0 # ob 720 1 5 0.425000 3.984553539 2.599883160 7.995024441 0 0 0 # ho - 721 1 1 1.575000 -9.430111193 1.315464043 9.189872068 1 0 0 # ao - 722 1 2 2.100000 -5.218039044 9.067732250 -6.523590700 1 0 1 # st - 723 1 2 2.100000 -5.181708225 3.048848378 -6.517157818 1 0 1 # st - 724 1 3 -1.050000 -5.467536370 0.176965538 -8.141927041 1 0 1 # ob - 725 1 3 -1.050000 -5.028197386 2.921349398 -8.139170101 1 0 1 # ob - 726 1 4 -0.950000 -7.627936701 1.889886595 -8.209932150 1 0 1 # oh - 727 1 3 -1.050000 -8.953204789 3.605171499 -5.905112111 1 0 1 # ob - 728 1 3 -1.050000 -5.480506342 1.584366125 -5.909707262 1 0 1 # ob - 729 1 3 -1.050000 -6.349127826 4.089643514 -6.147724732 1 0 1 # ob - 730 1 5 0.425000 -6.536386443 1.883028080 -8.008509666 1 0 1 # ho - 731 1 1 1.575000 -6.821943888 5.798375353 9.189872068 1 0 0 # ao - 732 1 2 2.100000 -7.826206349 4.584820940 -6.523590700 1 0 1 # st - 733 1 2 2.100000 -7.733540767 7.531759689 -6.517157818 1 0 1 # st - 734 1 3 -1.050000 -8.019368913 4.659876849 -8.141927041 1 0 1 # ob - 735 1 3 -1.050000 -7.580029929 7.404260709 -8.139170101 1 0 1 # ob - 736 1 4 -0.950000 -5.019769396 6.372797905 -8.209932150 1 0 1 # oh - 737 1 3 -1.050000 -6.345037484 8.088082810 -5.905112111 1 0 1 # ob - 738 1 3 -1.050000 -8.032338885 6.067277436 -5.909707262 1 0 1 # ob - 739 1 3 -1.050000 -8.900960369 8.572554825 -6.147724732 1 0 1 # ob - 740 1 5 0.425000 -3.928219138 6.365939391 -8.008509666 1 0 1 # ho - 741 1 1 1.575000 -9.387432307 7.286701985 9.189872068 1 0 0 # ao + 721 1 1 1.575000 -6.036233445 1.679120566 -9.189871776 1 0 1 # ao + 722 1 2 2.100000 -5.218039044 9.067732250 -6.510105182 1 0 1 # st + 723 1 2 2.100000 -5.181708225 3.048848378 -6.503672300 1 0 1 # st + 724 1 3 -1.050000 -5.467536370 0.176965538 -8.128441523 1 0 1 # ob + 725 1 3 -1.050000 -5.028197386 2.921349398 -8.125684583 1 0 1 # ob + 726 1 4 -0.950000 -7.627936701 1.889886595 -8.196446632 1 0 1 # oh + 727 1 3 -1.050000 -8.953204789 3.605171499 -5.891626593 1 0 1 # ob + 728 1 3 -1.050000 -5.480506342 1.584366125 -5.896221744 1 0 1 # ob + 729 1 3 -1.050000 -6.349127826 4.089643514 -6.134239214 1 0 1 # ob + 730 1 5 0.425000 -6.536386443 1.883028080 -7.995024148 1 0 1 # ho + 731 1 1 1.575000 -3.428066140 6.162031876 -9.189871776 1 0 1 # ao + 732 1 2 2.100000 -7.826206349 4.584820940 -6.510105182 1 0 1 # st + 733 1 2 2.100000 -7.733540767 7.531759689 -6.503672300 1 0 1 # st + 734 1 3 -1.050000 -8.019368913 4.659876849 -8.128441523 1 0 1 # ob + 735 1 3 -1.050000 -7.580029929 7.404260709 -8.125684583 1 0 1 # ob + 736 1 4 -0.950000 -5.019769396 6.372797905 -8.196446632 1 0 1 # oh + 737 1 3 -1.050000 -6.345037484 8.088082810 -5.891626593 1 0 1 # ob + 738 1 3 -1.050000 -8.032338885 6.067277436 -5.896221744 1 0 1 # ob + 739 1 3 -1.050000 -8.900960369 8.572554825 -6.134239214 1 0 1 # ob + 740 1 5 0.425000 -3.928219138 6.365939391 -7.995024148 1 0 1 # ho + 741 1 1 1.575000 -5.993554559 7.650358508 -9.189871776 1 0 1 # ao 742 1 2 2.100000 -10.205626708 -0.101909699 6.510105475 1 0 0 # st 743 1 2 2.100000 -10.241957528 5.916974173 6.503672592 1 0 0 # st 744 1 3 -1.050000 -9.956129382 8.788857013 8.128441816 1 0 0 # ob @@ -786,7 +786,7 @@ Atoms # full 748 1 3 -1.050000 -9.943159410 7.381456426 5.896222036 1 0 0 # ob 749 1 3 -1.050000 -9.074537926 4.876179037 6.134239507 1 0 0 # ob 750 1 5 0.425000 -8.887279309 7.082794471 7.995024441 1 0 0 # ho - 751 1 1 1.575000 -11.995599612 2.803790675 9.189872068 1 0 0 # ao + 751 1 1 1.575000 -8.601721864 3.167447198 -9.189871776 1 0 1 # ao 752 1 2 2.100000 -7.597459403 4.381001611 6.510105475 1 0 0 # st 753 1 2 2.100000 -7.690124985 1.434062862 6.503672592 1 0 0 # st 754 1 3 -1.050000 -7.404296839 4.305945702 8.128441816 1 0 0 # ob @@ -796,27 +796,27 @@ Atoms # full 758 1 3 -1.050000 -7.391326867 2.898545115 5.896222036 1 0 0 # ob 759 1 3 -1.050000 -6.522705383 0.393267726 6.134239507 1 0 0 # ob 760 1 5 0.425000 -11.495446614 2.599883160 7.995024441 1 0 0 # ho - 761 1 1 1.575000 -4.270111345 1.315464043 9.189872068 1 0 0 # ao - 762 1 2 2.100000 -0.058039197 9.067732250 -6.523590700 1 0 1 # st - 763 1 2 2.100000 -0.021708377 3.048848378 -6.517157818 1 0 1 # st - 764 1 3 -1.050000 -0.307536523 0.176965538 -8.141927041 1 0 1 # ob - 765 1 3 -1.050000 0.131802461 2.921349398 -8.139170101 1 0 1 # ob - 766 1 4 -0.950000 -2.467936853 1.889886595 -8.209932150 1 0 1 # oh - 767 1 3 -1.050000 -3.793204941 3.605171499 -5.905112111 1 0 1 # ob - 768 1 3 -1.050000 -0.320506495 1.584366125 -5.909707262 1 0 1 # ob - 769 1 3 -1.050000 -1.189127979 4.089643514 -6.147724732 1 0 1 # ob - 770 1 5 0.425000 -1.376386596 1.883028080 -8.008509666 1 0 1 # ho - 771 1 1 1.575000 -1.661944041 5.798375353 9.189872068 1 0 0 # ao - 772 1 2 2.100000 -2.666206502 4.584820940 -6.523590700 1 0 1 # st - 773 1 2 2.100000 -2.573540920 7.531759689 -6.517157818 1 0 1 # st - 774 1 3 -1.050000 -2.859369065 4.659876849 -8.141927041 1 0 1 # ob - 775 1 3 -1.050000 -2.420030081 7.404260709 -8.139170101 1 0 1 # ob - 776 1 4 -0.950000 0.140230451 6.372797905 -8.209932150 1 0 1 # oh - 777 1 3 -1.050000 -1.185037637 8.088082810 -5.905112111 1 0 1 # ob - 778 1 3 -1.050000 -2.872339037 6.067277436 -5.909707262 1 0 1 # ob - 779 1 3 -1.050000 -3.740960522 8.572554825 -6.147724732 1 0 1 # ob - 780 1 5 0.425000 1.231780709 6.365939391 -8.008509666 1 0 1 # ho - 781 1 1 1.575000 -4.227432460 7.286701985 9.189872068 1 0 0 # ao + 761 1 1 1.575000 -0.876233597 1.679120566 -9.189871776 1 0 1 # ao + 762 1 2 2.100000 -0.058039197 9.067732250 -6.510105182 1 0 1 # st + 763 1 2 2.100000 -0.021708377 3.048848378 -6.503672300 1 0 1 # st + 764 1 3 -1.050000 -0.307536523 0.176965538 -8.128441523 1 0 1 # ob + 765 1 3 -1.050000 0.131802461 2.921349398 -8.125684583 1 0 1 # ob + 766 1 4 -0.950000 -2.467936853 1.889886595 -8.196446632 1 0 1 # oh + 767 1 3 -1.050000 -3.793204941 3.605171499 -5.891626593 1 0 1 # ob + 768 1 3 -1.050000 -0.320506495 1.584366125 -5.896221744 1 0 1 # ob + 769 1 3 -1.050000 -1.189127979 4.089643514 -6.134239214 1 0 1 # ob + 770 1 5 0.425000 -1.376386596 1.883028080 -7.995024148 1 0 1 # ho + 771 1 1 1.575000 1.731933707 6.162031876 -9.189871776 1 0 1 # ao + 772 1 2 2.100000 -2.666206502 4.584820940 -6.510105182 1 0 1 # st + 773 1 2 2.100000 -2.573540920 7.531759689 -6.503672300 1 0 1 # st + 774 1 3 -1.050000 -2.859369065 4.659876849 -8.128441523 1 0 1 # ob + 775 1 3 -1.050000 -2.420030081 7.404260709 -8.125684583 1 0 1 # ob + 776 1 4 -0.950000 0.140230451 6.372797905 -8.196446632 1 0 1 # oh + 777 1 3 -1.050000 -1.185037637 8.088082810 -5.891626593 1 0 1 # ob + 778 1 3 -1.050000 -2.872339037 6.067277436 -5.896221744 1 0 1 # ob + 779 1 3 -1.050000 -3.740960522 8.572554825 -6.134239214 1 0 1 # ob + 780 1 5 0.425000 1.231780709 6.365939391 -7.995024148 1 0 1 # ho + 781 1 1 1.575000 -0.833554712 7.650358508 -9.189871776 1 0 1 # ao 782 1 2 2.100000 -5.045626860 -0.101909699 6.510105475 1 0 0 # st 783 1 2 2.100000 -5.081957680 5.916974173 6.503672592 1 0 0 # st 784 1 3 -1.050000 -4.796129535 8.788857013 8.128441816 1 0 0 # ob @@ -826,7 +826,7 @@ Atoms # full 788 1 3 -1.050000 -4.783159563 7.381456426 5.896222036 1 0 0 # ob 789 1 3 -1.050000 -3.914538079 4.876179037 6.134239507 1 0 0 # ob 790 1 5 0.425000 -3.727279462 7.082794471 7.995024441 1 0 0 # ho - 791 1 1 1.575000 -6.835599765 2.803790675 9.189872068 1 0 0 # ao + 791 1 1 1.575000 -3.441722017 3.167447198 -9.189871776 1 0 1 # ao 792 1 2 2.100000 -2.437459556 4.381001611 6.510105475 1 0 0 # st 793 1 2 2.100000 -2.530125137 1.434062862 6.503672592 1 0 0 # st 794 1 3 -1.050000 -2.244296992 4.305945702 8.128441816 1 0 0 # ob @@ -836,27 +836,27 @@ Atoms # full 798 1 3 -1.050000 -2.231327020 2.898545115 5.896222036 1 0 0 # ob 799 1 3 -1.050000 -1.362705536 0.393267726 6.134239507 1 0 0 # ob 800 1 5 0.425000 -6.335446766 2.599883160 7.995024441 1 0 0 # ho - 801 1 1 1.575000 0.946223874 10.281286664 9.189872068 0 0 0 # ao - 802 1 2 2.100000 4.932957348 -17.829737203 -6.523590700 0 1 1 # st - 803 1 2 2.100000 5.194626842 12.014671000 -6.517157818 0 0 1 # st - 804 1 3 -1.050000 4.908798697 9.142788159 -8.141927041 0 0 1 # ob - 805 1 3 -1.050000 5.348137681 11.887172019 -8.139170101 0 0 1 # ob - 806 1 4 -0.950000 2.748398366 10.855709216 -8.209932150 0 0 1 # oh - 807 1 3 -1.050000 1.423130278 12.570994121 -5.905112111 0 0 1 # ob - 808 1 3 -1.050000 4.895828725 10.550188747 -5.909707262 0 0 1 # ob - 809 1 3 -1.050000 4.027207241 13.055466135 -6.147724732 0 0 1 # ob - 810 1 5 0.425000 3.839948624 10.848850702 -8.008509666 0 0 1 # ho - 811 1 1 1.575000 3.554391179 14.764197975 9.189872068 0 0 0 # ao - 812 1 2 2.100000 2.550128718 13.550643561 -6.523590700 0 0 1 # st - 813 1 2 2.100000 2.642794300 16.497582311 -6.517157818 0 0 1 # st - 814 1 3 -1.050000 2.356966154 13.625699470 -8.141927041 0 0 1 # ob - 815 1 3 -1.050000 2.796305138 16.370083330 -8.139170101 0 0 1 # ob - 816 1 4 -0.950000 5.356565671 15.338620527 -8.209932150 0 0 1 # oh - 817 1 3 -1.050000 4.031297583 17.053905432 -5.905112111 0 0 1 # ob - 818 1 3 -1.050000 2.343996182 15.033100058 -5.909707262 0 0 1 # ob - 819 1 3 -1.050000 1.475374698 17.538377446 -6.147724732 0 0 1 # ob - 820 1 5 0.425000 6.448115929 15.331762013 -8.008509666 0 0 1 # ho - 821 1 1 1.575000 0.988902760 16.252524607 9.189872068 0 0 0 # ao + 801 1 1 1.575000 4.340101622 10.644943187 -9.189871776 0 0 1 # ao + 802 1 2 2.100000 4.932957348 -17.829737203 -6.510105182 0 1 1 # st + 803 1 2 2.100000 5.194626842 12.014671000 -6.503672300 0 0 1 # st + 804 1 3 -1.050000 4.908798697 9.142788159 -8.128441523 0 0 1 # ob + 805 1 3 -1.050000 5.348137681 11.887172019 -8.125684583 0 0 1 # ob + 806 1 4 -0.950000 2.748398366 10.855709216 -8.196446632 0 0 1 # oh + 807 1 3 -1.050000 1.423130278 12.570994121 -5.891626593 0 0 1 # ob + 808 1 3 -1.050000 4.895828725 10.550188747 -5.896221744 0 0 1 # ob + 809 1 3 -1.050000 4.027207241 13.055466135 -6.134239214 0 0 1 # ob + 810 1 5 0.425000 3.839948624 10.848850702 -7.995024148 0 0 1 # ho + 811 1 1 1.575000 6.948268927 15.127854498 -9.189871776 0 0 1 # ao + 812 1 2 2.100000 2.550128718 13.550643561 -6.510105182 0 0 1 # st + 813 1 2 2.100000 2.642794300 16.497582311 -6.503672300 0 0 1 # st + 814 1 3 -1.050000 2.356966154 13.625699470 -8.128441523 0 0 1 # ob + 815 1 3 -1.050000 2.796305138 16.370083330 -8.125684583 0 0 1 # ob + 816 1 4 -0.950000 5.356565671 15.338620527 -8.196446632 0 0 1 # oh + 817 1 3 -1.050000 4.031297583 17.053905432 -5.891626593 0 0 1 # ob + 818 1 3 -1.050000 2.343996182 15.033100058 -5.896221744 0 0 1 # ob + 819 1 3 -1.050000 1.475374698 17.538377446 -6.134239214 0 0 1 # ob + 820 1 5 0.425000 6.448115929 15.331762013 -7.995024148 0 0 1 # ho + 821 1 1 1.575000 4.382780508 16.616181130 -9.189871776 0 0 1 # ao 822 1 2 2.100000 0.170708359 8.863912922 6.510105475 0 0 0 # st 823 1 2 2.100000 0.134377539 14.882796794 6.503672592 0 0 0 # st 824 1 3 -1.050000 0.194867010 -18.108612440 8.128441816 0 1 0 # ob @@ -866,7 +866,7 @@ Atoms # full 828 1 3 -1.050000 0.433175657 16.347279047 5.896222036 0 0 0 # ob 829 1 3 -1.050000 1.301797141 13.842001659 6.134239507 0 0 0 # ob 830 1 5 0.425000 1.489055758 16.048617092 7.995024441 0 0 0 # ho - 831 1 1 1.575000 -1.619264545 11.769613296 9.189872068 0 0 0 # ao + 831 1 1 1.575000 1.774613203 12.133269819 -9.189871776 0 0 1 # ao 832 1 2 2.100000 2.778875664 13.346824233 6.510105475 0 0 0 # st 833 1 2 2.100000 2.686210082 10.399885483 6.503672592 0 0 0 # st 834 1 3 -1.050000 2.972038228 13.271768324 8.128441816 0 0 0 # ob @@ -876,27 +876,27 @@ Atoms # full 838 1 3 -1.050000 2.985008200 11.864367737 5.896222036 0 0 0 # ob 839 1 3 -1.050000 3.853629684 9.359090348 6.134239507 0 0 0 # ob 840 1 5 0.425000 -1.119111547 11.565705782 7.995024441 0 0 0 # ho - 841 1 1 1.575000 6.106223722 10.281286664 9.189872068 0 0 0 # ao - 842 1 2 2.100000 -10.547042805 -17.829737203 -6.523590700 1 1 1 # st - 843 1 2 2.100000 -10.285373310 12.014671000 -6.517157818 1 0 1 # st - 844 1 3 -1.050000 10.068798544 9.142788159 -8.141927041 0 0 1 # ob - 845 1 3 -1.050000 -10.131862472 11.887172019 -8.139170101 1 0 1 # ob - 846 1 4 -0.950000 7.908398214 10.855709216 -8.209932150 0 0 1 # oh - 847 1 3 -1.050000 6.583130126 12.570994121 -5.905112111 0 0 1 # ob - 848 1 3 -1.050000 -10.584171428 10.550188747 -5.909707262 1 0 1 # ob - 849 1 3 -1.050000 9.187207088 13.055466135 -6.147724732 0 0 1 # ob - 850 1 5 0.425000 8.999948471 10.848850702 -8.008509666 0 0 1 # ho - 851 1 1 1.575000 -11.925608974 14.764197975 9.189872068 1 0 0 # ao - 852 1 2 2.100000 7.710128565 13.550643561 -6.523590700 0 0 1 # st - 853 1 2 2.100000 7.802794147 16.497582311 -6.517157818 0 0 1 # st - 854 1 3 -1.050000 7.516966002 13.625699470 -8.141927041 0 0 1 # ob - 855 1 3 -1.050000 7.956304986 16.370083330 -8.139170101 0 0 1 # ob - 856 1 4 -0.950000 -10.123434482 15.338620527 -8.209932150 1 0 1 # oh - 857 1 3 -1.050000 9.191297430 17.053905432 -5.905112111 0 0 1 # ob - 858 1 3 -1.050000 7.503996030 15.033100058 -5.909707262 0 0 1 # ob - 859 1 3 -1.050000 6.635374545 17.538377446 -6.147724732 0 0 1 # ob - 860 1 5 0.425000 -9.031884224 15.331762013 -8.008509666 1 0 1 # ho - 861 1 1 1.575000 6.148902607 16.252524607 9.189872068 0 0 0 # ao + 841 1 1 1.575000 9.500101470 10.644943187 -9.189871776 0 0 1 # ao + 842 1 2 2.100000 -10.547042805 -17.829737203 -6.510105182 1 1 1 # st + 843 1 2 2.100000 -10.285373310 12.014671000 -6.503672300 1 0 1 # st + 844 1 3 -1.050000 10.068798544 9.142788159 -8.128441523 0 0 1 # ob + 845 1 3 -1.050000 -10.131862472 11.887172019 -8.125684583 1 0 1 # ob + 846 1 4 -0.950000 7.908398214 10.855709216 -8.196446632 0 0 1 # oh + 847 1 3 -1.050000 6.583130126 12.570994121 -5.891626593 0 0 1 # ob + 848 1 3 -1.050000 -10.584171428 10.550188747 -5.896221744 1 0 1 # ob + 849 1 3 -1.050000 9.187207088 13.055466135 -6.134239214 0 0 1 # ob + 850 1 5 0.425000 8.999948471 10.848850702 -7.995024148 0 0 1 # ho + 851 1 1 1.575000 -8.531731226 15.127854498 -9.189871776 1 0 1 # ao + 852 1 2 2.100000 7.710128565 13.550643561 -6.510105182 0 0 1 # st + 853 1 2 2.100000 7.802794147 16.497582311 -6.503672300 0 0 1 # st + 854 1 3 -1.050000 7.516966002 13.625699470 -8.128441523 0 0 1 # ob + 855 1 3 -1.050000 7.956304986 16.370083330 -8.125684583 0 0 1 # ob + 856 1 4 -0.950000 -10.123434482 15.338620527 -8.196446632 1 0 1 # oh + 857 1 3 -1.050000 9.191297430 17.053905432 -5.891626593 0 0 1 # ob + 858 1 3 -1.050000 7.503996030 15.033100058 -5.896221744 0 0 1 # ob + 859 1 3 -1.050000 6.635374545 17.538377446 -6.134239214 0 0 1 # ob + 860 1 5 0.425000 -9.031884224 15.331762013 -7.995024148 1 0 1 # ho + 861 1 1 1.575000 9.542780355 16.616181130 -9.189871776 0 0 1 # ao 862 1 2 2.100000 5.330708207 8.863912922 6.510105475 0 0 0 # st 863 1 2 2.100000 5.294377387 14.882796794 6.503672592 0 0 0 # st 864 1 3 -1.050000 5.354866857 -18.108612440 8.128441816 0 1 0 # ob @@ -906,7 +906,7 @@ Atoms # full 868 1 3 -1.050000 5.593175504 16.347279047 5.896222036 0 0 0 # ob 869 1 3 -1.050000 6.461796988 13.842001659 6.134239507 0 0 0 # ob 870 1 5 0.425000 6.649055605 16.048617092 7.995024441 0 0 0 # ho - 871 1 1 1.575000 3.540735302 11.769613296 9.189872068 0 0 0 # ao + 871 1 1 1.575000 6.934613050 12.133269819 -9.189871776 0 0 1 # ao 872 1 2 2.100000 -12.701124489 13.346824233 6.510105475 1 0 0 # st 873 1 2 2.100000 -12.793790070 10.399885483 6.503672592 1 0 0 # st 874 1 3 -1.050000 -12.507961925 13.271768324 8.128441816 1 0 0 # ob @@ -916,27 +916,27 @@ Atoms # full 878 1 3 -1.050000 -12.494991953 11.864367737 5.896222036 1 0 0 # ob 879 1 3 -1.050000 -11.626370469 9.359090348 6.134239507 1 0 0 # ob 880 1 5 0.425000 4.040888301 11.565705782 7.995024441 0 0 0 # ho - 881 1 1 1.575000 -9.373776431 10.281286664 9.189872068 1 0 0 # ao - 882 1 2 2.100000 -5.387042958 -17.829737203 -6.523590700 1 1 1 # st - 883 1 2 2.100000 -5.125373463 12.014671000 -6.517157818 1 0 1 # st - 884 1 3 -1.050000 -5.411201608 9.142788159 -8.141927041 1 0 1 # ob - 885 1 3 -1.050000 -4.971862624 11.887172019 -8.139170101 1 0 1 # ob - 886 1 4 -0.950000 -7.571601939 10.855709216 -8.209932150 1 0 1 # oh - 887 1 3 -1.050000 -8.896870027 12.570994121 -5.905112111 1 0 1 # ob - 888 1 3 -1.050000 -5.424171580 10.550188747 -5.909707262 1 0 1 # ob - 889 1 3 -1.050000 -6.292793064 13.055466135 -6.147724732 1 0 1 # ob - 890 1 5 0.425000 -6.480051681 10.848850702 -8.008509666 1 0 1 # ho - 891 1 1 1.575000 -6.765609126 14.764197975 9.189872068 1 0 0 # ao - 892 1 2 2.100000 -7.769871587 13.550643561 -6.523590700 1 0 1 # st - 893 1 2 2.100000 -7.677206006 16.497582311 -6.517157818 1 0 1 # st - 894 1 3 -1.050000 -7.963034151 13.625699470 -8.141927041 1 0 1 # ob - 895 1 3 -1.050000 -7.523695167 16.370083330 -8.139170101 1 0 1 # ob - 896 1 4 -0.950000 -4.963434634 15.338620527 -8.209932150 1 0 1 # oh - 897 1 3 -1.050000 -6.288702722 17.053905432 -5.905112111 1 0 1 # ob - 898 1 3 -1.050000 -7.976004123 15.033100058 -5.909707262 1 0 1 # ob - 899 1 3 -1.050000 -8.844625607 17.538377446 -6.147724732 1 0 1 # ob - 900 1 5 0.425000 -3.871884377 15.331762013 -8.008509666 1 0 1 # ho - 901 1 1 1.575000 -9.331097546 16.252524607 9.189872068 1 0 0 # ao + 881 1 1 1.575000 -5.979898683 10.644943187 -9.189871776 1 0 1 # ao + 882 1 2 2.100000 -5.387042958 -17.829737203 -6.510105182 1 1 1 # st + 883 1 2 2.100000 -5.125373463 12.014671000 -6.503672300 1 0 1 # st + 884 1 3 -1.050000 -5.411201608 9.142788159 -8.128441523 1 0 1 # ob + 885 1 3 -1.050000 -4.971862624 11.887172019 -8.125684583 1 0 1 # ob + 886 1 4 -0.950000 -7.571601939 10.855709216 -8.196446632 1 0 1 # oh + 887 1 3 -1.050000 -8.896870027 12.570994121 -5.891626593 1 0 1 # ob + 888 1 3 -1.050000 -5.424171580 10.550188747 -5.896221744 1 0 1 # ob + 889 1 3 -1.050000 -6.292793064 13.055466135 -6.134239214 1 0 1 # ob + 890 1 5 0.425000 -6.480051681 10.848850702 -7.995024148 1 0 1 # ho + 891 1 1 1.575000 -3.371731378 15.127854498 -9.189871776 1 0 1 # ao + 892 1 2 2.100000 -7.769871587 13.550643561 -6.510105182 1 0 1 # st + 893 1 2 2.100000 -7.677206006 16.497582311 -6.503672300 1 0 1 # st + 894 1 3 -1.050000 -7.963034151 13.625699470 -8.128441523 1 0 1 # ob + 895 1 3 -1.050000 -7.523695167 16.370083330 -8.125684583 1 0 1 # ob + 896 1 4 -0.950000 -4.963434634 15.338620527 -8.196446632 1 0 1 # oh + 897 1 3 -1.050000 -6.288702722 17.053905432 -5.891626593 1 0 1 # ob + 898 1 3 -1.050000 -7.976004123 15.033100058 -5.896221744 1 0 1 # ob + 899 1 3 -1.050000 -8.844625607 17.538377446 -6.134239214 1 0 1 # ob + 900 1 5 0.425000 -3.871884377 15.331762013 -7.995024148 1 0 1 # ho + 901 1 1 1.575000 -5.937219798 16.616181130 -9.189871776 1 0 1 # ao 902 1 2 2.100000 -10.149291946 8.863912922 6.510105475 1 0 0 # st 903 1 2 2.100000 -10.185622766 14.882796794 6.503672592 1 0 0 # st 904 1 3 -1.050000 -10.125133295 -18.108612440 8.128441816 1 1 0 # ob @@ -946,7 +946,7 @@ Atoms # full 908 1 3 -1.050000 -9.886824648 16.347279047 5.896222036 1 0 0 # ob 909 1 3 -1.050000 -9.018203164 13.842001659 6.134239507 1 0 0 # ob 910 1 5 0.425000 -8.830944547 16.048617092 7.995024441 1 0 0 # ho - 911 1 1 1.575000 -11.939264850 11.769613296 9.189872068 1 0 0 # ao + 911 1 1 1.575000 -8.545387102 12.133269819 -9.189871776 1 0 1 # ao 912 1 2 2.100000 -7.541124641 13.346824233 6.510105475 1 0 0 # st 913 1 2 2.100000 -7.633790223 10.399885483 6.503672592 1 0 0 # st 914 1 3 -1.050000 -7.347962077 13.271768324 8.128441816 1 0 0 # ob @@ -956,27 +956,27 @@ Atoms # full 918 1 3 -1.050000 -7.334992106 11.864367737 5.896222036 1 0 0 # ob 919 1 3 -1.050000 -6.466370621 9.359090348 6.134239507 1 0 0 # ob 920 1 5 0.425000 -11.439111852 11.565705782 7.995024441 1 0 0 # ho - 921 1 1 1.575000 -4.213776583 10.281286664 9.189872068 1 0 0 # ao - 922 1 2 2.100000 -0.227043110 -17.829737203 -6.523590700 1 1 1 # st - 923 1 2 2.100000 0.034626385 12.014671000 -6.517157818 1 0 1 # st - 924 1 3 -1.050000 -0.251201761 9.142788159 -8.141927041 1 0 1 # ob - 925 1 3 -1.050000 0.188137223 11.887172019 -8.139170101 1 0 1 # ob - 926 1 4 -0.950000 -2.411602091 10.855709216 -8.209932150 1 0 1 # oh - 927 1 3 -1.050000 -3.736870180 12.570994121 -5.905112111 1 0 1 # ob - 928 1 3 -1.050000 -0.264171733 10.550188747 -5.909707262 1 0 1 # ob - 929 1 3 -1.050000 -1.132793217 13.055466135 -6.147724732 1 0 1 # ob - 930 1 5 0.425000 -1.320051834 10.848850702 -8.008509666 1 0 1 # ho - 931 1 1 1.575000 -1.605609279 14.764197975 9.189872068 1 0 0 # ao - 932 1 2 2.100000 -2.609871740 13.550643561 -6.523590700 1 0 1 # st - 933 1 2 2.100000 -2.517206158 16.497582311 -6.517157818 1 0 1 # st - 934 1 3 -1.050000 -2.803034304 13.625699470 -8.141927041 1 0 1 # ob - 935 1 3 -1.050000 -2.363695320 16.370083330 -8.139170101 1 0 1 # ob - 936 1 4 -0.950000 0.196565213 15.338620527 -8.209932150 1 0 1 # oh - 937 1 3 -1.050000 -1.128702875 17.053905432 -5.905112111 1 0 1 # ob - 938 1 3 -1.050000 -2.816004275 15.033100058 -5.909707262 1 0 1 # ob - 939 1 3 -1.050000 -3.684625760 17.538377446 -6.147724732 1 0 1 # ob - 940 1 5 0.425000 1.288115471 15.331762013 -8.008509666 1 0 1 # ho - 941 1 1 1.575000 -4.171097698 16.252524607 9.189872068 1 0 0 # ao + 921 1 1 1.575000 -0.819898835 10.644943187 -9.189871776 1 0 1 # ao + 922 1 2 2.100000 -0.227043110 -17.829737203 -6.510105182 1 1 1 # st + 923 1 2 2.100000 0.034626385 12.014671000 -6.503672300 1 0 1 # st + 924 1 3 -1.050000 -0.251201761 9.142788159 -8.128441523 1 0 1 # ob + 925 1 3 -1.050000 0.188137223 11.887172019 -8.125684583 1 0 1 # ob + 926 1 4 -0.950000 -2.411602091 10.855709216 -8.196446632 1 0 1 # oh + 927 1 3 -1.050000 -3.736870180 12.570994121 -5.891626593 1 0 1 # ob + 928 1 3 -1.050000 -0.264171733 10.550188747 -5.896221744 1 0 1 # ob + 929 1 3 -1.050000 -1.132793217 13.055466135 -6.134239214 1 0 1 # ob + 930 1 5 0.425000 -1.320051834 10.848850702 -7.995024148 1 0 1 # ho + 931 1 1 1.575000 1.788268469 15.127854498 -9.189871776 1 0 1 # ao + 932 1 2 2.100000 -2.609871740 13.550643561 -6.510105182 1 0 1 # st + 933 1 2 2.100000 -2.517206158 16.497582311 -6.503672300 1 0 1 # st + 934 1 3 -1.050000 -2.803034304 13.625699470 -8.128441523 1 0 1 # ob + 935 1 3 -1.050000 -2.363695320 16.370083330 -8.125684583 1 0 1 # ob + 936 1 4 -0.950000 0.196565213 15.338620527 -8.196446632 1 0 1 # oh + 937 1 3 -1.050000 -1.128702875 17.053905432 -5.891626593 1 0 1 # ob + 938 1 3 -1.050000 -2.816004275 15.033100058 -5.896221744 1 0 1 # ob + 939 1 3 -1.050000 -3.684625760 17.538377446 -6.134239214 1 0 1 # ob + 940 1 5 0.425000 1.288115471 15.331762013 -7.995024148 1 0 1 # ho + 941 1 1 1.575000 -0.777219950 16.616181130 -9.189871776 1 0 1 # ao 942 1 2 2.100000 -4.989292099 8.863912922 6.510105475 1 0 0 # st 943 1 2 2.100000 -5.025622918 14.882796794 6.503672592 1 0 0 # st 944 1 3 -1.050000 -4.965133448 -18.108612440 8.128441816 1 1 0 # ob @@ -986,7 +986,7 @@ Atoms # full 948 1 3 -1.050000 -4.726824801 16.347279047 5.896222036 1 0 0 # ob 949 1 3 -1.050000 -3.858203317 13.842001659 6.134239507 1 0 0 # ob 950 1 5 0.425000 -3.670944700 16.048617092 7.995024441 1 0 0 # ho - 951 1 1 1.575000 -6.779265003 11.769613296 9.189872068 1 0 0 # ao + 951 1 1 1.575000 -3.385387255 12.133269819 -9.189871776 1 0 1 # ao 952 1 2 2.100000 -2.381124794 13.346824233 6.510105475 1 0 0 # st 953 1 2 2.100000 -2.473790376 10.399885483 6.503672592 1 0 0 # st 954 1 3 -1.050000 -2.187962230 13.271768324 8.128441816 1 0 0 # ob @@ -996,27 +996,27 @@ Atoms # full 958 1 3 -1.050000 -2.174992258 11.864367737 5.896222036 1 0 0 # ob 959 1 3 -1.050000 -1.306370774 9.359090348 6.134239507 1 0 0 # ob 960 1 5 0.425000 -6.279112005 11.565705782 7.995024441 1 0 0 # ho - 961 1 1 1.575000 0.777219961 -16.616182789 9.189872068 0 1 0 # ao - 962 1 2 2.100000 4.989292109 -8.863914582 -6.523590700 0 1 1 # st - 963 1 2 2.100000 5.025622929 -14.882798454 -6.517157818 0 1 1 # st - 964 1 3 -1.050000 4.739794784 -17.754681294 -8.141927041 0 1 1 # ob - 965 1 3 -1.050000 5.179133768 -15.010297434 -8.139170101 0 1 1 # ob - 966 1 4 -0.950000 2.579394453 -16.041760237 -8.209932150 0 1 1 # oh - 967 1 3 -1.050000 1.254126365 -14.326475333 -5.905112111 0 1 1 # ob - 968 1 3 -1.050000 4.726824812 -16.347280707 -5.909707262 0 1 1 # ob - 969 1 3 -1.050000 3.858203328 -13.842003318 -6.147724732 0 1 1 # ob - 970 1 5 0.425000 3.670944711 -16.048618752 -8.008509666 0 1 1 # ho - 971 1 1 1.575000 3.385387266 -12.133271479 9.189872068 0 1 0 # ao - 972 1 2 2.100000 2.381124805 -13.346825892 -6.523590700 0 1 1 # st - 973 1 2 2.100000 2.473790386 -10.399887143 -6.517157818 0 1 1 # st - 974 1 3 -1.050000 2.187962241 -13.271769983 -8.141927041 0 1 1 # ob - 975 1 3 -1.050000 2.627301225 -10.527386123 -8.139170101 0 1 1 # ob - 976 1 4 -0.950000 5.187561758 -11.558848927 -8.209932150 0 1 1 # oh - 977 1 3 -1.050000 3.862293670 -9.843564022 -5.905112111 0 1 1 # ob - 978 1 3 -1.050000 2.174992269 -11.864369396 -5.909707262 0 1 1 # ob - 979 1 3 -1.050000 1.306370785 -9.359092007 -6.147724732 0 1 1 # ob - 980 1 5 0.425000 6.279112015 -11.565707441 -8.008509666 0 1 1 # ho - 981 1 1 1.575000 0.819898846 -10.644944847 9.189872068 0 1 0 # ao + 961 1 1 1.575000 4.171097709 -16.252526266 -9.189871776 0 1 1 # ao + 962 1 2 2.100000 4.989292109 -8.863914582 -6.510105182 0 1 1 # st + 963 1 2 2.100000 5.025622929 -14.882798454 -6.503672300 0 1 1 # st + 964 1 3 -1.050000 4.739794784 -17.754681294 -8.128441523 0 1 1 # ob + 965 1 3 -1.050000 5.179133768 -15.010297434 -8.125684583 0 1 1 # ob + 966 1 4 -0.950000 2.579394453 -16.041760237 -8.196446632 0 1 1 # oh + 967 1 3 -1.050000 1.254126365 -14.326475333 -5.891626593 0 1 1 # ob + 968 1 3 -1.050000 4.726824812 -16.347280707 -5.896221744 0 1 1 # ob + 969 1 3 -1.050000 3.858203328 -13.842003318 -6.134239214 0 1 1 # ob + 970 1 5 0.425000 3.670944711 -16.048618752 -7.995024148 0 1 1 # ho + 971 1 1 1.575000 6.779265014 -11.769614956 -9.189871776 0 1 1 # ao + 972 1 2 2.100000 2.381124805 -13.346825892 -6.510105182 0 1 1 # st + 973 1 2 2.100000 2.473790386 -10.399887143 -6.503672300 0 1 1 # st + 974 1 3 -1.050000 2.187962241 -13.271769983 -8.128441523 0 1 1 # ob + 975 1 3 -1.050000 2.627301225 -10.527386123 -8.125684583 0 1 1 # ob + 976 1 4 -0.950000 5.187561758 -11.558848927 -8.196446632 0 1 1 # oh + 977 1 3 -1.050000 3.862293670 -9.843564022 -5.891626593 0 1 1 # ob + 978 1 3 -1.050000 2.174992269 -11.864369396 -5.896221744 0 1 1 # ob + 979 1 3 -1.050000 1.306370785 -9.359092007 -6.134239214 0 1 1 # ob + 980 1 5 0.425000 6.279112015 -11.565707441 -7.995024148 0 1 1 # ho + 981 1 1 1.575000 4.213776594 -10.281288324 -9.189871776 0 1 1 # ao 982 1 2 2.100000 0.001704446 -18.033556531 6.510105475 0 1 0 # st 983 1 2 2.100000 -0.034626374 -12.014672659 6.503672592 0 1 0 # st 984 1 3 -1.050000 0.251201772 -9.142789819 8.128441816 0 1 0 # ob @@ -1026,7 +1026,7 @@ Atoms # full 988 1 3 -1.050000 0.264171744 -10.550190406 5.896222036 0 1 0 # ob 989 1 3 -1.050000 1.132793228 -13.055467795 6.134239507 0 1 0 # ob 990 1 5 0.425000 1.320051845 -10.848852361 7.995024441 0 1 0 # ho - 991 1 1 1.575000 -1.788268458 -15.127856157 9.189872068 0 1 0 # ao + 991 1 1 1.575000 1.605609290 -14.764199634 -9.189871776 0 1 1 # ao 992 1 2 2.100000 2.609871751 -13.550645221 6.510105475 0 1 0 # st 993 1 2 2.100000 2.517206169 -16.497583970 6.503672592 0 1 0 # st 994 1 3 -1.050000 2.803034315 -13.625701130 8.128441816 0 1 0 # ob @@ -1036,27 +1036,27 @@ Atoms # full 998 1 3 -1.050000 2.816004286 -15.033101717 5.896222036 0 1 0 # ob 999 1 3 -1.050000 3.684625771 -17.538379106 6.134239507 0 1 0 # ob 1000 1 5 0.425000 -1.288115460 -15.331763672 7.995024441 0 1 0 # ho - 1001 1 1 1.575000 5.937219808 -16.616182789 9.189872068 0 1 0 # ao - 1002 1 2 2.100000 -10.490708043 -8.863914582 -6.523590700 1 1 1 # st - 1003 1 2 2.100000 -10.454377223 -14.882798454 -6.517157818 1 1 1 # st - 1004 1 3 -1.050000 9.899794631 -17.754681294 -8.141927041 0 1 1 # ob - 1005 1 3 -1.050000 -10.300866385 -15.010297434 -8.139170101 1 1 1 # ob - 1006 1 4 -0.950000 7.739394301 -16.041760237 -8.209932150 0 1 1 # oh - 1007 1 3 -1.050000 6.414126212 -14.326475333 -5.905112111 0 1 1 # ob - 1008 1 3 -1.050000 -10.753175341 -16.347280707 -5.909707262 1 1 1 # ob - 1009 1 3 -1.050000 9.018203175 -13.842003318 -6.147724732 0 1 1 # ob - 1010 1 5 0.425000 8.830944558 -16.048618752 -8.008509666 0 1 1 # ho - 1011 1 1 1.575000 -12.094612887 -12.133271479 9.189872068 1 1 0 # ao - 1012 1 2 2.100000 7.541124652 -13.346825892 -6.523590700 0 1 1 # st - 1013 1 2 2.100000 7.633790234 -10.399887143 -6.517157818 0 1 1 # st - 1014 1 3 -1.050000 7.347962088 -13.271769983 -8.141927041 0 1 1 # ob - 1015 1 3 -1.050000 7.787301072 -10.527386123 -8.139170101 0 1 1 # ob - 1016 1 4 -0.950000 -10.292438395 -11.558848927 -8.209932150 1 1 1 # oh - 1017 1 3 -1.050000 9.022293517 -9.843564022 -5.905112111 0 1 1 # ob - 1018 1 3 -1.050000 7.334992116 -11.864369396 -5.909707262 0 1 1 # ob - 1019 1 3 -1.050000 6.466370632 -9.359092007 -6.147724732 0 1 1 # ob - 1020 1 5 0.425000 -9.200888137 -11.565707441 -8.008509666 1 1 1 # ho - 1021 1 1 1.575000 5.979898694 -10.644944847 9.189872068 0 1 0 # ao + 1001 1 1 1.575000 9.331097556 -16.252526266 -9.189871776 0 1 1 # ao + 1002 1 2 2.100000 -10.490708043 -8.863914582 -6.510105182 1 1 1 # st + 1003 1 2 2.100000 -10.454377223 -14.882798454 -6.503672300 1 1 1 # st + 1004 1 3 -1.050000 9.899794631 -17.754681294 -8.128441523 0 1 1 # ob + 1005 1 3 -1.050000 -10.300866385 -15.010297434 -8.125684583 1 1 1 # ob + 1006 1 4 -0.950000 7.739394301 -16.041760237 -8.196446632 0 1 1 # oh + 1007 1 3 -1.050000 6.414126212 -14.326475333 -5.891626593 0 1 1 # ob + 1008 1 3 -1.050000 -10.753175341 -16.347280707 -5.896221744 1 1 1 # ob + 1009 1 3 -1.050000 9.018203175 -13.842003318 -6.134239214 0 1 1 # ob + 1010 1 5 0.425000 8.830944558 -16.048618752 -7.995024148 0 1 1 # ho + 1011 1 1 1.575000 -8.700735139 -11.769614956 -9.189871776 1 1 1 # ao + 1012 1 2 2.100000 7.541124652 -13.346825892 -6.510105182 0 1 1 # st + 1013 1 2 2.100000 7.633790234 -10.399887143 -6.503672300 0 1 1 # st + 1014 1 3 -1.050000 7.347962088 -13.271769983 -8.128441523 0 1 1 # ob + 1015 1 3 -1.050000 7.787301072 -10.527386123 -8.125684583 0 1 1 # ob + 1016 1 4 -0.950000 -10.292438395 -11.558848927 -8.196446632 1 1 1 # oh + 1017 1 3 -1.050000 9.022293517 -9.843564022 -5.891626593 0 1 1 # ob + 1018 1 3 -1.050000 7.334992116 -11.864369396 -5.896221744 0 1 1 # ob + 1019 1 3 -1.050000 6.466370632 -9.359092007 -6.134239214 0 1 1 # ob + 1020 1 5 0.425000 -9.200888137 -11.565707441 -7.995024148 1 1 1 # ho + 1021 1 1 1.575000 9.373776442 -10.281288324 -9.189871776 0 1 1 # ao 1022 1 2 2.100000 5.161704293 -18.033556531 6.510105475 0 1 0 # st 1023 1 2 2.100000 5.125373474 -12.014672659 6.503672592 0 1 0 # st 1024 1 3 -1.050000 5.411201619 -9.142789819 8.128441816 0 1 0 # ob @@ -1066,7 +1066,7 @@ Atoms # full 1028 1 3 -1.050000 5.424171591 -10.550190406 5.896222036 0 1 0 # ob 1029 1 3 -1.050000 6.292793075 -13.055467795 6.134239507 0 1 0 # ob 1030 1 5 0.425000 6.480051692 -10.848852361 7.995024441 0 1 0 # ho - 1031 1 1 1.575000 3.371731389 -15.127856157 9.189872068 0 1 0 # ao + 1031 1 1 1.575000 6.765609137 -14.764199634 -9.189871776 0 1 1 # ao 1032 1 2 2.100000 -12.870128402 -13.550645221 6.510105475 1 1 0 # st 1033 1 2 2.100000 -12.962793984 -16.497583970 6.503672592 1 1 0 # st 1034 1 3 -1.050000 -12.676965838 -13.625701130 8.128441816 1 1 0 # ob @@ -1076,27 +1076,27 @@ Atoms # full 1038 1 3 -1.050000 -12.663995866 -15.033101717 5.896222036 1 1 0 # ob 1039 1 3 -1.050000 -11.795374382 -17.538379106 6.134239507 1 1 0 # ob 1040 1 5 0.425000 3.871884387 -15.331763672 7.995024441 0 1 0 # ho - 1041 1 1 1.575000 -9.542780344 -16.616182789 9.189872068 1 1 0 # ao - 1042 1 2 2.100000 -5.330708196 -8.863914582 -6.523590700 1 1 1 # st - 1043 1 2 2.100000 -5.294377376 -14.882798454 -6.517157818 1 1 1 # st - 1044 1 3 -1.050000 -5.580205521 -17.754681294 -8.141927041 1 1 1 # ob - 1045 1 3 -1.050000 -5.140866537 -15.010297434 -8.139170101 1 1 1 # ob - 1046 1 4 -0.950000 -7.740605852 -16.041760237 -8.209932150 1 1 1 # oh - 1047 1 3 -1.050000 -9.065873940 -14.326475333 -5.905112111 1 1 1 # ob - 1048 1 3 -1.050000 -5.593175493 -16.347280707 -5.909707262 1 1 1 # ob - 1049 1 3 -1.050000 -6.461796978 -13.842003318 -6.147724732 1 1 1 # ob - 1050 1 5 0.425000 -6.649055594 -16.048618752 -8.008509666 1 1 1 # ho - 1051 1 1 1.575000 -6.934613039 -12.133271479 9.189872068 1 1 0 # ao - 1052 1 2 2.100000 -7.938875500 -13.346825892 -6.523590700 1 1 1 # st - 1053 1 2 2.100000 -7.846209919 -10.399887143 -6.517157818 1 1 1 # st - 1054 1 3 -1.050000 -8.132038064 -13.271769983 -8.141927041 1 1 1 # ob - 1055 1 3 -1.050000 -7.692699080 -10.527386123 -8.139170101 1 1 1 # ob - 1056 1 4 -0.950000 -5.132438547 -11.558848927 -8.209932150 1 1 1 # oh - 1057 1 3 -1.050000 -6.457706636 -9.843564022 -5.905112111 1 1 1 # ob - 1058 1 3 -1.050000 -8.145008036 -11.864369396 -5.909707262 1 1 1 # ob - 1059 1 3 -1.050000 -9.013629520 -9.359092007 -6.147724732 1 1 1 # ob - 1060 1 5 0.425000 -4.040888290 -11.565707441 -8.008509666 1 1 1 # ho - 1061 1 1 1.575000 -9.500101459 -10.644944847 9.189872068 1 1 0 # ao + 1041 1 1 1.575000 -6.148902596 -16.252526266 -9.189871776 1 1 1 # ao + 1042 1 2 2.100000 -5.330708196 -8.863914582 -6.510105182 1 1 1 # st + 1043 1 2 2.100000 -5.294377376 -14.882798454 -6.503672300 1 1 1 # st + 1044 1 3 -1.050000 -5.580205521 -17.754681294 -8.128441523 1 1 1 # ob + 1045 1 3 -1.050000 -5.140866537 -15.010297434 -8.125684583 1 1 1 # ob + 1046 1 4 -0.950000 -7.740605852 -16.041760237 -8.196446632 1 1 1 # oh + 1047 1 3 -1.050000 -9.065873940 -14.326475333 -5.891626593 1 1 1 # ob + 1048 1 3 -1.050000 -5.593175493 -16.347280707 -5.896221744 1 1 1 # ob + 1049 1 3 -1.050000 -6.461796978 -13.842003318 -6.134239214 1 1 1 # ob + 1050 1 5 0.425000 -6.649055594 -16.048618752 -7.995024148 1 1 1 # ho + 1051 1 1 1.575000 -3.540735291 -11.769614956 -9.189871776 1 1 1 # ao + 1052 1 2 2.100000 -7.938875500 -13.346825892 -6.510105182 1 1 1 # st + 1053 1 2 2.100000 -7.846209919 -10.399887143 -6.503672300 1 1 1 # st + 1054 1 3 -1.050000 -8.132038064 -13.271769983 -8.128441523 1 1 1 # ob + 1055 1 3 -1.050000 -7.692699080 -10.527386123 -8.125684583 1 1 1 # ob + 1056 1 4 -0.950000 -5.132438547 -11.558848927 -8.196446632 1 1 1 # oh + 1057 1 3 -1.050000 -6.457706636 -9.843564022 -5.891626593 1 1 1 # ob + 1058 1 3 -1.050000 -8.145008036 -11.864369396 -5.896221744 1 1 1 # ob + 1059 1 3 -1.050000 -9.013629520 -9.359092007 -6.134239214 1 1 1 # ob + 1060 1 5 0.425000 -4.040888290 -11.565707441 -7.995024148 1 1 1 # ho + 1061 1 1 1.575000 -6.106223711 -10.281288324 -9.189871776 1 1 1 # ao 1062 1 2 2.100000 -10.318295859 -18.033556531 6.510105475 1 1 0 # st 1063 1 2 2.100000 -10.354626679 -12.014672659 6.503672592 1 1 0 # st 1064 1 3 -1.050000 -10.068798533 -9.142789819 8.128441816 1 1 0 # ob @@ -1106,7 +1106,7 @@ Atoms # full 1068 1 3 -1.050000 -10.055828562 -10.550190406 5.896222036 1 1 0 # ob 1069 1 3 -1.050000 -9.187207077 -13.055467795 6.134239507 1 1 0 # ob 1070 1 5 0.425000 -8.999948461 -10.848852361 7.995024441 1 1 0 # ho - 1071 1 1 1.575000 -12.108268763 -15.127856157 9.189872068 1 1 0 # ao + 1071 1 1 1.575000 -8.714391015 -14.764199634 -9.189871776 1 1 1 # ao 1072 1 2 2.100000 -7.710128554 -13.550645221 6.510105475 1 1 0 # st 1073 1 2 2.100000 -7.802794136 -16.497583970 6.503672592 1 1 0 # st 1074 1 3 -1.050000 -7.516965991 -13.625701130 8.128441816 1 1 0 # ob @@ -1116,27 +1116,27 @@ Atoms # full 1078 1 3 -1.050000 -7.503996019 -15.033101717 5.896222036 1 1 0 # ob 1079 1 3 -1.050000 -6.635374534 -17.538379106 6.134239507 1 1 0 # ob 1080 1 5 0.425000 -11.608115765 -15.331763672 7.995024441 1 1 0 # ho - 1081 1 1 1.575000 -4.382780497 -16.616182789 9.189872068 1 1 0 # ao - 1082 1 2 2.100000 -0.170708348 -8.863914582 -6.523590700 1 1 1 # st - 1083 1 2 2.100000 -0.134377529 -14.882798454 -6.517157818 1 1 1 # st - 1084 1 3 -1.050000 -0.420205674 -17.754681294 -8.141927041 1 1 1 # ob - 1085 1 3 -1.050000 0.019133310 -15.010297434 -8.139170101 1 1 1 # ob - 1086 1 4 -0.950000 -2.580606005 -16.041760237 -8.209932150 1 1 1 # oh - 1087 1 3 -1.050000 -3.905874093 -14.326475333 -5.905112111 1 1 1 # ob - 1088 1 3 -1.050000 -0.433175646 -16.347280707 -5.909707262 1 1 1 # ob - 1089 1 3 -1.050000 -1.301797130 -13.842003318 -6.147724732 1 1 1 # ob - 1090 1 5 0.425000 -1.489055747 -16.048618752 -8.008509666 1 1 1 # ho - 1091 1 1 1.575000 -1.774613192 -12.133271479 9.189872068 1 1 0 # ao - 1092 1 2 2.100000 -2.778875653 -13.346825892 -6.523590700 1 1 1 # st - 1093 1 2 2.100000 -2.686210071 -10.399887143 -6.517157818 1 1 1 # st - 1094 1 3 -1.050000 -2.972038217 -13.271769983 -8.141927041 1 1 1 # ob - 1095 1 3 -1.050000 -2.532699233 -10.527386123 -8.139170101 1 1 1 # ob - 1096 1 4 -0.950000 0.027561300 -11.558848927 -8.209932150 1 1 1 # oh - 1097 1 3 -1.050000 -1.297706788 -9.843564022 -5.905112111 1 1 1 # ob - 1098 1 3 -1.050000 -2.985008189 -11.864369396 -5.909707262 1 1 1 # ob - 1099 1 3 -1.050000 -3.853629673 -9.359092007 -6.147724732 1 1 1 # ob - 1100 1 5 0.425000 1.119111558 -11.565707441 -8.008509666 1 1 1 # ho - 1101 1 1 1.575000 -4.340101611 -10.644944847 9.189872068 1 1 0 # ao + 1081 1 1 1.575000 -0.988902749 -16.252526266 -9.189871776 1 1 1 # ao + 1082 1 2 2.100000 -0.170708348 -8.863914582 -6.510105182 1 1 1 # st + 1083 1 2 2.100000 -0.134377529 -14.882798454 -6.503672300 1 1 1 # st + 1084 1 3 -1.050000 -0.420205674 -17.754681294 -8.128441523 1 1 1 # ob + 1085 1 3 -1.050000 0.019133310 -15.010297434 -8.125684583 1 1 1 # ob + 1086 1 4 -0.950000 -2.580606005 -16.041760237 -8.196446632 1 1 1 # oh + 1087 1 3 -1.050000 -3.905874093 -14.326475333 -5.891626593 1 1 1 # ob + 1088 1 3 -1.050000 -0.433175646 -16.347280707 -5.896221744 1 1 1 # ob + 1089 1 3 -1.050000 -1.301797130 -13.842003318 -6.134239214 1 1 1 # ob + 1090 1 5 0.425000 -1.489055747 -16.048618752 -7.995024148 1 1 1 # ho + 1091 1 1 1.575000 1.619264556 -11.769614956 -9.189871776 1 1 1 # ao + 1092 1 2 2.100000 -2.778875653 -13.346825892 -6.510105182 1 1 1 # st + 1093 1 2 2.100000 -2.686210071 -10.399887143 -6.503672300 1 1 1 # st + 1094 1 3 -1.050000 -2.972038217 -13.271769983 -8.128441523 1 1 1 # ob + 1095 1 3 -1.050000 -2.532699233 -10.527386123 -8.125684583 1 1 1 # ob + 1096 1 4 -0.950000 0.027561300 -11.558848927 -8.196446632 1 1 1 # oh + 1097 1 3 -1.050000 -1.297706788 -9.843564022 -5.891626593 1 1 1 # ob + 1098 1 3 -1.050000 -2.985008189 -11.864369396 -5.896221744 1 1 1 # ob + 1099 1 3 -1.050000 -3.853629673 -9.359092007 -6.134239214 1 1 1 # ob + 1100 1 5 0.425000 1.119111558 -11.565707441 -7.995024148 1 1 1 # ho + 1101 1 1 1.575000 -0.946223863 -10.281288324 -9.189871776 1 1 1 # ao 1102 1 2 2.100000 -5.158296012 -18.033556531 6.510105475 1 1 0 # st 1103 1 2 2.100000 -5.194626832 -12.014672659 6.503672592 1 1 0 # st 1104 1 3 -1.050000 -4.908798686 -9.142789819 8.128441816 1 1 0 # ob @@ -1146,7 +1146,7 @@ Atoms # full 1108 1 3 -1.050000 -4.895828714 -10.550190406 5.896222036 1 1 0 # ob 1109 1 3 -1.050000 -4.027207230 -13.055467795 6.134239507 1 1 0 # ob 1110 1 5 0.425000 -3.839948613 -10.848852361 7.995024441 1 1 0 # ho - 1111 1 1 1.575000 -6.948268916 -15.127856157 9.189872068 1 1 0 # ao + 1111 1 1 1.575000 -3.554391168 -14.764199634 -9.189871776 1 1 1 # ao 1112 1 2 2.100000 -2.550128707 -13.550645221 6.510105475 1 1 0 # st 1113 1 2 2.100000 -2.642794289 -16.497583970 6.503672592 1 1 0 # st 1114 1 3 -1.050000 -2.356966143 -13.625701130 8.128441816 1 1 0 # ob @@ -1156,27 +1156,27 @@ Atoms # full 1118 1 3 -1.050000 -2.343996171 -15.033101717 5.896222036 1 1 0 # ob 1119 1 3 -1.050000 -1.475374687 -17.538379106 6.134239507 1 1 0 # ob 1120 1 5 0.425000 -6.448115918 -15.331763672 7.995024441 1 1 0 # ho - 1121 1 1 1.575000 0.833554723 -7.650360168 9.189872068 0 1 0 # ao - 1122 1 2 2.100000 5.045626871 0.101908040 -6.523590700 0 1 1 # st - 1123 1 2 2.100000 5.081957691 -5.916975832 -6.517157818 0 1 1 # st - 1124 1 3 -1.050000 4.796129546 -8.788858673 -8.141927041 0 1 1 # ob - 1125 1 3 -1.050000 5.235468530 -6.044474812 -8.139170101 0 1 1 # ob - 1126 1 4 -0.950000 2.635729215 -7.075937616 -8.209932150 0 1 1 # oh - 1127 1 3 -1.050000 1.310461127 -5.360652711 -5.905112111 0 1 1 # ob - 1128 1 3 -1.050000 4.783159574 -7.381458085 -5.909707262 0 1 1 # ob - 1129 1 3 -1.050000 3.914538089 -4.876180697 -6.147724732 0 1 1 # ob - 1130 1 5 0.425000 3.727279473 -7.082796130 -8.008509666 0 1 1 # ho - 1131 1 1 1.575000 3.441722028 -3.167448857 9.189872068 0 1 0 # ao - 1132 1 2 2.100000 2.437459567 -4.381003271 -6.523590700 0 1 1 # st - 1133 1 2 2.100000 2.530125148 -1.434064521 -6.517157818 0 1 1 # st - 1134 1 3 -1.050000 2.244297003 -4.305947362 -8.141927041 0 1 1 # ob - 1135 1 3 -1.050000 2.683635987 -1.561563502 -8.139170101 0 1 1 # ob - 1136 1 4 -0.950000 5.243896520 -2.593026305 -8.209932150 0 1 1 # oh - 1137 1 3 -1.050000 3.918628431 -0.877741400 -5.905112111 0 1 1 # ob - 1138 1 3 -1.050000 2.231327031 -2.898546774 -5.909707262 0 1 1 # ob - 1139 1 3 -1.050000 1.362705547 -0.393269386 -6.147724732 0 1 1 # ob - 1140 1 5 0.425000 6.335446777 -2.599884819 -8.008509666 0 1 1 # ho - 1141 1 1 1.575000 0.876233608 -1.679122225 9.189872068 0 1 0 # ao + 1121 1 1 1.575000 4.227432471 -7.286703645 -9.189871776 0 1 1 # ao + 1122 1 2 2.100000 5.045626871 0.101908040 -6.510105182 0 1 1 # st + 1123 1 2 2.100000 5.081957691 -5.916975832 -6.503672300 0 1 1 # st + 1124 1 3 -1.050000 4.796129546 -8.788858673 -8.128441523 0 1 1 # ob + 1125 1 3 -1.050000 5.235468530 -6.044474812 -8.125684583 0 1 1 # ob + 1126 1 4 -0.950000 2.635729215 -7.075937616 -8.196446632 0 1 1 # oh + 1127 1 3 -1.050000 1.310461127 -5.360652711 -5.891626593 0 1 1 # ob + 1128 1 3 -1.050000 4.783159574 -7.381458085 -5.896221744 0 1 1 # ob + 1129 1 3 -1.050000 3.914538089 -4.876180697 -6.134239214 0 1 1 # ob + 1130 1 5 0.425000 3.727279473 -7.082796130 -7.995024148 0 1 1 # ho + 1131 1 1 1.575000 6.835599776 -2.803792334 -9.189871776 0 1 1 # ao + 1132 1 2 2.100000 2.437459567 -4.381003271 -6.510105182 0 1 1 # st + 1133 1 2 2.100000 2.530125148 -1.434064521 -6.503672300 0 1 1 # st + 1134 1 3 -1.050000 2.244297003 -4.305947362 -8.128441523 0 1 1 # ob + 1135 1 3 -1.050000 2.683635987 -1.561563502 -8.125684583 0 1 1 # ob + 1136 1 4 -0.950000 5.243896520 -2.593026305 -8.196446632 0 1 1 # oh + 1137 1 3 -1.050000 3.918628431 -0.877741400 -5.891626593 0 1 1 # ob + 1138 1 3 -1.050000 2.231327031 -2.898546774 -5.896221744 0 1 1 # ob + 1139 1 3 -1.050000 1.362705547 -0.393269386 -6.134239214 0 1 1 # ob + 1140 1 5 0.425000 6.335446777 -2.599884819 -7.995024148 0 1 1 # ho + 1141 1 1 1.575000 4.270111356 -1.315465702 -9.189871776 0 1 1 # ao 1142 1 2 2.100000 0.058039208 -9.067733910 6.510105475 0 1 0 # st 1143 1 2 2.100000 0.021708388 -3.048850038 6.503672592 0 1 0 # st 1144 1 3 -1.050000 0.307536534 -0.176967197 8.128441816 0 1 0 # ob @@ -1186,7 +1186,7 @@ Atoms # full 1148 1 3 -1.050000 0.320506505 -1.584367785 5.896222036 0 1 0 # ob 1149 1 3 -1.050000 1.189127990 -4.089645173 6.134239507 0 1 0 # ob 1150 1 5 0.425000 1.376386606 -1.883029740 7.995024441 0 1 0 # ho - 1151 1 1 1.575000 -1.731933696 -6.162033536 9.189872068 0 1 0 # ao + 1151 1 1 1.575000 1.661944052 -5.798377013 -9.189871776 0 1 1 # ao 1152 1 2 2.100000 2.666206512 -4.584822599 6.510105475 0 1 0 # st 1153 1 2 2.100000 2.573540931 -7.531761349 6.503672592 0 1 0 # st 1154 1 3 -1.050000 2.859369076 -4.659878508 8.128441816 0 1 0 # ob @@ -1196,27 +1196,27 @@ Atoms # full 1158 1 3 -1.050000 2.872339048 -6.067279095 5.896222036 0 1 0 # ob 1159 1 3 -1.050000 3.740960532 -8.572556484 6.134239507 0 1 0 # ob 1160 1 5 0.425000 -1.231780698 -6.365941050 7.995024441 0 1 0 # ho - 1161 1 1 1.575000 5.993554570 -7.650360168 9.189872068 0 1 0 # ao - 1162 1 2 2.100000 -10.434373281 0.101908040 -6.523590700 1 1 1 # st - 1163 1 2 2.100000 -10.398042462 -5.916975832 -6.517157818 1 1 1 # st - 1164 1 3 -1.050000 9.956129393 -8.788858673 -8.141927041 0 1 1 # ob - 1165 1 3 -1.050000 -10.244531623 -6.044474812 -8.139170101 1 1 1 # ob - 1166 1 4 -0.950000 7.795729062 -7.075937616 -8.209932150 0 1 1 # oh - 1167 1 3 -1.050000 6.470460974 -5.360652711 -5.905112111 0 1 1 # ob - 1168 1 3 -1.050000 -10.696840579 -7.381458085 -5.909707262 1 1 1 # ob - 1169 1 3 -1.050000 9.074537937 -4.876180697 -6.147724732 0 1 1 # ob - 1170 1 5 0.425000 8.887279320 -7.082796130 -8.008509666 0 1 1 # ho - 1171 1 1 1.575000 -12.038278125 -3.167448857 9.189872068 1 1 0 # ao - 1172 1 2 2.100000 7.597459414 -4.381003271 -6.523590700 0 1 1 # st - 1173 1 2 2.100000 7.690124996 -1.434064521 -6.517157818 0 1 1 # st - 1174 1 3 -1.050000 7.404296850 -4.305947362 -8.141927041 0 1 1 # ob - 1175 1 3 -1.050000 7.843635834 -1.561563502 -8.139170101 0 1 1 # ob - 1176 1 4 -0.950000 -10.236103633 -2.593026305 -8.209932150 1 1 1 # oh - 1177 1 3 -1.050000 9.078628279 -0.877741400 -5.905112111 0 1 1 # ob - 1178 1 3 -1.050000 7.391326878 -2.898546774 -5.909707262 0 1 1 # ob - 1179 1 3 -1.050000 6.522705394 -0.393269386 -6.147724732 0 1 1 # ob - 1180 1 5 0.425000 -9.144553375 -2.599884819 -8.008509666 1 1 1 # ho - 1181 1 1 1.575000 6.036233456 -1.679122225 9.189872068 0 1 0 # ao + 1161 1 1 1.575000 9.387432318 -7.286703645 -9.189871776 0 1 1 # ao + 1162 1 2 2.100000 -10.434373281 0.101908040 -6.510105182 1 1 1 # st + 1163 1 2 2.100000 -10.398042462 -5.916975832 -6.503672300 1 1 1 # st + 1164 1 3 -1.050000 9.956129393 -8.788858673 -8.128441523 0 1 1 # ob + 1165 1 3 -1.050000 -10.244531623 -6.044474812 -8.125684583 1 1 1 # ob + 1166 1 4 -0.950000 7.795729062 -7.075937616 -8.196446632 0 1 1 # oh + 1167 1 3 -1.050000 6.470460974 -5.360652711 -5.891626593 0 1 1 # ob + 1168 1 3 -1.050000 -10.696840579 -7.381458085 -5.896221744 1 1 1 # ob + 1169 1 3 -1.050000 9.074537937 -4.876180697 -6.134239214 0 1 1 # ob + 1170 1 5 0.425000 8.887279320 -7.082796130 -7.995024148 0 1 1 # ho + 1171 1 1 1.575000 -8.644400377 -2.803792334 -9.189871776 1 1 1 # ao + 1172 1 2 2.100000 7.597459414 -4.381003271 -6.510105182 0 1 1 # st + 1173 1 2 2.100000 7.690124996 -1.434064521 -6.503672300 0 1 1 # st + 1174 1 3 -1.050000 7.404296850 -4.305947362 -8.128441523 0 1 1 # ob + 1175 1 3 -1.050000 7.843635834 -1.561563502 -8.125684583 0 1 1 # ob + 1176 1 4 -0.950000 -10.236103633 -2.593026305 -8.196446632 1 1 1 # oh + 1177 1 3 -1.050000 9.078628279 -0.877741400 -5.891626593 0 1 1 # ob + 1178 1 3 -1.050000 7.391326878 -2.898546774 -5.896221744 0 1 1 # ob + 1179 1 3 -1.050000 6.522705394 -0.393269386 -6.134239214 0 1 1 # ob + 1180 1 5 0.425000 -9.144553375 -2.599884819 -7.995024148 1 1 1 # ho + 1181 1 1 1.575000 9.430111204 -1.315465702 -9.189871776 0 1 1 # ao 1182 1 2 2.100000 5.218039055 -9.067733910 6.510105475 0 1 0 # st 1183 1 2 2.100000 5.181708235 -3.048850038 6.503672592 0 1 0 # st 1184 1 3 -1.050000 5.467536381 -0.176967197 8.128441816 0 1 0 # ob @@ -1226,7 +1226,7 @@ Atoms # full 1188 1 3 -1.050000 5.480506353 -1.584367785 5.896222036 0 1 0 # ob 1189 1 3 -1.050000 6.349127837 -4.089645173 6.134239507 0 1 0 # ob 1190 1 5 0.425000 6.536386454 -1.883029740 7.995024441 0 1 0 # ho - 1191 1 1 1.575000 3.428066151 -6.162033536 9.189872068 0 1 0 # ao + 1191 1 1 1.575000 6.821943899 -5.798377013 -9.189871776 0 1 1 # ao 1192 1 2 2.100000 -12.813793640 -4.584822599 6.510105475 1 1 0 # st 1193 1 2 2.100000 -12.906459222 -7.531761349 6.503672592 1 1 0 # st 1194 1 3 -1.050000 -12.620631076 -4.659878508 8.128441816 1 1 0 # ob @@ -1236,27 +1236,27 @@ Atoms # full 1198 1 3 -1.050000 -12.607661104 -6.067279095 5.896222036 1 1 0 # ob 1199 1 3 -1.050000 -11.739039620 -8.572556484 6.134239507 1 1 0 # ob 1200 1 5 0.425000 3.928219149 -6.365941050 7.995024441 0 1 0 # ho - 1201 1 1 1.575000 -9.486445582 -7.650360168 9.189872068 1 1 0 # ao - 1202 1 2 2.100000 -5.274373434 0.101908040 -6.523590700 1 1 1 # st - 1203 1 2 2.100000 -5.238042614 -5.916975832 -6.517157818 1 1 1 # st - 1204 1 3 -1.050000 -5.523870760 -8.788858673 -8.141927041 1 1 1 # ob - 1205 1 3 -1.050000 -5.084531776 -6.044474812 -8.139170101 1 1 1 # ob - 1206 1 4 -0.950000 -7.684271090 -7.075937616 -8.209932150 1 1 1 # oh - 1207 1 3 -1.050000 -9.009539178 -5.360652711 -5.905112111 1 1 1 # ob - 1208 1 3 -1.050000 -5.536840731 -7.381458085 -5.909707262 1 1 1 # ob - 1209 1 3 -1.050000 -6.405462216 -4.876180697 -6.147724732 1 1 1 # ob - 1210 1 5 0.425000 -6.592720833 -7.082796130 -8.008509666 1 1 1 # ho - 1211 1 1 1.575000 -6.878278278 -3.167448857 9.189872068 1 1 0 # ao - 1212 1 2 2.100000 -7.882540739 -4.381003271 -6.523590700 1 1 1 # st - 1213 1 2 2.100000 -7.789875157 -1.434064521 -6.517157818 1 1 1 # st - 1214 1 3 -1.050000 -8.075703302 -4.305947362 -8.141927041 1 1 1 # ob - 1215 1 3 -1.050000 -7.636364318 -1.561563502 -8.139170101 1 1 1 # ob - 1216 1 4 -0.950000 -5.076103786 -2.593026305 -8.209932150 1 1 1 # oh - 1217 1 3 -1.050000 -6.401371874 -0.877741400 -5.905112111 1 1 1 # ob - 1218 1 3 -1.050000 -8.088673274 -2.898546774 -5.909707262 1 1 1 # ob - 1219 1 3 -1.050000 -8.957294759 -0.393269386 -6.147724732 1 1 1 # ob - 1220 1 5 0.425000 -3.984553528 -2.599884819 -8.008509666 1 1 1 # ho - 1221 1 1 1.575000 -9.443766697 -1.679122225 9.189872068 1 1 0 # ao + 1201 1 1 1.575000 -6.092567834 -7.286703645 -9.189871776 1 1 1 # ao + 1202 1 2 2.100000 -5.274373434 0.101908040 -6.510105182 1 1 1 # st + 1203 1 2 2.100000 -5.238042614 -5.916975832 -6.503672300 1 1 1 # st + 1204 1 3 -1.050000 -5.523870760 -8.788858673 -8.128441523 1 1 1 # ob + 1205 1 3 -1.050000 -5.084531776 -6.044474812 -8.125684583 1 1 1 # ob + 1206 1 4 -0.950000 -7.684271090 -7.075937616 -8.196446632 1 1 1 # oh + 1207 1 3 -1.050000 -9.009539178 -5.360652711 -5.891626593 1 1 1 # ob + 1208 1 3 -1.050000 -5.536840731 -7.381458085 -5.896221744 1 1 1 # ob + 1209 1 3 -1.050000 -6.405462216 -4.876180697 -6.134239214 1 1 1 # ob + 1210 1 5 0.425000 -6.592720833 -7.082796130 -7.995024148 1 1 1 # ho + 1211 1 1 1.575000 -3.484400530 -2.803792334 -9.189871776 1 1 1 # ao + 1212 1 2 2.100000 -7.882540739 -4.381003271 -6.510105182 1 1 1 # st + 1213 1 2 2.100000 -7.789875157 -1.434064521 -6.503672300 1 1 1 # st + 1214 1 3 -1.050000 -8.075703302 -4.305947362 -8.128441523 1 1 1 # ob + 1215 1 3 -1.050000 -7.636364318 -1.561563502 -8.125684583 1 1 1 # ob + 1216 1 4 -0.950000 -5.076103786 -2.593026305 -8.196446632 1 1 1 # oh + 1217 1 3 -1.050000 -6.401371874 -0.877741400 -5.891626593 1 1 1 # ob + 1218 1 3 -1.050000 -8.088673274 -2.898546774 -5.896221744 1 1 1 # ob + 1219 1 3 -1.050000 -8.957294759 -0.393269386 -6.134239214 1 1 1 # ob + 1220 1 5 0.425000 -3.984553528 -2.599884819 -7.995024148 1 1 1 # ho + 1221 1 1 1.575000 -6.049888949 -1.315465702 -9.189871776 1 1 1 # ao 1222 1 2 2.100000 -10.261961097 -9.067733910 6.510105475 1 1 0 # st 1223 1 2 2.100000 -10.298291917 -3.048850038 6.503672592 1 1 0 # st 1224 1 3 -1.050000 -10.012463772 -0.176967197 8.128441816 1 1 0 # ob @@ -1266,7 +1266,7 @@ Atoms # full 1228 1 3 -1.050000 -9.999493800 -1.584367785 5.896222036 1 1 0 # ob 1229 1 3 -1.050000 -9.130872315 -4.089645173 6.134239507 1 1 0 # ob 1230 1 5 0.425000 -8.943613699 -1.883029740 7.995024441 1 1 0 # ho - 1231 1 1 1.575000 -12.051934002 -6.162033536 9.189872068 1 1 0 # ao + 1231 1 1 1.575000 -8.658056254 -5.798377013 -9.189871776 1 1 1 # ao 1232 1 2 2.100000 -7.653793793 -4.584822599 6.510105475 1 1 0 # st 1233 1 2 2.100000 -7.746459374 -7.531761349 6.503672592 1 1 0 # st 1234 1 3 -1.050000 -7.460631229 -4.659878508 8.128441816 1 1 0 # ob @@ -1276,27 +1276,27 @@ Atoms # full 1238 1 3 -1.050000 -7.447661257 -6.067279095 5.896222036 1 1 0 # ob 1239 1 3 -1.050000 -6.579039773 -8.572556484 6.134239507 1 1 0 # ob 1240 1 5 0.425000 -11.551781003 -6.365941050 7.995024441 1 1 0 # ho - 1241 1 1 1.575000 -4.326445735 -7.650360168 9.189872068 1 1 0 # ao - 1242 1 2 2.100000 -0.114373587 0.101908040 -6.523590700 1 1 1 # st - 1243 1 2 2.100000 -0.078042767 -5.916975832 -6.517157818 1 1 1 # st - 1244 1 3 -1.050000 -0.363870912 -8.788858673 -8.141927041 1 1 1 # ob - 1245 1 3 -1.050000 0.075468072 -6.044474812 -8.139170101 1 1 1 # ob - 1246 1 4 -0.950000 -2.524271243 -7.075937616 -8.209932150 1 1 1 # oh - 1247 1 3 -1.050000 -3.849539331 -5.360652711 -5.905112111 1 1 1 # ob - 1248 1 3 -1.050000 -0.376840884 -7.381458085 -5.909707262 1 1 1 # ob - 1249 1 3 -1.050000 -1.245462368 -4.876180697 -6.147724732 1 1 1 # ob - 1250 1 5 0.425000 -1.432720985 -7.082796130 -8.008509666 1 1 1 # ho - 1251 1 1 1.575000 -1.718278430 -3.167448857 9.189872068 1 1 0 # ao - 1252 1 2 2.100000 -2.722540891 -4.381003271 -6.523590700 1 1 1 # st - 1253 1 2 2.100000 -2.629875310 -1.434064521 -6.517157818 1 1 1 # st - 1254 1 3 -1.050000 -2.915703455 -4.305947362 -8.141927041 1 1 1 # ob - 1255 1 3 -1.050000 -2.476364471 -1.561563502 -8.139170101 1 1 1 # ob - 1256 1 4 -0.950000 0.083896062 -2.593026305 -8.209932150 1 1 1 # oh - 1257 1 3 -1.050000 -1.241372026 -0.877741400 -5.905112111 1 1 1 # ob - 1258 1 3 -1.050000 -2.928673427 -2.898546774 -5.909707262 1 1 1 # ob - 1259 1 3 -1.050000 -3.797294911 -0.393269386 -6.147724732 1 1 1 # ob - 1260 1 5 0.425000 1.175446320 -2.599884819 -8.008509666 1 1 1 # ho - 1261 1 1 1.575000 -4.283766850 -1.679122225 9.189872068 1 1 0 # ao + 1241 1 1 1.575000 -0.932567987 -7.286703645 -9.189871776 1 1 1 # ao + 1242 1 2 2.100000 -0.114373587 0.101908040 -6.510105182 1 1 1 # st + 1243 1 2 2.100000 -0.078042767 -5.916975832 -6.503672300 1 1 1 # st + 1244 1 3 -1.050000 -0.363870912 -8.788858673 -8.128441523 1 1 1 # ob + 1245 1 3 -1.050000 0.075468072 -6.044474812 -8.125684583 1 1 1 # ob + 1246 1 4 -0.950000 -2.524271243 -7.075937616 -8.196446632 1 1 1 # oh + 1247 1 3 -1.050000 -3.849539331 -5.360652711 -5.891626593 1 1 1 # ob + 1248 1 3 -1.050000 -0.376840884 -7.381458085 -5.896221744 1 1 1 # ob + 1249 1 3 -1.050000 -1.245462368 -4.876180697 -6.134239214 1 1 1 # ob + 1250 1 5 0.425000 -1.432720985 -7.082796130 -7.995024148 1 1 1 # ho + 1251 1 1 1.575000 1.675599318 -2.803792334 -9.189871776 1 1 1 # ao + 1252 1 2 2.100000 -2.722540891 -4.381003271 -6.510105182 1 1 1 # st + 1253 1 2 2.100000 -2.629875310 -1.434064521 -6.503672300 1 1 1 # st + 1254 1 3 -1.050000 -2.915703455 -4.305947362 -8.128441523 1 1 1 # ob + 1255 1 3 -1.050000 -2.476364471 -1.561563502 -8.125684583 1 1 1 # ob + 1256 1 4 -0.950000 0.083896062 -2.593026305 -8.196446632 1 1 1 # oh + 1257 1 3 -1.050000 -1.241372026 -0.877741400 -5.891626593 1 1 1 # ob + 1258 1 3 -1.050000 -2.928673427 -2.898546774 -5.896221744 1 1 1 # ob + 1259 1 3 -1.050000 -3.797294911 -0.393269386 -6.134239214 1 1 1 # ob + 1260 1 5 0.425000 1.175446320 -2.599884819 -7.995024148 1 1 1 # ho + 1261 1 1 1.575000 -0.889889102 -1.315465702 -9.189871776 1 1 1 # ao 1262 1 2 2.100000 -5.101961250 -9.067733910 6.510105475 1 1 0 # st 1263 1 2 2.100000 -5.138292070 -3.048850038 6.503672592 1 1 0 # st 1264 1 3 -1.050000 -4.852463924 -0.176967197 8.128441816 1 1 0 # ob @@ -1306,7 +1306,7 @@ Atoms # full 1268 1 3 -1.050000 -4.839493952 -1.584367785 5.896222036 1 1 0 # ob 1269 1 3 -1.050000 -3.970872468 -4.089645173 6.134239507 1 1 0 # ob 1270 1 5 0.425000 -3.783613851 -1.883029740 7.995024441 1 1 0 # ho - 1271 1 1 1.575000 -6.891934154 -6.162033536 9.189872068 1 1 0 # ao + 1271 1 1 1.575000 -3.498056406 -5.798377013 -9.189871776 1 1 1 # ao 1272 1 2 2.100000 -2.493793945 -4.584822599 6.510105475 1 1 0 # st 1273 1 2 2.100000 -2.586459527 -7.531761349 6.503672592 1 1 0 # st 1274 1 3 -1.050000 -2.300631381 -4.659878508 8.128441816 1 1 0 # ob diff --git a/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data2 b/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data2 index 4ee3c34e08..c37bbbfb6e 100644 --- a/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data2 +++ b/tools/msi2lmp/test/reference/PyAC_bulk-clayff.data2 @@ -1,18 +1,18 @@ -LAMMPS data file via write_data, version 5 Oct 2016, timestep = 97 +LAMMPS data file via write_data, version 29 Aug 2024, timestep = 94, units = real 1280 atoms 5 atom types 128 bonds 1 bond types --1.0320000000000000e+01 1.0320000000000000e+01 xlo xhi --1.7931646038000000e+01 1.7931646038000000e+01 ylo yhi --9.1966146809999998e+00 9.1966146809999998e+00 zlo zhi -2.2533867499999999e-01 -3.3938777480000000e+00 -3.6365652300000001e-01 xy xz yz +-10.32 10.32 xlo xhi +-17.931646038 17.931646038 ylo yhi +-9.189871922 9.189871922 zlo zhi +0.225338675 -3.393877748 -0.363656523 xy xz yz Masses -1 26.9815 +1 26.98154 2 28.0855 3 15.9994 4 15.9994 @@ -32,2697 +32,2697 @@ Bond Coeffs # harmonic Atoms # full -1 1 1 1.5750000000000000e+00 2.5999859887541152e+00 1.4895295600930361e+00 -3.2684095454396100e-03 0 0 0 -2 1 2 2.1000000000000001e+00 3.3890158510276365e+00 8.8633001207774171e+00 2.7539653683351801e+00 0 0 0 -3 1 2 2.1000000000000001e+00 3.4122910819528869e+00 2.8524994338649421e+00 2.7618119426027334e+00 0 0 0 -4 1 3 -1.0500000000000000e+00 3.1917106615190871e+00 -3.1338269933883112e-02 1.1357135267887219e+00 0 0 0 -5 1 3 -1.0500000000000000e+00 3.5469715140592335e+00 2.7595296221569008e+00 1.1370155152705124e+00 0 0 0 -6 1 4 -9.4999999999999996e-01 9.0629865585113478e-01 1.5632893985929819e+00 1.0069802353213007e+00 0 0 0 -7 1 3 -1.0500000000000000e+00 -4.4288757751145802e-01 3.5553420116799757e+00 3.2357631024135358e+00 0 0 0 -8 1 3 -1.0500000000000000e+00 3.3001891233613350e+00 1.3781620274833877e+00 3.2374587487515178e+00 0 0 0 -9 1 3 -1.0500000000000000e+00 2.1467877066985572e+00 3.7041759320173213e+00 3.0459627186065976e+00 0 0 0 -10 1 5 4.2499999999999999e-01 3.9940955153589464e-01 6.8440286676932871e-01 1.1507107600825695e+00 0 0 0 -11 1 1 1.5750000000000000e+00 5.2081610166521486e+00 5.9724603411204953e+00 -3.2678269427819373e-03 0 0 0 -12 1 2 2.1000000000000001e+00 7.8082390717283801e-01 4.3803873151640857e+00 2.7539573196414509e+00 0 0 0 -13 1 2 2.1000000000000001e+00 8.6049184758384811e-01 7.3353954630155727e+00 2.7618077229661608e+00 0 0 0 -14 1 3 -1.0500000000000000e+00 6.3985291874622341e-01 4.4515561398457919e+00 1.1357491950576897e+00 0 0 0 -15 1 3 -1.0500000000000000e+00 9.9516171983727020e-01 7.2424161873666044e+00 1.1369982112775414e+00 0 0 0 -16 1 4 -9.4999999999999996e-01 3.5145105035101025e+00 6.0462386370433165e+00 1.0069819170376704e+00 0 0 0 -17 1 3 -1.0500000000000000e+00 2.1653116942489294e+00 8.0382453753562579e+00 3.2357864576820852e+00 0 0 0 -18 1 3 -1.0500000000000000e+00 7.4835045014725310e-01 5.8610807683013810e+00 3.2374554227766854e+00 0 0 0 -19 1 3 -1.0500000000000000e+00 -4.0504297107989196e-01 8.1871235044640613e+00 3.0459446104666839e+00 0 0 0 -20 1 5 4.2499999999999999e-01 3.0075427813219715e+00 5.1673094182610200e+00 1.1507483385791737e+00 0 0 0 -21 1 1 1.5750000000000000e+00 2.6161048042733217e+00 7.4761708036165011e+00 -3.7559789002870048e-03 0 0 0 -22 1 2 2.1000000000000001e+00 1.8270583837568388e+00 1.0238224436981014e-01 -2.7609433636234240e+00 0 0 0 -23 1 2 2.1000000000000001e+00 1.8037926928806804e+00 6.1131912799914794e+00 -2.7688607211148550e+00 0 0 0 -24 1 3 -1.0500000000000000e+00 2.0245843840439779e+00 8.9971802013490674e+00 -1.1427990238002010e+00 0 0 0 -25 1 3 -1.0500000000000000e+00 1.6690794936693152e+00 6.2059206004470688e+00 -1.1439630092406396e+00 0 0 0 -26 1 4 -9.4999999999999996e-01 4.3099221099139733e+00 7.4024984100470839e+00 -1.0139954105990832e+00 0 0 0 -27 1 3 -1.0500000000000000e+00 5.6590031416114179e+00 5.4102611437460766e+00 -3.2425548174288927e+00 0 0 0 -28 1 3 -1.0500000000000000e+00 1.9161020068286181e+00 7.5874974627099085e+00 -3.2446533398602568e+00 0 0 0 -29 1 3 -1.0500000000000000e+00 3.0692592992822245e+00 5.2614642342074163e+00 -3.0529614390734992e+00 0 0 0 -30 1 5 4.2499999999999999e-01 4.8166943688168953e+00 8.2812214959912609e+00 -1.1575736167124671e+00 0 0 0 -31 1 1 1.5750000000000000e+00 7.9432589838539513e-03 2.9932322017580155e+00 -3.7548310874804258e-03 0 0 0 -32 1 2 2.1000000000000001e+00 4.4352567674819241e+00 4.5852778048151954e+00 -2.7609429803808379e+00 0 0 0 -33 1 2 2.1000000000000001e+00 4.3556162421312727e+00 1.6303072110574242e+00 -2.7688546110084076e+00 0 0 0 -34 1 3 -1.0500000000000000e+00 4.5764329813965201e+00 4.5142930965604897e+00 -1.1428117296192752e+00 0 0 0 -35 1 3 -1.0500000000000000e+00 4.2208908922051336e+00 1.7230220252737531e+00 -1.1439381753911206e+00 0 0 0 -36 1 4 -9.4999999999999996e-01 1.7016840076240793e+00 2.9195149987798672e+00 -1.0140049080257008e+00 0 0 0 -37 1 3 -1.0500000000000000e+00 3.0508094436907065e+00 9.2735955111261248e-01 -3.2425715481628181e+00 0 0 0 -38 1 3 -1.0500000000000000e+00 4.4679443003019212e+00 3.1046094096202950e+00 -3.2446642002284571e+00 0 0 0 -39 1 3 -1.0500000000000000e+00 5.6210980414275298e+00 7.7850985095940217e-01 -3.0529442198684507e+00 0 0 0 -40 1 5 4.2499999999999999e-01 2.2085508398324158e+00 3.7983231087392824e+00 -1.1577011829365311e+00 0 0 0 -41 1 1 1.5750000000000000e+00 7.7600001218919523e+00 1.4895196291109798e+00 -3.2657864259189040e-03 0 0 0 -42 1 2 2.1000000000000001e+00 -1.2091006019115589e+01 8.8632974450891417e+00 2.7539547041448795e+00 1 0 0 -43 1 2 2.1000000000000001e+00 -1.2067713232602157e+01 2.8525136078719164e+00 2.7618235199017409e+00 1 0 0 -44 1 3 -1.0500000000000000e+00 8.3516959569061555e+00 -3.1339532310223461e-02 1.1357433570971924e+00 0 0 0 -45 1 3 -1.0500000000000000e+00 -1.1933025492148120e+01 2.7595329973670104e+00 1.1370134112483044e+00 1 0 0 -46 1 4 -9.4999999999999996e-01 6.0662672751172373e+00 1.5632462792111497e+00 1.0069655351602584e+00 0 0 0 -47 1 3 -1.0500000000000000e+00 4.7171183489904180e+00 3.5553451756140646e+00 3.2357682722939334e+00 0 0 0 -48 1 3 -1.0500000000000000e+00 -1.2179814839759992e+01 1.3781454091287202e+00 3.2374743626679336e+00 1 0 0 -49 1 3 -1.0500000000000000e+00 7.3067998243476922e+00 3.7041714141887212e+00 3.0459588478344077e+00 0 0 0 -50 1 5 4.2499999999999999e-01 5.5593955964411492e+00 6.8439387532284712e-01 1.1505669005138230e+00 0 0 0 -51 1 1 1.5750000000000000e+00 -1.0271831701454774e+01 5.9724606597474157e+00 -3.2705090813571758e-03 1 0 0 -52 1 2 2.1000000000000001e+00 5.9408180554345194e+00 4.3804006042748469e+00 2.7539659364818760e+00 0 0 0 -53 1 2 2.1000000000000001e+00 6.0204684195903440e+00 7.3353880878252973e+00 2.7618080745736791e+00 0 0 0 -54 1 3 -1.0500000000000000e+00 5.7998593359670885e+00 4.4515526262705585e+00 1.1357348262919498e+00 0 0 0 -55 1 3 -1.0500000000000000e+00 6.1551595276671165e+00 7.2424262028151780e+00 1.1369916832629752e+00 0 0 0 -56 1 4 -9.4999999999999996e-01 -1.1965495382161963e+01 6.0462304220183150e+00 1.0070022736298210e+00 1 0 0 -57 1 3 -1.0500000000000000e+00 7.3253562399943561e+00 8.0382687380075382e+00 3.2357743586474470e+00 0 0 0 -58 1 3 -1.0500000000000000e+00 5.9083487186442269e+00 5.8610518812341148e+00 3.2374617422859089e+00 0 0 0 -59 1 3 -1.0500000000000000e+00 4.7549501783979871e+00 8.1871270499566329e+00 3.0459460044387185e+00 0 0 0 -60 1 5 4.2499999999999999e-01 8.1675777506518017e+00 5.1673452186951785e+00 1.1508566812238072e+00 0 0 0 -61 1 1 1.5750000000000000e+00 7.7761138336528575e+00 7.4761697781487406e+00 -3.7580555383147640e-03 0 0 0 -62 1 2 2.1000000000000001e+00 6.9870625419908770e+00 1.0236857881965378e-01 -2.7609530688361730e+00 0 0 0 -63 1 2 2.1000000000000001e+00 6.9638163429426783e+00 6.1131999232440997e+00 -2.7688597574900182e+00 0 0 0 -64 1 3 -1.0500000000000000e+00 7.1845768042068343e+00 8.9971837243373898e+00 -1.1427811518142867e+00 0 0 0 -65 1 3 -1.0500000000000000e+00 6.8290823812761374e+00 6.2059099045834571e+00 -1.1439562764296625e+00 0 0 0 -66 1 4 -9.4999999999999996e-01 -1.1170087733372744e+01 7.4024835932118371e+00 -1.0139778639834489e+00 1 0 0 -67 1 3 -1.0500000000000000e+00 -9.8209500322774037e+00 5.4102853287564159e+00 -3.2425657088525419e+00 1 0 0 -68 1 3 -1.0500000000000000e+00 7.0761041543125955e+00 7.5875255489076068e+00 -3.2446577662945764e+00 0 0 0 -69 1 3 -1.0500000000000000e+00 8.2292664263953057e+00 5.2614600888460927e+00 -3.0529632035400311e+00 0 0 0 -70 1 5 4.2499999999999999e-01 -1.0663272694379867e+01 8.2812544466164262e+00 -1.1574899906021443e+00 1 0 0 -71 1 1 1.5750000000000000e+00 5.1679291867752841e+00 2.9932421670148486e+00 -3.7577228510699001e-03 0 0 0 -72 1 2 2.1000000000000001e+00 -1.1044767300168616e+01 4.5852760408334774e+00 -2.7609521778782176e+00 1 0 0 -73 1 2 2.1000000000000001e+00 -1.1124388980911734e+01 1.6303195608669299e+00 -2.7688431292616507e+00 1 0 0 -74 1 3 -1.0500000000000000e+00 -1.0903581498310636e+01 4.5142906934311924e+00 -1.1427845473335605e+00 1 0 0 -75 1 3 -1.0500000000000000e+00 -1.1259106369033972e+01 1.7230268384806458e+00 -1.1439410995923396e+00 1 0 0 -76 1 4 -9.4999999999999996e-01 6.8617144958469076e+00 2.9195557673048818e+00 -1.0139879262950160e+00 0 0 0 -77 1 3 -1.0500000000000000e+00 8.2108057946072996e+00 9.2735729230230390e-01 -3.2425776499777719e+00 0 0 0 -78 1 3 -1.0500000000000000e+00 -1.1012059810070339e+01 3.1045919471971004e+00 -3.2446473870783468e+00 1 0 0 -79 1 3 -1.0500000000000000e+00 -9.8588900558710808e+00 7.7850518566728155e-01 -3.0529472339342547e+00 1 0 0 -80 1 5 4.2499999999999999e-01 7.3685681396951637e+00 3.7983364769353116e+00 -1.1575475830507376e+00 0 0 0 -81 1 1 1.5750000000000000e+00 -7.7200137563852609e+00 1.4895294530661403e+00 -3.2684599822623284e-03 1 0 0 -82 1 2 2.1000000000000001e+00 -6.9309835694285180e+00 8.8633000360003891e+00 2.7539655829756615e+00 1 0 0 -83 1 2 2.1000000000000001e+00 -6.9077088390904287e+00 2.8524991867344269e+00 2.7618117534379607e+00 1 0 0 -84 1 3 -1.0500000000000000e+00 -7.1282896122357791e+00 -3.1337790666402299e-02 1.1357138609993100e+00 1 0 0 -85 1 3 -1.0500000000000000e+00 -6.7730290070254648e+00 2.7595293114608452e+00 1.1370158767616427e+00 1 0 0 -86 1 4 -9.4999999999999996e-01 -9.4137011727461637e+00 1.5632889909165790e+00 1.0069804405775873e+00 1 0 0 -87 1 3 -1.0500000000000000e+00 -1.0762886927889904e+01 3.5553424385566323e+00 3.2357631758371692e+00 1 0 0 -88 1 3 -1.0500000000000000e+00 -7.0198114225359447e+00 1.3781627149599167e+00 3.2374581287983730e+00 1 0 0 -89 1 3 -1.0500000000000000e+00 -8.1732123893871496e+00 3.7041758941777481e+00 3.0459624211456457e+00 1 0 0 -90 1 5 4.2499999999999999e-01 -9.9205899642595909e+00 6.8440357171864363e-01 1.1507119739579768e+00 1 0 0 -91 1 1 1.5750000000000000e+00 -5.1118389155552801e+00 5.9724604992986876e+00 -3.2677938051328681e-03 1 0 0 -92 1 2 2.1000000000000001e+00 -9.5391764189832653e+00 4.3803869998831004e+00 2.7539574271080607e+00 1 0 0 -93 1 2 2.1000000000000001e+00 -9.4595081799096388e+00 7.3353965025317827e+00 2.7618078347600434e+00 1 0 0 -94 1 3 -1.0500000000000000e+00 -9.6801468113930103e+00 4.4515564521540760e+00 1.1357496055372582e+00 1 0 0 -95 1 3 -1.0500000000000000e+00 -9.3248381172301720e+00 7.2424157374410534e+00 1.1369982830380732e+00 1 0 0 -96 1 4 -9.4999999999999996e-01 -6.8054892996665686e+00 6.0462398159612256e+00 1.0069818364520042e+00 1 0 0 -97 1 3 -1.0500000000000000e+00 -8.1546879688645451e+00 8.0382456007117398e+00 3.2357858485300515e+00 1 0 0 -98 1 3 -1.0500000000000000e+00 -9.5716494128412961e+00 5.8610801354623092e+00 3.2374551881368738e+00 1 0 0 -99 1 3 -1.0500000000000000e+00 -1.0725043969209066e+01 8.1871237437456550e+00 3.0459446329304463e+00 1 0 0 -100 1 5 4.2499999999999999e-01 -7.3124574315238666e+00 5.1673091520186851e+00 1.1507497875855961e+00 1 0 0 -101 1 1 1.5750000000000000e+00 -7.7038948864236607e+00 7.4761709779946877e+00 -3.7563858194751276e-03 1 0 0 -102 1 2 2.1000000000000001e+00 -8.4929416250579237e+00 1.0238205190912097e-01 -2.7609433628825437e+00 1 0 0 -103 1 2 2.1000000000000001e+00 -8.5162068339533832e+00 6.1131914931820752e+00 -2.7688610988552806e+00 1 0 0 -104 1 3 -1.0500000000000000e+00 -8.2954155113210621e+00 8.9971804574221004e+00 -1.1427988389805961e+00 1 0 0 -105 1 3 -1.0500000000000000e+00 -8.6509205149498847e+00 6.2059203624978956e+00 -1.1439628590241284e+00 1 0 0 -106 1 4 -9.4999999999999996e-01 -6.0100779546322549e+00 7.4024979040990928e+00 -1.0139958673379255e+00 1 0 0 -107 1 3 -1.0500000000000000e+00 -4.6609961845666854e+00 5.4102617087963587e+00 -3.2425544229802377e+00 1 0 0 -108 1 3 -1.0500000000000000e+00 -8.4038980403496684e+00 7.5874972699614922e+00 -3.2446537367483241e+00 1 0 0 -109 1 3 -1.0500000000000000e+00 -7.2507418722833936e+00 5.2614645688107124e+00 -3.0529610822747220e+00 1 0 0 -110 1 5 4.2499999999999999e-01 -5.5033059365206407e+00 8.2812208311420328e+00 -1.1575765233774842e+00 1 0 0 -111 1 1 1.5750000000000000e+00 -1.0312056810203035e+01 2.9932322130065998e+00 -3.7546907983969646e-03 1 0 0 -112 1 2 2.1000000000000001e+00 -5.8847433874873287e+00 4.5852785579583113e+00 -2.7609429033193313e+00 1 0 0 -113 1 2 2.1000000000000001e+00 -5.9643839170466686e+00 1.6303066689684940e+00 -2.7688547216593093e+00 1 0 0 -114 1 3 -1.0500000000000000e+00 -5.7435669944029248e+00 4.5142929050558713e+00 -1.1428123357037983e+00 1 0 0 -115 1 3 -1.0500000000000000e+00 -6.0991091928443781e+00 1.7230226164423819e+00 -1.1439386957606192e+00 1 0 0 -116 1 4 -9.4999999999999996e-01 -8.6183165743253909e+00 2.9195138811823007e+00 -1.0140048055866231e+00 1 0 0 -117 1 3 -1.0500000000000000e+00 -7.2691903819614820e+00 9.2736020008046438e-01 -3.2425717834672385e+00 1 0 0 -118 1 3 -1.0500000000000000e+00 -5.8520558193569849e+00 3.1046084246173429e+00 -3.2446638893543165e+00 1 0 0 -119 1 3 -1.0500000000000000e+00 -4.6989020453283867e+00 7.7850970320779922e-01 -3.0529439927969300e+00 1 0 0 -120 1 5 4.2499999999999999e-01 -8.1114486366180678e+00 3.7983239523822867e+00 -1.1577012407700238e+00 1 0 0 -121 1 1 1.5750000000000000e+00 -2.5599994626479976e+00 1.4895195225863134e+00 -3.2660642944506435e-03 1 0 0 -122 1 2 2.1000000000000001e+00 -1.7710062286867227e+00 8.8632972747936982e+00 2.7539546373495405e+00 1 0 0 -123 1 2 2.1000000000000001e+00 -1.7477123009753157e+00 2.8525136185930755e+00 2.7618236550482624e+00 1 0 0 -124 1 3 -1.0500000000000000e+00 -1.9683041681687321e+00 -3.1339314943114260e-02 1.1357434005060227e+00 1 0 0 -125 1 3 -1.0500000000000000e+00 -1.6130256197699246e+00 2.7595328394810750e+00 1.1370135266718489e+00 1 0 0 -126 1 4 -9.4999999999999996e-01 -4.2537337856944379e+00 1.5632441680250189e+00 1.0069648786930525e+00 1 0 0 -127 1 3 -1.0500000000000000e+00 -5.6028806915798572e+00 3.5553457721911457e+00 3.2357688590305571e+00 1 0 0 -128 1 3 -1.0500000000000000e+00 -1.8598151617417997e+00 1.3781457768821710e+00 3.2374742127375917e+00 1 0 0 -129 1 3 -1.0500000000000000e+00 -3.0132009706882785e+00 3.7041715558434340e+00 3.0459588995882712e+00 1 0 0 -130 1 5 4.2499999999999999e-01 -4.7606046035337171e+00 6.8439349642540748e-01 1.1505607409996657e+00 1 0 0 -131 1 1 1.5750000000000000e+00 4.8168369057153271e-02 5.9724605712145085e+00 -3.2704117388746567e-03 1 0 0 -132 1 2 2.1000000000000001e+00 -4.3791817898634946e+00 4.3804013865484848e+00 2.7539658968809935e+00 1 0 0 -133 1 2 2.1000000000000001e+00 -4.2995319515097883e+00 7.3353882982958218e+00 2.7618078686262244e+00 1 0 0 -134 1 3 -1.0500000000000000e+00 -4.5201407292494871e+00 4.4515523522287275e+00 1.1357346289073345e+00 1 0 0 -135 1 3 -1.0500000000000000e+00 -4.1648406595123815e+00 7.2424265294413708e+00 1.1369912415882819e+00 1 0 0 -136 1 4 -9.4999999999999996e-01 -1.6454953945829232e+00 6.0462304571223804e+00 1.0070026477177905e+00 1 0 0 -137 1 3 -1.0500000000000000e+00 -2.9946439468920527e+00 8.0382690637231917e+00 3.2357743104080114e+00 1 0 0 -138 1 3 -1.0500000000000000e+00 -4.4116515205365854e+00 5.8610507068010520e+00 3.2374621599322246e+00 1 0 0 -139 1 3 -1.0500000000000000e+00 -5.5650494733788616e+00 8.1871266439263444e+00 3.0459458790032787e+00 1 0 0 -140 1 5 4.2499999999999999e-01 -2.1524218145509142e+00 5.1673462038601450e+00 1.1508591924041909e+00 1 0 0 -141 1 1 1.5750000000000000e+00 -2.5438859347542353e+00 7.4761697245415135e+00 -3.7582632092334478e-03 1 0 0 -142 1 2 2.1000000000000001e+00 -3.3329363804842851e+00 1.0236811376637078e-01 -2.7609530240071400e+00 1 0 0 -143 1 2 2.1000000000000001e+00 -3.3561829873008966e+00 6.1132000526679882e+00 -2.7688597295862918e+00 1 0 0 -144 1 3 -1.0500000000000000e+00 -3.1354235356298261e+00 8.9971842040066932e+00 -1.1427807792040543e+00 1 0 0 -145 1 3 -1.0500000000000000e+00 -3.4909182600583835e+00 6.2059094368932080e+00 -1.1439561958179105e+00 1 0 0 -146 1 4 -9.4999999999999996e-01 -8.5008735848989936e-01 7.4024836920448358e+00 -1.0139778123793981e+00 1 0 0 -147 1 3 -1.0500000000000000e+00 4.9904929440264922e-01 5.4102852955378324e+00 -3.2425656000684357e+00 1 0 0 -148 1 3 -1.0500000000000000e+00 -3.2438965163198175e+00 7.5875255784303768e+00 -3.2446584652783006e+00 1 0 0 -149 1 3 -1.0500000000000000e+00 -2.0907332304654513e+00 5.2614599131077355e+00 -3.0529635108663689e+00 1 0 0 -150 1 5 4.2499999999999999e-01 -3.4327252441785028e-01 8.2812545429943718e+00 -1.1574891817763771e+00 1 0 0 -151 1 1 1.5750000000000000e+00 -5.1520707526539180e+00 2.9932422696247407e+00 -3.7575710303681120e-03 1 0 0 -152 1 2 2.1000000000000001e+00 -7.2476725112305829e-01 4.5852758198773422e+00 -2.7609523624229535e+00 1 0 0 -153 1 2 2.1000000000000001e+00 -8.0438924000571177e-01 1.6303201218117742e+00 -2.7688428916587142e+00 1 0 0 -154 1 3 -1.0500000000000000e+00 -5.8358168128787469e-01 4.5142909571471250e+00 -1.1427842365077652e+00 1 0 0 -155 1 3 -1.0500000000000000e+00 -9.3910616162538574e-01 1.7230269705662700e+00 -1.1439410014752056e+00 1 0 0 -156 1 4 -9.4999999999999996e-01 -3.4582854395786136e+00 2.9195566902968508e+00 -1.0139875801681342e+00 1 0 0 -157 1 3 -1.0500000000000000e+00 -2.1091939503703152e+00 9.2735770730401157e-01 -3.2425784113786129e+00 1 0 0 -158 1 3 -1.0500000000000000e+00 -6.9205988720759315e-01 3.1045918731948028e+00 -3.2446474792644775e+00 1 0 0 -159 1 3 -1.0500000000000000e+00 4.6110929925033162e-01 7.7850537414041909e-01 -3.0529474813034367e+00 1 0 0 -160 1 5 4.2499999999999999e-01 -2.9514312473451580e+00 3.7983374737998545e+00 -1.1575435527823981e+00 1 0 0 -161 1 1 1.5750000000000000e+00 2.6563134001835174e+00 1.0455371516328153e+01 -3.2734775751421807e-03 0 0 0 -162 1 2 2.1000000000000001e+00 3.2200000793507915e+00 -1.8034168055802969e+01 2.7539699954278376e+00 0 1 0 -163 1 2 2.1000000000000001e+00 3.4686304351053021e+00 1.1818330371923647e+01 2.7618152520441335e+00 0 0 0 -164 1 3 -1.0500000000000000e+00 3.2480341177807297e+00 8.9344903893301542e+00 1.1357344410643542e+00 0 0 0 -165 1 3 -1.0500000000000000e+00 3.6033098122302398e+00 1.1725346434065575e+01 1.1370112044265888e+00 0 0 0 -166 1 4 -9.4999999999999996e-01 9.6267397491767959e-01 1.0529174885771052e+01 1.0070252189892397e+00 0 0 0 -167 1 3 -1.0500000000000000e+00 -3.8653877399151604e-01 1.2521163438190538e+01 3.2357641403694473e+00 0 0 0 -168 1 3 -1.0500000000000000e+00 3.3565301561723722e+00 1.0343985356367806e+01 3.2374269175278840e+00 0 0 0 -169 1 3 -1.0500000000000000e+00 2.2031200709181498e+00 1.2669998832466721e+01 3.0459640112402759e+00 0 0 0 -170 1 5 4.2499999999999999e-01 4.5576637197079073e-01 9.6503202047150296e+00 1.1510612204759685e+00 0 0 0 -171 1 1 1.5750000000000000e+00 5.2644962792255114e+00 1.4938261534530948e+01 -3.2647451325473753e-03 0 0 0 -172 1 2 2.1000000000000001e+00 8.3716356417033211e-01 1.3346218268802954e+01 2.7539470758802835e+00 0 0 0 -173 1 2 2.1000000000000001e+00 9.1681075011299562e-01 1.6301231345167661e+01 2.7618125766893566e+00 0 0 0 -174 1 3 -1.0500000000000000e+00 6.9620077398076852e-01 1.3417388047104328e+01 1.1357329589097631e+00 0 0 0 -175 1 3 -1.0500000000000000e+00 1.0514831822196271e+00 1.6208254021374852e+01 1.1370122634561355e+00 0 0 0 -176 1 4 -9.4999999999999996e-01 3.5707945950799811e+00 1.5012003258438757e+01 1.0069522165247839e+00 0 0 0 -177 1 3 -1.0500000000000000e+00 2.2216102376958187e+00 1.7004067428805609e+01 3.2357767642373751e+00 0 0 0 -178 1 3 -1.0500000000000000e+00 8.0468569905412757e-01 1.4826903246577299e+01 3.2374729366625967e+00 0 0 0 -179 1 3 -1.0500000000000000e+00 -3.4870265184444982e-01 1.7152922875565228e+01 3.0459529855636607e+00 0 0 0 -180 1 5 4.2499999999999999e-01 3.0638743761134464e+00 1.4133083814238685e+01 1.1505085237616779e+00 0 0 0 -181 1 1 1.5750000000000000e+00 2.6724321383913008e+00 1.6441984484756983e+01 -3.7572437476853793e-03 0 0 0 -182 1 2 2.1000000000000001e+00 1.8833962660382699e+00 9.0681931419489921e+00 -2.7609335407081872e+00 0 0 0 -183 1 2 2.1000000000000001e+00 1.8601111196809121e+00 1.5079040342628350e+01 -2.7688508235497418e+00 0 0 0 -184 1 3 -1.0500000000000000e+00 1.8555857070306878e+00 -1.7900270605410917e+01 -1.1427969927137482e+00 0 1 0 -185 1 3 -1.0500000000000000e+00 1.7253995479611604e+00 1.5171757634659638e+01 -1.1439490826924779e+00 0 0 0 -186 1 4 -9.4999999999999996e-01 4.3662322107158733e+00 1.6368309943641055e+01 -1.0139855229210077e+00 0 0 0 -187 1 3 -1.0500000000000000e+00 5.7153027190516923e+00 1.4376080986155142e+01 -3.2425652845281192e+00 0 0 0 -188 1 3 -1.0500000000000000e+00 1.9724442262540727e+00 1.6553320295057897e+01 -3.2446645540223695e+00 0 0 0 -189 1 3 -1.0500000000000000e+00 3.1255993997228746e+00 1.4227253671506265e+01 -3.0529479201541534e+00 0 0 0 -190 1 5 4.2499999999999999e-01 4.8730503872703981e+00 1.7247084738816252e+01 -1.1575108950696489e+00 0 0 0 -191 1 1 1.5750000000000000e+00 6.4287605771619738e-02 1.1959047499133955e+01 -3.7503865813821591e-03 0 0 0 -192 1 2 2.1000000000000001e+00 4.4915804029907438e+00 1.3551111831722395e+01 -2.7609471625592663e+00 0 0 0 -193 1 2 2.1000000000000001e+00 4.4119577432399595e+00 1.0596109686747351e+01 -2.7688628659227765e+00 0 0 0 -194 1 3 -1.0500000000000000e+00 4.6327720860930999e+00 1.3480101674103746e+01 -1.1428277031521787e+00 0 0 0 -195 1 3 -1.0500000000000000e+00 4.2772319797087590e+00 1.0688840900034439e+01 -1.1439426903633443e+00 0 0 0 -196 1 4 -9.4999999999999996e-01 1.7580042741312081e+00 1.1885300143491271e+01 -1.0140430923412715e+00 0 0 0 -197 1 3 -1.0500000000000000e+00 3.1071547577327969e+00 9.8931852268420180e+00 -3.2425655357851380e+00 0 0 0 -198 1 3 -1.0500000000000000e+00 4.5242699949370255e+00 1.2070431784027537e+01 -3.2446352562949325e+00 0 0 0 -199 1 3 -1.0500000000000000e+00 5.6774301856846563e+00 9.7443531193086770e+00 -3.0529533590117293e+00 0 0 0 -200 1 5 4.2499999999999999e-01 2.2648570458734767e+00 1.2764054876209290e+01 -1.1579888107521015e+00 0 0 0 -201 1 1 1.5750000000000000e+00 7.8163274667709004e+00 1.0455361822489916e+01 -3.2709891462765484e-03 0 0 0 -202 1 2 2.1000000000000001e+00 -1.2260022563792901e+01 -1.8034171036451074e+01 2.7539593838063130e+00 1 1 0 -203 1 2 2.1000000000000001e+00 -1.2011372927310228e+01 1.1818343457748362e+01 2.7618265468659775e+00 1 0 0 -204 1 3 -1.0500000000000000e+00 8.4080197705526096e+00 8.9344884845612391e+00 1.1357635616008466e+00 0 0 0 -205 1 3 -1.0500000000000000e+00 -1.1876687672220676e+01 1.1725350016313566e+01 1.1370089651291622e+00 1 0 0 -206 1 4 -9.4999999999999996e-01 6.1226432401957958e+00 1.0529133102795765e+01 1.0070115060204152e+00 0 0 0 -207 1 3 -1.0500000000000000e+00 4.7734668544665446e+00 1.2521166817291366e+01 3.2357690806651949e+00 0 0 0 -208 1 3 -1.0500000000000000e+00 -1.2123474030539958e+01 1.0343970332896109e+01 3.2374431734477387e+00 1 0 0 -209 1 3 -1.0500000000000000e+00 7.3631314638252157e+00 1.2669994182427597e+01 3.0459606244004132e+00 0 0 0 -210 1 5 4.2499999999999999e-01 5.6157533562485771e+00 9.6503133176461908e+00 1.1509245775060091e+00 0 0 0 -211 1 1 1.5750000000000000e+00 -1.0215496529349888e+01 1.4938261423331294e+01 -3.2671984669594423e-03 1 0 0 -212 1 2 2.1000000000000001e+00 5.9971582839890871e+00 1.3346232288807276e+01 2.7539563176022028e+00 0 0 0 -213 1 2 2.1000000000000001e+00 6.0767870017495582e+00 1.6301224009304729e+01 2.7618126124878195e+00 0 0 0 -214 1 3 -1.0500000000000000e+00 5.8562070155499164e+00 1.3417384395333375e+01 1.1357183501401593e+00 0 0 0 -215 1 3 -1.0500000000000000e+00 6.2114804261402661e+00 1.6208264386955651e+01 1.1370050309136168e+00 0 0 0 -216 1 4 -9.4999999999999996e-01 -1.1909211620817819e+01 1.5011994029862020e+01 1.0069718593232118e+00 1 0 0 -217 1 3 -1.0500000000000000e+00 7.3816555131360566e+00 1.7004091118074196e+01 3.2357656756583246e+00 0 0 0 -218 1 3 -1.0500000000000000e+00 5.9646835115051537e+00 1.4826873453180109e+01 3.2374785589618078e+00 0 0 0 -219 1 3 -1.0500000000000000e+00 4.8112906275483347e+00 1.7152926240598543e+01 3.0459544088802879e+00 0 0 0 -220 1 5 4.2499999999999999e-01 8.2239086021434531e+00 1.4133118218483940e+01 1.1506108499324537e+00 0 0 0 -221 1 1 1.5750000000000000e+00 7.8324408920161304e+00 1.6441983610717937e+01 -3.7594297909890884e-03 0 0 0 -222 1 2 2.1000000000000001e+00 7.0433997235143551e+00 9.0681792148270475e+00 -2.7609433793505778e+00 0 0 0 -223 1 2 2.1000000000000001e+00 7.0201340001296231e+00 1.5079048983075904e+01 -2.7688496626705303e+00 0 0 0 -224 1 3 -1.0500000000000000e+00 7.0155782305852874e+00 -1.7900266980861765e+01 -1.1427797504921990e+00 0 1 0 -225 1 3 -1.0500000000000000e+00 6.8854016068099639e+00 1.5171747073972536e+01 -1.1439426401780413e+00 0 0 0 -226 1 4 -9.4999999999999996e-01 -1.1113777147342798e+01 1.6368296182520734e+01 -1.0139673644246923e+00 1 0 0 -227 1 3 -1.0500000000000000e+00 -9.7646511702758900e+00 1.4376104836232383e+01 -3.2425769131107307e+00 1 0 0 -228 1 3 -1.0500000000000000e+00 7.1324461842806457e+00 1.6553349104877181e+01 -3.2446699530389926e+00 0 0 0 -229 1 3 -1.0500000000000000e+00 8.2856077950535507e+00 1.4227248777578755e+01 -3.0529496330623935e+00 0 0 0 -230 1 5 4.2499999999999999e-01 -1.0606916142948355e+01 1.7247118324243235e+01 -1.1574224546860155e+00 1 0 0 -231 1 1 1.5750000000000000e+00 5.2242739244354812e+00 1.1959056994618571e+01 -3.7532727914637576e-03 0 0 0 -232 1 2 2.1000000000000001e+00 -1.0988443129796577e+01 1.3551110560593809e+01 -2.7609569176995983e+00 1 0 0 -233 1 2 2.1000000000000001e+00 -1.1068048695702130e+01 1.0596121856381689e+01 -2.7688513474828484e+00 1 0 0 -234 1 3 -1.0500000000000000e+00 -1.0847241951081806e+01 1.3480099061382369e+01 -1.1428004053747500e+00 1 0 0 -235 1 3 -1.0500000000000000e+00 -1.1202765363241992e+01 1.0688845693686350e+01 -1.1439459591196393e+00 1 0 0 -236 1 4 -9.4999999999999996e-01 6.9180337353417016e+00 1.1885339729941098e+01 -1.0140270633946198e+00 0 0 0 -237 1 3 -1.0500000000000000e+00 8.2671524098729279e+00 9.8931836876833437e+00 -3.2425721304984254e+00 0 0 0 -238 1 3 -1.0500000000000000e+00 -1.0955734208517388e+01 1.2070414566453039e+01 -3.2446182476609398e+00 1 0 0 -239 1 3 -1.0500000000000000e+00 -9.8025577056732693e+00 9.7443480474548068e+00 -3.0529560570682035e+00 1 0 0 -240 1 5 4.2499999999999999e-01 7.4248734752605259e+00 1.2764066295177155e+01 -1.1578424655573620e+00 0 0 0 -241 1 1 1.5750000000000000e+00 -7.6636863980105723e+00 1.0455371549550875e+01 -3.2736966669304479e-03 1 0 0 -242 1 2 2.1000000000000001e+00 -7.0999990710355192e+00 -1.8034168111282145e+01 2.7539701102788019e+00 1 1 0 -243 1 2 2.1000000000000001e+00 -6.8513690426892451e+00 1.1818330417854160e+01 2.7618149786464521e+00 1 0 0 -244 1 3 -1.0500000000000000e+00 -7.0719662711101812e+00 8.9344903042783628e+00 1.1357345069924225e+00 1 0 0 -245 1 3 -1.0500000000000000e+00 -6.7166907540342233e+00 1.1725346382842613e+01 1.1370110473100272e+00 1 0 0 -246 1 4 -9.4999999999999996e-01 -9.3573260672622727e+00 1.0529174208841528e+01 1.0070252131964725e+00 1 0 0 -247 1 3 -1.0500000000000000e+00 -1.0706539610507846e+01 1.2521163280396088e+01 3.2357640814962956e+00 1 0 0 -248 1 3 -1.0500000000000000e+00 -6.9634703631137729e+00 1.0343985091037393e+01 3.2374266804707119e+00 1 0 0 -249 1 3 -1.0500000000000000e+00 -8.1168803339350379e+00 1.2669999097078030e+01 3.0459638003843619e+00 1 0 0 -250 1 5 4.2499999999999999e-01 -9.8642334722321223e+00 9.6503204443188402e+00 1.1510602007101145e+00 1 0 0 -251 1 1 1.5750000000000000e+00 -5.0555038517207738e+00 1.4938261738890343e+01 -3.2644360310367659e-03 1 0 0 -252 1 2 2.1000000000000001e+00 -9.4828364774811842e+00 1.3346217905042987e+01 2.7539473478098913e+00 1 0 0 -253 1 2 2.1000000000000001e+00 -9.4031892666074217e+00 1.6301231779632818e+01 2.7618124621169056e+00 1 0 0 -254 1 3 -1.0500000000000000e+00 -9.6237991655729793e+00 1.3417388438033317e+01 1.1357331457960171e+00 1 0 0 -255 1 3 -1.0500000000000000e+00 -9.2685168345328872e+00 1.6208253547145770e+01 1.1370123846061553e+00 1 0 0 -256 1 4 -9.4999999999999996e-01 -6.7492056915652885e+00 1.5012003321026935e+01 1.0069523554452928e+00 1 0 0 -257 1 3 -1.0500000000000000e+00 -8.0983893100633519e+00 1.7004067932473877e+01 3.2357766175726148e+00 1 0 0 -258 1 3 -1.0500000000000000e+00 -9.5153144179962954e+00 1.4826903279627327e+01 3.2374724255443947e+00 1 0 0 -259 1 3 -1.0500000000000000e+00 -1.0668703168553188e+01 1.7152922672989472e+01 3.0459531606477839e+00 1 0 0 -260 1 5 4.2499999999999999e-01 -7.2561252036506980e+00 1.4133084321951994e+01 1.1505098705128987e+00 1 0 0 -261 1 1 1.5750000000000000e+00 -7.6475676736905971e+00 1.6441984614323157e+01 -3.7576410911643876e-03 1 0 0 -262 1 2 2.1000000000000001e+00 -8.4366040275132406e+00 9.0681930278031473e+00 -2.7609335500569747e+00 1 0 0 -263 1 2 2.1000000000000001e+00 -8.4598890146513455e+00 1.5079040601080134e+01 -2.7688510489233167e+00 1 0 0 -264 1 3 -1.0500000000000000e+00 -8.4644143079304612e+00 -1.7900269957740477e+01 -1.1427967775321957e+00 1 1 0 -265 1 3 -1.0500000000000000e+00 -8.5946007116663132e+00 1.5171756827133724e+01 -1.1439485975510628e+00 1 0 0 -266 1 4 -9.4999999999999996e-01 -5.9537679681653151e+00 1.6368309168283670e+01 -1.0139856989316165e+00 1 0 0 -267 1 3 -1.0500000000000000e+00 -4.6046964494860898e+00 1.4376081286353607e+01 -3.2425649205705174e+00 1 0 0 -268 1 3 -1.0500000000000000e+00 -8.3475558754399124e+00 1.6553320859356912e+01 -3.2446655570405536e+00 1 0 0 -269 1 3 -1.0500000000000000e+00 -7.1944009052872069e+00 1.4227253415250768e+01 -3.0529479683150100e+00 1 0 0 -270 1 5 4.2499999999999999e-01 -5.4469496542482680e+00 1.7247084860834857e+01 -1.1575125267203532e+00 1 0 0 -271 1 1 1.5750000000000000e+00 -1.0255712633101528e+01 1.1959047249232842e+01 -3.7503693590110743e-03 1 0 0 -272 1 2 2.1000000000000001e+00 -5.8284201346147073e+00 1.3551112633249122e+01 -2.7609471419709433e+00 1 0 0 -273 1 2 2.1000000000000001e+00 -5.9080424941932153e+00 1.0596109220690689e+01 -2.7688628072186683e+00 1 0 0 -274 1 3 -1.0500000000000000e+00 -5.6872277599586996e+00 1.3480101435889090e+01 -1.1428283866518854e+00 1 0 0 -275 1 3 -1.0500000000000000e+00 -6.0427680579917293e+00 1.0688841214141384e+01 -1.1439433517717656e+00 1 0 0 -276 1 4 -9.4999999999999996e-01 -8.5619959665734147e+00 1.1885299915121873e+01 -1.0140429279795615e+00 1 0 0 -277 1 3 -1.0500000000000000e+00 -7.2128453343082271e+00 9.8931855774587198e+00 -3.2425657643378525e+00 1 0 0 -278 1 3 -1.0500000000000000e+00 -5.7957299312633053e+00 1.2070431551626321e+01 -3.2446350249445235e+00 1 0 0 -279 1 3 -1.0500000000000000e+00 -4.6425695661462196e+00 9.7443530477275679e+00 -3.0529532196506795e+00 1 0 0 -280 1 5 4.2499999999999999e-01 -8.0551425673563894e+00 1.2764055666864170e+01 -1.1579876909028854e+00 1 0 0 -281 1 1 1.5750000000000000e+00 -2.5036724751688171e+00 1.0455361759083811e+01 -3.2712711680531470e-03 1 0 0 -282 1 2 2.1000000000000001e+00 -1.9400228605798482e+00 -1.8034171454532189e+01 2.7539596865588258e+00 1 1 0 -283 1 2 2.1000000000000001e+00 -1.6913729123417731e+00 1.1818343618664318e+01 2.7618267327017616e+00 1 0 0 -284 1 3 -1.0500000000000000e+00 -1.9119803931372488e+00 8.9344892347650671e+00 1.1357639880848378e+00 1 0 0 -285 1 3 -1.0500000000000000e+00 -1.5566878690749917e+00 1.1725349553088837e+01 1.1370088549317554e+00 1 0 0 -286 1 4 -9.4999999999999996e-01 -4.1973568501655052e+00 1.0529132534062505e+01 1.0070110973834296e+00 1 0 0 -287 1 3 -1.0500000000000000e+00 -5.5465318119038987e+00 1.2521167504631986e+01 3.2357693370264009e+00 1 0 0 -288 1 3 -1.0500000000000000e+00 -1.8034741996403554e+00 1.0343970336471010e+01 3.2374423050401600e+00 1 0 0 -289 1 3 -1.0500000000000000e+00 -2.9568695567301644e+00 1.2669994183537906e+01 3.0459605033388062e+00 1 0 0 -290 1 5 4.2499999999999999e-01 -4.7042469088330439e+00 9.6503130783144044e+00 1.1509221001927727e+00 1 0 0 -291 1 1 1.5750000000000000e+00 1.0450346973463098e-01 1.4938261236297333e+01 -3.2670506730401172e-03 1 0 0 -292 1 2 2.1000000000000001e+00 -4.3228420481760716e+00 1.3346232732007316e+01 2.7539563932159634e+00 1 0 0 -293 1 2 2.1000000000000001e+00 -4.2432134692497367e+00 1.6301223752354527e+01 2.7618123881160574e+00 1 0 0 -294 1 3 -1.0500000000000000e+00 -4.4637927327409024e+00 1.3417383967610970e+01 1.1357180288639768e+00 1 0 0 -295 1 3 -1.0500000000000000e+00 -4.1085196585471646e+00 1.6208264626532635e+01 1.1370043708840640e+00 1 0 0 -296 1 4 -9.4999999999999996e-01 -1.5892118927513685e+00 1.5011993465855195e+01 1.0069717418888526e+00 1 0 0 -297 1 3 -1.0500000000000000e+00 -2.9383445989094135e+00 1.7004091419961096e+01 3.2357659595260095e+00 1 0 0 -298 1 3 -1.0500000000000000e+00 -4.3553166601485476e+00 1.4826873197424877e+01 3.2374790045700852e+00 1 0 0 -299 1 3 -1.0500000000000000e+00 -5.5087090978359239e+00 1.7152925908147093e+01 3.0459546197474001e+00 1 0 0 -300 1 5 4.2499999999999999e-01 -2.0960913924612612e+00 1.4133118310388799e+01 1.1506099517336104e+00 1 0 0 -301 1 1 1.5750000000000000e+00 -2.4875591017952310e+00 1.6441983631942183e+01 -3.7595151500315893e-03 1 0 0 -302 1 2 2.1000000000000001e+00 -3.2765996605939858e+00 9.0681789268622346e+00 -2.7609433563661456e+00 1 0 0 -303 1 2 2.1000000000000001e+00 -3.2998653023383682e+00 1.5079049184116290e+01 -2.7688496671533649e+00 1 0 0 -304 1 3 -1.0500000000000000e+00 -3.3044219890671753e+00 -1.7900266681096994e+01 -1.1427795585717835e+00 1 1 0 -305 1 3 -1.0500000000000000e+00 -3.4345989454937165e+00 1.5171746655803712e+01 -1.1439426919442308e+00 1 0 0 -306 1 4 -9.4999999999999996e-01 -7.9377665672300779e-01 1.6368296633100169e+01 -1.0139672617067319e+00 1 0 0 -307 1 3 -1.0500000000000000e+00 5.5534868438584084e-01 1.4376105151954103e+01 -3.2425775178456107e+00 1 0 0 -308 1 3 -1.0500000000000000e+00 -3.1875541544200496e+00 1.6553349368867583e+01 -3.2446705490554963e+00 1 0 0 -309 1 3 -1.0500000000000000e+00 -2.0343923260202423e+00 1.4227248534036743e+01 -3.0529497284009599e+00 1 0 0 -310 1 5 4.2499999999999999e-01 -2.8691619587030104e-01 1.7247118382062236e+01 -1.1574212565794415e+00 1 0 0 -311 1 1 1.5750000000000000e+00 -5.0957261453677205e+00 1.1959057139099524e+01 -3.7531547867057924e-03 1 0 0 -312 1 2 2.1000000000000001e+00 -6.6844354483350799e-01 1.3551110266998439e+01 -2.7609568234960040e+00 1 0 0 -313 1 2 2.1000000000000001e+00 -7.4804896223840167e-01 1.0596122230984061e+01 -2.7688515273933572e+00 1 0 0 -314 1 3 -1.0500000000000000e+00 -5.2724202123535946e-01 1.3480098969688658e+01 -1.1428001598979964e+00 1 0 0 -315 1 3 -1.0500000000000000e+00 -8.8276536122547178e-01 1.0688845631800483e+01 -1.1439460002810460e+00 1 0 0 -316 1 4 -9.4999999999999996e-01 -3.4019662726515918e+00 1.1885340094739142e+01 -1.0140272109664838e+00 1 0 0 -317 1 3 -1.0500000000000000e+00 -2.0528473081502625e+00 9.8931841810449264e+00 -3.2425722407459503e+00 1 0 0 -318 1 3 -1.0500000000000000e+00 -6.3573424619330154e-01 1.2070414396821146e+01 -3.2446180957081046e+00 1 0 0 -319 1 3 -1.0500000000000000e+00 5.1744185669619647e-01 9.7443480289127145e+00 -3.0529559039522471e+00 1 0 0 -320 1 5 4.2499999999999999e-01 -2.8951265260920414e+00 1.2764066156472012e+01 -1.1578424373010439e+00 1 0 0 -321 1 1 1.5750000000000000e+00 2.4873158312812595e+00 -1.6442085140542702e+01 -3.2731044767775330e-03 0 1 0 -322 1 2 2.1000000000000001e+00 3.2763303149493339e+00 -9.0683326576281793e+00 2.7539613504973826e+00 0 1 0 -323 1 2 2.1000000000000001e+00 3.2996440062323522e+00 -1.5079163855001152e+01 2.7618052351303000e+00 0 1 0 -324 1 3 -1.0500000000000000e+00 3.0790227247234743e+00 -1.7962996900519780e+01 1.1357369105052264e+00 0 1 0 -325 1 3 -1.0500000000000000e+00 3.4343217071143606e+00 -1.5172137633996615e+01 1.1369964612814574e+00 0 1 0 -326 1 4 -9.4999999999999996e-01 7.9370044014982000e-01 -1.6368273971208470e+01 1.0070225988534567e+00 0 1 0 -327 1 3 -1.0500000000000000e+00 -5.5550646979719431e-01 -1.4376303482612908e+01 3.2357750050409617e+00 0 1 0 -328 1 3 -1.0500000000000000e+00 3.1875194117149164e+00 -1.6553483052839894e+01 3.2374345381514082e+00 0 1 0 -329 1 3 -1.0500000000000000e+00 2.0341105779595932e+00 -1.4227436497588396e+01 3.0459506035312476e+00 0 1 0 -330 1 5 4.2499999999999999e-01 2.8674517218617801e-01 -1.7247173634562643e+01 1.1510526978509290e+00 0 1 0 -331 1 1 1.5750000000000000e+00 5.0954822332712713e+00 -1.1959202907500007e+01 -3.2689027687062833e-03 0 1 0 -332 1 2 2.1000000000000001e+00 6.6817149298760725e-01 -1.3551259854316488e+01 2.7539499370113383e+00 0 1 0 -333 1 2 2.1000000000000001e+00 7.4779844304577914e-01 -1.0596215429331199e+01 2.7618213942130811e+00 0 1 0 -334 1 3 -1.0500000000000000e+00 5.2719372455953284e-01 -1.3480064946544015e+01 1.1357459580603049e+00 0 1 0 -335 1 3 -1.0500000000000000e+00 8.8247034442780148e-01 -1.0689208474290135e+01 1.1370185048637591e+00 0 1 0 -336 1 4 -9.4999999999999996e-01 3.4017986335829793e+00 -1.1885434663339312e+01 1.0069865452366784e+00 0 1 0 -337 1 3 -1.0500000000000000e+00 2.0525899417440829e+00 -9.8934041355699218e+00 3.2357693155049283e+00 0 1 0 -338 1 3 -1.0500000000000000e+00 6.3569045021168691e-01 -1.2070565156037954e+01 3.2374462468578749e+00 0 1 0 -339 1 3 -1.0500000000000000e+00 -5.1770556733988826e-01 -9.7445696176577137e+00 3.0459634759630063e+00 0 1 0 -340 1 5 4.2499999999999999e-01 2.8948985199675210e+00 -1.2764300231969152e+01 1.1507672489080729e+00 0 1 0 -341 1 1 1.5750000000000000e+00 2.5034344422152106e+00 -1.0455505379844039e+01 -3.7523281344018500e-03 0 1 0 -342 1 2 2.1000000000000001e+00 1.7144048897053548e+00 -1.7829278827200582e+01 -2.7609375642366514e+00 0 1 0 -343 1 2 2.1000000000000001e+00 1.6910990452522778e+00 -1.1818431965120766e+01 -2.7688521544223557e+00 0 1 0 -344 1 3 -1.0500000000000000e+00 1.9119323284819547e+00 -8.9344496581989912e+00 -1.1428177382481586e+00 0 1 0 -345 1 3 -1.0500000000000000e+00 1.5563892707225317e+00 -1.1725703690663577e+01 -1.1439423397491009e+00 0 1 0 -346 1 4 -9.4999999999999996e-01 4.1971837724422230e+00 -1.0529223437421583e+01 -1.0140304498512833e+00 0 1 0 -347 1 3 -1.0500000000000000e+00 5.5462795339137028e+00 -1.2521387157719200e+01 -3.2425682558207303e+00 0 1 0 -348 1 3 -1.0500000000000000e+00 1.8034351269669298e+00 -1.0344148858506074e+01 -3.2446357157915351e+00 0 1 0 -349 1 3 -1.0500000000000000e+00 2.9565987372624374e+00 -1.2670221018382025e+01 -3.0529474743907752e+00 0 1 0 -350 1 5 4.2499999999999999e-01 4.7040264803948517e+00 -9.6504758402318025e+00 -1.1578593135248099e+00 0 1 0 -351 1 1 1.5750000000000000e+00 -1.0471558842234785e-01 -1.4938400364160536e+01 -3.7532330236462741e-03 0 1 0 -352 1 2 2.1000000000000001e+00 4.3225692029115947e+00 -1.3346363976966739e+01 -2.7609380283517364e+00 0 1 0 -353 1 2 2.1000000000000001e+00 4.2429709170616423e+00 -1.6301375789705347e+01 -2.7688688655989422e+00 0 1 0 -354 1 3 -1.0500000000000000e+00 4.4637558178452430e+00 -1.3417379075377282e+01 -1.1428141432543626e+00 0 1 0 -355 1 3 -1.0500000000000000e+00 4.1082428565530851e+00 -1.6208643768110964e+01 -1.1439575855412993e+00 0 1 0 -356 1 4 -9.4999999999999996e-01 1.5890483233411512e+00 -1.5012116338313390e+01 -1.0140187450303042e+00 0 1 0 -357 1 3 -1.0500000000000000e+00 2.9381893438004383e+00 -1.7004282362058166e+01 -3.2425562113891360e+00 0 1 0 -358 1 3 -1.0500000000000000e+00 4.3552644383759880e+00 -1.4827036483227031e+01 -3.2446472860660487e+00 0 1 0 -359 1 3 -1.0500000000000000e+00 5.5084200493482207e+00 -1.7153088410673128e+01 -3.0529633116519523e+00 0 1 0 -360 1 5 4.2499999999999999e-01 2.0958519709362449e+00 -1.4133378407095304e+01 -1.1577903583398630e+00 0 1 0 -361 1 1 1.5750000000000000e+00 7.6473300699624751e+00 -1.6442095286891750e+01 -3.2703635846598189e-03 0 1 0 -362 1 2 2.1000000000000001e+00 -1.2203692618505173e+01 -9.0683357616495428e+00 2.7539509521771546e+00 1 1 0 -363 1 2 2.1000000000000001e+00 -1.2180359496617088e+01 -1.5079150525191121e+01 2.7618176073756171e+00 1 1 0 -364 1 3 -1.0500000000000000e+00 8.2390082609686672e+00 -1.7962998453343072e+01 1.1357656996613876e+00 0 1 0 -365 1 3 -1.0500000000000000e+00 -1.2045675817277496e+01 -1.5172133958248716e+01 1.1369942170488319e+00 1 1 0 -366 1 4 -9.4999999999999996e-01 5.9536694527401863e+00 -1.6368316601029903e+01 1.0070080197153732e+00 0 1 0 -367 1 3 -1.0500000000000000e+00 4.6044999502781359e+00 -1.4376299637797334e+01 3.2357807367951334e+00 0 1 0 -368 1 3 -1.0500000000000000e+00 -1.2292484484628849e+01 -1.6553497884316052e+01 3.2374506520275279e+00 1 1 0 -369 1 3 -1.0500000000000000e+00 7.1941227993251466e+00 -1.4227441518171391e+01 3.0459466724115387e+00 0 1 0 -370 1 5 4.2499999999999999e-01 5.4467316809031381e+00 -1.7247181780075401e+01 1.1509108228447804e+00 0 1 0 -371 1 1 1.5750000000000000e+00 -1.0384509878063577e+01 -1.1959202847679125e+01 -3.2711064931607581e-03 1 1 0 -372 1 2 2.1000000000000001e+00 5.8281658705053552e+00 -1.3551245908771351e+01 2.7539592284405323e+00 0 1 0 -373 1 2 2.1000000000000001e+00 5.9077738247842859e+00 -1.0596222346036228e+01 2.7618212472175525e+00 0 1 0 -374 1 3 -1.0500000000000000e+00 5.6872004561341356e+00 -1.3480068502065599e+01 1.1357317122361454e+00 0 1 0 -375 1 3 -1.0500000000000000e+00 6.0424675752151202e+00 -1.0689198215110313e+01 1.1370119790707562e+00 0 1 0 -376 1 4 -9.4999999999999996e-01 -1.2078208028614750e+01 -1.1885444143137754e+01 1.0070061177644991e+00 1 1 0 -377 1 3 -1.0500000000000000e+00 7.2126361390250970e+00 -9.8933805239822377e+00 3.2357588745576784e+00 0 1 0 -378 1 3 -1.0500000000000000e+00 5.7956884111784390e+00 -1.2070595254996373e+01 3.2374511312518859e+00 0 1 0 -379 1 3 -1.0500000000000000e+00 4.6422879410256392e+00 -9.7445663758382555e+00 3.0459651484494685e+00 0 1 0 -380 1 5 4.2499999999999999e-01 8.0549327596562676e+00 -1.2764265216608282e+01 1.1508699292293620e+00 0 1 0 -381 1 1 1.5750000000000000e+00 7.6634431466777784e+00 -1.0455505708640878e+01 -3.7548033538037373e-03 0 1 0 -382 1 2 2.1000000000000001e+00 6.8744083301107217e+00 -1.7829293052197659e+01 -2.7609466521728914e+00 0 1 0 -383 1 2 2.1000000000000001e+00 6.8511227655710272e+00 -1.1818423654764077e+01 -2.7688514413745127e+00 0 1 0 -384 1 3 -1.0500000000000000e+00 7.0719252612919341e+00 -8.9344460915840020e+00 -1.1428009362290084e+00 0 1 0 -385 1 3 -1.0500000000000000e+00 6.7163919273608208e+00 -1.1725714227503657e+01 -1.1439356358725981e+00 0 1 0 -386 1 4 -9.4999999999999996e-01 -1.1282825542264309e+01 -1.0529236312045516e+01 -1.0140115555323934e+00 1 1 0 -387 1 3 -1.0500000000000000e+00 -9.9336734334620846e+00 -1.2521362616099530e+01 -3.2425798517804285e+00 1 1 0 -388 1 3 -1.0500000000000000e+00 6.9634372372059339e+00 -1.0344120541373844e+01 -3.2446411197658893e+00 0 1 0 -389 1 3 -1.0500000000000000e+00 8.1166063278059539e+00 -1.2670225192721546e+01 -3.0529487788076324e+00 0 1 0 -390 1 5 4.2499999999999999e-01 -1.0775939522237591e+01 -9.6504409144075929e+00 -1.1577656550448125e+00 1 1 0 -391 1 1 1.5750000000000000e+00 5.0552705213441183e+00 -1.4938390885101004e+01 -3.7556755525152141e-03 0 1 0 -392 1 2 2.1000000000000001e+00 -1.1157454505789488e+01 -1.3346365259602514e+01 -2.7609477580902304e+00 1 1 0 -393 1 2 2.1000000000000001e+00 -1.1237035180060985e+01 -1.6301363128204752e+01 -2.7688574109066444e+00 1 1 0 -394 1 3 -1.0500000000000000e+00 -1.1016258515593059e+01 -1.3417381597335801e+01 -1.1427868565141956e+00 1 1 0 -395 1 3 -1.0500000000000000e+00 -1.1371754569044754e+01 -1.6208638414131233e+01 -1.1439607996348045e+00 1 1 0 -396 1 4 -9.4999999999999996e-01 6.7490784560440211e+00 -1.5012076202167444e+01 -1.0140027578594140e+00 0 1 0 -397 1 3 -1.0500000000000000e+00 8.0981866850365734e+00 -1.7004283665969044e+01 -3.2425625386787402e+00 0 1 0 -398 1 3 -1.0500000000000000e+00 -1.1124739912558308e+01 -1.4827054551632816e+01 -3.2446301023047264e+00 1 1 0 -399 1 3 -1.0500000000000000e+00 -9.9715680099552539e+00 -1.7153093163802694e+01 -3.0529662797145232e+00 1 1 0 -400 1 5 4.2499999999999999e-01 7.2558685014983197e+00 -1.4133366923834902e+01 -1.1576429969303437e+00 0 1 0 -401 1 1 1.5750000000000000e+00 -7.8326838930931588e+00 -1.6442085371931373e+01 -3.2732823819223711e-03 1 1 0 -402 1 2 2.1000000000000001e+00 -7.0436687618137706e+00 -9.0683330137854199e+00 2.7539612939828899e+00 1 1 0 -403 1 2 2.1000000000000001e+00 -7.0203552020415820e+00 -1.5079163689271947e+01 2.7618054084239976e+00 1 1 0 -404 1 3 -1.0500000000000000e+00 -7.2409778107259708e+00 -1.7962996502787455e+01 1.1357372000778803e+00 1 1 0 -405 1 3 -1.0500000000000000e+00 -6.8856791733648324e+00 -1.5172137885807103e+01 1.1369963196178823e+00 1 1 0 -406 1 4 -9.4999999999999996e-01 -9.5262993670970957e+00 -1.6368274059361951e+01 1.0070224743079983e+00 1 1 0 -407 1 3 -1.0500000000000000e+00 -1.0875507123551024e+01 -1.4376303441124719e+01 3.2357750518877921e+00 1 1 0 -408 1 3 -1.0500000000000000e+00 -7.1324812203291952e+00 -1.6553483309654993e+01 3.2374341141377876e+00 1 1 0 -409 1 3 -1.0500000000000000e+00 -8.2858895255051763e+00 -1.4227436498303321e+01 3.0459500397149455e+00 1 1 0 -410 1 5 4.2499999999999999e-01 -1.0033255030691558e+01 -1.7247173624416206e+01 1.1510522172719124e+00 1 1 0 -411 1 1 1.5750000000000000e+00 -5.2245178771227252e+00 -1.1959202758526017e+01 -3.2686596132034396e-03 1 1 0 -412 1 2 2.1000000000000001e+00 -9.6518281984812564e+00 -1.3551260332238776e+01 2.7539503258911147e+00 1 1 0 -413 1 2 2.1000000000000001e+00 -9.5722019707608350e+00 -1.0596214958141957e+01 2.7618209577011115e+00 1 1 0 -414 1 3 -1.0500000000000000e+00 -9.7928062006518743e+00 -1.3480064638111777e+01 1.1357461718012836e+00 1 1 0 -415 1 3 -1.0500000000000000e+00 -9.4375295209633592e+00 -1.0689208902166378e+01 1.1370191291599561e+00 1 1 0 -416 1 4 -9.4999999999999996e-01 -6.9182015257625187e+00 -1.1885434141823438e+01 1.0069865076337194e+00 1 1 0 -417 1 3 -1.0500000000000000e+00 -8.2674096516605591e+00 -9.8934036664961500e+00 3.2357692780579193e+00 1 1 0 -418 1 3 -1.0500000000000000e+00 -9.6843094688930229e+00 -1.2070565339252298e+01 3.2374457243298291e+00 1 1 0 -419 1 3 -1.0500000000000000e+00 -1.0837706493502655e+01 -9.7445692971577635e+00 3.0459634859134095e+00 1 1 0 -420 1 5 4.2499999999999999e-01 -7.4251012846331328e+00 -1.2764299901878989e+01 1.1507685978831965e+00 1 1 0 -421 1 1 1.5750000000000000e+00 -7.8165653710802498e+00 -1.0455505239212570e+01 -3.7526247607146956e-03 1 1 0 -422 1 2 2.1000000000000001e+00 -8.6055950659194451e+00 -1.7829279483493142e+01 -2.7609372605772293e+00 1 1 0 -423 1 2 2.1000000000000001e+00 -8.6289008782796017e+00 -1.1818431879204407e+01 -2.7688521292950892e+00 1 1 0 -424 1 3 -1.0500000000000000e+00 -8.4080675550031589e+00 -8.9344488405749729e+00 -1.1428176752375947e+00 1 1 0 -425 1 3 -1.0500000000000000e+00 -8.7636107163403185e+00 -1.1725704386940036e+01 -1.1439420773679156e+00 1 1 0 -426 1 4 -9.4999999999999996e-01 -6.1228163916760510e+00 -1.0529224175237184e+01 -1.0140309851989482e+00 1 1 0 -427 1 3 -1.0500000000000000e+00 -4.7737195815235491e+00 -1.2521386711486187e+01 -3.2425678127633280e+00 1 1 0 -428 1 3 -1.0500000000000000e+00 -8.5165649126724521e+00 -1.0344149018842892e+01 -3.2446367291315585e+00 1 1 0 -429 1 3 -1.0500000000000000e+00 -7.3634018650483775e+00 -1.2670221015317008e+01 -3.0529473079508476e+00 1 1 0 -430 1 5 4.2499999999999999e-01 -5.6159739075346202e+00 -9.6504763623609211e+00 -1.1578626433938446e+00 1 1 0 -431 1 1 1.5750000000000000e+00 -1.0424715562756791e+01 -1.4938400553082197e+01 -3.7529812015879571e-03 1 1 0 -432 1 2 2.1000000000000001e+00 -5.9974311083098977e+00 -1.3346363613537441e+01 -2.7609379380798256e+00 1 1 0 -433 1 2 2.1000000000000001e+00 -6.0770293492200240e+00 -1.6301376203040785e+01 -2.7688686473689419e+00 1 1 0 -434 1 3 -1.0500000000000000e+00 -5.8562441201571032e+00 -1.3417379284137553e+01 -1.1428148565171732e+00 1 1 0 -435 1 3 -1.0500000000000000e+00 -6.2117573594784972e+00 -1.6208643250903787e+01 -1.1439581431602814e+00 1 1 0 -436 1 4 -9.4999999999999996e-01 -8.7309519339587123e+00 -1.5012116780663984e+01 -1.0140186145754715e+00 1 1 0 -437 1 3 -1.0500000000000000e+00 -7.3818111353884994e+00 -1.7004282131950792e+01 -3.2425561129342411e+00 1 1 0 -438 1 3 -1.0500000000000000e+00 -5.9647356400559168e+00 -1.4827036902064780e+01 -3.2446470576896891e+00 1 1 0 -439 1 3 -1.0500000000000000e+00 -4.8115794831903598e+00 -1.7153088674562756e+01 -3.0529634737335600e+00 1 1 0 -440 1 5 4.2499999999999999e-01 -8.2241476630143868e+00 -1.4133377854348987e+01 -1.1577899116091555e+00 1 1 0 -441 1 1 1.5750000000000000e+00 -2.6726699900750228e+00 -1.6442095184071334e+01 -3.2708152303708715e-03 1 1 0 -442 1 2 2.1000000000000001e+00 -1.8836926728028871e+00 -9.0683360706481988e+00 2.7539509370400470e+00 1 1 0 -443 1 2 2.1000000000000001e+00 -1.8603595713516334e+00 -1.5079150161517486e+01 2.7618175803065235e+00 1 1 0 -444 1 3 -1.0500000000000000e+00 -2.0809915940246260e+00 -1.7962997831100317e+01 1.1357659249768037e+00 1 1 0 -445 1 3 -1.0500000000000000e+00 -1.7256757375340950e+00 -1.5172134571584408e+01 1.1369946994743039e+00 1 1 0 -446 1 4 -9.4999999999999996e-01 -4.3663303959113087e+00 -1.6368316840061311e+01 1.0070078045087598e+00 1 1 0 -447 1 3 -1.0500000000000000e+00 -5.7154995299878735e+00 -1.4376299278351169e+01 3.2357812249751525e+00 1 1 0 -448 1 3 -1.0500000000000000e+00 -1.9724845002142448e+00 -1.6553498331239307e+01 3.2374496312659460e+00 1 1 0 -449 1 3 -1.0500000000000000e+00 -3.1258773791373198e+00 -1.4227441660105004e+01 3.0459467420251194e+00 1 1 0 -450 1 5 4.2499999999999999e-01 -4.8732684553684695e+00 -1.7247182150638707e+01 1.1509092295157224e+00 1 1 0 -451 1 1 1.5750000000000000e+00 -6.4509876999837346e-02 -1.1959203109449142e+01 -3.2708769273028793e-03 1 1 0 -452 1 2 2.1000000000000001e+00 -4.4918347776990135e+00 -1.3551245280844766e+01 2.7539590818090893e+00 1 1 0 -453 1 2 2.1000000000000001e+00 -4.4122264005535730e+00 -1.0596223265618471e+01 2.7618213123579540e+00 1 1 0 -454 1 3 -1.0500000000000000e+00 -4.6327993770775722e+00 -1.3480068776200241e+01 1.1357306843562007e+00 1 1 0 -455 1 3 -1.0500000000000000e+00 -4.2775323301208692e+00 -1.0689197415521184e+01 1.1370113492700824e+00 1 1 0 -456 1 4 -9.4999999999999996e-01 -1.7582085114615111e+00 -1.1885444828152252e+01 1.0070062173292218e+00 1 1 0 -457 1 3 -1.0500000000000000e+00 -3.1073643655971166e+00 -9.8933800969646732e+00 3.2357583287275880e+00 1 1 0 -458 1 3 -1.0500000000000000e+00 -4.5243114909096018e+00 -1.2070595204559629e+01 3.2374516197245775e+00 1 1 0 -459 1 3 -1.0500000000000000e+00 -5.6777118011680381e+00 -9.7445664568842254e+00 3.0459653648294243e+00 1 1 0 -460 1 5 4.2499999999999999e-01 -2.2650668296403058e+00 -1.2764264287005354e+01 1.1508705187769799e+00 1 1 0 -461 1 1 1.5750000000000000e+00 -2.6565565918664173e+00 -1.0455505879441525e+01 -3.7549348428154161e-03 1 1 0 -462 1 2 2.1000000000000001e+00 -3.4455908391566643e+00 -1.7829293092612534e+01 -2.7609464806422892e+00 1 1 0 -463 1 2 2.1000000000000001e+00 -3.4688767904129696e+00 -1.1818423511849518e+01 -2.7688516686671711e+00 1 1 0 -464 1 3 -1.0500000000000000e+00 -3.2480752505918087e+00 -8.9344459968549881e+00 -1.1428004913510801e+00 1 1 0 -465 1 3 -1.0500000000000000e+00 -3.6036086619536984e+00 -1.1725714425851610e+01 -1.1439355502562485e+00 1 1 0 -466 1 4 -9.4999999999999996e-01 -9.6282526002298496e-01 -1.0529236900614441e+01 -1.0140115755842860e+00 1 1 0 -467 1 3 -1.0500000000000000e+00 3.8632613444500308e-01 -1.2521362577138564e+01 -3.2425797939291590e+00 1 1 0 -468 1 3 -1.0500000000000000e+00 -3.3565633803144257e+00 -1.0344120627751945e+01 -3.2446417136325554e+00 1 1 0 -469 1 3 -1.0500000000000000e+00 -2.2033940096388758e+00 -1.2670225163556584e+01 -3.0529490801396841e+00 1 1 0 -470 1 5 4.2499999999999999e-01 -4.5593933850753388e-01 -9.6504406470385522e+00 -1.1577660591579200e+00 1 1 0 -471 1 1 1.5750000000000000e+00 -5.2647294099161668e+00 -1.4938390849710533e+01 -3.7554421477974387e-03 1 1 0 -472 1 2 2.1000000000000001e+00 -8.3745420972569029e-01 -1.3346365774570319e+01 -2.7609475596234478e+00 1 1 0 -473 1 2 2.1000000000000001e+00 -9.1703537310051608e-01 -1.6301362578024104e+01 -2.7688573923577673e+00 1 1 0 -474 1 3 -1.0500000000000000e+00 -6.9625852330998939e-01 -1.3417381412652670e+01 -1.1427865734610130e+00 1 1 0 -475 1 3 -1.0500000000000000e+00 -1.0517542431933027e+00 -1.6208638871829955e+01 -1.1439604905627423e+00 1 1 0 -476 1 4 -9.4999999999999996e-01 -3.5709214848472897e+00 -1.5012075671946061e+01 -1.0140027621155667e+00 1 1 0 -477 1 3 -1.0500000000000000e+00 -2.2218134642601797e+00 -1.7004283740992047e+01 -3.2425628740632551e+00 1 1 0 -478 1 3 -1.0500000000000000e+00 -8.0473984685049871e-01 -1.4827054452774018e+01 -3.2446303282137938e+00 1 1 0 -479 1 3 -1.0500000000000000e+00 3.4843154859986214e-01 -1.7153093111175764e+01 -3.0529662393152108e+00 1 1 0 -480 1 5 4.2499999999999999e-01 -3.0641314229533227e+00 -1.4133366897183697e+01 -1.1576420001890888e+00 1 1 0 -481 1 1 1.5750000000000000e+00 2.5436575698973183e+00 -7.4762807567511658e+00 -3.2681608649411942e-03 0 1 0 -482 1 2 2.1000000000000001e+00 3.3326771644182021e+00 -1.0251055584260627e-01 2.7539562495680912e+00 0 1 0 -483 1 2 2.1000000000000001e+00 3.3559732863378500e+00 -6.1133477231204605e+00 2.7618019815700912e+00 0 1 0 -484 1 3 -1.0500000000000000e+00 3.1353688973965141e+00 -8.9971784534458905e+00 1.1357161192366334e+00 0 1 0 -485 1 3 -1.0500000000000000e+00 3.4906521933711527e+00 -6.2063093440944606e+00 1.1370013682474660e+00 0 1 0 -486 1 4 -9.4999999999999996e-01 8.4999416594662414e-01 -7.4025141664959477e+00 1.0069772303784212e+00 0 1 0 -487 1 3 -1.0500000000000000e+00 -4.9918531211513084e-01 -5.4104789806646600e+00 3.2357748645072224e+00 0 1 0 -488 1 3 -1.0500000000000000e+00 3.2438478921430658e+00 -7.5876613926832288e+00 3.2374650513379368e+00 0 1 0 -489 1 3 -1.0500000000000000e+00 2.0904479338721824e+00 -5.2616143373361730e+00 3.0459494998857473e+00 0 1 0 -490 1 5 4.2499999999999999e-01 3.4305735839570417e-01 -8.2814451510232985e+00 1.1506997919603279e+00 0 1 0 -491 1 1 1.5750000000000000e+00 5.1518162806579664e+00 -2.9933577707057264e+00 -3.2719737650719338e-03 0 1 0 -492 1 2 2.1000000000000001e+00 7.2450056662052376e-01 -4.5854449159198332e+00 2.7539595529080376e+00 0 1 0 -493 1 2 2.1000000000000001e+00 8.0414918868178020e-01 -1.6304050196450568e+00 2.7618170330682865e+00 0 1 0 -494 1 3 -1.0500000000000000e+00 5.8351556926552206e-01 -4.5142509399890525e+00 1.1357618537192042e+00 0 1 0 -495 1 3 -1.0500000000000000e+00 9.3881877218889187e-01 -1.7234004054205592e+00 1.1370055641638572e+00 0 1 0 -496 1 4 -9.4999999999999996e-01 3.4581831713214264e+00 -2.9195538038595128e+00 1.0070161949531542e+00 0 1 0 -497 1 3 -1.0500000000000000e+00 2.1089597637650321e+00 -9.2758109333887262e-01 3.2357778931579837e+00 0 1 0 -498 1 3 -1.0500000000000000e+00 6.9202493621660821e-01 -3.1047407556425863e+00 3.2374286426884460e+00 0 1 0 -499 1 3 -1.0500000000000000e+00 -4.6137640541853386e-01 -7.7872267536189810e-01 3.0459549651529940e+00 0 1 0 -500 1 5 4.2499999999999999e-01 2.9512366629883733e+00 -3.7984283160022585e+00 1.1510066071998590e+00 0 1 0 -501 1 1 1.5750000000000000e+00 2.5597764890479056e+00 -1.4896728825261896e+00 -3.7510557474558226e-03 0 1 0 -502 1 2 2.1000000000000001e+00 1.7707367658906890e+00 -8.8634435099187865e+00 -2.7609471549488367e+00 0 1 0 -503 1 2 2.1000000000000001e+00 1.7474499480691374e+00 -2.8526347731487149e+00 -2.7688625207492397e+00 0 1 0 -504 1 3 -1.0500000000000000e+00 1.9682617548841872e+00 3.1354766712723858e-02 -1.1428194347213658e+00 0 1 0 -505 1 3 -1.0500000000000000e+00 1.6127391948302812e+00 -2.7598942364919772e+00 -1.1439556954086907e+00 0 1 0 -506 1 4 -9.4999999999999996e-01 4.2535424365009327e+00 -1.5633894774555195e+00 -1.0140400418681299e+00 0 1 0 -507 1 3 -1.0500000000000000e+00 5.6026491085120664e+00 -3.5555609449644461e+00 -3.2425576785307024e+00 0 1 0 -508 1 3 -1.0500000000000000e+00 1.8597625543053109e+00 -1.3783257788357659e+00 -3.2446235882093193e+00 0 1 0 -509 1 3 -1.0500000000000000e+00 3.0129281343278276e+00 -3.7043645719831666e+00 -3.0529605899078920e+00 0 1 0 -510 1 5 4.2499999999999999e-01 4.7603403166335863e+00 -6.8469223618015818e-01 -1.1579216441180478e+00 0 1 0 -511 1 1 1.5750000000000000e+00 -4.8390666169016328e-02 -5.9725694760793182e+00 -3.7574054098818976e-03 0 1 0 -512 1 2 2.1000000000000001e+00 4.3789145761620833e+00 -4.3805514632713773e+00 -2.7609339008297651e+00 0 1 0 -513 1 2 2.1000000000000001e+00 4.2992990506123103e+00 -7.3355316180595338e+00 -2.7688607932569447e+00 0 1 0 -514 1 3 -1.0500000000000000e+00 4.5200860118647945e+00 -4.4515414547653052e+00 -1.1427984036885626e+00 0 1 0 -515 1 3 -1.0500000000000000e+00 4.1645706838576562e+00 -7.2428167611125112e+00 -1.1439527343219709e+00 0 1 0 -516 1 4 -9.4999999999999996e-01 1.6453970630033830e+00 -6.0462561133443451e+00 -1.0139809143366438e+00 0 1 0 -517 1 3 -1.0500000000000000e+00 2.9945122334512657e+00 -8.0384625674130401e+00 -3.2425618392344440e+00 0 1 0 -518 1 3 -1.0500000000000000e+00 4.4116077983548383e+00 -5.8612132939321402e+00 -3.2446764319665711e+00 0 1 0 -519 1 3 -1.0500000000000000e+00 5.5647564535540042e+00 -8.1872858674545341e+00 -3.0529539697099937e+00 0 1 0 -520 1 5 4.2499999999999999e-01 2.1522145539965809e+00 -5.1674642083276847e+00 -1.1575048043055780e+00 0 1 0 -521 1 1 1.5750000000000000e+00 7.7036718214034785e+00 -7.4762910233615827e+00 -3.2653078128408453e-03 0 1 0 -522 1 2 2.1000000000000001e+00 -1.2147345593282450e+01 -1.0251359496978196e-01 2.7539461566052985e+00 1 1 0 -523 1 2 2.1000000000000001e+00 -1.2124030375241905e+01 -6.1133337616709902e+00 2.7618143580363075e+00 1 1 0 -524 1 3 -1.0500000000000000e+00 8.2953539893182153e+00 -8.9971801103008087e+00 1.1357452560675760e+00 0 1 0 -525 1 3 -1.0500000000000000e+00 -1.1989344710863094e+01 -6.2063052996612100e+00 1.1369992978851666e+00 1 1 0 -526 1 4 -9.4999999999999996e-01 6.0099622042933412e+00 -7.4025579776391464e+00 1.0069620216105442e+00 0 1 0 -527 1 3 -1.0500000000000000e+00 4.6608198109043748e+00 -5.4104755502326540e+00 3.2357801936372788e+00 0 1 0 -528 1 3 -1.0500000000000000e+00 -1.2236155841755377e+01 -7.5876771945797863e+00 3.2374811955831060e+00 1 1 0 -529 1 3 -1.0500000000000000e+00 7.2504604436044637e+00 -5.2616189118410208e+00 3.0459453948467168e+00 0 1 0 -530 1 5 4.2499999999999999e-01 5.5030432428799969e+00 -8.2814548631033578e+00 1.1505519475195651e+00 0 1 0 -531 1 1 1.5750000000000000e+00 -1.0328175895563996e+01 -2.9933575426158416e+00 -3.2744817930581149e-03 1 1 0 -532 1 2 2.1000000000000001e+00 5.8844947594629744e+00 -4.5854310172663979e+00 2.7539684166484335e+00 0 1 0 -533 1 2 2.1000000000000001e+00 5.9641241815446477e+00 -1.6304119441692926e+00 2.7618170105232309e+00 0 1 0 -534 1 3 -1.0500000000000000e+00 5.7435221593356864e+00 -4.5142545324410897e+00 1.1357475728004847e+00 0 1 0 -535 1 3 -1.0500000000000000e+00 6.0988162421821528e+00 -1.7233902325835047e+00 1.1369987706733902e+00 0 1 0 -536 1 4 -9.4999999999999996e-01 -1.2021823199294937e+01 -2.9195626861295096e+00 1.0070363114076599e+00 1 1 0 -537 1 3 -1.0500000000000000e+00 7.2690066204394697e+00 -9.2755674730927851e-01 3.2357668612914487e+00 0 1 0 -538 1 3 -1.0500000000000000e+00 5.8520231027928773e+00 -3.1047706349297179e+00 3.2374344835306346e+00 0 1 0 -539 1 3 -1.0500000000000000e+00 4.6986174638786373e+00 -7.7871980724010115e-01 3.0459566060165137e+00 0 1 0 -540 1 5 4.2499999999999999e-01 8.1112714877724414e+00 -3.7983924620060368e+00 1.1511131184559726e+00 0 1 0 -541 1 1 1.5750000000000000e+00 7.7197852579331645e+00 -1.4896736083571689e+00 -3.7533060240022564e-03 0 1 0 -542 1 2 2.1000000000000001e+00 6.9307402907651401e+00 -8.8634573665418070e+00 -2.7609563100003145e+00 0 1 0 -543 1 2 2.1000000000000001e+00 6.9074738694113726e+00 -2.8526269036832552e+00 -2.7688615983007780e+00 0 1 0 -544 1 3 -1.0500000000000000e+00 7.1282543967930359e+00 3.1358883413265204e-02 -1.1428023230370439e+00 0 1 0 -545 1 3 -1.0500000000000000e+00 6.7727419364633441e+00 -2.7599051276576141e+00 -1.1439490121146996e+00 0 1 0 -546 1 4 -9.4999999999999996e-01 -1.1226466741815884e+01 -1.5634024412045342e+00 -1.0140216083145610e+00 1 1 0 -547 1 3 -1.0500000000000000e+00 -9.8773043947366048e+00 -3.5555368621683208e+00 -3.2425683285331646e+00 1 1 0 -548 1 3 -1.0500000000000000e+00 7.0197645195179135e+00 -1.3782969003001959e+00 -3.2446289771148944e+00 0 1 0 -549 1 3 -1.0500000000000000e+00 8.1729347482067567e+00 -3.7043681527678913e+00 -3.0529625378704122e+00 0 1 0 -550 1 5 4.2499999999999999e-01 -1.0719626421899999e+01 -6.8465756426241953e-01 -1.1578295867776784e+00 1 1 0 -551 1 1 1.5750000000000000e+00 5.1115953590272625e+00 -5.9725597637293362e+00 -3.7601916249858647e-03 0 1 0 -552 1 2 2.1000000000000001e+00 -1.1101108931968247e+01 -4.3805528895496302e+00 -2.7609432872668886e+00 1 1 0 -553 1 2 2.1000000000000001e+00 -1.1180707067380455e+01 -7.3355188643798535e+00 -2.7688492591590554e+00 1 1 0 -554 1 3 -1.0500000000000000e+00 -1.0959928694497696e+01 -4.4515436898974556e+00 -1.1427711527976001e+00 1 1 0 -555 1 3 -1.0500000000000000e+00 -1.1315426415312240e+01 -7.2428113900070752e+00 -1.1439555106248189e+00 1 1 0 -556 1 4 -9.4999999999999996e-01 6.8054277571583519e+00 -6.0462150488380164e+00 -1.0139639895733374e+00 0 1 0 -557 1 3 -1.0500000000000000e+00 8.1545095181602107e+00 -8.0384640840914869e+00 -3.2425680420518717e+00 0 1 0 -558 1 3 -1.0500000000000000e+00 -1.1068396240802272e+01 -5.8612309436472234e+00 -3.2446597028975566e+00 1 1 0 -559 1 3 -1.0500000000000000e+00 -9.9152309072003355e+00 -8.1872904891822973e+00 -3.0529571692262092e+00 1 1 0 -560 1 5 4.2499999999999999e-01 7.3122321320503794e+00 -5.1674508614420542e+00 -1.1573511315660259e+00 0 1 0 -561 1 1 1.5750000000000000e+00 -7.7763421850555563e+00 -7.4762809230994502e+00 -3.2682934864940449e-03 1 1 0 -562 1 2 2.1000000000000001e+00 -6.9873219863206382e+00 -1.0251132671459473e-01 2.7539563597780923e+00 1 1 0 -563 1 2 2.1000000000000001e+00 -6.9640259840326628e+00 -6.1133472592188820e+00 2.7618022956295629e+00 1 1 0 -564 1 3 -1.0500000000000000e+00 -7.1846314193998664e+00 -8.9971779600544295e+00 1.1357163882947159e+00 1 1 0 -565 1 3 -1.0500000000000000e+00 -6.8293485880591112e+00 -6.2063096894111425e+00 1.1370012333877977e+00 1 1 0 -566 1 4 -9.4999999999999996e-01 -9.4700055755924701e+00 -7.4025139876644594e+00 1.0069773362883989e+00 1 1 0 -567 1 3 -1.0500000000000000e+00 -1.0819186074870123e+01 -5.4104790908412692e+00 3.2357742683187052e+00 1 1 0 -568 1 3 -1.0500000000000000e+00 -7.0761528036457531e+00 -7.5876609775127299e+00 3.2374644576831670e+00 1 1 0 -569 1 3 -1.0500000000000000e+00 -8.2295514763541018e+00 -5.2616148976108672e+00 3.0459489432022515e+00 1 1 0 -570 1 5 4.2499999999999999e-01 -9.9769424641830984e+00 -8.2814450142459481e+00 1.1507004566801982e+00 1 1 0 -571 1 1 1.5750000000000000e+00 -5.1681838127975688e+00 -2.9933576384022018e+00 -3.2717369706762867e-03 1 1 0 -572 1 2 2.1000000000000001e+00 -9.5954996993209818e+00 -4.5854449940775464e+00 2.7539595842160942e+00 1 1 0 -573 1 2 2.1000000000000001e+00 -9.5158507875170635e+00 -1.6304043820163336e+00 2.7618168255845852e+00 1 1 0 -574 1 3 -1.0500000000000000e+00 -9.7364845194737679e+00 -4.5142510709812989e+00 1.1357620952221374e+00 1 1 0 -575 1 3 -1.0500000000000000e+00 -9.3811811495596569e+00 -1.7234005945692772e+00 1.1370052607441838e+00 1 1 0 -576 1 4 -9.4999999999999996e-01 -6.8618165785256515e+00 -2.9195528214605435e+00 1.0070162671680265e+00 1 1 0 -577 1 3 -1.0500000000000000e+00 -8.2110400227350908e+00 -9.2758055861539646e-01 3.2357776111175447e+00 1 1 0 -578 1 3 -1.0500000000000000e+00 -9.6279751712075452e+00 -3.1047410652821092e+00 3.2374288782542173e+00 1 1 0 -579 1 3 -1.0500000000000000e+00 -1.0781377378976435e+01 -7.7872238917679937e-01 3.0459548222147177e+00 1 1 0 -580 1 5 4.2499999999999999e-01 -7.3687631344134621e+00 -3.7984280683325675e+00 1.1510087811947294e+00 1 1 0 -581 1 1 1.5750000000000000e+00 -7.7602231493103115e+00 -1.4896729773895174e+00 -3.7514049036762032e-03 1 1 0 -582 1 2 2.1000000000000001e+00 -8.5492636448235881e+00 -8.8634434759155383e+00 -2.7609469383151142e+00 1 1 0 -583 1 2 2.1000000000000001e+00 -8.5725493910960839e+00 -2.8526345568910330e+00 -2.7688621512718061e+00 1 1 0 -584 1 3 -1.0500000000000000e+00 -8.3517386022975746e+00 3.1355306993937404e-02 -1.1428188664021803e+00 1 1 0 -585 1 3 -1.0500000000000000e+00 -8.7072613660041132e+00 -2.7598948801617009e+00 -1.1439559119420721e+00 1 1 0 -586 1 4 -9.4999999999999996e-01 -6.0664578280469037e+00 -1.5633906786741285e+00 -1.0140407761717736e+00 1 1 0 -587 1 3 -1.0500000000000000e+00 -4.7173504441606600e+00 -3.5555605571846858e+00 -3.2425569990613576e+00 1 1 0 -588 1 3 -1.0500000000000000e+00 -8.4602377735195269e+00 -1.3783256930774925e+00 -3.2446245341017974e+00 1 1 0 -589 1 3 -1.0500000000000000e+00 -7.3070727313816795e+00 -3.7043646163271404e+00 -3.0529607277228852e+00 1 1 0 -590 1 5 4.2499999999999999e-01 -5.5596602828094399e+00 -6.8469307887884057e-01 -1.1579270767641230e+00 1 1 0 -591 1 1 1.5750000000000000e+00 -1.0368390725810965e+01 -5.9725695262430403e+00 -3.7573993430903840e-03 1 1 0 -592 1 2 2.1000000000000001e+00 -5.9410852260952272e+00 -4.3805507393552645e+00 -2.7609332752915812e+00 1 1 0 -593 1 2 2.1000000000000001e+00 -6.0207013841734955e+00 -7.3355318748997771e+00 -2.7688608337867908e+00 1 1 0 -594 1 3 -1.0500000000000000e+00 -5.7999138374145174e+00 -4.4515417632928145e+00 -1.1427994454172925e+00 1 1 0 -595 1 3 -1.0500000000000000e+00 -6.1554296040035208e+00 -7.2428163652707518e+00 -1.1439534513399927e+00 1 1 0 -596 1 4 -9.4999999999999996e-01 -8.6746031854828516e+00 -6.0462567207300957e+00 -1.0139806757747039e+00 1 1 0 -597 1 3 -1.0500000000000000e+00 -7.3254880189215017e+00 -8.0384622206913434e+00 -3.2425615735922868e+00 1 1 0 -598 1 3 -1.0500000000000000e+00 -5.9083924192090498e+00 -5.8612138215664249e+00 -3.2446762232977573e+00 1 1 0 -599 1 3 -1.0500000000000000e+00 -4.7552429311707556e+00 -8.1872863652617180e+00 -3.0529541602386461e+00 1 1 0 -600 1 5 4.2499999999999999e-01 -8.1677849706477055e+00 -5.1674634384285980e+00 -1.1575040236693965e+00 1 1 0 -601 1 1 1.5750000000000000e+00 -2.6163277838675221e+00 -7.4762909455420772e+00 -3.2656709391893912e-03 1 1 0 -602 1 2 2.1000000000000001e+00 -1.8273462383235461e+00 -1.0251424082924032e-01 2.7539460950975592e+00 1 1 0 -603 1 2 2.1000000000000001e+00 -1.8040299652667589e+00 -6.1133335430346563e+00 2.7618141317410618e+00 1 1 0 -604 1 3 -1.0500000000000000e+00 -2.0246462781465269e+00 -8.9971797593687484e+00 1.1357458882768405e+00 1 1 0 -605 1 3 -1.0500000000000000e+00 -1.6693450475600429e+00 -6.2063057861451725e+00 1.1369993464425061e+00 1 1 0 -606 1 4 -9.4999999999999996e-01 -4.3100379988542743e+00 -7.4025589029402692e+00 1.0069615315099991e+00 1 1 0 -607 1 3 -1.0500000000000000e+00 -5.6591796712508087e+00 -5.4104753627400228e+00 3.2357806674433647e+00 1 1 0 -608 1 3 -1.0500000000000000e+00 -1.9161562222272543e+00 -7.5876772803970987e+00 3.2374804258171608e+00 1 1 0 -609 1 3 -1.0500000000000000e+00 -3.0695406692448417e+00 -5.2616188661581340e+00 3.0459453864236909e+00 1 1 0 -610 1 5 4.2499999999999999e-01 -4.8169571538695717e+00 -8.2814553617741105e+00 1.1505481493262373e+00 1 1 0 -611 1 1 1.5750000000000000e+00 -8.1760772968380024e-03 -2.9933575181058600e+00 -3.2743711147666943e-03 1 1 0 -612 1 2 2.1000000000000001e+00 -4.4355051118200439e+00 -4.5854299392358087e+00 2.7539688922472791e+00 1 1 0 -613 1 2 2.1000000000000001e+00 -4.3558760344290208e+00 -1.6304124598534635e+00 2.7618174106720375e+00 1 1 0 -614 1 3 -1.0500000000000000e+00 -4.5764777419565466e+00 -4.5142548759855465e+00 1.1357463260141714e+00 1 1 0 -615 1 3 -1.0500000000000000e+00 -4.2211840342906868e+00 -1.7233895441759479e+00 1.1369977086180825e+00 1 1 0 -616 1 4 -9.4999999999999996e-01 -1.7018236124385560e+00 -2.9195635165858569e+00 1.0070367229964496e+00 1 1 0 -617 1 3 -1.0500000000000000e+00 -3.0509931277018758e+00 -9.2755589773917535e-01 3.2357665495488988e+00 1 1 0 -618 1 3 -1.0500000000000000e+00 -4.4679770346247647e+00 -3.1047713732493420e+00 3.2374347240366337e+00 1 1 0 -619 1 3 -1.0500000000000000e+00 -5.6213822534512161e+00 -7.7872002325319301e-01 3.0459565887809354e+00 1 1 0 -620 1 5 4.2499999999999999e-01 -2.2087277522865865e+00 -3.7983910799744223e+00 1.1511149260038867e+00 1 1 0 -621 1 1 1.5750000000000000e+00 -2.6002141654952915e+00 -1.4896737094533599e+00 -3.7533128050117881e-03 1 1 0 -622 1 2 2.1000000000000001e+00 -3.3892590053672063e+00 -8.8634574857208097e+00 -2.7609561680299493e+00 1 1 0 -623 1 2 2.1000000000000001e+00 -3.4125251930059894e+00 -2.8526268846259306e+00 -2.7688615286285891e+00 1 1 0 -624 1 3 -1.0500000000000000e+00 -3.1917462126351293e+00 3.1358788246325986e-02 -1.1428022026067133e+00 1 1 0 -625 1 3 -1.0500000000000000e+00 -3.5472590403252218e+00 -2.7599052235913142e+00 -1.1439491794483505e+00 1 1 0 -626 1 4 -9.4999999999999996e-01 -9.0646691514089461e-01 -1.5634032584722597e+00 -1.0140215315146257e+00 1 1 0 -627 1 3 -1.0500000000000000e+00 4.4269580016512577e-01 -3.5555364857703218e+00 -3.2425687501749536e+00 1 1 0 -628 1 3 -1.0500000000000000e+00 -3.3002363165323549e+00 -1.3782961327508190e+00 -3.2446289526978092e+00 1 1 0 -629 1 3 -1.0500000000000000e+00 -2.1470649566305173e+00 -3.7043686941940379e+00 -3.0529629239873710e+00 1 1 0 -630 1 5 4.2499999999999999e-01 -3.9962609031332441e-01 -6.8465733157457720e-01 -1.1578307953461699e+00 1 1 0 -631 1 1 1.5750000000000000e+00 -5.2084047404047134e+00 -5.9725595364899284e+00 -3.7600979616296826e-03 1 1 0 -632 1 2 2.1000000000000001e+00 -7.8110919308991456e-01 -4.3805529373242500e+00 -2.7609430555638124e+00 1 1 0 -633 1 2 2.1000000000000001e+00 -8.6070716056939567e-01 -7.3355179831783488e+00 -2.7688491805898519e+00 1 1 0 -634 1 3 -1.0500000000000000e+00 -6.3992886880595101e-01 -4.4515437816825045e+00 -1.1427704613142957e+00 1 1 0 -635 1 3 -1.0500000000000000e+00 -9.9542614064111312e-01 -7.2428116519511683e+00 -1.1439560424635076e+00 1 1 0 -636 1 4 -9.4999999999999996e-01 -3.5145719480547983e+00 -6.0462140630651522e+00 -1.0139637930845495e+00 1 1 0 -637 1 3 -1.0500000000000000e+00 -2.1654899354619133e+00 -8.0384635502983741e+00 -3.2425685873817418e+00 1 1 0 -638 1 3 -1.0500000000000000e+00 -7.4839646312622143e-01 -5.8612317329932946e+00 -3.2446596299773693e+00 1 1 0 -639 1 3 -1.0500000000000000e+00 4.0476806768967499e-01 -8.1872903970557633e+00 -3.0529571025848705e+00 1 1 0 -640 1 5 4.2499999999999999e-01 -3.0077675350384983e+00 -5.1674504112187361e+00 -1.1573483949331340e+00 1 1 0 -641 1 1 1.5750000000000000e+00 9.0327999603982079e-01 1.3078268179656156e+00 9.1936285392393344e+00 0 0 0 -642 1 2 2.1000000000000001e+00 5.0862246908167243e+00 9.0452767941395287e+00 -6.4424047165866876e+00 0 0 1 -643 1 2 2.1000000000000001e+00 5.1094714654176485e+00 3.0344442077888871e+00 -6.4345036943507274e+00 0 0 1 -644 1 3 -1.0500000000000000e+00 4.8887010421652999e+00 1.5046638157599901e-01 -8.0605839637767165e+00 0 0 1 -645 1 3 -1.0500000000000000e+00 5.2441927721705479e+00 2.9417321911529406e+00 -8.0593973812488873e+00 0 0 1 -646 1 4 -9.4999999999999996e-01 2.6033704126610484e+00 1.7451758067380290e+00 -8.1893676020018926e+00 0 0 1 -647 1 3 -1.0500000000000000e+00 1.2542455852135834e+00 3.7373752120048742e+00 -5.9607992485127665e+00 0 0 1 -648 1 3 -1.0500000000000000e+00 4.9971716802184414e+00 1.5601454347120267e+00 -5.9587058378165576e+00 0 0 1 -649 1 3 -1.0500000000000000e+00 3.8440041612115703e+00 3.8861931285291931e+00 -6.1503945630703374e+00 0 0 1 -650 1 5 4.2499999999999999e-01 2.0965664266306856e+00 8.6640571709351022e-01 -8.0457855564755221e+00 0 0 1 -651 1 1 1.5750000000000000e+00 3.5114550282895003e+00 5.7907578289563908e+00 9.1936291799320600e+00 0 0 0 -652 1 2 2.1000000000000001e+00 2.4780321755723715e+00 4.5623640127866523e+00 -6.4424127795872970e+00 0 0 1 -653 1 2 2.1000000000000001e+00 2.5576727978274061e+00 7.5173398896816757e+00 -6.4345086110749499e+00 0 0 1 -654 1 3 -1.0500000000000000e+00 2.3368431861690446e+00 4.6333600597891369e+00 -8.0605492211226508e+00 0 0 1 -655 1 3 -1.0500000000000000e+00 2.6923826235310759e+00 7.4246193194823391e+00 -8.0594146702702183e+00 0 0 1 -656 1 4 -9.4999999999999996e-01 5.2115829731287189e+00 6.2281267873758885e+00 -8.1893654320210807e+00 0 0 1 -657 1 3 -1.0500000000000000e+00 3.8624444953521344e+00 8.2202786364954399e+00 -5.9607758363894661e+00 0 0 1 -658 1 3 -1.0500000000000000e+00 2.4453330767289607e+00 6.0430641736627777e+00 -5.9587080529496408e+00 0 0 1 -659 1 3 -1.0500000000000000e+00 1.2921732996260165e+00 8.3691407396663671e+00 -6.1504119881455370e+00 0 0 1 -660 1 5 4.2499999999999999e-01 4.7046988428178977e+00 5.3493130177618120e+00 -8.0457432166301501e+00 0 0 1 -661 1 1 1.5750000000000000e+00 9.1939897374337853e-01 7.2944681040671533e+00 9.1931409599603455e+00 0 0 0 -662 1 2 2.1000000000000001e+00 1.3038993986497616e-01 -7.9297399937615864e-02 6.4359157158029880e+00 0 0 0 -663 1 2 2.1000000000000001e+00 1.0709548741821351e-01 5.9314795901314916e+00 6.4280530942119469e+00 0 0 0 -664 1 3 -1.0500000000000000e+00 3.2769702380019616e-01 8.8153278361017087e+00 8.0541328072809932e+00 0 0 0 -665 1 3 -1.0500000000000000e+00 -2.7577048002024540e-02 6.0244668918843125e+00 8.0528534019165114e+00 0 0 0 -666 1 4 -9.4999999999999996e-01 2.6131161534111662e+00 7.2207287036538688e+00 8.1828861539669902e+00 0 0 0 -667 1 3 -1.0500000000000000e+00 3.9622586469205352e+00 5.2286377281547125e+00 5.9541122763207728e+00 0 0 0 -668 1 3 -1.0500000000000000e+00 2.1920683743605807e-01 7.4058249891069572e+00 5.9524118713538332e+00 0 0 0 -669 1 3 -1.0500000000000000e+00 1.3725990807979294e+00 5.0798240702830135e+00 6.1439110162023720e+00 0 0 0 -670 1 5 4.2499999999999999e-01 3.1199732920356418e+00 8.0995675675041703e+00 8.0391592725870282e+00 0 0 0 -671 1 1 1.5750000000000000e+00 -1.6887626179515607e+00 2.8115295078010867e+00 9.1931424252045630e+00 0 0 0 -672 1 2 2.1000000000000001e+00 2.7385869519563499e+00 4.4035977810533353e+00 6.4359159181916219e+00 0 0 0 -673 1 2 2.1000000000000001e+00 2.6589194294892575e+00 1.4485953760994725e+00 6.4280582079700412e+00 0 0 0 -674 1 3 -1.0500000000000000e+00 2.8795450934696767e+00 4.3324405226142382e+00 8.0541191847946116e+00 0 0 0 -675 1 3 -1.0500000000000000e+00 2.5242342524813459e+00 1.5415689319063191e+00 8.0528783307282445e+00 0 0 0 -676 1 4 -9.4999999999999996e-01 4.8784689801895098e-03 2.7377462205787744e+00 8.1828769724994572e+00 0 0 0 -677 1 3 -1.0500000000000000e+00 1.3540645414120398e+00 7.4573621369630771e-01 5.9540951763794290e+00 0 0 0 -678 1 3 -1.0500000000000000e+00 2.7710487894445812e+00 2.9229363552675593e+00 5.9524016589185003e+00 0 0 0 -679 1 3 -1.0500000000000000e+00 3.9244363315488222e+00 5.9687068067378490e-01 6.1439283855687208e+00 0 0 0 -680 1 5 4.2499999999999999e-01 5.1182958888196950e-01 3.6166698539281406e+00 8.0390353352204258e+00 0 0 0 -681 1 1 1.5750000000000000e+00 6.0632942270537633e+00 1.3078169876851220e+00 9.1936311772266670e+00 0 0 0 -682 1 2 2.1000000000000001e+00 -1.0393798325235846e+01 9.0452739485367424e+00 -6.4424156068686536e+00 1 0 1 -683 1 2 2.1000000000000001e+00 -1.0370532688021308e+01 3.0344580928070393e+00 -6.4344922061203995e+00 1 0 1 -684 1 3 -1.0500000000000000e+00 1.0048686020539265e+01 1.5046462355047652e-01 -8.0605545916265591e+00 0 0 1 -685 1 3 -1.0500000000000000e+00 -1.0235804675645195e+01 2.9417358723974196e+00 -8.0593999402563039e+00 1 0 1 -686 1 4 -9.4999999999999996e-01 7.7633395990307577e+00 1.7451343070668237e+00 -8.1893820944806723e+00 0 0 1 -687 1 3 -1.0500000000000000e+00 6.4142512718313398e+00 3.7373784423628784e+00 -5.9607944539472024e+00 0 0 1 -688 1 3 -1.0500000000000000e+00 -1.0482832407337161e+01 1.5601299826680055e+00 -5.9586900611972622e+00 1 0 1 -689 1 3 -1.0500000000000000e+00 9.0040166486961724e+00 3.8861879123933356e+00 -6.1503979527282322e+00 0 0 1 -690 1 5 4.2499999999999999e-01 7.2565522149876038e+00 8.6639701182161843e-01 -8.0459259751794452e+00 0 0 1 -691 1 1 1.5750000000000000e+00 -1.1968537431183220e+01 5.7907580951124764e+00 9.1936264813234629e+00 1 0 0 -692 1 2 2.1000000000000001e+00 7.6380264848568480e+00 4.5623772756766172e+00 -6.4424042946661455e+00 0 0 1 -693 1 2 2.1000000000000001e+00 7.7176486578355963e+00 7.5173331294719183e+00 -6.4345083071021740e+00 0 0 1 -694 1 3 -1.0500000000000000e+00 7.4968492694646329e+00 4.6333567441294718e+00 -8.0605633819113667e+00 0 0 1 -695 1 3 -1.0500000000000000e+00 7.8523804276681837e+00 7.4246294275852094e+00 -8.0594213971891069e+00 0 0 1 -696 1 4 -9.4999999999999996e-01 -1.0268423694159754e+01 6.2281174442746980e+00 -8.1893456261947950e+00 1 0 1 -697 1 3 -1.0500000000000000e+00 9.0224907290535974e+00 8.2203025512221188e+00 -5.9607878799717344e+00 0 0 1 -698 1 3 -1.0500000000000000e+00 7.6053307981435196e+00 6.0430347893748895e+00 -5.9587019740450744e+00 0 0 1 -699 1 3 -1.0500000000000000e+00 6.4521656412247061e+00 8.3691447635715939e+00 -6.1504109506876619e+00 0 0 1 -700 1 5 4.2499999999999999e-01 9.8647336224214257e+00 5.3493476067839048e+00 -8.0456403223888646e+00 0 0 1 -701 1 1 1.5750000000000000e+00 6.0794078571688708e+00 7.2944672054170034e+00 9.1931387604710437e+00 0 0 0 -702 1 2 2.1000000000000001e+00 5.2903927590288387e+00 -7.9312034344852123e-02 6.4359061568910381e+00 0 0 0 -703 1 2 2.1000000000000001e+00 5.2671187145264451e+00 5.9314885138761895e+00 6.4280540558924795e+00 0 0 0 -704 1 3 -1.0500000000000000e+00 5.4876891838999047e+00 8.8153320262813786e+00 8.0541505226321348e+00 0 0 0 -705 1 3 -1.0500000000000000e+00 5.1324255766353062e+00 6.0244559010705814e+00 8.0528602469110595e+00 0 0 0 -706 1 4 -9.4999999999999996e-01 -1.2866893134241742e+01 7.2207151463837640e+00 8.1829043644805566e+00 1 0 0 -707 1 3 -1.0500000000000000e+00 -1.1517695582926537e+01 5.2286614618147631e+00 5.9541014619513177e+00 1 0 0 -708 1 3 -1.0500000000000000e+00 5.3792088967617762e+00 7.4058530000867115e+00 5.9524066869292191e+00 0 0 0 -709 1 3 -1.0500000000000000e+00 6.5326055526273592e+00 5.0798202391399414e+00 6.1439090309796480e+00 0 0 0 -710 1 5 4.2499999999999999e-01 -1.2359993555416519e+01 8.0996018552035594e+00 8.0392491603506926e+00 1 0 0 -711 1 1 1.5750000000000000e+00 3.4712234255278887e+00 2.8115394997158880e+00 9.1931391128052944e+00 0 0 0 -712 1 2 2.1000000000000001e+00 -1.2741436473488831e+01 4.4035966072164179e+00 6.4359068114368210e+00 1 0 0 -713 1 2 2.1000000000000001e+00 -1.2821086026369231e+01 1.4486074172151717e+00 6.4280700878493633e+00 1 0 0 -714 1 3 -1.0500000000000000e+00 -1.2600468968641490e+01 4.3324381244139616e+00 8.0541460499971507e+00 1 0 0 -715 1 3 -1.0500000000000000e+00 -1.2955762910609096e+01 1.5415738718932701e+00 8.0528750796115318e+00 1 0 0 -716 1 4 -9.4999999999999996e-01 5.1649091672213814e+00 2.7377878492092051e+00 8.1828935067407187e+00 0 0 0 -717 1 3 -1.0500000000000000e+00 6.5140624838503562e+00 7.4573449160736516e-01 5.9540890408837548e+00 0 0 0 -718 1 3 -1.0500000000000000e+00 -1.2708954990084692e+01 2.9229186261097801e+00 5.9524185176229256e+00 1 0 0 -719 1 3 -1.0500000000000000e+00 -1.1555552596604405e+01 5.9686666841561120e-01 6.1439252012806840e+00 1 0 0 -720 1 5 4.2499999999999999e-01 5.6718460531567594e+00 3.6166822258108873e+00 8.0391876250502783e+00 0 0 0 -721 1 1 1.5750000000000000e+00 -9.4167199060881881e+00 1.3078268191493692e+00 9.1936284569148015e+00 1 0 0 -722 1 2 2.1000000000000001e+00 -5.2337744657239433e+00 9.0452766042384738e+00 -6.4424045702369455e+00 1 0 1 -723 1 2 2.1000000000000001e+00 -5.2105281225591042e+00 3.0344444952698204e+00 -6.4345035089905984e+00 1 0 1 -724 1 3 -1.0500000000000000e+00 -5.4312992074936357e+00 1.5046681141750895e-01 -8.0605839113224356e+00 1 0 1 -725 1 3 -1.0500000000000000e+00 -5.0758078374764404e+00 2.9417318484454675e+00 -8.0593973266164873e+00 1 0 1 -726 1 4 -9.4999999999999996e-01 -7.7166293890810103e+00 1.7451757670354233e+00 -8.1893673368413982e+00 1 0 1 -727 1 3 -1.0500000000000000e+00 -9.0657538566382421e+00 3.7373757159528864e+00 -5.9607990251074803e+00 1 0 1 -728 1 3 -1.0500000000000000e+00 -5.3228288100664747e+00 1.5601455206964303e+00 -5.9587064079961714e+00 1 0 1 -729 1 3 -1.0500000000000000e+00 -6.4759962422743165e+00 3.8861930601011174e+00 -6.1503948993679884e+00 1 0 1 -730 1 5 4.2499999999999999e-01 -8.2234331102634997e+00 8.6640653809297419e-01 -8.0457837360048821e+00 1 0 1 -731 1 1 1.5750000000000000e+00 -6.8085449284088639e+00 5.7907577807183372e+00 9.1936293269548521e+00 1 0 0 -732 1 2 2.1000000000000001e+00 -7.8419680626851189e+00 4.5623634416592758e+00 -6.4424126333628751e+00 1 0 1 -733 1 2 2.1000000000000001e+00 -7.7623275299073544e+00 7.5173406513668297e+00 -6.4345089088641538e+00 1 0 1 -734 1 3 -1.0500000000000000e+00 -7.9831563836472403e+00 4.6333602950605197e+00 -8.0605491278584473e+00 1 0 1 -735 1 3 -1.0500000000000000e+00 -7.6276171712668432e+00 7.4246189825803306e+00 -8.0594145553904770e+00 1 0 1 -736 1 4 -9.4999999999999996e-01 -5.1084172118972777e+00 6.2281269736840663e+00 -8.1893656974078635e+00 1 0 1 -737 1 3 -1.0500000000000000e+00 -6.4575553159047985e+00 8.2202787726173803e+00 -5.9607759162796174e+00 1 0 1 -738 1 3 -1.0500000000000000e+00 -7.8746669235491940e+00 6.0430635684913305e+00 -5.9587081543805747e+00 1 0 1 -739 1 3 -1.0500000000000000e+00 -9.0278274510389611e+00 8.3691408883945471e+00 -6.1504119619142976e+00 1 0 1 -740 1 5 4.2499999999999999e-01 -5.6153013914286491e+00 5.3493123886781433e+00 -8.0457448175864990e+00 1 0 1 -741 1 1 1.5750000000000000e+00 -9.4006010736587182e+00 7.2944682033683108e+00 9.1931405770212429e+00 1 0 0 -742 1 2 2.1000000000000001e+00 -1.0189610402198802e+01 -7.9297605759123257e-02 6.4359160010201411e+00 1 0 0 -743 1 2 2.1000000000000001e+00 -1.0212904137937610e+01 5.9314797115273628e+00 6.4280531027538306e+00 1 0 0 -744 1 3 -1.0500000000000000e+00 -9.9923029717657226e+00 8.8153286205910888e+00 8.0541331394482896e+00 1 0 0 -745 1 3 -1.0500000000000000e+00 -1.0347577370841730e+01 6.0244660873837574e+00 8.0528534336309434e+00 1 0 0 -746 1 4 -9.4999999999999996e-01 -7.7068834885402087e+00 7.2207286744038370e+00 8.1828858968542733e+00 1 0 0 -747 1 3 -1.0500000000000000e+00 -6.3577402971397685e+00 5.2286383860826824e+00 5.9541124938145380e+00 1 0 0 -748 1 3 -1.0500000000000000e+00 -1.0100793183120766e+01 7.4058245081335947e+00 5.9524106541055115e+00 1 0 0 -749 1 3 -1.0500000000000000e+00 -8.9474022065604224e+00 5.0798241454221369e+00 6.1439110956024354e+00 1 0 0 -750 1 5 4.2499999999999999e-01 -7.2000270353828046e+00 8.0995672068003088e+00 8.0391580946381538e+00 1 0 0 -751 1 1 1.5750000000000000e+00 -1.2008762881451348e+01 2.8115295177180428e+00 9.1931425674069622e+00 1 0 0 -752 1 2 2.1000000000000001e+00 -7.5814135479309908e+00 4.4035983789351896e+00 6.4359165483385254e+00 1 0 0 -753 1 2 2.1000000000000001e+00 -7.6610808670997494e+00 1.4485945780712797e+00 6.4280582613292712e+00 1 0 0 -754 1 3 -1.0500000000000000e+00 -7.4404546055668366e+00 4.3324402056750237e+00 8.0541182772999065e+00 1 0 0 -755 1 3 -1.0500000000000000e+00 -7.7957660342353634e+00 1.5415692603738904e+00 8.0528775182214503e+00 1 0 0 -756 1 4 -9.4999999999999996e-01 -1.0315122227353797e+01 2.7377447118390101e+00 8.1828767909309512e+00 1 0 0 -757 1 3 -1.0500000000000000e+00 -8.9659352915333912e+00 7.4573683553695247e-01 5.9540956560325071e+00 1 0 0 -758 1 3 -1.0500000000000000e+00 -7.5489512806955084e+00 2.9229361730432970e+00 5.9524018601671909e+00 1 0 0 -759 1 3 -1.0500000000000000e+00 -6.3955632518685768e+00 5.9687042658164202e-01 6.1439287069185120e+00 1 0 0 -760 1 5 4.2499999999999999e-01 -9.8081700960310911e+00 3.6166702227703560e+00 8.0390328438422074e+00 1 0 0 -761 1 1 1.5750000000000000e+00 -4.2567053371850658e+00 1.3078168179231042e+00 9.1936310535964232e+00 1 0 0 -762 1 2 2.1000000000000001e+00 -7.3798358768387473e-02 9.0452733241225367e+00 -6.4424155682759778e+00 1 0 1 -763 1 2 2.1000000000000001e+00 -5.0532229357781233e-02 3.0344580784693349e+00 -6.4344918700685572e+00 1 0 1 -764 1 3 -1.0500000000000000e+00 -2.7131416236533745e-01 1.5046495859890285e-01 -8.0605544059037157e+00 1 0 1 -765 1 3 -1.0500000000000000e+00 8.4195332462293493e-02 2.9417357818207392e+00 -8.0593997996762017e+00 1 0 1 -766 1 4 -9.4999999999999996e-01 -2.5566610233707436e+00 1.7451330035208805e+00 -8.1893824664560277e+00 1 0 1 -767 1 3 -1.0500000000000000e+00 -3.9057480740250670e+00 3.7373787164792880e+00 -5.9607940658222685e+00 1 0 1 -768 1 3 -1.0500000000000000e+00 -1.6283268367977044e-01 1.5601301436753872e+00 -5.9586903754265217e+00 1 0 1 -769 1 3 -1.0500000000000000e+00 -1.3159834872646030e+00 3.8861877175565063e+00 -6.1503980963921077e+00 1 0 1 -770 1 5 4.2499999999999999e-01 -3.0634476704969611e+00 8.6639700750263771e-01 -8.0459289853123099e+00 1 0 1 -771 1 1 1.5750000000000000e+00 -1.6485375220706722e+00 5.7907579502462809e+00 9.1936265404422031e+00 1 0 0 -772 1 2 2.1000000000000001e+00 -2.6819735576082735e+00 4.5623781023095979e+00 -6.4424040572249073e+00 1 0 1 -773 1 2 2.1000000000000001e+00 -2.6023515605367979e+00 7.5173327298043446e+00 -6.4345082402300129e+00 1 0 1 -774 1 3 -1.0500000000000000e+00 -2.8231506835232665e+00 4.6333563194435925e+00 -8.0605641951789604e+00 1 0 1 -775 1 3 -1.0500000000000000e+00 -2.4676198319304250e+00 7.4246300333344308e+00 -8.0594222945481242e+00 1 0 1 -776 1 4 -9.4999999999999996e-01 5.1576495784171783e-02 6.2281174409833078e+00 -8.1893454451251166e+00 1 0 1 -777 1 3 -1.0500000000000000e+00 -1.2975095016776308e+00 8.2203029875449154e+00 -5.9607878139978627e+00 1 0 1 -778 1 3 -1.0500000000000000e+00 -2.7146694253441410e+00 6.0430341006248725e+00 -5.9587013238385174e+00 1 0 1 -779 1 3 -1.0500000000000000e+00 -3.8678341546705104e+00 8.3691444919325306e+00 -6.1504110733339727e+00 1 0 1 -780 1 5 4.2499999999999999e-01 -4.5526617430967775e-01 5.3493482680335944e+00 -8.0456383324324694e+00 1 0 1 -781 1 1 1.5750000000000000e+00 -4.2405919059825807e+00 7.2944670077944807e+00 9.1931388109336041e+00 1 0 0 -782 1 2 2.1000000000000001e+00 -5.0296064030418073e+00 -7.9312429721781541e-02 6.4359061250102680e+00 1 0 0 -783 1 2 2.1000000000000001e+00 -5.0528803208991677e+00 5.9314885847207393e+00 6.4280541425652125e+00 1 0 0 -784 1 3 -1.0500000000000000e+00 -4.8323113190408655e+00 8.8153320489722837e+00 8.0541507220343149e+00 1 0 0 -785 1 3 -1.0500000000000000e+00 -5.1875750890510588e+00 6.0244559180381110e+00 8.0528596115841111e+00 1 0 0 -786 1 4 -9.4999999999999996e-01 -2.5468930396501683e+00 7.2207149872186704e+00 8.1829042004208787e+00 1 0 0 -787 1 3 -1.0500000000000000e+00 -1.1976965541013911e+00 5.2286614070351156e+00 5.9541011082395521e+00 1 0 0 -788 1 3 -1.0500000000000000e+00 -4.9407917338772620e+00 7.4058524302343827e+00 5.9524067025542333e+00 1 0 0 -789 1 3 -1.0500000000000000e+00 -3.7873946085238082e+00 5.0798202478013827e+00 6.1439086823543967e+00 1 0 0 -790 1 5 4.2499999999999999e-01 -2.0399936634126092e+00 8.0996016485694859e+00 8.0392480188451927e+00 1 0 0 -791 1 1 1.5750000000000000e+00 -6.8487764459721472e+00 2.8115396349174162e+00 9.1931393353000779e+00 1 0 0 -792 1 2 2.1000000000000001e+00 -2.4214360684733940e+00 4.4035963531915776e+00 6.4359069157849582e+00 1 0 0 -793 1 2 2.1000000000000001e+00 -2.5010867592299606e+00 1.4486079821071627e+00 6.4280700334665895e+00 1 0 0 -794 1 3 -1.0500000000000000e+00 -2.2804693176190796e+00 4.3324382110458508e+00 8.0541466060331786e+00 1 0 0 -795 1 3 -1.0500000000000000e+00 -2.6357629333121757e+00 1.5415738251322431e+00 8.0528749502136669e+00 1 0 0 -796 1 4 -9.4999999999999996e-01 -5.1550909689645312e+00 2.7377879547736299e+00 8.1828937813504332e+00 1 0 0 -797 1 3 -1.0500000000000000e+00 -3.8059367645404496e+00 7.4573545262324359e-01 5.9540888558950602e+00 1 0 0 -798 1 3 -1.0500000000000000e+00 -2.3889553911064834e+00 2.9229182779601004e+00 5.9524183742338437e+00 1 0 0 -799 1 3 -1.0500000000000000e+00 -1.2355527417253249e+00 5.9686629625387155e-01 6.1439250256775200e+00 1 0 0 -800 1 5 4.2499999999999999e-01 -4.6481532735579094e+00 3.6166830920668787e+00 8.0391899192038352e+00 1 0 0 -801 1 1 1.5750000000000000e+00 9.5960721862191178e-01 1.0273669203704241e+01 9.1936235733136655e+00 0 0 0 -802 1 2 2.1000000000000001e+00 4.9172083055255325e+00 -1.7852191623671651e+01 -6.4424003148067897e+00 0 1 1 -803 1 2 2.1000000000000001e+00 5.1658109241490635e+00 1.2000275388808074e+01 -6.4345007360704409e+00 0 0 1 -804 1 3 -1.0500000000000000e+00 4.9450244688133118e+00 9.1162949204304589e+00 -8.0605632730520096e+00 0 0 1 -805 1 3 -1.0500000000000000e+00 5.3005308774925517e+00 1.1907548980836420e+01 -8.0594017059932295e+00 0 0 1 -806 1 4 -9.4999999999999996e-01 2.6597454135294925e+00 1.0711060731609876e+01 -8.1893222259048226e+00 0 0 1 -807 1 3 -1.0500000000000000e+00 1.3105929168482859e+00 1.2703195772710561e+01 -5.9607978618737851e+00 0 0 1 -808 1 3 -1.0500000000000000e+00 5.0535126422700269e+00 1.0525967976026514e+01 -5.9587374189296218e+00 0 0 1 -809 1 3 -1.0500000000000000e+00 3.9003367713006512e+00 1.2852015907184782e+01 -6.1503929107381827e+00 0 0 1 -810 1 5 4.2499999999999999e-01 2.1529230807864241e+00 9.8323236639573999e+00 -8.0454353340627449e+00 0 0 1 -811 1 1 1.5750000000000000e+00 3.5677901403638028e+00 1.4756558886709637e+01 9.1936320859504761e+00 0 0 0 -812 1 2 2.1000000000000001e+00 2.5343724058499628e+00 1.3528195803657599e+01 -6.4424233387050833e+00 0 0 1 -813 1 2 2.1000000000000001e+00 2.6139913107597703e+00 1.6483175576855384e+01 -6.4345033330073518e+00 0 0 1 -814 1 3 -1.0500000000000000e+00 2.3931907288388619e+00 1.3599192072250563e+01 -8.0605653726222926e+00 0 0 1 -815 1 3 -1.0500000000000000e+00 2.7487038780850757e+00 1.6390457170443742e+01 -8.0594014632146163e+00 0 0 1 -816 1 4 -9.4999999999999996e-01 5.2678674489775172e+00 1.5193891941576492e+01 -8.1893953522173568e+00 0 0 1 -817 1 3 -1.0500000000000000e+00 3.9187441878934699e+00 1.7186101345028060e+01 -5.9607852712427771e+00 0 0 1 -818 1 3 -1.0500000000000000e+00 2.5016682244761341e+00 1.5008886897788546e+01 -5.9586911872320503e+00 0 0 1 -819 1 3 -1.0500000000000000e+00 1.3485131765279306e+00 1.7334940064883479e+01 -6.1504041699538501e+00 0 0 1 -820 1 5 4.2499999999999999e-01 4.7610305469518792e+00 1.4315086371947789e+01 -8.0459836447046609e+00 0 0 1 -821 1 1 1.5750000000000000e+00 9.7572650104669378e-01 1.6260281732774981e+01 9.1931398694629891e+00 0 0 0 -822 1 2 2.1000000000000001e+00 1.8672671622704407e-01 8.8865134574184275e+00 6.4359252403327680e+00 0 0 0 -823 1 2 2.1000000000000001e+00 1.6341323909156813e-01 1.4897327838108385e+01 6.4280628738807160e+00 0 0 0 -824 1 3 -1.0500000000000000e+00 1.5869811845495363e-01 -1.8082123015369124e+01 8.0541340213658152e+00 0 1 0 -825 1 3 -1.0500000000000000e+00 2.8742433885017959e-02 1.4990304244778233e+01 8.0528666545587271e+00 0 0 0 -826 1 4 -9.4999999999999996e-01 2.6694272128344245e+00 1.6186542002547892e+01 8.1828957582626032e+00 0 0 0 -827 1 3 -1.0500000000000000e+00 4.0185585050563635e+00 1.4194457856414292e+01 5.9541011149118788e+00 0 0 0 -828 1 3 -1.0500000000000000e+00 2.7554898326217625e-01 1.6371647903721165e+01 5.9524005485591456e+00 0 0 0 -829 1 3 -1.0500000000000000e+00 1.4289385783169752e+00 1.4045613821719037e+01 6.1439242276787418e+00 0 0 0 -830 1 5 4.2499999999999999e-01 3.1763286179058490e+00 1.7065430306293440e+01 8.0392248483087627e+00 0 0 0 -831 1 1 1.5750000000000000e+00 -1.6324184683740341e+00 1.1777344934266068e+01 9.1931465772030379e+00 0 0 0 -832 1 2 2.1000000000000001e+00 2.7949109368093250e+00 1.3369431615951214e+01 6.4359123446196520e+00 0 0 0 -833 1 2 2.1000000000000001e+00 2.7152603396729695e+00 1.0414397731227350e+01 6.4280500078867284e+00 0 0 0 -834 1 3 -1.0500000000000000e+00 2.9358844870702256e+00 1.3298249254921139e+01 8.0541035265692571e+00 0 0 0 -835 1 3 -1.0500000000000000e+00 2.5805749833761542e+00 1.0507387417059693e+01 8.0528735429538472e+00 0 0 0 -836 1 4 -9.4999999999999996e-01 6.1197499880652373e-02 1.1703528856756993e+01 8.1828384666384260e+00 0 0 0 -837 1 3 -1.0500000000000000e+00 1.4104102567525914e+00 9.7115620528564612e+00 5.9541021771064706e+00 0 0 0 -838 1 3 -1.0500000000000000e+00 2.8273745913960777e+00 1.1888758720918936e+01 5.9524302538497906e+00 0 0 0 -839 1 3 -1.0500000000000000e+00 3.9807696072751568e+00 9.5627134385235948e+00 6.1439195726972784e+00 0 0 0 -840 1 5 4.2499999999999999e-01 5.6813568547824111e-01 1.2582401681601350e+01 8.0387418936318475e+00 0 0 0 -841 1 1 1.5750000000000000e+00 6.1196215253814046e+00 1.0273659065047806e+01 9.1936258533248534e+00 0 0 0 -842 1 2 2.1000000000000001e+00 -1.0562814009027809e+01 -1.7852194770053313e+01 -6.4424105685276851e+00 1 1 1 -843 1 2 2.1000000000000001e+00 -1.0314192648491979e+01 1.2000288960951057e+01 -6.4344887365136625e+00 1 0 1 -844 1 3 -1.0500000000000000e+00 1.0105009678123249e+01 9.1162929018207564e+00 -8.0605338552440937e+00 0 0 1 -845 1 3 -1.0500000000000000e+00 -1.0179466880668443e+01 1.1907552881889412e+01 -8.0594043468605676e+00 1 0 1 -846 1 4 -9.4999999999999996e-01 7.8197150885211819e+00 1.0711020034930083e+01 -8.1893365530719642e+00 0 0 1 -847 1 3 -1.0500000000000000e+00 6.4705998763924484e+00 1.2703199920926114e+01 -5.9607930505693743e+00 0 0 1 -848 1 3 -1.0500000000000000e+00 -1.0426491548728604e+01 1.0525953700197832e+01 -5.9587210876857588e+00 1 0 1 -849 1 3 -1.0500000000000000e+00 9.0603500318690493e+00 1.2852009950045630e+01 -6.1503965426825715e+00 0 0 1 -850 1 5 4.2499999999999999e-01 7.3129093231943401e+00 9.8323158297765794e+00 -8.0455729433718481e+00 0 0 1 -851 1 1 1.5750000000000000e+00 -1.1912201911242379e+01 1.4756558626125500e+01 9.1936299374457917e+00 1 0 0 -852 1 2 2.1000000000000001e+00 7.6943659531364794e+00 1.3528208886055431e+01 -6.4424143301466863e+00 0 0 1 -853 1 2 2.1000000000000001e+00 7.7739676680323946e+00 1.6483168696691120e+01 -6.4345034954359610e+00 0 0 1 -854 1 3 -1.0500000000000000e+00 7.5531973024030030e+00 1.3599188471640346e+01 -8.0605797041230307e+00 0 0 1 -855 1 3 -1.0500000000000000e+00 7.9087020351309256e+00 1.6390467445460896e+01 -8.0594079096168336e+00 0 0 1 -856 1 4 -9.4999999999999996e-01 -1.0212139335865229e+01 1.5193882091436695e+01 -8.1893761679918828e+00 1 0 1 -857 1 3 -1.0500000000000000e+00 9.0787892522491873e+00 1.7186124575197912e+01 -5.9607974326211624e+00 0 0 1 -858 1 3 -1.0500000000000000e+00 7.6616656495931785e+00 1.5008857806803714e+01 -5.9586855554315239e+00 0 0 1 -859 1 3 -1.0500000000000000e+00 6.5085065607252339e+00 1.7334943758700970e+01 -6.1504023223690254e+00 0 0 1 -860 1 5 4.2499999999999999e-01 9.9210652194241220e+00 1.4315120680602522e+01 -8.0458834643735155e+00 0 0 1 -861 1 1 1.5750000000000000e+00 6.1357349670429358e+00 1.6260281068792370e+01 9.1931373680824571e+00 0 0 0 -862 1 2 2.1000000000000001e+00 5.3467301429260417e+00 8.8864991034834802e+00 6.4359154980254800e+00 0 0 0 -863 1 2 2.1000000000000001e+00 5.3234375680067387e+00 1.4897337020707621e+01 6.4280633305033579e+00 0 0 0 -864 1 3 -1.0500000000000000e+00 5.3186906165474923e+00 -1.8082119281913194e+01 8.0541512151208217e+00 0 1 0 -865 1 3 -1.0500000000000000e+00 5.1887452446427620e+00 1.4990293685008570e+01 8.0528738710979937e+00 0 0 0 -866 1 4 -9.4999999999999996e-01 -1.2810582246898260e+01 1.6186528254419503e+01 8.1829142774136940e+00 1 0 0 -867 1 3 -1.0500000000000000e+00 -1.1461395078265998e+01 1.4194482279531240e+01 5.9540899004490573e+00 1 0 0 -868 1 3 -1.0500000000000000e+00 5.4355508226374880e+00 1.6371676240781067e+01 5.9523953371586007e+00 0 0 0 -869 1 3 -1.0500000000000000e+00 6.5889448393182057e+00 1.4045610487796125e+01 6.1439231486978052e+00 0 0 0 -870 1 5 4.2499999999999999e-01 -1.2303637921787351e+01 1.7065464446927333e+01 8.0393139102372757e+00 1 0 0 -871 1 1 1.5750000000000000e+00 3.5275677694768159e+00 1.1777354695333322e+01 9.1931437178279722e+00 0 0 0 -872 1 2 2.1000000000000001e+00 -1.2685112265527499e+01 1.3369430390314744e+01 6.4359024875393231e+00 1 0 0 -873 1 2 2.1000000000000001e+00 -1.2764745750720230e+01 1.0414410392944081e+01 6.4280618196758432e+00 1 0 0 -874 1 3 -1.0500000000000000e+00 -1.2544129418050623e+01 1.3298246737339653e+01 8.0541308893949868e+00 1 0 0 -875 1 3 -1.0500000000000000e+00 -1.2899422299851542e+01 1.0507392314703441e+01 8.0528703655478253e+00 1 0 0 -876 1 4 -9.4999999999999996e-01 5.2212278834502932e+00 1.1703569772888418e+01 8.1828544574580064e+00 0 0 0 -877 1 3 -1.0500000000000000e+00 6.5704071848659780e+00 9.7115602039361448e+00 5.9540954146707854e+00 0 0 0 -878 1 3 -1.0500000000000000e+00 -1.2652629524673211e+01 1.1888740289825776e+01 5.9524475845137914e+00 1 0 0 -879 1 3 -1.0500000000000000e+00 -1.1499218746205949e+01 9.5627087106832143e+00 6.1439160615276585e+00 1 0 0 -880 1 5 4.2499999999999999e-01 5.7281515933265581e+00 1.2582412438419141e+01 8.0388892666496652e+00 0 0 0 -881 1 1 1.5750000000000000e+00 -9.3603927864488465e+00 1.0273669083913415e+01 9.1936233661194393e+00 1 0 0 -882 1 2 2.1000000000000001e+00 -5.4027908311748014e+00 -1.7852191591920860e+01 -6.4423999875116174e+00 1 1 1 -883 1 2 2.1000000000000001e+00 -5.1541886950179352e+00 1.2000275601569015e+01 -6.4345007553289859e+00 1 0 1 -884 1 3 -1.0500000000000000e+00 -5.3749756973895453e+00 9.1162952266749038e+00 -8.0605627427916939e+00 1 0 1 -885 1 3 -1.0500000000000000e+00 -5.0194697033740354e+00 1.1907548341371790e+01 -8.0594018260737599e+00 1 0 1 -886 1 4 -9.4999999999999996e-01 -7.6602542409070491e+00 1.0711060750080897e+01 -8.1893221273874310e+00 1 0 1 -887 1 3 -1.0500000000000000e+00 -9.0094074701008555e+00 1.2703195909859126e+01 -5.9607981827342975e+00 1 0 1 -888 1 3 -1.0500000000000000e+00 -5.2664879046879927e+00 1.0525968292949354e+01 -5.9587382228044579e+00 1 0 1 -889 1 3 -1.0500000000000000e+00 -6.4196629439277020e+00 1.2852015502374297e+01 -6.1503932553102407e+00 1 0 1 -890 1 5 4.2499999999999999e-01 -8.1670767718813568e+00 9.8323240619096737e+00 -8.0454342284319758e+00 1 0 1 -891 1 1 1.5750000000000000e+00 -6.7522099413499017e+00 1.4756558947084972e+01 9.1936323727388007e+00 1 0 0 -892 1 2 2.1000000000000001e+00 -7.7856279740588574e+00 1.3528195363213435e+01 -6.4424232658091753e+00 1 0 1 -893 1 2 2.1000000000000001e+00 -7.7060087959671275e+00 1.6483175890169566e+01 -6.4345031930632031e+00 1 0 1 -894 1 3 -1.0500000000000000e+00 -7.9268092601655393e+00 1.3599192115246709e+01 -8.0605653924195604e+00 1 0 1 -895 1 3 -1.0500000000000000e+00 -7.5712960116532937e+00 1.6390457132414308e+01 -8.0594014597887931e+00 1 0 1 -896 1 4 -9.4999999999999996e-01 -5.0521325823369825e+00 1.5193892557836794e+01 -8.1893952534646299e+00 1 0 1 -897 1 3 -1.0500000000000000e+00 -6.4012557391469294e+00 1.7186101532233774e+01 -5.9607858793978696e+00 1 0 1 -898 1 3 -1.0500000000000000e+00 -7.8183317428747330e+00 1.5008887211419324e+01 -5.9586911269344274e+00 1 0 1 -899 1 3 -1.0500000000000000e+00 -8.9714872285006528e+00 1.7334939869012782e+01 -6.1504040245545895e+00 1 0 1 -900 1 5 4.2499999999999999e-01 -5.5589691759804651e+00 1.4315086850375383e+01 -8.0459819775647716e+00 1 0 1 -901 1 1 1.5750000000000000e+00 -9.3442733480072278e+00 1.6260281622445302e+01 9.1931395426557110e+00 1 0 0 -902 1 2 2.1000000000000001e+00 -1.0133273070639682e+01 8.8865135585351283e+00 6.4359254506516130e+00 1 0 0 -903 1 2 2.1000000000000001e+00 -1.0156586250533065e+01 1.4897327970275054e+01 6.4280630778184857e+00 1 0 0 -904 1 3 -1.0500000000000000e+00 -1.0161302019375981e+01 -1.8082122656764607e+01 8.0541343934514202e+00 1 1 0 -905 1 3 -1.0500000000000000e+00 -1.0291257777350937e+01 1.4990303857072181e+01 8.0528666793281509e+00 1 0 0 -906 1 4 -9.4999999999999996e-01 -7.6505730793458735e+00 1.6186540928105540e+01 8.1828953469210077e+00 1 0 0 -907 1 3 -1.0500000000000000e+00 -6.3014405501587794e+00 1.4194458390634342e+01 5.9541017585236222e+00 1 0 0 -908 1 3 -1.0500000000000000e+00 -1.0044451276249209e+01 1.6371648291125627e+01 5.9523999323485963e+00 1 0 0 -909 1 3 -1.0500000000000000e+00 -8.8910624691465792e+00 1.4045613984800305e+01 6.1439241727047040e+00 1 0 0 -910 1 5 4.2499999999999999e-01 -7.1436715781015101e+00 1.7065429938006378e+01 8.0392210249142231e+00 1 0 0 -911 1 1 1.5750000000000000e+00 -1.1952418365405634e+01 1.1777344769910933e+01 9.1931467279769166e+00 1 0 0 -912 1 2 2.1000000000000001e+00 -7.5250893067069100e+00 1.3369431895710402e+01 6.4359125458494333e+00 1 0 0 -913 1 2 2.1000000000000001e+00 -7.6047401369908787e+00 1.0414397698876499e+01 6.4280497411191835e+00 1 0 0 -914 1 3 -1.0500000000000000e+00 -7.3841152575290465e+00 1.3298249173792914e+01 8.0541030584957483e+00 1 0 0 -915 1 3 -1.0500000000000000e+00 -7.7394250224018784e+00 1.0507387430609697e+01 8.0528732915954180e+00 1 0 0 -916 1 4 -9.4999999999999996e-01 -1.0258802640316311e+01 1.1703528516691946e+01 8.1828385357148221e+00 1 0 0 -917 1 3 -1.0500000000000000e+00 -8.9095901142621763e+00 9.7115620830072906e+00 5.9541024500382438e+00 1 0 0 -918 1 3 -1.0500000000000000e+00 -7.4926254257397229e+00 1.1888758199297200e+01 5.9524304645907566e+00 1 0 0 -919 1 3 -1.0500000000000000e+00 -6.3392300077947583e+00 9.5627130728697161e+00 6.1439197965438819e+00 1 0 0 -920 1 5 4.2499999999999999e-01 -9.7518639894871963e+00 1.2582402105100961e+01 8.0387424963826071e+00 1 0 0 -921 1 1 1.5750000000000000e+00 -4.2003781666602471e+00 1.0273659178062957e+01 9.1936255865073093e+00 1 0 0 -922 1 2 2.1000000000000001e+00 -2.4281380657306251e-01 -1.7852194978795438e+01 -6.4424104817889924e+00 1 1 1 -923 1 2 2.1000000000000001e+00 5.8073311176265463e-03 1.2000289091306779e+01 -6.4344887684447798e+00 1 0 1 -924 1 3 -1.0500000000000000e+00 -2.1499032318097377e-01 9.1162934469930086e+00 -8.0605336122239901e+00 1 0 1 -925 1 3 -1.0500000000000000e+00 1.4053305155494300e-01 1.1907552348135759e+01 -8.0594038296133501e+00 1 0 1 -926 1 4 -9.4999999999999996e-01 -2.5002851790322218e+00 1.0711019049960040e+01 -8.1893367336811629e+00 1 0 1 -927 1 3 -1.0500000000000000e+00 -3.8493992000608142e+00 1.2703200254186580e+01 -5.9607924527654337e+00 1 0 1 -928 1 3 -1.0500000000000000e+00 -1.0649173895659203e-01 1.0525953880245513e+01 -5.9587217128014212e+00 1 0 1 -929 1 3 -1.0500000000000000e+00 -1.2596509514345406e+00 1.2852010139898166e+01 -6.1503965479066123e+00 1 0 1 -930 1 5 4.2499999999999999e-01 -3.0070906737129794e+00 9.8323157767275227e+00 -8.0455750813439302e+00 1 0 1 -931 1 1 1.5750000000000000e+00 -1.5922020057366204e+00 1.4756558606205918e+01 9.1936300371528645e+00 1 0 0 -932 1 2 2.1000000000000001e+00 -2.6256342849090215e+00 1.3528209535884709e+01 -6.4424139428101448e+00 1 0 1 -933 1 2 2.1000000000000001e+00 -2.5460327371080291e+00 1.6483168361853000e+01 -6.4345035627546698e+00 1 0 1 -934 1 3 -1.0500000000000000e+00 -2.7668025513118506e+00 1.3599188282117982e+01 -8.0605804580563216e+00 1 0 1 -935 1 3 -1.0500000000000000e+00 -2.4112982996207268e+00 1.6390467789148698e+01 -8.0594086153186755e+00 1 0 1 -936 1 4 -9.4999999999999996e-01 1.0786065194175976e-01 1.5193881944029133e+01 -8.1893757702455225e+00 1 0 1 -937 1 3 -1.0500000000000000e+00 -1.2412111852687584e+00 1.7186124642348755e+01 -5.9607969511890113e+00 1 0 1 -938 1 3 -1.0500000000000000e+00 -2.6583342475974394e+00 1.5008857192020724e+01 -5.9586852469727489e+00 1 0 1 -939 1 3 -1.0500000000000000e+00 -3.8114931308841973e+00 1.7334943467465425e+01 -6.1504023276487931e+00 1 0 1 -940 1 5 4.2499999999999999e-01 -3.9893427220104982e-01 1.4315121673056158e+01 -8.0458809439058001e+00 1 0 1 -941 1 1 1.5750000000000000e+00 -4.1842647733189811e+00 1.6260280900738014e+01 9.1931373541656640e+00 1 0 0 -942 1 2 2.1000000000000001e+00 -4.9732694000809214e+00 8.8864994991107871e+00 6.4359156339852426e+00 1 0 0 -943 1 2 2.1000000000000001e+00 -4.9965619586163417e+00 1.4897336869865573e+01 6.4280633746036724e+00 1 0 0 -944 1 3 -1.0500000000000000e+00 -5.0013096301578557e+00 -1.8082119246981069e+01 8.0541515174107730e+00 1 1 0 -945 1 3 -1.0500000000000000e+00 -5.1312553567957622e+00 1.4990293360305930e+01 8.0528735880481079e+00 1 0 0 -946 1 4 -9.4999999999999996e-01 -2.4905823373432447e+00 1.6186527481653730e+01 8.1829144834701140e+00 1 0 0 -947 1 3 -1.0500000000000000e+00 -1.1413951190011318e+00 1.4194482448426054e+01 5.9540897749393551e+00 1 0 0 -948 1 3 -1.0500000000000000e+00 -4.8844497340440238e+00 1.6371676351109574e+01 5.9523950906844671e+00 1 0 0 -949 1 3 -1.0500000000000000e+00 -3.7310550651778556e+00 1.4045610398815395e+01 6.1439228472924778e+00 1 0 0 -950 1 5 4.2499999999999999e-01 -1.9836375134124715e+00 1.7065464974264582e+01 8.0393139158871101e+00 1 0 0 -951 1 1 1.5750000000000000e+00 -6.7924322972263633e+00 1.1777354850443714e+01 9.1931438146612265e+00 1 0 0 -952 1 2 2.1000000000000001e+00 -2.3651126781701990e+00 1.3369430006310449e+01 6.4359026051072608e+00 1 0 0 -953 1 2 2.1000000000000001e+00 -2.4447460827353300e+00 1.0414411083296347e+01 6.4280619889271122e+00 1 0 0 -954 1 3 -1.0500000000000000e+00 -2.2241294262454936e+00 1.3298247110263322e+01 8.0541315514418770e+00 1 0 0 -955 1 3 -1.0500000000000000e+00 -2.5794223333527002e+00 1.0507391785141490e+01 8.0528704188601807e+00 1 0 0 -956 1 4 -9.4999999999999996e-01 -5.0987720093367859e+00 1.1703570724197846e+01 8.1828547460418974e+00 1 0 0 -957 1 3 -1.0500000000000000e+00 -3.7495921260073386e+00 9.7115606887493051e+00 5.9540950468016760e+00 1 0 0 -958 1 3 -1.0500000000000000e+00 -2.3326296115329681e+00 1.1888740063197147e+01 5.9524471808153692e+00 1 0 0 -959 1 3 -1.0500000000000000e+00 -1.1792192073374927e+00 9.5627085299782664e+00 6.1439162617713929e+00 1 0 0 -960 1 5 4.2499999999999999e-01 -4.5918481348798545e+00 1.2582412837426194e+01 8.0388923615312642e+00 1 0 0 -961 1 1 1.5750000000000000e+00 7.9060983098709947e-01 -1.6623787873959273e+01 9.1936241221863249e+00 0 1 0 -962 1 2 2.1000000000000001e+00 4.9735388822158324e+00 -8.8863555261994307e+00 -6.4424092998091940e+00 0 1 1 -963 1 2 2.1000000000000001e+00 4.9968241249244869e+00 -1.4897218761397980e+01 -6.4345103916532667e+00 0 1 1 -964 1 3 -1.0500000000000000e+00 4.7760131551751499e+00 -1.7781192163992994e+01 -8.0605609776657161e+00 0 1 1 -965 1 3 -1.0500000000000000e+00 5.1315425565977346e+00 -1.4989934535773086e+01 -8.0594167673316317e+00 0 1 1 -966 1 4 -9.4999999999999996e-01 2.4907718354025317e+00 -1.6186388039551588e+01 -8.1893254397424453e+00 0 1 1 -967 1 3 -1.0500000000000000e+00 1.1416263133331963e+00 -1.4194270381621900e+01 -5.9607865702829335e+00 0 1 1 -968 1 3 -1.0500000000000000e+00 4.8845019877675924e+00 -1.6371500937997865e+01 -5.9587293889869883e+00 0 1 1 -969 1 3 -1.0500000000000000e+00 3.7313268130514974e+00 -1.4045419052787359e+01 -6.1504066950524141e+00 0 1 1 -970 1 5 4.2499999999999999e-01 1.9839009717795477e+00 -1.7065170720648808e+01 -8.0454457650585169e+00 0 1 1 -971 1 1 1.5750000000000000e+00 3.3987763835280393e+00 -1.2140905346190326e+01 9.1936279670263232e+00 0 1 0 -972 1 2 2.1000000000000001e+00 2.3653801838590205e+00 -1.3369283232827510e+01 -6.4424200206570612e+00 0 1 1 -973 1 2 2.1000000000000001e+00 2.4449786524018275e+00 -1.0414270762708139e+01 -6.4344947097414282e+00 0 1 1 -974 1 3 -1.0500000000000000e+00 2.2241844110169264e+00 -1.3298260551948676e+01 -8.0605520411220617e+00 0 1 1 -975 1 3 -1.0500000000000000e+00 2.5796912969422721e+00 -1.0507005922241635e+01 -8.0593940590064257e+00 0 1 1 -976 1 4 -9.4999999999999996e-01 5.0988702527267655e+00 -1.1703548043198262e+01 -8.1893609630806132e+00 0 1 1 -977 1 3 -1.0500000000000000e+00 3.7497226337578446e+00 -9.7113711982098554e+00 -5.9607926003920886e+00 0 1 1 -978 1 3 -1.0500000000000000e+00 2.3326731922370776e+00 -1.1888581465336951e+01 -5.9587182867267821e+00 0 1 1 -979 1 3 -1.0500000000000000e+00 1.1795119362153041e+00 -9.5625531841926961e+00 -6.1503935088776673e+00 0 1 1 -980 1 5 4.2499999999999999e-01 4.5920550844469084e+00 -1.2582296526462123e+01 -8.0457276875216497e+00 0 1 1 -981 1 1 1.5750000000000000e+00 8.0672847412836290e-01 -1.0637207792244846e+01 9.1931446898855285e+00 0 1 0 -982 1 2 2.1000000000000001e+00 1.7735241745661767e-02 -1.8010958050662659e+01 6.4359214102136661e+00 0 1 0 -983 1 2 2.1000000000000001e+00 -5.5984692701720462e-03 -1.2000144101042064e+01 6.4280608641863495e+00 0 1 0 -984 1 3 -1.0500000000000000e+00 2.1504497388601607e-01 -9.1163020892610103e+00 8.0541133501916313e+00 0 1 0 -985 1 3 -1.0500000000000000e+00 -1.4026752501513329e-01 -1.1907157403873917e+01 8.0528737619875344e+00 0 1 0 -986 1 4 -9.4999999999999996e-01 2.5003778817693600e+00 -1.0710992981939540e+01 8.1828509244580552e+00 0 1 0 -987 1 3 -1.0500000000000000e+00 3.8495354817159484e+00 -1.2703010397069693e+01 5.9540985576838814e+00 0 1 0 -988 1 3 -1.0500000000000000e+00 1.0654000383107132e-01 -1.0525822550974517e+01 5.9524296977026996e+00 0 1 0 -989 1 3 -1.0500000000000000e+00 1.2599376891911795e+00 -1.2851860677111500e+01 6.1439254063881670e+00 0 1 0 -990 1 5 4.2499999999999999e-01 3.0073045113687797e+00 -9.8321299051036668e+00 8.0388731267178173e+00 0 1 0 -991 1 1 1.5750000000000000e+00 -1.8014219144703780e+00 -1.5120102745243090e+01 9.1931438625465880e+00 0 1 0 -992 1 2 2.1000000000000001e+00 2.6258996467147959e+00 -1.3528044251547879e+01 6.4359209604334673e+00 0 1 0 -993 1 2 2.1000000000000001e+00 2.5462735712870828e+00 -1.6483086989741455e+01 6.4280448059299271e+00 0 1 0 -994 1 3 -1.0500000000000000e+00 2.7668677793957492e+00 -1.3599231581151475e+01 8.0541173845552336e+00 0 1 0 -995 1 3 -1.0500000000000000e+00 2.4115859888696161e+00 -1.6390097180934195e+01 8.0528587154913431e+00 0 1 0 -996 1 4 -9.4999999999999996e-01 -1.0775774466886645e-01 -1.5193886699232316e+01 8.1828630796662409e+00 0 1 0 -997 1 3 -1.0500000000000000e+00 1.2414435679215039e+00 -1.7185906236511268e+01 5.9541111590081126e+00 0 1 0 -998 1 3 -1.0500000000000000e+00 2.6583690581402628e+00 -1.5008709077371243e+01 5.9524182019502625e+00 0 1 0 -999 1 3 -1.0500000000000000e+00 3.8117604092858510e+00 -1.7334728765347329e+01 6.1439089845392907e+00 0 1 0 -1000 1 5 4.2499999999999999e-01 3.9913087790997537e-01 -1.4315031645600069e+01 8.0389424860193159e+00 0 1 0 -1001 1 1 1.5750000000000000e+00 5.9506242267994338e+00 -1.6623797722718635e+01 9.1936266671202027e+00 0 1 0 -1002 1 2 2.1000000000000001e+00 -1.0506484231044222e+01 -8.8863590027846140e+00 -6.4424197062906847e+00 1 1 1 -1003 1 2 2.1000000000000001e+00 -1.0483178876911868e+01 -1.4897205772908450e+01 -6.4344990737011551e+00 1 1 1 -1004 1 3 -1.0500000000000000e+00 9.9359983241624121e+00 -1.7781194811616796e+01 -8.0605324246202574e+00 0 1 1 -1005 1 3 -1.0500000000000000e+00 -1.0348454886651519e+01 -1.4989930517170308e+01 -8.0594191130608888e+00 1 1 1 -1006 1 4 -9.4999999999999996e-01 7.6507412164599700e+00 -1.6186429673609464e+01 -8.1893398094371470e+00 0 1 1 -1007 1 3 -1.0500000000000000e+00 6.3016333555911750e+00 -1.4194266019203811e+01 -5.9607817823241254e+00 0 1 1 -1008 1 3 -1.0500000000000000e+00 -1.0595502565248184e+01 -1.6371514961221607e+01 -5.9587127227690981e+00 1 1 1 -1009 1 3 -1.0500000000000000e+00 8.8913398738443519e+00 -1.4045424689210986e+01 -6.1504098107569085e+00 0 1 1 -1010 1 5 4.2499999999999999e-01 7.1438876889407545e+00 -1.7065178919134084e+01 -8.0455854917667171e+00 0 1 1 -1011 1 1 1.5750000000000000e+00 -1.2081215850500879e+01 -1.2140905395609227e+01 9.1936259420414075e+00 1 1 0 -1012 1 2 2.1000000000000001e+00 7.5253738692339525e+00 -1.3369269642659075e+01 -6.4424111597687768e+00 0 1 1 -1013 1 2 2.1000000000000001e+00 7.6049545385687836e+00 -1.0414276974615465e+01 -6.4344945980992394e+00 0 1 1 -1014 1 3 -1.0500000000000000e+00 7.3841902361699852e+00 -1.3298264178152394e+01 -8.0605659001293244e+00 0 1 1 -1015 1 3 -1.0500000000000000e+00 7.7396886594995777e+00 -1.0506995362793919e+01 -8.0594009666794690e+00 0 1 1 -1016 1 4 -9.4999999999999996e-01 -1.0381136033782756e+01 -1.1703557162467805e+01 -8.1893415275727754e+00 1 1 1 -1017 1 3 -1.0500000000000000e+00 8.9097683875275777e+00 -9.7113477429998554e+00 -5.9608036880169948e+00 0 1 1 -1018 1 3 -1.0500000000000000e+00 7.4926707750249442e+00 -1.1888611603990626e+01 -5.9587129622138315e+00 0 1 1 -1019 1 3 -1.0500000000000000e+00 6.3395050264705226e+00 -9.5625498048806605e+00 -6.1503917722438839e+00 0 1 1 -1020 1 5 4.2499999999999999e-01 9.7520894232513022e+00 -1.2582261952400280e+01 -8.0456251567187103e+00 0 1 1 -1021 1 1 1.5750000000000000e+00 5.9667370778486841e+00 -1.0637208378655330e+01 9.1931422048264544e+00 0 1 0 -1022 1 2 2.1000000000000001e+00 5.1777388423707791e+00 -1.8010972751178073e+01 6.4359121754500244e+00 0 1 0 -1023 1 2 2.1000000000000001e+00 5.1544257741572128e+00 -1.2000135136612140e+01 6.4280617964993638e+00 0 1 0 -1024 1 3 -1.0500000000000000e+00 5.3750375393141656e+00 -9.1162982390217753e+00 8.0541305365780431e+00 0 1 0 -1025 1 3 -1.0500000000000000e+00 5.0197352902290149e+00 -1.1907167532088971e+01 8.0528807818657455e+00 0 1 0 -1026 1 4 -9.4999999999999996e-01 -1.2979631380919937e+01 -1.0711006291129422e+01 8.1828700503208935e+00 1 1 0 -1027 1 3 -1.0500000000000000e+00 -1.1630417926343309e+01 -1.2702985659105673e+01 5.9540877407865125e+00 1 1 0 -1028 1 3 -1.0500000000000000e+00 5.2665416753772529e+00 -1.0525794189955786e+01 5.9524244742663583e+00 0 1 0 -1029 1 3 -1.0500000000000000e+00 6.4199440092225899e+00 -1.2851863874618857e+01 6.1439235679236717e+00 0 1 0 -1030 1 5 4.2499999999999999e-01 -1.2472661329901149e+01 -9.8320942016287756e+00 8.0389673105729180e+00 1 1 0 -1031 1 1 1.5750000000000000e+00 3.3585649047189889e+00 -1.5120093348845725e+01 9.1931411954480566e+00 0 1 0 -1032 1 2 2.1000000000000001e+00 -1.2854123594330217e+01 -1.3528045164039147e+01 6.4359116103016181e+00 1 1 0 -1033 1 2 2.1000000000000001e+00 -1.2933732590309141e+01 -1.6483074437729339e+01 6.4280564286910398e+00 1 1 0 -1034 1 3 -1.0500000000000000e+00 -1.2713145906083197e+01 -1.3599233632746742e+01 8.0541449495269859e+00 1 1 0 -1035 1 3 -1.0500000000000000e+00 -1.3068411221795856e+01 -1.6390092450304788e+01 8.0528554659863651e+00 1 1 0 -1036 1 4 -9.4999999999999996e-01 5.0522727200298299e+00 -1.5193845531977976e+01 8.1828789851171209e+00 0 1 0 -1037 1 3 -1.0500000000000000e+00 6.4014411247152445e+00 -1.7185907439712917e+01 5.9541046118211618e+00 0 1 0 -1038 1 3 -1.0500000000000000e+00 -1.2821635126045146e+01 -1.5008727883675213e+01 5.9524348860327301e+00 1 1 0 -1039 1 3 -1.0500000000000000e+00 -1.1668227901039566e+01 -1.7334733486964044e+01 6.1439059255523709e+00 1 1 0 -1040 1 5 4.2499999999999999e-01 5.5591473737227073e+00 -1.4315020006199346e+01 8.0390921650199161e+00 0 1 0 -1041 1 1 1.5750000000000000e+00 -9.5293899139307694e+00 -1.6623787874027308e+01 9.1936237921326978e+00 1 1 0 -1042 1 2 2.1000000000000001e+00 -5.3464608308947765e+00 -8.8863556911962096e+00 -6.4424092176216048e+00 1 1 1 -1043 1 2 2.1000000000000001e+00 -5.3231748952036835e+00 -1.4897218973116527e+01 -6.4345104241940962e+00 1 1 1 -1044 1 3 -1.0500000000000000e+00 -5.5439872108319319e+00 -1.7781192038243685e+01 -8.0605608257717840e+00 1 1 1 -1045 1 3 -1.0500000000000000e+00 -5.1884582357209057e+00 -1.4989934983843813e+01 -8.0594168014332030e+00 1 1 1 -1046 1 4 -9.4999999999999996e-01 -7.8292281421606535e+00 -1.6186388852721340e+01 -8.1893254252454835e+00 1 1 1 -1047 1 3 -1.0500000000000000e+00 -9.1783739996627851e+00 -1.4194270057895784e+01 -5.9607866851716160e+00 1 1 1 -1048 1 3 -1.0500000000000000e+00 -5.4354986326112078e+00 -1.6371500383854073e+01 -5.9587300155577916e+00 1 1 1 -1049 1 3 -1.0500000000000000e+00 -6.5886729673149071e+00 -1.4045419390766746e+01 -6.1504069109579946e+00 1 1 1 -1050 1 5 4.2499999999999999e-01 -8.3360989250281836e+00 -1.7065170615191157e+01 -8.0454468626928328e+00 1 1 1 -1051 1 1 1.5750000000000000e+00 -6.9212237057752120e+00 -1.2140905279613698e+01 9.1936281507656687e+00 1 1 0 -1052 1 2 2.1000000000000001e+00 -7.9546199637027923e+00 -1.3369283212241633e+01 -6.4424198458964543e+00 1 1 1 -1053 1 2 2.1000000000000001e+00 -7.8750220126689445e+00 -1.0414269957845629e+01 -6.4344945451348980e+00 1 1 1 -1054 1 3 -1.0500000000000000e+00 -8.0958159273297188e+00 -1.3298260211663301e+01 -8.0605513382597493e+00 1 1 1 -1055 1 3 -1.0500000000000000e+00 -7.7403088366973201e+00 -1.0507006356603398e+01 -8.0593941765754753e+00 1 1 1 -1056 1 4 -9.4999999999999996e-01 -5.2211297865863031e+00 -1.1703547420839435e+01 -8.1893607408242381e+00 1 1 1 -1057 1 3 -1.0500000000000000e+00 -6.5702769425775092e+00 -9.7113707796203723e+00 -5.9607925028876929e+00 1 1 1 -1058 1 3 -1.0500000000000000e+00 -7.9873270257999014e+00 -1.1888581912789562e+01 -5.9587188809513618e+00 1 1 1 -1059 1 3 -1.0500000000000000e+00 -9.1404883090905678e+00 -9.5625534027958778e+00 -6.1503936739305480e+00 1 1 1 -1060 1 5 4.2499999999999999e-01 -5.7279445458000602e+00 -1.2582296132873992e+01 -8.0457254720891491e+00 1 1 1 -1061 1 1 1.5750000000000000e+00 -9.5132712414377334e+00 -1.0637207901225382e+01 9.1931444515859475e+00 1 1 0 -1062 1 2 2.1000000000000001e+00 -1.0302264392719938e+01 -1.8010958511740707e+01 6.4359215057736137e+00 1 1 0 -1063 1 2 2.1000000000000001e+00 -1.0325597763836218e+01 -1.2000143885660425e+01 6.4280610801444311e+00 1 1 0 -1064 1 3 -1.0500000000000000e+00 -1.0104955208427846e+01 -9.1163013593801878e+00 8.0541136736620658e+00 1 1 0 -1065 1 3 -1.0500000000000000e+00 -1.0460267735536464e+01 -1.1907157783691902e+01 8.0528740633627187e+00 1 1 0 -1066 1 4 -9.4999999999999996e-01 -7.8196223206297599e+00 -1.0710993626792270e+01 8.1828507388406777e+00 1 1 0 -1067 1 3 -1.0500000000000000e+00 -6.4704642052545438e+00 -1.2703010171579383e+01 5.9540992478015298e+00 1 1 0 -1068 1 3 -1.0500000000000000e+00 -1.0213460429886737e+01 -1.0525822438026575e+01 5.9524290073249873e+00 1 1 0 -1069 1 3 -1.0500000000000000e+00 -9.0600631078363385e+00 -1.2851860507306093e+01 6.1439252136403280e+00 1 1 0 -1070 1 5 4.2499999999999999e-01 -7.3126955303015109e+00 -9.8321299462400891e+00 8.0388711780317124e+00 1 1 0 -1071 1 1 1.5750000000000000e+00 -1.2121421966983748e+01 -1.5120103097951539e+01 9.1931440500472021e+00 1 1 0 -1072 1 2 2.1000000000000001e+00 -7.6941002854421603e+00 -1.3528043511188180e+01 6.4359209951073471e+00 1 1 0 -1073 1 2 2.1000000000000001e+00 -7.7737266980076587e+00 -1.6483087090498845e+01 6.4280445509300250e+00 1 1 0 -1074 1 3 -1.0500000000000000e+00 -7.5531320715693777e+00 -1.3599231720429650e+01 8.0541168058333739e+00 1 1 0 -1075 1 3 -1.0500000000000000e+00 -7.9084140951184896e+00 -1.6390096710853786e+01 8.0528583515780205e+00 1 1 0 -1076 1 4 -9.4999999999999996e-01 -1.0427757546752302e+01 -1.5193886230508655e+01 8.1828631556698568e+00 1 1 0 -1077 1 3 -1.0500000000000000e+00 -9.0785565111582986e+00 -1.7185905911303450e+01 5.9541108650454166e+00 1 1 0 -1078 1 3 -1.0500000000000000e+00 -7.6616310609012928e+00 -1.5008709807568991e+01 5.9524186852294534e+00 1 1 0 -1079 1 3 -1.0500000000000000e+00 -6.5082396808725838e+00 -1.7334728922656321e+01 6.1439092326832387e+00 1 1 0 -1080 1 5 4.2499999999999999e-01 -9.9208689362280111e+00 -1.4315031144537746e+01 8.0389449231468681e+00 1 1 0 -1081 1 1 1.5750000000000000e+00 -4.3693757223617666e+00 -1.6623797501862640e+01 9.1936263177833482e+00 1 1 0 -1082 1 2 2.1000000000000001e+00 -1.8648460984142190e-01 -8.8863586813278168e+00 -6.4424195624530487e+00 1 1 1 -1083 1 2 2.1000000000000001e+00 -1.6317858587872180e-01 -1.4897205481208472e+01 -6.4344990010154151e+00 1 1 1 -1084 1 3 -1.0500000000000000e+00 -3.8400153394278114e-01 -1.7781193820588967e+01 -8.0605319542434177e+00 1 1 1 -1085 1 3 -1.0500000000000000e+00 -2.8454999893785526e-02 -1.4989931453633501e+01 -8.0594185181939455e+00 1 1 1 -1086 1 4 -9.4999999999999996e-01 -2.6692591084942503e+00 -1.6186430683271997e+01 -8.1893402866282834e+00 1 1 1 -1087 1 3 -1.0500000000000000e+00 -4.0183661906291688e+00 -1.4194265831610833e+01 -5.9607808745353807e+00 1 1 1 -1088 1 3 -1.0500000000000000e+00 -2.7550238935744531e-01 -1.6371514908008283e+01 -5.9587140310484852e+00 1 1 1 -1089 1 3 -1.0500000000000000e+00 -1.4286614237111728e+00 -1.4045424230641748e+01 -6.1504096183736543e+00 1 1 1 -1090 1 5 4.2499999999999999e-01 -3.1761126419061068e+00 -1.7065179317613428e+01 -8.0455892156983246e+00 1 1 1 -1091 1 1 1.5750000000000000e+00 -1.7612158961272968e+00 -1.2140905479144390e+01 9.1936260759968853e+00 1 1 0 -1092 1 2 2.1000000000000001e+00 -2.7946263215985852e+00 -1.3369269256921584e+01 -6.4424110744123801e+00 1 1 1 -1093 1 2 2.1000000000000001e+00 -2.7150458550199357e+00 -1.0414277514160078e+01 -6.4344944256095680e+00 1 1 1 -1094 1 3 -1.0500000000000000e+00 -2.9358094427693269e+00 -1.3298264285990225e+01 -8.0605669808735332e+00 1 1 1 -1095 1 3 -1.0500000000000000e+00 -2.5803112238951149e+00 -1.0506994842385364e+01 -8.0594011087445736e+00 1 1 1 -1096 1 4 -9.4999999999999996e-01 -6.1136511004701077e-02 -1.1703557808529808e+01 -8.1893413821177354e+00 1 1 1 -1097 1 3 -1.0500000000000000e+00 -1.4102319921708553e+00 -9.7113476086937851e+00 -5.9608039429212827e+00 1 1 1 -1098 1 3 -1.0500000000000000e+00 -2.8273291376600058e+00 -1.1888611910680980e+01 -5.9587126204693064e+00 1 1 1 -1099 1 3 -1.0500000000000000e+00 -3.9804942634357019e+00 -9.5625498131225903e+00 -6.1503916166810431e+00 1 1 1 -1100 1 5 4.2499999999999999e-01 -5.6791014020577890e-01 -1.2582261570234339e+01 -8.0456254549334609e+00 1 1 1 -1101 1 1 1.5750000000000000e+00 -4.3532626822440168e+00 -1.0637208536663195e+01 9.1931420264587729e+00 1 1 0 -1102 1 2 2.1000000000000001e+00 -5.1422605340029284e+00 -1.8010972814526962e+01 6.4359120870863116e+00 1 1 0 -1103 1 2 2.1000000000000001e+00 -5.1655741403261368e+00 -1.2000135207445506e+01 6.4280620600064111e+00 1 1 0 -1104 1 3 -1.0500000000000000e+00 -4.9449629476468537e+00 -9.1162979298605773e+00 8.0541307780712827e+00 1 1 0 -1105 1 3 -1.0500000000000000e+00 -5.3002653908317328e+00 -1.1907167772463211e+01 8.0528808659252675e+00 1 1 0 -1106 1 4 -9.4999999999999996e-01 -2.6596311317213690e+00 -1.0711006131470690e+01 8.1828701429922646e+00 1 1 0 -1107 1 3 -1.0500000000000000e+00 -1.3104180489574517e+00 -1.2702985515202105e+01 5.9540876691461300e+00 1 1 0 -1108 1 3 -1.0500000000000000e+00 -5.0534589320363521e+00 -1.0525793717166195e+01 5.9524239638193972e+00 1 1 0 -1109 1 3 -1.0500000000000000e+00 -3.9000552799467680e+00 -1.2851864283020131e+01 6.1439229758272287e+00 1 1 0 -1110 1 5 4.2499999999999999e-01 -2.1526612288429927e+00 -9.8320937887400692e+00 8.0389686940673108e+00 1 1 0 -1111 1 1 1.5750000000000000e+00 -6.9614351741271125e+00 -1.5120093227433923e+01 9.1931413794662760e+00 1 1 0 -1112 1 2 2.1000000000000001e+00 -2.5341240788726989e+00 -1.3528045681933817e+01 6.4359119500026907e+00 1 1 0 -1113 1 2 2.1000000000000001e+00 -2.6137328955640973e+00 -1.6483073671230674e+01 6.4280561366629687e+00 1 1 0 -1114 1 3 -1.0500000000000000e+00 -2.3931455622464295e+00 -1.3599233318386894e+01 8.0541451348176771e+00 1 1 0 -1115 1 3 -1.0500000000000000e+00 -2.7484109330614652e+00 -1.6390092942449684e+01 8.0528560015686210e+00 1 1 0 -1116 1 4 -9.4999999999999996e-01 -5.2677274043188298e+00 -1.5193844862391190e+01 8.1828790257998492e+00 1 1 0 -1117 1 3 -1.0500000000000000e+00 -3.9185585465782733e+00 -1.7185907339500744e+01 5.9541042049943371e+00 1 1 0 -1118 1 3 -1.0500000000000000e+00 -2.5016349383751777e+00 -1.5008727970228067e+01 5.9524344787992991e+00 1 1 0 -1119 1 3 -1.0500000000000000e+00 -1.3482285748874521e+00 -1.7334733258778282e+01 6.1439062228621797e+00 1 1 0 -1120 1 5 4.2499999999999999e-01 -4.7608525391156977e+00 -1.4315020002600260e+01 8.0390931457615267e+00 1 1 0 -1121 1 1 1.5750000000000000e+00 8.4695165366353464e-01 -7.6579835812050305e+00 9.1936288874475025e+00 0 1 0 -1122 1 2 2.1000000000000001e+00 5.0298854047814761e+00 7.9466019669347077e-02 -6.4424144556152356e+00 0 1 1 -1123 1 2 2.1000000000000001e+00 5.0531536696119623e+00 -5.9314030709104628e+00 -6.4345137045491718e+00 0 1 1 -1124 1 3 -1.0500000000000000e+00 4.8323591387051597e+00 -8.8153742682486840e+00 -8.0605820239111772e+00 0 1 1 -1125 1 3 -1.0500000000000000e+00 5.1878732372825311e+00 -6.0241061595734156e+00 -8.0594120212256826e+00 0 1 1 -1126 1 4 -9.4999999999999996e-01 2.5470666548004353e+00 -7.2206258319811312e+00 -8.1893702843091258e+00 0 1 1 -1127 1 3 -1.0500000000000000e+00 1.1979474982824225e+00 -5.2284455720511929e+00 -5.9607876425440542e+00 0 1 1 -1128 1 3 -1.0500000000000000e+00 4.9408304873956261e+00 -7.4056780595905938e+00 -5.9586989539517790e+00 0 1 1 -1129 1 3 -1.0500000000000000e+00 3.7876635127476206e+00 -5.0795967839315637e+00 -6.1504077701680906e+00 0 1 1 -1130 1 5 4.2499999999999999e-01 2.0402139399280124e+00 -8.0994421271782215e+00 -8.0457925512527595e+00 0 1 1 -1131 1 1 1.5750000000000000e+00 3.4551102318040119e+00 -3.1750602373809240e+00 9.1936249963033703e+00 0 1 0 -1132 1 2 2.1000000000000001e+00 2.4217092647353429e+00 -4.4034682343922888e+00 -6.4424100556567545e+00 0 1 1 -1133 1 2 2.1000000000000001e+00 2.5013293209638849e+00 -1.4484603202585618e+00 -6.4344992392404317e+00 0 1 1 -1134 1 3 -1.0500000000000000e+00 2.2805060226959757e+00 -4.3324466752627728e+00 -8.0605363775723244e+00 0 1 1 -1135 1 3 -1.0500000000000000e+00 2.6360394328354282e+00 -1.5411974066626648e+00 -8.0594077778299429e+00 0 1 1 -1136 1 4 -9.4999999999999996e-01 5.1552544755530079e+00 -2.7376679909019241e+00 -8.1893317299450903e+00 0 1 1 -1137 1 3 -1.0500000000000000e+00 3.8060930080128710e+00 -7.4554775156575559e-01 -5.9607837224071307e+00 0 1 1 -1138 1 3 -1.0500000000000000e+00 2.3890075299204767e+00 -2.9227581668061351e+00 -5.9587353346909673e+00 0 1 1 -1139 1 3 -1.0500000000000000e+00 1.2358408994011736e+00 -5.9670616076288141e-01 -6.1504018482632379e+00 0 1 1 -1140 1 5 4.2499999999999999e-01 4.6483926264102795e+00 -3.6164252934885788e+00 -8.0454920049899243e+00 0 1 1 -1141 1 1 1.5750000000000000e+00 8.6307055672640232e-01 -1.6713754640994978e+00 9.1931460949904640e+00 0 1 0 -1142 1 2 2.1000000000000001e+00 7.4067440539106855e-02 -9.0451227689768317e+00 6.4359123217975327e+00 0 1 0 -1143 1 2 2.1000000000000001e+00 5.0752316497217009e-02 -3.0343466932277092e+00 6.4280512560994687e+00 0 1 0 -1144 1 3 -1.0500000000000000e+00 2.7137419103710414e-01 -1.5049744182525160e-01 8.0541123492163607e+00 0 1 0 -1145 1 3 -1.0500000000000000e+00 -8.3917914878398392e-02 -2.9413479949048895e+00 8.0528600947866131e+00 0 1 0 -1146 1 4 -9.4999999999999996e-01 2.5567365503950334e+00 -1.7451597138855846e+00 8.1828414199204254e+00 0 1 0 -1147 1 3 -1.0500000000000000e+00 3.9059034396848951e+00 -3.7371848336857560e+00 5.9541096293006248e+00 0 1 0 -1148 1 3 -1.0500000000000000e+00 1.6286724521224549e-01 -1.5599986393757455e+00 5.9524414687461746e+00 0 1 0 -1149 1 3 -1.0500000000000000e+00 1.3162680069554646e+00 -3.8860046896279545e+00 6.1439117370450713e+00 0 1 0 -1150 1 5 4.2499999999999999e-01 3.0636187093141718e+00 -8.6634582863043264e-01 8.0388105803397210e+00 0 1 0 -1151 1 1 1.5750000000000000e+00 -1.7450965412416704e+00 -6.1542721885858267e+00 9.1931396400753584e+00 0 1 0 -1152 1 2 2.1000000000000001e+00 2.6822454390238182e+00 -4.5622313652435196e+00 6.4359253215799903e+00 0 1 0 -1153 1 2 2.1000000000000001e+00 2.6026017072422256e+00 -7.5172435230657051e+00 6.4280525353786011e+00 0 1 0 -1154 1 3 -1.0500000000000000e+00 2.8231982980173314e+00 -4.6333936871729211e+00 8.0541324435706265e+00 0 1 0 -1155 1 3 -1.0500000000000000e+00 2.4679139242312864e+00 -7.4242702203153303e+00 8.0528635321919779e+00 0 1 0 -1156 1 4 -9.4999999999999996e-01 -5.1408619376051234e-02 -6.2280255545507099e+00 8.1829007040250481e+00 0 1 0 -1157 1 3 -1.0500000000000000e+00 1.2977674624764681e+00 -8.2200857847518538e+00 5.9541052800705856e+00 0 1 0 -1158 1 3 -1.0500000000000000e+00 2.7147125497670821e+00 -6.0428853524192228e+00 5.9523886568029081e+00 0 1 0 -1159 1 3 -1.0500000000000000e+00 3.8680955259932048e+00 -8.3689256292345569e+00 6.1439186514543209e+00 0 1 0 -1160 1 5 4.2499999999999999e-01 4.5549352909885421e-01 -5.3491180364217001e+00 8.0392291387924644e+00 0 1 0 -1161 1 1 1.5750000000000000e+00 6.0069661936035175e+00 -7.6579937548320185e+00 9.1936316044666739e+00 0 1 0 -1162 1 2 2.1000000000000001e+00 -1.0450137639744804e+01 7.9463299035463564e-02 -6.4424243710353402e+00 1 1 1 -1163 1 2 2.1000000000000001e+00 -1.0426849451533693e+01 -5.9313896376259745e+00 -6.4345018460644283e+00 1 1 1 -1164 1 3 -1.0500000000000000e+00 9.9923438028197751e+00 -8.8153761390063448e+00 -8.0605530520838684e+00 0 1 1 -1165 1 3 -1.0500000000000000e+00 -1.0292123773060068e+01 -6.0241018369519264e+00 -8.0594139253470427e+00 1 1 1 -1166 1 4 -9.4999999999999996e-01 7.7070346980525173e+00 -7.2206693221354694e+00 -8.1893857246142900e+00 0 1 1 -1167 1 3 -1.0500000000000000e+00 6.3579542845899226e+00 -5.2284417436908193e+00 -5.9607824168769161e+00 0 1 1 -1168 1 3 -1.0500000000000000e+00 -1.0539173651568435e+01 -7.4056923085044541e+00 -5.9586827688544748e+00 1 1 1 -1169 1 3 -1.0500000000000000e+00 8.9476763873501461e+00 -5.0796014856025877e+00 -6.1504115174662424e+00 0 1 1 -1170 1 5 4.2499999999999999e-01 7.2001993237605930e+00 -8.0994521615003521e+00 -8.0459408726587203e+00 0 1 1 -1171 1 1 1.5750000000000000e+00 -1.2024881927933214e+01 -3.1750599741083469e+00 9.1936226503397407e+00 1 1 0 -1172 1 2 2.1000000000000001e+00 7.5817028306539775e+00 -4.4034548242472553e+00 -6.4424012953316998e+00 0 1 1 -1173 1 2 2.1000000000000001e+00 7.6613050161260396e+00 -1.4484663840709260e+00 -6.4344990879805639e+00 0 1 1 -1174 1 3 -1.0500000000000000e+00 7.4405122110527309e+00 -4.3324502105469040e+00 -8.0605503307031601e+00 0 1 1 -1175 1 3 -1.0500000000000000e+00 7.7960372596729002e+00 -1.5411873078722387e+00 -8.0594145656169474e+00 0 1 1 -1176 1 4 -9.4999999999999996e-01 -1.0324751332420679e+01 -2.7376760680379526e+00 -8.1893113790367398e+00 1 1 1 -1177 1 3 -1.0500000000000000e+00 8.9661392654718526e+00 -7.4552372525409538e-01 -5.9607953656302595e+00 0 1 1 -1178 1 3 -1.0500000000000000e+00 7.5490052861507984e+00 -2.9227882817179367e+00 -5.9587292162986722e+00 0 1 1 -1179 1 3 -1.0500000000000000e+00 6.3958341876680791e+00 -5.9670249615731663e-01 -6.1504001903853247e+00 0 1 1 -1180 1 5 4.2499999999999999e-01 9.8084275444973414e+00 -3.6163895221434412e+00 -8.0453831689662785e+00 0 1 1 -1181 1 1 1.5750000000000000e+00 6.0230793220357306e+00 -1.6713760727027740e+00 9.1931436640286570e+00 0 1 0 -1182 1 2 2.1000000000000001e+00 5.2340703084782234e+00 -9.0451373900601055e+00 6.4359026509421398e+00 0 1 0 -1183 1 2 2.1000000000000001e+00 5.2107768285475249e+00 -3.0343378220597543e+00 6.4280520030326400e+00 0 1 0 -1184 1 3 -1.0500000000000000e+00 5.4313667981565832e+00 -1.5049347755671505e-01 8.0541295280698009e+00 0 1 0 -1185 1 3 -1.0500000000000000e+00 5.0760850477762016e+00 -2.9413590110961252e+00 8.0528672520399454e+00 0 1 0 -1186 1 4 -9.4999999999999996e-01 -1.2923273102973829e+01 -1.7451732121810899e+00 8.1828599921332490e+00 1 1 0 -1187 1 3 -1.0500000000000000e+00 -1.1574049043750753e+01 -3.7371601802106404e+00 5.9540991225733642e+00 1 1 0 -1188 1 3 -1.0500000000000000e+00 5.3228691356732281e+00 -1.5599701988768047e+00 5.9524365669383243e+00 0 1 0 -1189 1 3 -1.0500000000000000e+00 6.4762747623866446e+00 -3.8860082222617720e+00 6.1439098279782520e+00 0 1 0 -1190 1 5 4.2499999999999999e-01 -1.2416347906588287e+01 -8.6631108188533545e-01 8.0389014904005194e+00 1 1 0 -1191 1 1 1.5750000000000000e+00 3.4148894200107200e+00 -6.1542622930241908e+00 9.1931366055821719e+00 0 1 0 -1192 1 2 2.1000000000000001e+00 -1.2797778702965626e+01 -4.5622320761539967e+00 6.4359157700181306e+00 1 1 0 -1193 1 2 2.1000000000000001e+00 -1.2877404237061196e+01 -7.5172309934459047e+00 6.4280643167095981e+00 1 1 0 -1194 1 3 -1.0500000000000000e+00 -1.2656816171316713e+01 -4.6333960070141416e+00 8.0541603707338680e+00 1 1 0 -1195 1 3 -1.0500000000000000e+00 -1.3012083101506889e+01 -7.4242649986939213e+00 8.0528602947442778e+00 1 1 0 -1196 1 4 -9.4999999999999996e-01 5.1086228485452754e+00 -6.2279825781387714e+00 8.1829178979826871e+00 0 1 0 -1197 1 3 -1.0500000000000000e+00 6.4577656205449188e+00 -8.2200870263303347e+00 5.9540987494082156e+00 0 1 0 -1198 1 3 -1.0500000000000000e+00 -1.2765291524992206e+01 -6.0429045661158707e+00 5.9524054565834668e+00 1 1 0 -1199 1 3 -1.0500000000000000e+00 -1.1611893334617134e+01 -8.3689294997385222e+00 6.1439150437810142e+00 1 1 0 -1200 1 5 4.2499999999999999e-01 5.6155110306614677e+00 -5.3491040803435190e+00 8.0393878066564533e+00 0 1 0 -1201 1 1 1.5750000000000000e+00 -9.4730482558538363e+00 -7.6579837804608353e+00 9.1936288584421817e+00 1 1 0 -1202 1 2 2.1000000000000001e+00 -5.2901137812505343e+00 7.9465578875971943e-02 -6.4424141224911704e+00 1 1 1 -1203 1 2 2.1000000000000001e+00 -5.2668457657427714e+00 -5.9314030152575619e+00 -6.4345136025435696e+00 1 1 1 -1204 1 3 -1.0500000000000000e+00 -5.4876411141658021e+00 -8.8153737446390359e+00 -8.0605818622866803e+00 1 1 1 -1205 1 3 -1.0500000000000000e+00 -5.1321272527986546e+00 -6.0241062466034769e+00 -8.0594120618712797e+00 1 1 1 -1206 1 4 -9.4999999999999996e-01 -7.7729334170461382e+00 -7.2206262093781639e+00 -8.1893704172959332e+00 1 1 1 -1207 1 3 -1.0500000000000000e+00 -9.1220532114999084e+00 -5.2284458130184035e+00 -5.9607879501683279e+00 1 1 1 -1208 1 3 -1.0500000000000000e+00 -5.3791700396304432e+00 -7.4056785499597986e+00 -5.9586994192034970e+00 1 1 1 -1209 1 3 -1.0500000000000000e+00 -6.5323364101775097e+00 -5.0795968883872256e+00 -6.1504080690006111e+00 1 1 1 -1210 1 5 4.2499999999999999e-01 -8.2797860458884607e+00 -8.0994421096420997e+00 -8.0457936049874998e+00 1 1 1 -1211 1 1 1.5750000000000000e+00 -6.8648895692884251e+00 -3.1750601185026568e+00 9.1936250796126728e+00 1 1 0 -1212 1 2 2.1000000000000001e+00 -7.8982904263320277e+00 -4.4034684490191758e+00 -6.4424103941693547e+00 1 1 1 -1213 1 2 2.1000000000000001e+00 -7.8186708591875442e+00 -1.4484597541953050e+00 -6.4344993359968523e+00 1 1 1 -1214 1 3 -1.0500000000000000e+00 -8.0394941486076519e+00 -4.3324466787230307e+00 -8.0605359249190123e+00 1 1 1 -1215 1 3 -1.0500000000000000e+00 -7.6839605876128365e+00 -1.5411977245256949e+00 -8.0594077717620856e+00 1 1 1 -1216 1 4 -9.4999999999999996e-01 -5.1647454040881282e+00 -2.7376674564410823e+00 -8.1893315607054049e+00 1 1 1 -1217 1 3 -1.0500000000000000e+00 -6.5139066907486161e+00 -7.4554731891745618e-01 -5.9607837703642241e+00 1 1 1 -1218 1 3 -1.0500000000000000e+00 -7.9309926183745523e+00 -2.9227576840502572e+00 -5.9587353580265621e+00 1 1 1 -1219 1 3 -1.0500000000000000e+00 -9.0841593543895360e+00 -5.9670620551901621e-01 -6.1504020768312335e+00 1 1 1 -1220 1 5 4.2499999999999999e-01 -5.6716072349666886e+00 -3.6164251093405397e+00 -8.0454906260480783e+00 1 1 1 -1221 1 1 1.5750000000000000e+00 -9.4569292783711720e+00 -1.6713754920263852e+00 9.1931457295587080e+00 1 1 0 -1222 1 2 2.1000000000000001e+00 -1.0245932376290614e+01 -9.0451235386775792e+00 6.4359124866582462e+00 1 1 0 -1223 1 2 2.1000000000000001e+00 -1.0269246952870757e+01 -3.0343462448060272e+00 6.4280513194600832e+00 1 1 0 -1224 1 3 -1.0500000000000000e+00 -1.0048625782907161e+01 -1.5049666929490613e-01 8.0541125763199020e+00 1 1 0 -1225 1 3 -1.0500000000000000e+00 -1.0403918287278653e+01 -2.9413486509055247e+00 8.0528602350875680e+00 1 1 0 -1226 1 4 -9.4999999999999996e-01 -7.7632636474169150e+00 -1.7451601758199367e+00 8.1828410557435127e+00 1 1 0 -1227 1 3 -1.0500000000000000e+00 -6.4140961797655454e+00 -3.7371846807874363e+00 5.9541096999475336e+00 1 1 0 -1228 1 3 -1.0500000000000000e+00 -1.0157133031150034e+01 -1.5599995082696054e+00 5.9524404810865761e+00 1 1 0 -1229 1 3 -1.0500000000000000e+00 -9.0037325780420030e+00 -3.8860049270011618e+00 6.1439117463650170e+00 1 1 0 -1230 1 5 4.2499999999999999e-01 -7.2563815502586255e+00 -8.6634628906294964e-01 8.0388078115699031e+00 1 1 0 -1231 1 1 1.5750000000000000e+00 -1.2065096650972574e+01 -6.1542722772754583e+00 9.1931397726777710e+00 1 1 0 -1232 1 2 2.1000000000000001e+00 -7.6377546012995223e+00 -4.5622303585522523e+00 6.4359252178928603e+00 1 1 0 -1233 1 2 2.1000000000000001e+00 -7.7173985866608206e+00 -7.5172441124993092e+00 6.4280522853100841e+00 1 1 0 -1234 1 3 -1.0500000000000000e+00 -7.4968018474351847e+00 -4.6333944697759453e+00 8.0541317707799216e+00 1 1 0 -1235 1 3 -1.0500000000000000e+00 -7.8520862052053939e+00 -7.4242695437274406e+00 8.0528631393504213e+00 1 1 0 -1236 1 4 -9.4999999999999996e-01 -1.0371408885631821e+01 -6.2280260117520783e+00 8.1829008047331513e+00 1 1 0 -1237 1 3 -1.0500000000000000e+00 -9.0222328910023926e+00 -8.2200853313923243e+00 5.9541052342574226e+00 1 1 0 -1238 1 3 -1.0500000000000000e+00 -7.6052877106027434e+00 -6.0428855091388112e+00 5.9523894388192833e+00 1 1 0 -1239 1 3 -1.0500000000000000e+00 -6.4519043738629689e+00 -8.3689258339210468e+00 6.1439186596132167e+00 1 1 0 -1240 1 5 4.2499999999999999e-01 -9.8645060505972708e+00 -5.3491171533452899e+00 8.0392299847806328e+00 1 1 0 -1241 1 1 1.5750000000000000e+00 -4.3130335719799664e+00 -7.6579937038842196e+00 9.1936314019332954e+00 1 1 0 -1242 1 2 2.1000000000000001e+00 -1.3013744627974866e-01 7.9462913475630614e-02 -6.4424246376464893e+00 1 1 1 -1243 1 2 2.1000000000000001e+00 -1.0684893537057505e-01 -5.9313893508898428e+00 -6.4345017895585537e+00 1 1 1 -1244 1 3 -1.0500000000000000e+00 -3.2765635106517799e-01 -8.8153758391408683e+00 -8.0605526742101183e+00 1 1 1 -1245 1 3 -1.0500000000000000e+00 2.7875841094589404e-02 -6.0241024867204338e+00 -8.0594144214765731e+00 1 1 1 -1246 1 4 -9.4999999999999996e-01 -2.6129651405688552e+00 -7.2206691446852744e+00 -8.1893860636445819e+00 1 1 1 -1247 1 3 -1.0500000000000000e+00 -3.9620450314031350e+00 -5.2284410824498675e+00 -5.9607825544872162e+00 1 1 1 -1248 1 3 -1.0500000000000000e+00 -2.1917390278932380e-01 -7.4056927498705267e+00 -5.9586833079493466e+00 1 1 1 -1249 1 3 -1.0500000000000000e+00 -1.3723243147179893e+00 -5.0796017338512662e+00 -6.1504112915507374e+00 1 1 1 -1250 1 5 4.2499999999999999e-01 -3.1198011720613454e+00 -8.0994528008667253e+00 -8.0459422208187643e+00 1 1 1 -1251 1 1 1.5750000000000000e+00 -1.7048821048822589e+00 -3.1750600233974851e+00 9.1936227443404306e+00 1 1 0 -1252 1 2 2.1000000000000001e+00 -2.7382971234679294e+00 -4.4034543566522828e+00 -6.4424013859544047e+00 1 1 1 -1253 1 2 2.1000000000000001e+00 -2.6586950753550083e+00 -1.4484668500258664e+00 -6.4344993053995712e+00 1 1 1 -1254 1 3 -1.0500000000000000e+00 -2.8794880275479802e+00 -4.3324506611342049e+00 -8.0605508713604959e+00 1 1 1 -1255 1 3 -1.0500000000000000e+00 -2.5239630752470426e+00 -1.5411866297456065e+00 -8.0594147792074153e+00 1 1 1 -1256 1 4 -9.4999999999999996e-01 -4.7517837259558604e-03 -2.7376768429853673e+00 -8.1893112828875445e+00 1 1 1 -1257 1 3 -1.0500000000000000e+00 -1.3538609137908306e+00 -7.4552332505261631e-01 -5.9607953412511501e+00 1 1 1 -1258 1 3 -1.0500000000000000e+00 -2.7709951342120993e+00 -2.9227884148814525e+00 -5.9587287907420370e+00 1 1 1 -1259 1 3 -1.0500000000000000e+00 -3.9241656194851835e+00 -5.9670276827438684e-01 -6.1504002860173532e+00 1 1 1 -1260 1 5 4.2499999999999999e-01 -5.1157204379419063e-01 -3.6163887288381531e+00 -8.0453830198534977e+00 1 1 1 -1261 1 1 1.5750000000000000e+00 -4.2969203280827042e+00 -1.6713763688959737e+00 9.1931435527350907e+00 1 1 0 -1262 1 2 2.1000000000000001e+00 -5.0859285809060815e+00 -9.0451378610562490e+00 6.4359028036352743e+00 1 1 0 -1263 1 2 2.1000000000000001e+00 -5.1092223160746624e+00 -3.0343377842534469e+00 6.4280523464977879e+00 1 1 0 -1264 1 3 -1.0500000000000000e+00 -4.8886336868453881e+00 -1.5049298815613810e-01 8.0541300922261030e+00 1 1 0 -1265 1 3 -1.0500000000000000e+00 -5.2439155117684226e+00 -2.9413591861830017e+00 8.0528672575652891e+00 1 1 0 -1266 1 4 -9.4999999999999996e-01 -2.6032732264105301e+00 -1.7451740386941132e+00 8.1828598914546760e+00 1 1 0 -1267 1 3 -1.0500000000000000e+00 -1.2540490739490959e+00 -3.7371600128887970e+00 5.9540989905604338e+00 1 1 0 -1268 1 3 -1.0500000000000000e+00 -4.9971316149433278e+00 -1.5599701828983008e+00 5.9524360634314295e+00 1 1 0 -1269 1 3 -1.0500000000000000e+00 -3.8437253850415853e+00 -3.8860083322717820e+00 6.1439094566028505e+00 1 1 0 -1270 1 5 4.2499999999999999e-01 -2.0963478358362906e+00 -8.6631093607094911e-01 8.0388999212996310e+00 1 1 0 -1271 1 1 1.5750000000000000e+00 -6.9051103097426907e+00 -6.1542621657147709e+00 9.1931367563616888e+00 1 1 0 -1272 1 2 2.1000000000000001e+00 -2.4777787674654199e+00 -4.5622327426858931e+00 6.4359160209892181e+00 1 1 0 -1273 1 2 2.1000000000000001e+00 -2.5574043892191067e+00 -7.5172298950838190e+00 6.4280642763354940e+00 1 1 0 -1274 1 3 -1.0500000000000000e+00 -2.3368161191143999e+00 -4.6333959836381862e+00 8.0541606972964814e+00 1 1 0 -1275 1 3 -1.0500000000000000e+00 -2.6920831091204462e+00 -7.4242654010828897e+00 8.0528604112015607e+00 1 1 0 -1276 1 4 -9.4999999999999996e-01 -5.2113770815501210e+00 -6.2279820619212582e+00 8.1829178539255096e+00 1 1 0 -1277 1 3 -1.0500000000000000e+00 -3.8622341308639339e+00 -8.2200867295059776e+00 5.9540986555569546e+00 1 1 0 -1278 1 3 -1.0500000000000000e+00 -2.4452916356073002e+00 -6.0429049672168205e+00 5.9524054018626558e+00 1 1 0 -1279 1 3 -1.0500000000000000e+00 -1.2918938182515838e+00 -8.3689297483017420e+00 6.1439149539106701e+00 1 1 0 -1280 1 5 4.2499999999999999e-01 -4.7044890687991749e+00 -5.3491044489278803e+00 8.0393882099475782e+00 1 1 0 +264 1 3 -1.05 -8.46491586606061 -17.901292325442792 -1.1390414842434584 1 1 0 +304 1 3 -1.05 -3.3049186986576506 -17.901290750068977 -1.1390437890122005 1 1 0 +386 1 4 -0.95 -11.28157790356712 -10.528150577554554 -1.0092882779490928 1 1 0 +387 1 3 -1.05 -9.935357744081871 -12.520946280855286 -3.2389916781308177 1 1 0 +392 1 2 2.1 -11.15811099398584 -13.347486038255383 -2.757315613614664 1 1 0 +393 1 2 2.1 -11.237889362245781 -16.302594662783648 -2.7652874030664893 1 1 0 +394 1 3 -1.05 -11.016750509593795 -13.41838356845754 -1.1390376370894852 1 1 0 +395 1 3 -1.05 -11.372121493310306 -16.209812211718564 -1.1403956987186419 1 1 0 +398 1 3 -1.05 -11.123698820481325 -14.828288395652272 -3.2407370008852503 1 1 0 +399 1 3 -1.05 -9.973058453394058 -17.155713166817623 -3.0491809998142045 1 1 0 +422 1 2 2.1 -8.60626977865343 -17.83040303949202 -2.757307688224758 1 1 0 +423 1 2 2.1 -8.629758159651553 -11.819745199168645 -2.7652839948923624 1 1 0 +425 1 3 -1.05 -8.763971912039096 -11.726883125298963 -1.1403800557050356 1 1 0 +426 1 4 -0.95 -6.121574906086911 -10.528150365511065 -1.0092950354343362 1 1 0 +427 1 3 -1.05 -4.775388146251923 -12.520964201416565 -3.238989168491626 1 1 0 +428 1 3 -1.05 -8.515522554581228 -10.345254400323427 -3.2407468259276673 1 1 0 +429 1 3 -1.05 -7.364884681634946 -12.672823688539507 -3.0491633890428966 1 1 0 +430 1 5 0.425 -5.613050421649238 -9.64905707477696 -1.1481351133572844 1 1 0 +432 1 2 2.1 -5.998102242208308 -13.347479307389396 -2.7573203771857013 1 1 0 +433 1 2 2.1 -6.077895683838097 -16.302607571672493 -2.7652940728794455 1 1 0 +434 1 3 -1.05 -5.856746304305593 -13.418383628427902 -1.1390485082470203 1 1 0 +435 1 3 -1.05 -6.212119786812962 -16.209812802990335 -1.1403998993912392 1 1 0 +436 1 4 -0.95 -8.729708502581238 -15.011039091449069 -1.009285880112964 1 1 0 +437 1 3 -1.05 -7.383491201121386 -17.003869454733763 -3.2389871522032028 1 1 0 +438 1 3 -1.05 -5.963695048103962 -14.828277302058277 -3.240739000854397 1 1 0 +439 1 3 -1.05 -4.813069783015718 -17.155706888160015 -3.049179484681825 1 1 0 +440 1 5 0.425 -8.221237365494655 -14.13197978175737 -1.148104642070665 1 1 0 +462 1 2 2.1 -3.4462548346618433 -17.830406583197693 -2.7573102223982113 1 1 0 +463 1 2 2.1 -3.469735283388167 -11.819752050513955 -2.7652804467485357 1 1 0 +465 1 3 -1.05 -3.603971352452068 -11.726883408388186 -1.1403764365310618 1 1 0 +468 1 3 -1.05 -3.355521162154167 -10.345233316904825 -3.2407460058544304 1 1 0 +469 1 3 -1.05 -2.2048968961935245 -12.67281584124379 -3.0491647524307126 1 1 0 +470 1 5 0.425 -0.4530336944118556 -9.649058436806532 -1.1481079681882829 1 1 0 +476 1 4 -0.95 -3.569702194052315 -15.011029442444274 -1.0092829982342053 1 1 0 +477 1 3 -1.05 -2.223510954503034 -17.003879928264908 -3.238987017751225 1 1 0 +480 1 5 0.425 -3.061226086377273 -14.131977973418296 -1.1480614741617021 1 1 0 +842 1 2 2.1 -10.562132297329805 -17.851014175793086 -6.432546285956645 1 1 1 +882 1 2 2.1 -5.402127266641829 -17.85100681359648 -6.4325501191668755 1 1 1 +1003 1 2 2.1 -10.482315844290621 -14.895888504252758 -6.4245862304386065 1 1 1 +1005 1 3 -1.05 -10.348095162304375 -14.98875808661767 -8.049491681798669 1 1 1 +1008 1 3 -1.05 -10.596544769602936 -16.37040254747953 -5.949125564063066 1 1 1 +1011 1 1 1.575 -8.68725474195775 -11.77759774974404 -9.1893849316929 1 1 1 +1016 1 4 -0.95 -10.382364613954495 -11.704612098178766 -8.180586191286995 1 1 1 +1020 1 5 0.425 9.749175400148882 -12.583669666698384 -8.041796462307946 0 1 1 +1041 1 1 1.575 -6.135425776818094 -16.260473936348028 -9.189383231154757 1 1 1 +1043 1 2 2.1 -5.3223230647806945 -14.895898704501215 -6.424593124897623 1 1 1 +1044 1 3 -1.05 -5.543484086381093 -17.780185221481208 -8.050814994171715 1 1 1 +1045 1 3 -1.05 -5.188093428747346 -14.988758733832059 -8.04949629788937 1 1 1 +1046 1 4 -0.95 -7.8304849276464115 -16.187485278543456 -8.180577316436402 1 1 1 +1047 1 3 -1.05 -9.176703799441476 -14.19469188744462 -5.95088180516822 1 1 1 +1048 1 3 -1.05 -5.436541172095698 -16.370396928539293 -5.9491275765252185 1 1 1 +1049 1 3 -1.05 -6.587179230640963 -14.042820482901044 -6.140706408704635 1 1 1 +1050 1 5 0.425 -8.339016864185075 -17.06657758066529 -8.041710773044175 1 1 1 +1051 1 1 1.575 -3.527261903235889 -11.77759444673707 -9.189385423079939 1 1 1 +1052 1 2 2.1 -7.953952109972265 -13.36816612248871 -6.432556712138617 1 1 1 +1053 1 2 2.1 -7.8741625097250605 -10.413049444820782 -6.4245790461347605 1 1 1 +1054 1 3 -1.05 -8.095317714508074 -13.297256855654805 -8.050832188461564 1 1 1 +1055 1 3 -1.05 -7.739944593076132 -10.505829863398748 -8.049470808969227 1 1 1 +1056 1 4 -0.95 -5.22236234575324 -11.704613397559287 -8.180592735251716 1 1 1 +1057 1 3 -1.05 -6.568578215806069 -9.711775687113166 -5.950883008183602 1 1 1 +1058 1 3 -1.05 -7.988367489366465 -11.887339802933614 -5.949132603449773 1 1 1 +1059 1 3 -1.05 -9.13900815725675 -9.559928450728057 -6.140691573977905 1 1 1 +1060 1 5 0.425 -5.730842071322643 -12.583668202662341 -8.041826700411479 1 1 1 +1061 1 1 1.575 -9.51358678931899 -10.63699289645826 9.189382244602578 1 1 0 +1071 1 1 1.575 -12.121741592199127 -15.119878495438652 9.18938580037907 1 1 0 +1086 1 4 -0.95 -2.670491270081923 -16.187494416422872 -8.180579828027994 1 1 1 +1087 1 3 -1.05 -4.0166814766710734 -14.194679692266455 -5.950882081820672 1 1 1 +1092 1 2 2.1 -2.793967359717164 -13.368163663673295 -6.432553729688593 1 1 1 +1093 1 2 2.1 -2.714183453046749 -10.413041630295027 -6.4245816020409485 1 1 1 +1094 1 3 -1.05 -2.935315187147549 -13.297258489782635 -8.050829092232332 1 1 1 +1095 1 3 -1.05 -2.5799455030118272 -10.505829398335198 -8.049473780055107 1 1 1 +1098 1 3 -1.05 -2.828369189735337 -11.887360068177586 -5.949133477017218 1 1 1 +1099 1 3 -1.05 -3.978994149665634 -9.559937349167438 -6.140690350890794 1 1 1 +1111 1 1 1.575 -6.961748817696581 -15.11987630783114 9.189384651241914 1 1 0 +184 1 3 -1.05 1.8550841706248615 -17.90129235456656 -1.139041394638964 0 1 0 +224 1 3 -1.05 7.015081266257585 -17.90129077481551 -1.1390443875216096 0 1 0 +342 1 2 2.1 1.7137286735549981 -17.830403990336592 -2.7573075994425302 0 1 0 +343 1 2 2.1 1.6902433706862166 -11.81974490284625 -2.765283591611176 0 1 0 +345 1 3 -1.05 1.5560281171673136 -11.726883064897962 -1.1403796474735532 0 1 0 +346 1 4 -0.95 4.19842475094546 -10.528150984297781 -1.0092947548256248 0 1 0 +347 1 3 -1.05 5.544610676800708 -12.520964874446697 -3.238989158956352 0 1 0 +348 1 3 -1.05 1.8044775438647918 -10.34525379637876 -3.2407469149941974 0 1 0 +349 1 3 -1.05 2.9551154623676954 -12.672823617460397 -3.0491633747353077 0 1 0 +350 1 5 0.425 4.70694993483294 -9.649056414846337 -1.1481352776498177 0 1 0 +352 1 2 2.1 4.321898281880586 -13.347477423653608 -2.7573203124497994 0 1 0 +353 1 2 2.1 4.242103458566291 -16.302606453938324 -2.7652944924881524 0 1 0 +354 1 3 -1.05 4.463253605388115 -13.418383717750396 -1.1390481950715863 0 1 0 +355 1 3 -1.05 4.107880173799165 -16.20981289781871 -1.1404006770584267 0 1 0 +356 1 4 -0.95 1.590291661406587 -15.011038808742594 -1.0092858875728705 0 1 0 +357 1 3 -1.05 2.936511486433403 -17.003867946178126 -3.2389871417941283 0 1 0 +358 1 3 -1.05 4.356304894039823 -14.82828075681348 -3.240738966883275 0 1 0 +359 1 3 -1.05 5.5069296575236155 -17.15570655268637 -3.0491796518303165 0 1 0 +360 1 5 0.425 2.0987625163240065 -14.131980219839427 -1.1481045890306945 0 1 0 +382 1 2 2.1 6.873746152856757 -17.83040759592042 -2.7573103659838294 0 1 0 +383 1 2 2.1 6.850264705504596 -11.819752523567601 -2.765280196679372 0 1 0 +385 1 3 -1.05 6.716028840751633 -11.726883416657042 -1.1403757891844322 0 1 0 +388 1 3 -1.05 6.9644790606265055 -10.345231078604485 -3.2407460256393845 0 1 0 +389 1 3 -1.05 8.115103706656491 -12.67281636337119 -3.049164810920919 0 1 0 +390 1 5 0.425 -10.773033349967385 -9.649057938720548 -1.1481081159206 1 1 0 +396 1 4 -0.95 6.750298277493279 -15.011028676607136 -1.009283156622482 0 1 0 +397 1 3 -1.05 8.0964871902111 -17.003881231209515 -3.2389870762613597 0 1 0 +400 1 5 0.425 7.2587734956592485 -14.13197867342077 -1.1480612275841189 0 1 0 +466 1 4 -0.95 -0.9615776661893243 -10.528149916413177 -1.0092884371126996 1 1 0 +467 1 3 -1.05 0.384639153569287 -12.520948194899127 -3.238991755885184 1 1 0 +472 1 2 2.1 -0.8381092882996519 -13.347486331441 -2.757316117320512 1 1 0 +473 1 2 2.1 -0.9178906756593168 -16.302595966027578 -2.7652875692926067 1 1 0 +474 1 3 -1.05 -0.6967505864201584 -13.418383565182262 -1.1390380998660525 1 1 0 +475 1 3 -1.05 -1.052121569574025 -16.2098121280094 -1.1403955470516056 1 1 0 +478 1 3 -1.05 -0.80369877564749 -14.828286105827077 -3.240737085742131 1 1 0 +479 1 3 -1.05 0.3469420057265431 -17.155713599291026 -3.0491809320772196 1 1 0 +802 1 2 2.1 4.917871554153482 -17.85100620844276 -6.432550144635655 0 1 1 +922 1 2 2.1 -0.24213302537487635 -17.851014271362367 -6.432546231091103 1 1 1 +961 1 1 1.575 4.184574182271406 -16.26047384481049 -9.189383148012734 0 1 1 +963 1 2 2.1 4.997676106105352 -14.89589820439723 -6.4245934413401775 0 1 1 +964 1 3 -1.05 4.776515931907607 -17.780185218865757 -8.050814805633557 0 1 1 +965 1 3 -1.05 5.131906621044807 -14.988758753951338 -8.049496747093013 0 1 1 +966 1 4 -0.95 2.4895150021685026 -16.187485328567735 -8.18057734079336 0 1 1 +967 1 3 -1.05 1.143296887105647 -14.194691428548982 -5.9508816939542974 0 1 1 +968 1 3 -1.05 4.883458746547756 -16.37039893676453 -5.949127198225527 0 1 1 +969 1 3 -1.05 3.732820257087102 -14.042820299512814 -6.140706422631954 0 1 1 +970 1 5 0.425 1.9809831558045818 -17.066577464624032 -8.041710794411177 0 1 1 +971 1 1 1.575 6.7927381146326375 -11.77759445236754 -9.189385500475517 0 1 1 +972 1 2 2.1 2.366048010875998 -13.368167275390114 -6.432556655123134 0 1 1 +973 1 2 2.1 2.4458378380486767 -10.41305019631907 -6.424578726466713 0 1 1 +974 1 3 -1.05 2.2246822195755787 -13.29725686242322 -8.05083232247889 0 1 1 +975 1 3 -1.05 2.5800552709418074 -10.505829765473539 -8.049470445822946 0 1 1 +976 1 4 -0.95 5.0976375270724485 -11.704613678089164 -8.180592647516727 0 1 1 +977 1 3 -1.05 3.7514206411706486 -9.711776314023226 -5.950882972545097 0 1 1 +978 1 3 -1.05 2.3316324623441567 -11.887337544400147 -5.949132835505473 0 1 1 +979 1 3 -1.05 1.1809926474831354 -9.559928961691705 -6.140691595020608 0 1 1 +980 1 5 0.425 4.589158058033442 -12.583667977364943 -8.041826644512858 0 1 1 +981 1 1 1.575 0.8064132734883156 -10.636992854297397 9.189382072755725 0 1 0 +991 1 1 1.575 -1.8017416248975664 -15.119878549552123 9.189385972861375 0 1 0 +1001 1 1 1.575 9.344581867349657 -16.26047640418962 -9.189381749309328 0 1 1 +1004 1 3 -1.05 9.936511499778717 -17.780185040060882 -8.05080542913734 0 1 1 +1006 1 4 -0.95 7.649509037996275 -16.18749394104903 -8.180579812342321 0 1 1 +1007 1 3 -1.05 6.303320211197779 -14.1946787391996 -5.950882102933339 0 1 1 +1009 1 3 -1.05 8.892830645210495 -14.042826183342902 -6.140708128407816 0 1 1 +1010 1 5 0.425 7.140972750324373 -17.06658046434351 -8.041751694671362 0 1 1 +1012 1 2 2.1 7.526031845452174 -13.36816391655739 -6.432553511023912 0 1 1 +1013 1 2 2.1 7.605817828327925 -10.413041177382924 -6.424581346822816 0 1 1 +1014 1 3 -1.05 7.38468487052144 -13.29725845871807 -8.050828733960508 0 1 1 +1015 1 3 -1.05 7.7400544850575095 -10.50582940361992 -8.04947339734067 0 1 1 +1017 1 3 -1.05 8.911445176635418 -9.711761973761018 -5.950885381272831 0 1 1 +1018 1 3 -1.05 7.491630912269503 -11.887360305407935 -5.949133684133898 0 1 1 +1019 1 3 -1.05 6.3410053156834145 -9.559936868934592 -6.140690348261861 0 1 1 +1021 1 1 1.575 5.966421032045936 -10.636996344651585 9.18938264851844 0 1 0 +1031 1 1 1.575 3.358251150778086 -15.119876228292004 9.189384489321482 0 1 0 +1081 1 1 1.575 -0.9754180121607927 -16.260476400180153 -9.189381721845933 1 1 1 +1083 1 2 2.1 -0.16231620481821452 -14.89588855547083 -6.424586299599195 1 1 1 +1084 1 3 -1.05 -0.3834885547438134 -17.780184943603096 -8.050805349197944 1 1 1 +1085 1 3 -1.05 -0.028095111453609434 -14.988758081242285 -8.049491887024741 1 1 1 +1088 1 3 -1.05 -0.2765447418097864 -16.37040300834762 -5.9491255041968065 1 1 1 +1089 1 3 -1.05 -1.4271693938816554 -14.042826105628727 -6.140708097208753 1 1 1 +1090 1 5 0.425 -3.179027007517461 -17.066580020974854 -8.041751915389574 1 1 1 +1091 1 1 1.575 1.6327453228612008 -11.777597747951123 -9.189384953878353 1 1 1 +1096 1 4 -0.95 -0.0623645826637933 -11.704612065115022 -8.180586180092902 1 1 1 +1097 1 3 -1.05 -1.4085524757194428 -9.711760380898674 -5.950885471300883 1 1 1 +1100 1 5 0.425 -0.5708246238514203 -12.583669638729017 -8.04179655219608 1 1 1 +1101 1 1 1.575 -4.353578939913898 -10.636996303988589 9.18938270030422 1 1 0 +424 1 3 -1.05 -8.408577734808924 -8.935458287782806 -1.139059751375683 1 1 0 +464 1 3 -1.05 -3.248580635072857 -8.935456708467136 -1.1390620639004094 1 1 0 +546 1 4 -0.95 -11.22522470896448 -1.5623206043777742 -1.009290789576525 1 1 0 +547 1 3 -1.05 -9.878997905733707 -3.5551322132945415 -3.2389873081377836 1 1 0 +552 1 2 2.1 -11.101767117492052 -4.381650113675265 -2.757304877477752 1 1 0 +553 1 2 2.1 -11.18156269540419 -7.336765995632838 -2.765288344254607 1 1 0 +554 1 3 -1.05 -10.960418221812578 -4.452562993782461 -1.1390300904548987 1 1 0 +555 1 3 -1.05 -11.31579151270627 -7.243982991685186 -1.1404006413513326 1 1 0 +558 1 3 -1.05 -11.06735994354583 -5.86248222073808 -3.2407468195561364 1 1 0 +559 1 3 -1.05 -9.916720989567086 -8.189896133020403 -3.0491777269547473 1 1 0 +582 1 2 2.1 -8.549941681999599 -8.864628616853421 -2.7573212395646927 1 1 0 +583 1 2 2.1 -8.573408346697407 -2.853890413154632 -2.765288711323917 1 1 0 +585 1 3 -1.05 -8.707624618283598 -2.761073008594156 -1.1403895124190147 1 1 0 +586 1 4 -0.95 -6.065222151349898 -1.5623213449494315 -1.0092973063700743 1 1 0 +587 1 3 -1.05 -4.719027607075699 -3.5551497395403047 -3.2389848295174266 1 1 0 +588 1 3 -1.05 -8.459194165968885 -1.379498682372585 -3.2407378892779697 1 1 0 +589 1 3 -1.05 -7.308554273668932 -3.7069858317422746 -3.049174267227535 1 1 0 +590 1 5 0.425 -5.556733431736472 -0.6832639340674191 -1.1481901639195726 1 1 0 +592 1 2 2.1 -5.941758795256964 -4.381643258523525 -2.757309447300389 1 1 0 +593 1 2 2.1 -6.0215675995468345 -7.336778252386646 -2.7652948891186604 1 1 0 +594 1 3 -1.05 -5.800414010022665 -4.452562979690537 -1.1390409601576525 1 1 0 +595 1 3 -1.05 -6.155789757574291 -7.243983669469674 -1.1404049422805471 1 1 0 +596 1 4 -0.95 -8.67336350603957 -6.045192015790269 -1.009270118053955 1 1 0 +597 1 3 -1.05 -7.3271615151700535 -8.038035790495682 -3.238992981860023 1 1 0 +598 1 3 -1.05 -5.907356197440085 -5.862471934911557 -3.240748636610774 1 1 0 +599 1 3 -1.05 -4.756733445681928 -8.189889175673681 -3.0491762653397636 1 1 0 +600 1 5 0.425 -8.164891614098584 -5.166098789059271 -1.1479179100126835 1 1 0 +622 1 2 2.1 -3.389926071141417 -8.86463251987464 -2.757323741452172 1 1 0 +623 1 2 2.1 -3.413385787633488 -2.8538972774550295 -2.765285166795646 1 1 0 +625 1 3 -1.05 -3.5476240557786918 -2.7610733325609367 -1.1403860582710532 1 1 0 +628 1 3 -1.05 -3.2991927884903163 -1.3794780433085272 -3.240737139143919 1 1 0 +629 1 3 -1.05 -2.148566845248455 -3.70697790414607 -3.0491756564843833 1 1 0 +630 1 5 0.425 -0.39671665581836635 -0.6832652429943806 -1.1481630022982454 1 1 0 +636 1 4 -0.95 -3.5133574867319632 -6.045182758574995 -1.0092671746550455 1 1 0 +637 1 3 -1.05 -2.16718148458315 -8.038046305327988 -3.2389929701897184 1 1 0 +640 1 5 0.425 -3.004880126821485 -5.166096444308325 -1.147874732161542 1 1 0 +1002 1 2 2.1 -10.505804536592649 -8.88523539174835 -6.432560037265368 1 1 1 +1042 1 2 2.1 -5.345800298480642 -8.885227442477921 -6.432563806365232 1 1 1 +1163 1 2 2.1 -10.425983846443055 -5.930101828666643 -6.4245809302442 1 1 1 +1165 1 3 -1.05 -10.29176457220165 -6.022932645020992 -8.049476504290835 1 1 1 +1168 1 3 -1.05 -10.540210510252924 -7.404496027077366 -5.949119337484573 1 1 1 +1171 1 1 1.575 -8.63091986135657 -2.8117449377960053 -9.189382864542884 1 1 1 +1176 1 4 -0.95 -10.32599221206828 -2.738752318079948 -8.180571473894362 1 1 1 +1180 1 5 0.425 9.80549758975939 -3.6178227408875205 -8.041659890758954 0 1 1 +1201 1 1 1.575 -6.0790863142234715 -7.294676574746575 -9.189383710571903 1 1 1 +1203 1 2 2.1 -5.265992010385035 -5.9301120964551135 -6.424588121260333 1 1 1 +1204 1 3 -1.05 -5.487146068587514 -8.814352398208634 -8.050831889549796 1 1 1 +1205 1 3 -1.05 -5.13176290972859 -6.022933332502667 -8.049481513448013 1 1 1 +1206 1 4 -0.95 -7.774180195115953 -7.221701310871847 -8.180597375280326 1 1 1 +1207 1 3 -1.05 -9.120384514697443 -5.22887681444276 -5.950877168633751 1 1 1 +1208 1 3 -1.05 -5.380207122065727 -7.404491441720323 -5.949121385614578 1 1 1 +1209 1 3 -1.05 -6.530843795028404 -5.077002060288615 -6.140701804367323 1 1 1 +1210 1 5 0.425 -8.282682482860066 -8.100810146883555 -8.041925546941595 1 1 1 +1211 1 1 1.575 -3.4709270920462876 -2.8117417906836764 -9.189383656264441 1 1 1 +1212 1 2 2.1 -7.897616037858306 -4.402286956958651 -6.43254710358858 1 1 1 +1213 1 2 2.1 -7.8178154207096355 -1.447175155438515 -6.4245867930103415 1 1 1 +1214 1 3 -1.05 -8.038985872482693 -4.331447222326599 -8.050813553596454 1 1 1 +1215 1 3 -1.05 -7.683597598118229 -1.5400178951253878 -8.049489918538265 1 1 1 +1216 1 4 -0.95 -5.165990407443373 -2.738754409726031 -8.18057793517165 1 1 1 +1217 1 3 -1.05 -6.512212271258566 -0.7459521192234924 -5.950883480489781 1 1 1 +1218 1 3 -1.05 -7.932036772316762 -2.9216307666036787 -5.949131277993494 1 1 1 +1219 1 3 -1.05 -9.08267694565555 -0.5940914943799172 -6.140702906817449 1 1 1 +1220 1 5 0.425 -5.674519498759423 -3.617820687506949 -8.04169029858849 1 1 1 +1221 1 1 1.575 -9.457246412717982 -1.671157609032207 9.189383876214114 1 1 0 +1231 1 1 1.575 -12.065413830807493 -6.15404471574257 9.18938539391855 1 1 0 +1246 1 4 -0.95 -2.614186286838043 -7.2217101201307425 -8.180599793839612 1 1 1 +1247 1 3 -1.05 -3.960360277202871 -5.228863633164128 -5.950877486276319 1 1 1 +1252 1 2 2.1 -2.737632361906618 -4.402284731749566 -6.432544106737809 1 1 1 +1253 1 2 2.1 -2.6578351242773337 -1.4471666923434476 -6.424589226412993 1 1 1 +1254 1 3 -1.05 -2.8789833754375103 -4.331448893865138 -8.050810085224123 1 1 1 +1255 1 3 -1.05 -2.52359850535919 -1.5400175072708464 -8.049492401815417 1 1 1 +1258 1 3 -1.05 -2.7720385307922406 -2.9216515367363893 -5.949132112636712 1 1 1 +1259 1 3 -1.05 -3.9226633957335055 -0.5941000488864461 -6.140701563447556 1 1 1 +1271 1 1 1.575 -6.9054209708246175 -6.154042407692234 9.189383855475354 1 1 0 +344 1 3 -1.05 1.9114222200240487 -8.935458279277533 -1.139059564533735 0 1 0 +384 1 3 -1.05 7.071419484747558 -8.93545662912592 -1.139062289794058 0 1 0 +502 1 2 2.1 1.770058118239774 -8.864629279480386 -2.7573211125975057 0 1 0 +503 1 2 2.1 1.746593525661929 -2.8538905356736315 -2.7652882528616427 0 1 0 +505 1 3 -1.05 1.6123753212274217 -2.761072943936796 -1.1403890497030549 0 1 0 +506 1 4 -0.95 4.254777803494033 -1.5623214773049696 -1.009297090882841 0 1 0 +507 1 3 -1.05 5.60097265039013 -3.555149476103077 -3.2389847675130836 0 1 0 +508 1 3 -1.05 1.8608059611492145 -1.3794973995029132 -3.2407379362518247 0 1 0 +509 1 3 -1.05 3.0114454619351942 -3.706985652875458 -3.0491742347163786 0 1 0 +510 1 5 0.425 4.763266707487849 -0.6832636557343186 -1.1481903079338203 0 1 0 +512 1 2 2.1 4.37824108680676 -4.381642215703053 -2.7573092994894903 0 1 0 +513 1 2 2.1 4.298431828182256 -7.33677721573053 -2.7652951589904955 0 1 0 +514 1 3 -1.05 4.519586070169236 -4.452562991066955 -1.1390403197679557 0 1 0 +515 1 3 -1.05 4.164210351308228 -7.243983732757602 -1.1404051524282774 0 1 0 +516 1 4 -0.95 1.6466363953447338 -6.0451921005838205 -1.0092700800785774 0 1 0 +517 1 3 -1.05 2.9928396471856065 -8.038035104886228 -3.2389931344831044 0 1 0 +518 1 3 -1.05 4.412643745498574 -5.862474549618074 -3.2407487484486985 0 1 0 +519 1 3 -1.05 5.563266561391211 -8.18988915546985 -3.049176356081455 0 1 0 +520 1 5 0.425 2.1551084204706203 -5.16609866467239 -1.1479179157471826 0 1 0 +542 1 2 2.1 6.930073760633853 -8.864633805826598 -2.7573239225921 0 1 0 +543 1 2 2.1 6.906613517162718 -2.853897481917162 -2.7652852790953517 0 1 0 +545 1 3 -1.05 6.7723761157288465 -2.7610732877001496 -1.1403856281466087 0 1 0 +548 1 3 -1.05 7.020807272771009 -1.379476841625241 -3.240737136594837 0 1 0 +549 1 3 -1.05 8.171434172237035 -3.7069785563504976 -3.0491755848729465 0 1 0 +550 1 5 0.425 -10.71671674630369 -0.6832654187033356 -1.1481629281547843 1 1 0 +556 1 4 -0.95 6.806642683806004 -6.045182652289 -1.009267250406083 0 1 0 +557 1 3 -1.05 8.152818693761645 -8.038046252697145 -3.238992939192382 0 1 0 +560 1 5 0.425 7.315119719776252 -5.166096588082807 -1.1478746942687081 0 1 0 +626 1 4 -0.95 -0.9052247640192803 -1.5623207270903876 -1.0092908139849168 1 1 0 +627 1 3 -1.05 0.4409986064558229 -3.55513430014612 -3.238987362520838 1 1 0 +632 1 2 2.1 -0.781765198312538 -4.381650335007297 -2.757305136503125 1 1 0 +633 1 2 2.1 -0.8615624137678708 -7.3367669283109755 -2.765288206868874 1 1 0 +634 1 3 -1.05 -0.64041820705304 -4.452562941095179 -1.1390306805857744 1 1 0 +635 1 3 -1.05 -0.99579148699058 -7.243983089295831 -1.1404005503235641 1 1 0 +638 1 3 -1.05 -0.7473598652504947 -5.862480258450255 -3.2407468109284423 1 1 0 +639 1 3 -1.05 0.4032783081311422 -8.189895897628547 -3.0491778997613777 1 1 0 +962 1 2 2.1 4.974200302039463 -8.885226777627686 -6.432563729132441 0 1 1 +1082 1 2 2.1 -0.185806274293137 -8.88523494640969 -6.432559660140516 1 1 1 +1121 1 1 1.575 4.240913714129455 -7.294676590686793 -9.189383695860338 0 1 1 +1123 1 2 2.1 5.054007902786916 -5.930111693030472 -6.424588129023356 0 1 1 +1124 1 3 -1.05 4.832853936182163 -8.814352442842836 -8.050831879699462 0 1 1 +1125 1 3 -1.05 5.188237146181489 -6.022933335225861 -8.04948169103402 0 1 1 +1126 1 4 -0.95 2.545820092137145 -7.221700886779187 -8.180597400605782 0 1 1 +1127 1 3 -1.05 1.1996162029904571 -5.228876391762018 -5.950877255533969 0 1 1 +1128 1 3 -1.05 4.939792907189725 -7.404492472702664 -5.949121217373286 0 1 1 +1129 1 3 -1.05 3.7891559709552 -5.077001971456189 -6.140701857527632 0 1 1 +1130 1 5 0.425 2.037317272972258 -8.100810548569898 -8.041925509572767 0 1 1 +1131 1 1 1.575 6.849072888825457 -2.811741689480723 -9.189383537844549 0 1 1 +1132 1 2 2.1 2.4223841271020863 -4.402287509447001 -6.432547026529044 0 1 1 +1133 1 2 2.1 2.502185680946525 -1.4471749599701873 -6.42458639136697 0 1 1 +1134 1 3 -1.05 2.2810141007135005 -4.3314471971634685 -8.050813463045493 0 1 1 +1135 1 3 -1.05 2.6364022779876706 -1.5400179185302143 -8.04948944504933 0 1 1 +1136 1 4 -0.95 5.154009380117879 -2.73875462121606 -8.180577998655997 0 1 1 +1137 1 3 -1.05 3.807785149367101 -0.7459535839548899 -5.950883537936585 0 1 1 +1138 1 3 -1.05 2.387963148489902 -2.9216298569088384 -5.949131508409803 0 1 1 +1139 1 3 -1.05 1.2373236894784867 -0.5940918355153002 -6.140702909127059 0 1 1 +1140 1 5 0.425 4.645480590911754 -3.617820504422353 -8.041690386363467 0 1 1 +1141 1 1 1.575 0.8627536087070986 -1.6711577052294828 9.18938406619552 0 1 0 +1151 1 1 1.575 -1.7454137744760594 -6.154044591809328 9.18938520735365 0 1 0 +1161 1 1 1.575 9.400921345592693 -7.294679183739749 -9.189382152847024 0 1 1 +1164 1 3 -1.05 9.992849429211155 -8.81435221076156 -8.050822440181973 0 1 1 +1166 1 4 -0.95 7.705813430829789 -7.221710674457647 -8.18059968821399 0 1 1 +1167 1 3 -1.05 6.3596398781576156 -5.228863629458221 -5.950877516883361 0 1 1 +1169 1 3 -1.05 8.94916626015327 -5.077007792880384 -6.140703499406546 0 1 1 +1170 1 5 0.425 7.197307460595866 -8.100812243136945 -8.041966681414198 0 1 1 +1172 1 2 2.1 7.582367683488243 -4.402284032411057 -6.432543750269571 0 1 1 +1173 1 2 2.1 7.662165094646056 -1.447166201878126 -6.424589375468346 0 1 1 +1174 1 3 -1.05 7.441016703458011 -4.33144872801018 -8.05080991887099 0 1 1 +1175 1 3 -1.05 7.796401552064157 -1.5400176490207613 -8.049492711113222 0 1 1 +1177 1 3 -1.05 8.967811472553645 -0.7459382621993349 -5.950885996764763 0 1 1 +1178 1 3 -1.05 7.547961458135202 -2.921653116508409 -5.949132270533429 0 1 1 +1179 1 3 -1.05 6.397335934009757 -0.5940995052845253 -6.140701722863454 0 1 1 +1181 1 1 1.575 6.022761452224014 -1.6711610325038038 9.189384733881601 0 1 0 +1191 1 1 1.575 3.4145789253607752 -6.154042373460836 9.189383895405408 0 1 0 +1241 1 1 1.575 -0.919078623965504 -7.294679079021275 -9.189382380319893 1 1 1 +1243 1 2 2.1 -0.10598440370268136 -5.930100932531092 -6.424581119725159 1 1 1 +1244 1 3 -1.05 -0.32715050640343435 -8.814352196195246 -8.05082183294797 1 1 1 +1245 1 3 -1.05 0.028235486212588867 -6.022932681205882 -8.049476719692466 1 1 1 +1248 1 3 -1.05 -0.22021063902228377 -7.404498240171787 -5.949119370717186 1 1 1 +1249 1 3 -1.05 -1.370833370356804 -5.077008018598221 -6.140703488710428 1 1 1 +1250 1 5 0.425 -3.1226928311026017 -8.100812730925876 -8.041966561917656 1 1 1 +1251 1 1 1.575 1.6890802359826953 -2.811744966880619 -9.189383163839267 1 1 1 +1256 1 4 -0.95 -0.005992758242417295 -2.738753216631558 -8.180571330642541 1 1 1 +1257 1 3 -1.05 -1.3521890001903145 -0.7459384409832701 -5.950885927432735 1 1 1 +1260 1 5 0.425 -0.5145019540468603 -3.6178218687876402 -8.041660116739482 1 1 1 +1261 1 1 1.575 -4.297238698912295 -1.671161039098081 9.189384626544689 1 1 0 +66 1 4 -0.95 -11.168857413411244 7.40354230473303 -1.009270621918711 1 0 0 +67 1 3 -1.05 -9.822645460808385 5.410696592905623 -3.238991317700341 1 0 0 +72 1 2 2.1 -11.045431902700038 4.584118883669678 -2.7573129595669927 1 0 0 +73 1 2 2.1 -11.125242586862766 1.6290062999736783 -2.7652808348657647 1 0 0 +74 1 3 -1.05 -10.904080920918116 4.513273040814628 -1.139048034178586 1 0 0 +75 1 3 -1.05 -11.259470010039587 1.7218521674576586 -1.140382566465906 1 0 0 +78 1 3 -1.05 -11.011020631747801 3.1034522866952905 -3.2407497089701973 1 0 0 +79 1 3 -1.05 -9.860382334314748 0.7759117274275305 -3.049165720829433 1 0 0 +102 1 2 2.1 -8.493613424986258 0.10122409846838565 -2.757323192975578 1 0 0 +103 1 2 2.1 -8.517068108693376 6.111974095994508 -2.765294523187803 1 0 0 +105 1 3 -1.05 -8.651283926478486 6.204745693345419 -1.140405943066158 1 0 0 +106 1 4 -0.95 -6.008854360601466 7.403542287020748 -1.0092772192840815 1 0 0 +107 1 3 -1.05 -4.662672238122222 5.41068100659405 -3.238988681070188 1 0 0 +108 1 3 -1.05 -8.402859804670333 7.586231166902241 -3.2407428765350357 1 0 0 +109 1 3 -1.05 -7.252220538727875 5.258843554485125 -3.0491804233363666 1 0 0 +110 1 5 0.425 -5.500400910059637 8.282611476985458 -1.1479800750695226 1 0 0 +112 1 2 2.1 -5.885425009588118 4.584125237437899 -2.757317352913315 1 0 0 +113 1 2 2.1 -5.9652471268907 1.6289946463227452 -2.7652873359068035 1 0 0 +114 1 3 -1.05 -5.7440766738855835 4.513273091979116 -1.1390583802266399 1 0 0 +115 1 3 -1.05 -6.099468233788193 1.7218514621970193 -1.140386585906322 1 0 0 +116 1 4 -0.95 -8.617064557281656 2.9205985811717774 -1.0092826841011693 1 0 0 +117 1 3 -1.05 -7.270858314853548 0.9277887665995053 -3.2389934936208906 1 0 0 +118 1 3 -1.05 -5.851016824061415 3.103461965248542 -3.2407514463188676 1 0 0 +119 1 3 -1.05 -4.700394398805081 0.7759184548700553 -3.049164281105149 1 0 0 +120 1 5 0.425 -8.108543295227213 3.7997076991375103 -1.1480274788545408 1 0 0 +142 1 2 2.1 -3.3335981782845536 0.10122011566864941 -2.757325467057764 1 0 0 +143 1 2 2.1 -3.357047142472161 6.11196782113624 -2.7652915386543686 1 0 0 +145 1 3 -1.05 -3.491283130074649 6.204745345271121 -1.1404029948656156 1 0 0 +148 1 3 -1.05 -3.2428585701820545 7.586249294640197 -3.240741946764139 1 0 0 +149 1 3 -1.05 -2.092232840778756 5.258851348198554 -3.049181809306372 1 0 0 +150 1 5 0.425 -0.3403842372794994 8.28260966896027 -1.147952880973289 1 0 0 +156 1 4 -0.95 -3.457058959682409 2.9206070634285233 -1.0092797708444152 1 0 0 +157 1 3 -1.05 -2.1108774684637233 0.9277784951989752 -3.238993287738875 1 0 0 +160 1 5 0.425 -2.9485314187216485 3.799710624823639 -1.1479843692043108 1 0 0 +584 1 3 -1.05 -8.352243293361232 0.030357255394267924 -1.1390524737995804 1 1 0 +624 1 3 -1.05 -3.1922461439355594 0.030358867266201628 -1.1390547665052146 1 1 0 +683 1 2 2.1 -10.369664422889349 3.0356838773287826 -6.424575215502184 1 0 1 +685 1 3 -1.05 -10.23544329651916 2.9429037405897773 -8.049464594434008 1 0 1 +688 1 3 -1.05 -10.483869565823827 1.5614073964043058 -5.949127164061662 1 0 1 +691 1 1 1.575 -8.574578130087637 6.154071208432605 -9.189382180610352 1 0 1 +696 1 4 -0.95 -10.26966239527055 6.227051952939341 -8.18058521084057 1 0 1 +700 1 5 0.425 9.861819220954118 5.347945210496121 -8.041828398642311 0 0 1 +721 1 1 1.575 -6.022756736308421 1.6711303517316303 -9.189385968117815 1 0 1 +723 1 2 2.1 -5.209671651803681 3.0356748124450768 -6.424582436808366 1 0 1 +724 1 3 -1.05 -5.430810675667206 0.15147978126713468 -8.050841477729787 1 0 1 +725 1 3 -1.05 -5.075441675948434 2.9429030094494593 -8.049469596004098 1 0 1 +726 1 4 -0.95 -7.717868211333117 1.7441099614575464 -8.180598129872275 1 0 1 +727 1 3 -1.05 -9.064078445575394 3.7369535813240518 -5.950881069909556 1 0 1 +728 1 3 -1.05 -5.323866211609281 1.5614101422204314 -5.949129186293185 1 0 1 +729 1 3 -1.05 -6.474505361873171 3.888805922950688 -6.140690242051968 1 0 1 +730 1 5 0.425 -8.226330365821905 0.8650337528317387 -8.04190334906801 1 0 1 +731 1 1 1.575 -3.414585332552538 6.154074477881739 -9.189382566209709 1 0 1 +732 1 2 2.1 -7.841289477842381 4.563531901090041 -6.432556537458721 1 0 1 +733 1 2 2.1 -7.761472051295536 7.5186496363365265 -6.424587203245381 1 0 1 +734 1 3 -1.05 -7.982649355928281 4.634376331833035 -8.050818102751318 1 0 1 +735 1 3 -1.05 -7.627256350140505 7.425797358610215 -8.049487688430835 1 0 1 +736 1 4 -0.95 -5.109660696087643 6.227049833368586 -8.180591643035886 1 0 1 +737 1 3 -1.05 -6.455868157279019 8.219860147310026 -5.95087771030675 1 0 1 +738 1 3 -1.05 -7.875707074793935 6.044192212608447 -5.94912141317001 1 0 1 +739 1 3 -1.05 -9.026344217684544 8.371738975760678 -6.140708086311505 1 0 1 +740 1 5 0.425 -5.6181977533352105 5.3479473553048 -8.041858759758451 1 0 1 +741 1 1 1.575 -9.400915706306321 7.294693070513674 9.189384926206701 1 0 0 +751 1 1 1.575 -12.009080441958165 2.811750177053689 9.18938314266052 1 0 0 +766 1 4 -0.95 -2.557874232771691 1.7441011023401245 -8.180600685373042 1 0 1 +767 1 3 -1.05 -3.904055434708212 3.7369659883295796 -5.95088122604723 1 0 1 +772 1 2 2.1 -2.6813056080742177 4.563533847624484 -6.432553914027515 1 0 1 +773 1 2 2.1 -2.601490329276581 7.518657095971896 -6.424589458180317 1 0 1 +774 1 3 -1.05 -2.8226468844219257 4.634374668663277 -8.050815326905791 1 0 1 +775 1 3 -1.05 -2.4672573177963235 7.425797891173772 -8.049490259445886 1 0 1 +778 1 3 -1.05 -2.7157085450829808 6.044173304923305 -5.949121943750433 1 0 1 +779 1 3 -1.05 -3.8663325104492907 8.371731480896397 -6.1407067178695645 1 0 1 +791 1 1 1.575 -6.849087529751488 2.8117525878767076 9.1893818738603 1 0 0 +1162 1 2 2.1 -10.449465120471645 0.08055078594262355 -6.432560152381424 1 1 1 +1202 1 2 2.1 -5.289460289979266 0.08055961753068175 -6.432563855616946 1 1 1 +22 1 2 2.1 1.8263872057743669 0.10122323982130865 -2.757323069657862 0 0 0 +23 1 2 2.1 1.802934745269436 6.111973633171797 -2.765294216508333 0 0 0 +25 1 3 -1.05 1.668716070201313 6.204745778974964 -1.1404054664058876 0 0 0 +26 1 4 -0.95 4.311144928833087 7.403541337708113 -1.0092768860747352 0 0 0 +27 1 3 -1.05 5.657326890881341 5.410680424130639 -3.238988834418395 0 0 0 +28 1 3 -1.05 1.9171402669985689 7.586233181934492 -3.240742714496779 0 0 0 +29 1 3 -1.05 3.0677776400331744 5.2588447310193835 -3.0491803607990793 0 0 0 +30 1 5 0.425 4.8195997233909065 8.28261243314935 -1.1479801940850205 0 0 0 +32 1 2 2.1 4.434576150427134 4.584125949994959 -2.757317218978379 0 0 0 +33 1 2 2.1 4.354752672996783 1.6289957720393495 -2.7652874840181454 0 0 0 +34 1 3 -1.05 4.5759233376029655 4.513273129126759 -1.139057813284337 0 0 0 +35 1 3 -1.05 4.22053164114371 1.7218514201119213 -1.1403866240843445 0 0 0 +36 1 4 -0.95 1.7029355240571267 2.9205989386970366 -1.0092827600960685 0 0 0 +37 1 3 -1.05 3.0491419813654197 0.9277888380035257 -3.2389933763391117 0 0 0 +38 1 3 -1.05 4.468983158494831 3.1034597839279527 -3.2407515601519075 0 0 0 +39 1 3 -1.05 5.619605927640823 0.7759183556409575 -3.049164220104494 0 0 0 +40 1 5 0.425 2.2114564850808733 3.799707313511526 -1.1480272683057233 0 0 0 +62 1 2 2.1 6.98640117943874 0.10121957566898487 -2.757325568981271 0 0 0 +63 1 2 2.1 6.962952872691414 6.111965565108033 -2.765291053290289 0 0 0 +65 1 3 -1.05 6.828716908336894 6.204745500725302 -1.140402317732672 0 0 0 +68 1 3 -1.05 7.077141690972795 7.586254503483634 -3.240741979621399 0 0 0 +69 1 3 -1.05 8.22776739587561 5.258851157636421 -3.0491818244113524 0 0 0 +70 1 5 0.425 -10.660383820606539 8.282610501019679 -1.147952957708176 1 0 0 +76 1 4 -0.95 6.862941628467034 2.9206079904103746 -1.0092797796811954 0 0 0 +77 1 3 -1.05 8.209123056757985 0.9277789976030242 -3.238993264209034 0 0 0 +80 1 5 0.425 7.371467964909829 3.799709712130774 -1.1479841901184216 0 0 0 +146 1 4 -0.95 -0.848856980688371 7.403543065635521 -1.009270824726272 1 0 0 +147 1 3 -1.05 0.49734997703464323 5.410693694185699 -3.2389910874000423 1 0 0 +152 1 2 2.1 -0.7254293332831061 4.584118530889224 -2.7573137024694017 1 0 0 +153 1 2 2.1 -0.8052417523067312 1.6290045848501222 -2.7652807440033573 1 0 0 +154 1 3 -1.05 -0.5840810386894884 4.513273006817702 -1.139048972722227 1 0 0 +155 1 3 -1.05 -0.9394700517706553 1.72185203484268 -1.140382327667803 1 0 0 +158 1 3 -1.05 -0.6910205720729508 3.103455656179019 -3.240749615640384 1 0 0 +159 1 3 -1.05 0.459616131096908 0.7759123793662148 -3.049165738355901 1 0 0 +504 1 3 -1.05 1.9677567762458388 0.030357116215178337 -1.1390524463935598 0 1 0 +544 1 3 -1.05 7.12775394829735 0.03035871942021373 -1.1390547635028394 0 1 0 +641 1 1 1.575 4.297243280430267 1.671130298800211 -9.189385641244018 0 0 1 +643 1 2 2.1 5.110326224583382 3.0356739402471575 -6.424582352677525 0 0 1 +644 1 3 -1.05 4.889189397603788 0.15147969781351733 -8.050841862301551 0 0 1 +645 1 3 -1.05 5.244558465032792 2.9429031552053644 -8.049469782410847 0 0 1 +646 1 4 -0.95 2.6021319091520816 1.744110022530954 -8.180598314731402 0 0 1 +647 1 3 -1.05 1.2559239052415947 3.7369549667711297 -5.950880998844742 0 0 1 +648 1 3 -1.05 4.996133819794588 1.5614110520094115 -5.949129347804236 0 0 1 +649 1 3 -1.05 3.845495927210548 3.888805031760729 -6.140690357525236 0 0 1 +650 1 5 0.425 2.093669549271727 0.8650335817131634 -8.041903350027011 0 0 1 +651 1 1 1.575 6.9054145512570955 6.154074540458179 -9.189382656499738 0 0 1 +652 1 2 2.1 2.4787088179628842 4.563531561622252 -6.432556181635265 0 0 1 +653 1 2 2.1 2.5585280519123863 7.518649657399113 -6.424586980258276 0 0 1 +654 1 3 -1.05 2.337350595388921 4.634376273058173 -8.050818213443714 0 0 1 +655 1 3 -1.05 2.6927435931419677 7.425797306887876 -8.049487673491134 0 0 1 +656 1 4 -0.95 5.210339517630658 6.227049915612355 -8.180591746443643 0 0 1 +657 1 3 -1.05 3.864131301319931 8.21985992889757 -5.95087766666106 0 0 1 +658 1 3 -1.05 2.4442928575967446 6.0441923323782305 -5.949121469473265 0 0 1 +659 1 3 -1.05 1.293656047976766 8.371738811992955 -6.14070806309522 0 0 1 +660 1 5 0.425 4.701802056134319 5.3479472107053745 -8.041858727818608 0 0 1 +661 1 1 1.575 0.9190842362638474 7.294693176083765 9.18938507480893 0 0 0 +671 1 1 1.575 -1.6890804391306382 2.8117504093218777 9.18938332505079 0 0 0 +681 1 1 1.575 9.457250970166566 1.6711276693326198 -9.18938446512876 0 0 1 +684 1 3 -1.05 10.049184911415573 0.15147980327706279 -8.050832179942425 0 0 1 +686 1 4 -0.95 7.762125313928461 1.7441003699159374 -8.180600491077195 0 0 1 +687 1 3 -1.05 6.415947469477651 3.7369678311064902 -5.950881518055825 0 0 1 +689 1 3 -1.05 9.00550526381668 3.888799817051286 -6.140691886923977 0 0 1 +690 1 5 0.425 7.2536596052110625 0.8650317110006327 -8.0419445547798 0 0 1 +692 1 2 2.1 7.638693043216929 4.563534397426547 -6.4325531536019 0 0 1 +693 1 2 2.1 7.718508628628836 7.518659026560055 -6.424589752422065 0 0 1 +694 1 3 -1.05 7.497353200810323 4.6343747301491796 -8.050814243532022 0 0 1 +695 1 3 -1.05 7.852742644013961 7.425797704432085 -8.049490509348363 0 0 1 +697 1 3 -1.05 9.024155028039374 8.219873732752141 -5.950880032041851 0 0 1 +698 1 3 -1.05 7.604291305384475 6.044169570929853 -5.9491223139608955 0 0 1 +699 1 3 -1.05 6.4536687395115315 8.371730865840878 -6.140706656787863 0 0 1 +701 1 1 1.575 6.079092109884446 7.294689732957998 9.18938567229783 0 0 0 +711 1 1 1.575 3.470912432260338 2.811752520407829 9.18938169427584 0 0 0 +761 1 1 1.575 -0.862748970625967 1.6711279104285026 -9.189384196227254 1 0 1 +763 1 2 2.1 -0.04966552243351785 3.0356854531893447 -6.424575713127194 1 0 1 +764 1 3 -1.05 -0.27081511421111415 0.15147985204080072 -8.050831834106292 1 0 1 +765 1 3 -1.05 0.08455673703883626 2.942903550076892 -8.049465159099038 1 0 1 +768 1 3 -1.05 -0.1638698065461348 1.5614034363256728 -5.949127300886313 1 0 1 +769 1 3 -1.05 -1.314494472039442 3.888799611479218 -6.140691815778049 1 0 1 +770 1 5 0.425 -3.066340835168454 0.8650309510303416 -8.041944499248094 1 0 1 +771 1 1 1.575 1.7454217382651063 6.154071312403289 -9.189382010954455 1 0 1 +776 1 4 -0.95 0.05033727790187115 6.227051569791222 -8.180585248517964 1 0 1 +777 1 3 -1.05 -1.2958462848877286 8.219873003854904 -5.9508799815340065 1 0 1 +780 1 5 0.425 -0.45818049467738753 5.347945702077382 -8.041828574124134 1 0 1 +781 1 1 1.575 -4.24090790530673 7.294689732506644 9.189385751111567 1 0 0 +1122 1 2 2.1 5.030540831106158 0.08055969575320887 -6.432563931728475 0 1 1 +1242 1 2 2.1 -0.12946529950842844 0.08055281636946887 -6.432559861989349 1 1 1 +104 1 3 -1.05 -8.2959121065224 8.996169249695711 -1.1390343890689216 1 0 0 +144 1 3 -1.05 -3.135914908937057 8.996170757432672 -1.1390363095158538 1 0 0 +226 1 4 -0.95 -11.11254078166963 16.369359515899372 -1.0092685783280562 1 0 0 +227 1 3 -1.05 -9.766336767287553 14.376528090276803 -3.238995751451072 1 0 0 +232 1 2 2.1 -10.989105741717744 13.549928023588016 -2.757323935025683 1 0 0 +233 1 2 2.1 -11.068899971135107 10.594822188451765 -2.765279879336142 1 0 0 +234 1 3 -1.05 -10.847743946171107 13.479098577596996 -1.139055897700814 1 0 0 +235 1 3 -1.05 -11.203130777002407 10.687669044069192 -1.1403772521551225 1 0 0 +238 1 3 -1.05 -10.954690024169619 12.0692952367427 -3.2407400224236813 1 0 0 +239 1 3 -1.05 -9.804050408061553 9.741740732019732 -3.0491690386393895 1 0 0 +262 1 2 2.1 -8.437271860709437 9.067095538704638 -2.757309714415854 1 0 0 +263 1 2 2.1 -8.460747726831363 15.077765756381456 -2.7652898300476085 1 0 0 +265 1 3 -1.05 -8.59496192705347 15.170581558375392 -1.1403965373644613 1 0 0 +266 1 4 -0.95 -5.952537888276847 16.369358966890967 -1.009275025184838 1 0 0 +267 1 3 -1.05 -4.606364507026352 14.376511915388033 -3.2389931337439766 1 0 0 +268 1 3 -1.05 -8.346518937455452 16.552120589815434 -3.2407517060692665 1 0 0 +269 1 3 -1.05 -7.195882464907534 14.224652440524341 -3.049169641913795 1 0 0 +270 1 5 0.425 -5.444048709356704 17.248463720191207 -1.147926072458759 1 0 0 +272 1 2 2.1 -5.82909830220383 13.54993452321374 -2.7573283893462746 1 0 0 +273 1 2 2.1 -5.908905613187655 10.594810706455835 -2.7652862916855394 1 0 0 +274 1 3 -1.05 -5.687739797179347 13.479098509652726 -1.1390661905436907 1 0 0 +275 1 3 -1.05 -6.043129004198134 10.687668498390035 -1.1403811302142177 1 0 0 +276 1 4 -0.95 -8.560740308552909 11.886397563127215 -1.0092984324169958 1 0 0 +277 1 3 -1.05 -7.2145196644512986 9.893600558555839 -3.2389874855870726 1 0 0 +278 1 3 -1.05 -5.794686347127319 12.069305033408408 -3.2407418681182447 1 0 0 +279 1 3 -1.05 -4.644060718572748 9.741746448813768 -3.049167473148536 1 0 0 +280 1 5 0.425 -8.052219545977914 12.76547280470178 -1.1482142578089327 1 0 0 +302 1 2 2.1 -3.277258168611019 9.06709268329568 -2.757311979744827 1 0 0 +303 1 2 2.1 -3.300726568040103 15.077758676128251 -2.7652865730743468 1 0 0 +305 1 3 -1.05 -3.4349611231128563 15.170581239105989 -1.140393118347875 1 0 0 +308 1 3 -1.05 -3.1865176799692243 16.552141258442898 -3.2407508225636663 1 0 0 +309 1 3 -1.05 -2.0358940608778617 14.224659879221303 -3.049171103603271 1 0 0 +310 1 5 0.425 -0.2840321731062527 17.248462066835817 -1.147899000522239 1 0 0 +316 1 4 -0.95 -3.400734418826705 11.88640656459975 -1.009295426410807 1 0 0 +317 1 3 -1.05 -2.0545374153539004 9.893591389941722 -3.238987396892891 1 0 0 +320 1 5 0.425 -2.892208151349891 12.765475346667152 -1.1481709903587394 1 0 0 +682 1 2 2.1 -10.393123499696863 9.046417716708568 -6.432546443741804 1 0 1 +722 1 2 2.1 -5.233118165742026 9.046425415362691 -6.432550064074174 1 0 1 +843 1 2 2.1 -10.313327211997532 12.001542453469945 -6.424580425058928 1 0 1 +845 1 3 -1.05 -10.179104461980216 11.908724309692506 -8.049479685803213 1 0 1 +848 1 3 -1.05 -10.427534414878945 10.527148371504556 -5.949133527898292 1 0 1 +851 1 1 1.575 -8.51824369157916 15.119864900254253 -9.189383813337964 1 0 1 +856 1 4 -0.95 -10.213364798519171 15.19283911129094 -8.1805999456996 1 0 1 +860 1 5 0.425 9.918165904359903 14.31374353290386 -8.041964406905526 0 0 1 +881 1 1 1.575 -5.966426869062609 10.63697892126217 -9.189385316838635 1 0 1 +883 1 2 2.1 -5.153334349175274 12.001533504730254 -6.4245873656476835 1 0 1 +884 1 3 -1.05 -5.37447932514466 9.117293092454549 -8.05082458509691 1 0 1 +885 1 3 -1.05 -5.019102860091463 11.908723625594156 -8.049484444657638 1 0 1 +886 1 4 -0.95 -7.661503944640748 10.709971405747378 -8.180577991074518 1 0 1 +887 1 3 -1.05 -9.007728176997928 12.702784770767579 -5.950885590714915 1 0 1 +888 1 3 -1.05 -5.267530930954242 10.527151999311467 -5.949135635771657 1 0 1 +889 1 3 -1.05 -6.41817044734872 12.85463275660672 -6.140694819991762 1 0 1 +890 1 5 0.425 -8.169994851836138 9.830913491173146 -8.04168744838706 1 0 1 +891 1 1 1.575 -3.3582508216134324 15.119868110955334 -9.189384343267172 1 0 1 +892 1 2 2.1 -7.78495678012643 13.529298477845082 -6.43256609319971 1 0 1 +893 1 2 2.1 -7.705149478241227 16.484421351166855 -6.4245793302851455 1 0 1 +894 1 3 -1.05 -7.926311841895801 13.600212556274936 -8.050837106470572 1 0 1 +895 1 3 -1.05 -7.570934159096701 16.391631488705965 -8.049468672465693 1 0 1 +896 1 4 -0.95 -5.053362791171019 15.192837358764411 -8.180606557550988 1 0 1 +897 1 3 -1.05 -6.39956503035528 17.185682318971356 -5.950877092454645 1 0 1 +898 1 3 -1.05 -7.819368343047313 15.010129619847799 -5.949122570305669 1 0 1 +899 1 3 -1.05 -8.970006576215036 17.33754814394433 -6.140696561990261 1 0 1 +900 1 5 0.425 -5.561851311372902 14.31374529314498 -8.041994715875438 1 0 1 +901 1 1 1.575 -9.344586773498397 16.260504033010424 9.189383061469888 1 0 0 +911 1 1 1.575 -11.952738939766169 11.777562142070536 9.189383933133433 1 0 0 +926 1 4 -0.95 -2.501509910995111 10.709962944319543 -8.180580504155689 1 0 1 +927 1 3 -1.05 -3.847706337399428 12.702796475235434 -5.95088592683273 1 0 1 +932 1 2 2.1 -2.6249716110823975 13.529300956978918 -6.432563378051501 1 0 1 +933 1 2 2.1 -2.545169473198447 16.484429053990457 -6.424581927314836 1 0 1 +934 1 3 -1.05 -2.766309288557113 13.60021091940969 -8.050834014660039 1 0 1 +935 1 3 -1.05 -2.4109350640925493 16.391631918688784 -8.049471644682779 1 0 1 +938 1 3 -1.05 -2.6593699486230253 15.01010932633989 -5.949123442093706 1 0 1 +939 1 3 -1.05 -3.8099936729346524 17.337539995470653 -6.1406953820272605 1 0 1 +951 1 1 1.575 -6.792745980839589 11.77756446289758 9.18938260060447 1 0 0 +24 1 3 -1.05 2.024087914529936 8.996169156423772 -1.1390344040655176 0 0 0 +64 1 3 -1.05 7.1840851787278694 8.996170799073049 -1.1390371246747328 0 0 0 +182 1 2 2.1 1.8827269267658568 9.067094010459222 -2.7573098488458445 0 0 0 +183 1 2 2.1 1.8592529639379158 15.077765641437093 -2.765289826325004 0 0 0 +185 1 3 -1.05 1.7250381794570995 15.170581615306336 -1.140396317987694 0 0 0 +186 1 4 -0.95 4.367461639099476 16.369358398717548 -1.0092749502092584 0 0 0 +187 1 3 -1.05 5.713635253023561 14.376511780801248 -3.2389931627830233 0 0 0 +188 1 3 -1.05 1.973481145796077 16.552121393137707 -3.240751532677078 0 0 0 +189 1 3 -1.05 3.1241173355918086 14.224652737956877 -3.049169549865817 0 0 0 +190 1 5 0.425 4.875951715036274 17.248464394280933 -1.1479261082563532 0 0 0 +192 1 2 2.1 4.490901958219151 13.549935878555889 -2.7573283335925183 0 0 0 +193 1 2 2.1 4.411093600085744 10.594811332515622 -2.7652867782753123 0 0 0 +194 1 3 -1.05 4.632260285297237 13.47909860671924 -1.1390658297944274 0 0 0 +195 1 3 -1.05 4.276871012971121 10.687668403574609 -1.1403818756309079 0 0 0 +196 1 4 -0.95 1.7592596338728619 11.886397590084986 -1.009298531294947 0 0 0 +197 1 3 -1.05 3.105482884951323 9.893602100379187 -3.238987419660816 0 0 0 +198 1 3 -1.05 4.525313593280881 12.06930177005012 -3.240741819055666 0 0 0 +199 1 3 -1.05 5.675938131533806 9.741747191339616 -3.049167537714908 0 0 0 +200 1 5 0.425 2.2677804650185784 12.765472764023784 -1.1482141972846076 0 0 0 +222 1 2 2.1 7.042742997773182 9.06709065048177 -2.757312355283716 0 0 0 +223 1 2 2.1 7.01927329241666 15.077757124097925 -2.7652861994385667 0 0 0 +225 1 3 -1.05 6.885038862750001 15.170581286095011 -1.1403927842743506 0 0 0 +228 1 3 -1.05 7.133482597267005 16.552144500578226 -3.24075083839441 0 0 0 +229 1 3 -1.05 8.284105934673054 14.224659825474038 -3.049171073901001 0 0 0 +230 1 5 0.425 -10.604031977218472 17.248462460723108 -1.1478989225176974 1 0 0 +236 1 4 -0.95 6.919266019605498 11.886407164285622 -1.0092956224084197 0 0 0 +237 1 3 -1.05 8.26546077991545 9.893590106942362 -3.2389873515609295 0 0 0 +240 1 5 0.425 7.427791685687939 12.765474859759728 -1.1481709088514993 0 0 0 +306 1 4 -0.95 -0.7925404329129417 16.36935978204208 -1.0092685234917038 1 0 0 +307 1 3 -1.05 0.5536617436637812 14.376527208127765 -3.238995583926612 1 0 0 +312 1 2 2.1 -0.669105152749097 13.549928062179351 -2.757324364009518 1 0 0 +313 1 2 2.1 -0.7489012635521384 10.594821654108735 -2.7652800741043446 1 0 0 +314 1 3 -1.05 -0.5277440456768847 13.479098585859202 -1.1390561242138784 1 0 0 +315 1 3 -1.05 -0.8831307002281701 10.687669040991018 -1.140377271846452 1 0 0 +318 1 3 -1.05 -0.6346900144781458 12.069295967999206 -3.240739844147214 1 0 0 +319 1 3 -1.05 0.5159499812446136 9.741740306242114 -3.0491689677433413 1 0 0 +642 1 2 2.1 5.086881183770259 9.046425911742144 -6.432550150515191 0 0 1 +762 1 2 2.1 -0.07312306646521094 9.046418567335255 -6.432546469896677 1 0 1 +801 1 1 1.575 4.353573033337209 10.636979042290022 -9.189385175536927 0 0 1 +803 1 2 2.1 5.166664061273867 12.001532910814223 -6.424587670201428 0 0 1 +804 1 3 -1.05 4.945520713988733 9.117292956379355 -8.050824916593145 0 0 1 +805 1 3 -1.05 5.3008973690342565 11.908723709427026 -8.049484733585496 0 0 1 +806 1 4 -0.95 2.6584959262536785 10.709971246531975 -8.18057805604267 0 0 1 +807 1 3 -1.05 1.3122728430088326 12.702785257177823 -5.950885474338808 0 0 1 +808 1 3 -1.05 5.052469129605173 10.527151794250187 -5.949135352014368 0 0 1 +809 1 3 -1.05 3.9018297035645837 12.854632641977553 -6.1406947910058385 0 0 1 +810 1 5 0.425 2.1500052491708654 9.8309136506718 -8.041687402015095 0 0 1 +811 1 1 1.575 6.961748976029714 15.11986798997648 -9.189384482358879 0 0 1 +812 1 2 2.1 2.535042687523287 13.529297970232026 -6.4325661347972565 0 0 1 +813 1 2 2.1 2.614849740198629 16.48442051022965 -6.424579413308912 0 0 1 +814 1 3 -1.05 2.3936882153036816 13.60021257463752 -8.050837163771524 0 0 1 +815 1 3 -1.05 2.7490658487965707 16.391631453766433 -8.049468717992344 0 0 1 +816 1 4 -0.95 5.266637005508615 15.192836965901162 -8.18060636473161 0 0 1 +817 1 3 -1.05 3.920436314988857 17.185683167433734 -5.950877119319536 0 0 1 +818 1 3 -1.05 2.5006316587760526 15.010130254796007 -5.949122772496109 0 0 1 +819 1 3 -1.05 1.3499937471668204 17.337547912493786 -6.140696589330803 0 0 1 +820 1 5 0.425 4.758148953436567 14.313745713621554 -8.04199471755077 0 0 1 +821 1 1 1.575 0.9754132074566062 16.260504016462573 9.189383008011989 0 0 0 +831 1 1 1.575 -1.632738974614906 11.777562083519168 9.18938401267412 0 0 0 +841 1 1 1.575 9.513580811205571 10.636976340496151 -9.189383726764085 0 0 1 +844 1 3 -1.05 10.105516190294331 9.117293135343541 -8.050815507300172 0 0 1 +846 1 4 -0.95 7.818489772082437 10.709962301298077 -8.180580332939817 0 0 1 +847 1 3 -1.05 6.472296617251764 12.702798330503999 -5.950885979768901 0 0 1 +849 1 3 -1.05 9.061839113308423 12.85462734482412 -6.140696399395345 0 0 1 +850 1 5 0.425 7.309994958903577 9.830911036100812 -8.041728700792529 0 0 1 +852 1 2 2.1 7.695026696212686 13.529300436217842 -6.432563050299859 0 0 1 +853 1 2 2.1 7.774830524031032 16.484429805628476 -6.4245820389886 0 0 1 +854 1 3 -1.05 7.553690820056456 13.600211019443176 -8.050833425414133 0 0 1 +855 1 3 -1.05 7.90906499883684 16.391631831273738 -8.049471300477997 0 0 1 +857 1 3 -1.05 9.080458785689217 17.18569631092181 -5.950879504982494 0 0 1 +858 1 3 -1.05 7.660630099640052 15.010108645803708 -5.949123539735778 0 0 1 +859 1 3 -1.05 6.510007127986675 17.337539652224454 -6.140695210856152 0 0 1 +861 1 1 1.575 6.135421058173524 16.260500644067545 9.18938364090062 0 0 0 +871 1 1 1.575 3.5272538816622223 11.777564383420493 9.189382493802137 0 0 0 +921 1 1 1.575 -0.806419090335142 10.6369764569408 -9.189383713930892 1 0 1 +923 1 2 2.1 0.006672498480664757 12.001542880171968 -6.424580735660621 1 0 1 +924 1 3 -1.05 -0.21448392537694794 9.11729318150578 -8.050815514872385 1 0 1 +925 1 3 -1.05 0.1408954660648405 11.908724218193651 -8.04948002701215 1 0 1 +928 1 3 -1.05 -0.10753447832841978 10.52714708281717 -5.9491334742097575 1 0 1 +929 1 3 -1.05 -1.2581612100588444 12.854627509935817 -6.140696384375066 1 0 1 +930 1 5 0.425 -3.0100053531165374 9.830910509518201 -8.041728647622413 1 0 1 +931 1 1 1.575 1.8017562269381084 15.11986483513633 -9.189383868712579 1 0 1 +936 1 4 -0.95 0.1066350030095542 15.192838843972428 -8.18059994641965 1 0 1 +937 1 3 -1.05 -1.2395403472651836 17.185696825023822 -5.950879632472325 1 0 1 +940 1 5 0.425 -0.4018339418639325 14.313743809074847 -8.041964435447152 1 0 1 +941 1 1 1.575 -4.184578902062027 16.260500609273645 9.18938369837054 1 0 0 +202 1 2 2.1 -12.25907142254093 -18.03284292800733 2.7573256699420927 1 1 0 +242 1 2 2.1 -7.099067653695995 -18.032834660131254 2.757321949202108 1 1 0 +282 1 2 2.1 -1.9390718808592098 -18.032841752433647 2.7573260151961314 1 1 0 +363 1 2 2.1 -12.17925475263019 -15.077717311058388 2.765285829341023 1 1 0 +365 1 3 -1.05 -12.045034136641693 -15.17058638521851 1.140380284474201 1 1 0 +368 1 3 -1.05 -12.293483708242846 -16.552230120486666 3.2407464397069514 1 1 0 +371 1 1 1.575 -10.384193742378155 -11.959425939444456 0.00048685249345048476 1 1 0 +376 1 4 -0.95 -12.079303598777397 -11.886440401085338 1.0092857120331082 1 1 0 +380 1 5 0.425 8.05223641778737 -12.765497942560684 1.1480754613364113 0 1 0 +401 1 1 1.575 -7.832364735753668 -16.442302159158654 0.0004887404677624829 1 1 0 +403 1 2 2.1 -7.0192621716443195 -15.077726395299486 2.7652785593097526 1 1 0 +404 1 3 -1.05 -7.240423003632657 -17.962013523315086 1.1390571695805392 1 1 0 +405 1 3 -1.05 -6.88503232555705 -15.170587037025818 1.1403751249754315 1 1 0 +406 1 4 -0.95 -9.52742392394671 -16.369313757792415 1.0092944834335125 1 1 0 +407 1 3 -1.05 -10.873642789988851 -14.376520147083449 3.238990044343934 1 1 0 +408 1 3 -1.05 -7.133480193318928 -16.55222721372571 3.240744460593115 1 1 0 +409 1 3 -1.05 -8.284118927456472 -14.224648350829375 3.0491654057163835 1 1 0 +410 1 5 0.425 -10.035955666435909 -17.24840570373585 1.1481612082133505 1 1 0 +411 1 1 1.575 -5.22420079757442 -11.959422800114778 0.00048645201034069885 1 1 0 +412 1 2 2.1 -9.650890626615093 -13.549995402180539 2.7573154149176524 1 1 0 +413 1 2 2.1 -9.571101081554449 -10.59487844511782 2.7652931515755252 1 1 0 +414 1 3 -1.05 -9.79225658896784 -13.479085120313027 1.1390395402492484 1 1 0 +415 1 3 -1.05 -9.43688362912373 -10.687658124577233 1.1404014440605117 1 1 0 +416 1 4 -0.95 -6.919301425568876 -11.886441838598635 1.0092790865815378 1 1 0 +417 1 3 -1.05 -8.26551829041191 -9.893604601648414 3.23898900788749 1 1 0 +418 1 3 -1.05 -9.68530634639146 -12.06916633578844 3.2407391018412675 1 1 0 +419 1 3 -1.05 -10.835946889124276 -9.741756916972987 3.0491804516627496 1 1 0 +420 1 5 0.425 -7.427780896515371 -12.765496313753136 1.148045263075046 1 1 0 +421 1 1 1.575 -7.816647968665705 -10.455164560774879 -0.0004897073322478462 1 1 0 +431 1 1 1.575 -10.424802848423498 -14.93805036694446 -0.0004859056005166451 1 1 0 +441 1 1 1.575 -2.672356985637104 -16.442304661398456 0.0004900967852954352 1 1 0 +443 1 2 2.1 -1.859254766057532 -15.077715497267564 2.7652856485819424 1 1 0 +444 1 3 -1.05 -2.080427517107452 -17.96201341457353 1.1390669364709627 1 1 0 +445 1 3 -1.05 -1.7250341319023796 -15.170586397099527 1.1403800605829684 1 1 0 +446 1 4 -0.95 -4.367430017015452 -16.369322564554952 1.009292140799971 1 1 0 +447 1 3 -1.05 -5.713619531094013 -14.376507547027593 3.2389899091549132 1 1 0 +448 1 3 -1.05 -1.9734837675643213 -16.552233362306946 3.2407464072217795 1 1 0 +449 1 3 -1.05 -3.124107903450609 -14.224654635100869 3.0491639457169573 1 1 0 +450 1 5 0.425 -4.875966104966851 -17.24840844446412 1.1481201137788588 1 1 0 +452 1 2 2.1 -4.490906768877101 -13.549992289876716 2.7573182139731394 1 1 0 +453 1 2 2.1 -4.411121706661005 -10.594869786965624 2.7652903571435505 1 1 0 +454 1 3 -1.05 -4.632254153063665 -13.479086778990123 1.1390428907981356 1 1 0 +455 1 3 -1.05 -4.276884510841384 -10.687657631952451 1.140398470795125 1 1 0 +456 1 4 -0.95 -1.759303475003291 -11.886440295102556 1.0092857335364354 1 1 0 +457 1 3 -1.05 -3.1054929638501587 -9.893589696855543 3.238986417750816 1 1 0 +458 1 3 -1.05 -4.5253080460816815 -12.069187701329469 3.240738460509835 1 1 0 +459 1 3 -1.05 -5.6759330828412295 -9.741765530480448 3.049181725968289 1 1 0 +461 1 1 1.575 -2.6566402814800893 -10.455168034309882 -0.0004892102139120169 1 1 0 +471 1 1 1.575 -5.264810010804999 -14.938048096264206 -0.000487296179734642 1 1 0 +904 1 3 -1.05 -10.161854685695545 -18.083120592662027 8.050830551575691 1 1 0 +944 1 3 -1.05 -5.0018574844249635 -18.08311895062402 8.050827904580979 1 1 0 +1026 1 4 -0.95 -12.978516922048088 -10.709978874953269 8.180583642606527 1 1 0 +1027 1 3 -1.05 -11.63229786651004 -12.702775234356054 5.950880154637874 1 1 0 +1032 1 2 2.1 -12.855048702475655 -13.529314203926136 6.432556100815381 1 1 0 +1033 1 2 2.1 -12.934828913170918 -16.484422806588526 6.424584210403742 1 1 0 +1034 1 3 -1.05 -12.713689435153274 -13.600211848270453 8.050834252924775 1 1 0 +1035 1 3 -1.05 -13.069060384643878 -16.39164043985936 8.04947631781359 1 1 0 +1038 1 3 -1.05 -12.820637718787896 -15.010116398098276 5.949134768177112 1 1 0 +1039 1 3 -1.05 -11.66999655483321 -17.337541956749448 6.140690977240167 1 1 0 +1062 1 2 2.1 -10.303209469423026 -18.012230849774276 6.432564214130078 1 1 0 +1063 1 2 2.1 -10.326696781513265 -12.001574504575316 6.424588327366143 1 1 0 +1065 1 3 -1.05 -10.46091070201314 -11.908711333552967 8.04949252107611 1 1 0 +1066 1 4 -0.95 -7.818513991591397 -10.709979211148491 8.180577084620788 1 1 0 +1067 1 3 -1.05 -6.472326004187935 -12.702791670718351 5.9508827006348675 1 1 0 +1068 1 3 -1.05 -10.212461185927816 -10.527079940778258 5.9491249291990975 1 1 0 +1069 1 3 -1.05 -9.061822886101059 -12.854652084676546 6.140708572507496 1 1 0 +1070 1 5 0.425 -7.309988942363981 -9.83088467575225 8.041736795520984 1 1 0 +1072 1 2 2.1 -7.695041897866641 -13.529307071510981 6.432551577978835 1 1 0 +1073 1 2 2.1 -7.774834793344618 -16.484434939947647 6.424577603346634 1 1 0 +1074 1 3 -1.05 -7.553685084393516 -13.60021181043779 8.050823752129004 1 1 0 +1075 1 3 -1.05 -7.90905859095466 -16.39164117482209 8.049471719485046 1 1 0 +1076 1 4 -0.95 -10.426647145797157 -15.192866902601361 8.180586106999344 1 1 0 +1077 1 3 -1.05 -9.080429028015885 -17.18569724875303 5.950884797992369 1 1 0 +1078 1 3 -1.05 -7.660633858427131 -15.010107503057135 5.949133029699604 1 1 0 +1079 1 3 -1.05 -6.510008950950501 -17.337534855285654 6.140692414529196 1 1 0 +1080 1 5 0.425 -9.918176441334206 -14.313808476712383 8.041767318325997 1 1 0 +1102 1 2 2.1 -5.143192688598495 -18.01223469020368 6.43256163865242 1 1 0 +1103 1 2 2.1 -5.166674659914734 -12.001581247949602 6.42459150745173 1 1 0 +1105 1 3 -1.05 -5.30091011073581 -11.90871163472071 8.049495593212923 1 1 0 +1106 1 4 -0.95 -2.6585163778844034 -10.709977949816722 8.18058344571133 1 1 0 +1108 1 3 -1.05 -5.052460029806349 -10.527060001226648 5.949125945371383 1 1 0 +1109 1 3 -1.05 -3.9018353937420382 -12.854644320517036 6.140707119099776 1 1 0 +1110 1 5 0.425 -2.149972520489074 -9.830886800682975 8.041764004644078 1 1 0 +1112 1 2 2.1 -2.535048249868213 -13.529314790919454 6.4325557752826 1 1 0 +1113 1 2 2.1 -2.614828592705191 -16.484424453660697 6.424584582139241 1 1 0 +1114 1 3 -1.05 -2.3936893961900143 -13.60021174374376 8.050833520746858 1 1 0 +1115 1 3 -1.05 -2.7490603036216505 -16.391640281660283 8.049476486761636 1 1 0 +1116 1 4 -0.95 -5.26664099130191 -15.192857666732323 8.180588944438727 1 1 0 +1117 1 3 -1.05 -3.92045104663105 -17.18570897645179 5.950884811850528 1 1 0 +1118 1 3 -1.05 -2.5006374505687665 -15.010113340479645 5.949134814048831 1 1 0 +1120 1 5 0.425 -4.758164912029445 -14.313806137693177 8.041810512450352 1 1 0 +162 1 2 2.1 3.2209340369483392 -18.032833757481832 2.7573218012303666 0 1 0 +321 1 1 1.575 2.4876351465059408 -16.442302154784315 0.0004888543282763891 0 1 0 +323 1 2 2.1 3.300736726127152 -15.077726538699793 2.7652785057231757 0 1 0 +324 1 3 -1.05 3.079577082904713 -17.962013555013442 1.1390567851692257 0 1 0 +325 1 3 -1.05 3.4349676885715965 -15.17058703603631 1.1403750102570616 0 1 0 +326 1 4 -0.95 0.7925763172482458 -16.369313154371326 1.0092944072974692 0 1 0 +327 1 3 -1.05 -0.5536431863284719 -14.376520315382402 3.2389901201446865 0 1 0 +328 1 3 -1.05 3.186519855031559 -16.552227249856152 3.240744515086007 0 1 0 +329 1 3 -1.05 2.0358818673234893 -14.224648915409531 3.0491654939012367 0 1 0 +330 1 5 0.425 0.2840440343041557 -17.248406206415737 1.1481612674588284 0 1 0 +331 1 1 1.575 5.095799077392401 -11.95942278124406 0.0004864450394208575 0 1 0 +332 1 2 2.1 0.669109464863638 -13.549995278717017 2.757315210932033 0 1 0 +333 1 2 2.1 0.7488989807994404 -10.594878651163997 2.765293220622354 0 1 0 +334 1 3 -1.05 0.5277433362522466 -13.479085130961657 1.1390393870012794 0 1 0 +335 1 3 -1.05 0.8831162860029735 -10.68765806499949 1.1404014490301666 0 1 0 +336 1 4 -0.95 3.4006984714464785 -11.886441915318503 1.009279146529078 0 1 0 +337 1 3 -1.05 2.05448186886235 -9.89360444283963 3.2389889279451722 0 1 0 +338 1 3 -1.05 0.6346935230510518 -12.069165638686224 3.240739110626995 0 1 0 +339 1 3 -1.05 -0.5159464707475578 -9.741757212644893 3.0491802687542187 0 1 0 +340 1 5 0.425 2.892219107482818 -12.76549622369036 1.1480452697934158 0 1 0 +341 1 1 1.575 2.5033520639114464 -10.45516467544924 -0.0004897125671803337 0 1 0 +351 1 1 1.575 -0.10480281833843286 -14.938050279884052 -0.00048588915579017566 0 1 0 +361 1 1 1.575 7.6476430809013145 -16.442304686736612 0.00049038023432324 0 1 0 +364 1 3 -1.05 8.239572543766378 -17.962013382666825 1.1390663747804606 0 1 0 +366 1 4 -0.95 5.952569848038642 -16.369322613306085 1.009292148021041 0 1 0 +367 1 3 -1.05 4.60638206298816 -14.376506603214507 3.2389897937288765 0 1 0 +369 1 3 -1.05 7.195891896594336 -14.224654560331162 3.049163809309336 0 1 0 +370 1 5 0.425 5.444033924435217 -17.248408369389658 1.14812008570199 0 1 0 +372 1 2 2.1 5.829092303814921 -13.54999199012334 2.757318610834842 0 1 0 +373 1 2 2.1 5.90887749264844 -10.594869830017455 2.7652903021076654 0 1 0 +374 1 3 -1.05 5.6877459594373825 -13.479086764973678 1.1390432596793474 0 1 0 +375 1 3 -1.05 6.043115538450444 -10.687657699182285 1.1403981434855215 0 1 0 +377 1 3 -1.05 7.214508812427347 -9.893588586527766 3.2389864017305623 0 1 0 +378 1 3 -1.05 5.794691970346413 -12.069189428924563 3.240738236405365 0 1 0 +379 1 3 -1.05 4.644066780347256 -9.741765351175818 3.0491815811632907 0 1 0 +381 1 1 1.575 7.663359745050162 -10.455168066687937 -0.0004892393395401484 0 1 0 +391 1 1 1.575 5.055189932412219 -14.938048027032371 -0.0004872860180871186 0 1 0 +451 1 1 1.575 -0.06419364919500303 -11.959425986989281 0.0004869662395279306 1 1 0 +460 1 5 0.425 -2.2677636282949134 -12.76549803894805 1.148075362714911 1 1 0 +824 1 3 -1.05 0.1581453413464491 -18.083120565372205 8.050830868008626 0 1 0 +864 1 3 -1.05 5.318142529558035 -18.083119031532103 8.050827887575677 0 1 0 +982 1 2 2.1 0.01679048455388532 -18.01223102881715 6.432564492236768 0 1 0 +983 1 2 2.1 -0.0066952224139544825 -12.001572696194678 6.424588219561466 0 1 0 +985 1 3 -1.05 -0.14091080181982996 -11.90871125431586 8.049492287832761 0 1 0 +986 1 4 -0.95 2.501485979919419 -10.709979047674722 8.180577187844913 0 1 0 +987 1 3 -1.05 3.8476721766409234 -12.702792935609825 5.9508827991561954 0 1 0 +988 1 3 -1.05 0.10753867741342837 -10.527082522856396 5.949125116992775 0 1 0 +989 1 3 -1.05 1.2581766829054715 -12.854651901566331 6.140708678514933 0 1 0 +990 1 5 0.425 3.010011029658287 -9.83088482771142 8.041736728168713 0 1 0 +992 1 2 2.1 2.624959204051148 -13.529306355082204 6.43255142132813 0 1 0 +993 1 2 2.1 2.545164633028703 -16.48443503403599 6.42457746374043 0 1 0 +994 1 3 -1.05 2.7663148605711587 -13.600211881262618 8.050823710986089 0 1 0 +995 1 3 -1.05 2.4109413776533053 -16.391641111187187 8.049471733472652 0 1 0 +996 1 4 -0.95 -0.1066473719781591 -15.192867242576972 8.180586172547835 0 1 0 +997 1 3 -1.05 1.2395719193215697 -17.185696569519184 5.950884733429518 0 1 0 +998 1 3 -1.05 2.659366057346901 -15.010107680149012 5.949132937132072 0 1 0 +999 1 3 -1.05 3.809991544530318 -17.337535214117086 6.140692372135046 0 1 0 +1000 1 5 0.425 0.40182382453956933 -14.313808115235338 8.041767358684657 0 1 0 +1022 1 2 2.1 5.176806602000145 -18.01223478887661 6.432561551916441 0 1 0 +1023 1 2 2.1 5.153325970226241 -12.001581171312154 6.424591592957077 0 1 0 +1025 1 3 -1.05 5.019090010771908 -11.908711671328648 8.04949592036308 0 1 0 +1028 1 3 -1.05 5.267540194467005 -10.527058835775907 5.9491260060330475 0 1 0 +1029 1 3 -1.05 6.418164130946273 -12.854644032937445 6.140707132764247 0 1 0 +1030 1 5 0.425 -12.469972004930579 -9.830886011965626 8.04176389538827 1 1 0 +1036 1 4 -0.95 5.053359381870932 -15.192857077416427 8.18058886353535 0 1 0 +1037 1 3 -1.05 6.399549290938857 -17.18570880002915 5.95088487910323 0 1 0 +1040 1 5 0.425 5.561834684837105 -14.313806813221188 8.041810608039574 0 1 0 +1107 1 3 -1.05 -1.3122999739981296 -12.70277646304137 5.950880254538765 1 1 0 +1119 1 3 -1.05 -1.3499973134743044 -17.337541538579533 6.140690843939177 1 1 0 +362 1 2 2.1 -12.202744771981509 -9.067064814200972 2.7573119706868425 1 1 0 +402 1 2 2.1 -7.04273866849521 -9.067055088426352 2.757307939589456 1 1 0 +442 1 2 2.1 -1.8827442162924566 -9.067062967529901 2.7573120570116707 1 1 0 +523 1 2 2.1 -12.122921769822408 -6.111929983949841 2.765291027036895 1 1 0 +525 1 3 -1.05 -11.988703560905819 -6.204760843795567 1.1403957796501452 1 1 0 +528 1 3 -1.05 -12.237149424227058 -7.5863238195216365 3.2407527420150988 1 1 0 +531 1 1 1.575 -10.327858771543793 -2.9935731989533902 0.0004888265444922268 1 1 0 +536 1 4 -0.95 -12.022931150481051 -2.920580487258352 1.0093004267478776 1 1 0 +540 1 5 0.425 8.108558594315166 -3.7996512054948504 1.148212040044502 0 1 0 +561 1 1 1.575 -7.776025262711729 -7.4765049298112505 0.0004881991149989773 1 1 0 +563 1 2 2.1 -6.962930286971465 -6.111939824800135 2.765283881410019 1 1 0 +564 1 3 -1.05 -7.184085023643926 -8.99618073281571 1.1390399029648446 1 1 0 +565 1 3 -1.05 -6.828701808117627 -6.204761586454861 1.1403901964143177 1 1 0 +566 1 4 -0.95 -9.471118879929094 -7.403529138856177 1.0092745530484954 1 1 0 +567 1 3 -1.05 -10.817325824488437 -5.4107065420677785 3.2389947847718457 1 1 0 +568 1 3 -1.05 -7.07714607080063 -7.586320418897543 3.240750734425326 1 1 0 +569 1 3 -1.05 -8.227783475860987 -5.258830022264039 3.0491700040117298 1 1 0 +570 1 5 0.425 -9.979621696041477 -8.282638958710844 1.14794634211753 1 1 0 +571 1 1 1.575 -5.167865926094019 -2.9935700449751117 0.0004884982994557419 1 1 0 +572 1 2 2.1 -9.594553208812169 -4.584114830084795 2.7573247242772645 1 1 0 +573 1 2 2.1 -9.514752964904952 -1.6290031107655665 2.76528557294675 1 1 0 +574 1 3 -1.05 -9.735924934424778 -4.5132754309991014 1.1390582524844408 1 1 0 +575 1 3 -1.05 -9.380536654292207 -1.7218462684346072 1.140382320970689 1 1 0 +576 1 4 -0.95 -6.862929549230458 -2.92058282567767 1.0092939379568069 1 1 0 +577 1 3 -1.05 -8.209153404187742 -0.9277818004229132 3.238988388541747 1 1 0 +578 1 3 -1.05 -9.62897585444286 -3.103458654146092 3.240740428512092 1 1 0 +579 1 3 -1.05 -10.779616213891364 -0.7759196991747856 3.049168880743542 1 1 0 +580 1 5 0.425 -7.371458392180491 -3.799648965132267 1.1481816702921979 1 1 0 +581 1 1 1.575 -7.760307453210186 -1.4893294292081407 -0.000487591834845702 1 1 0 +591 1 1 1.575 -10.368475011993443 -5.972216450792448 -0.0004866657689603926 1 1 0 +601 1 1 1.575 -2.6160175804425334 -7.476507331401965 0.000489554412521187 1 1 0 +603 1 2 2.1 -1.8029234809179044 -6.111929469506379 2.765290938817035 1 1 0 +604 1 3 -1.05 -2.0240895156060503 -8.996180475658797 1.1390497095690097 1 1 0 +605 1 3 -1.05 -1.6687035111919997 -6.204760947315812 1.1403951214536825 1 1 0 +606 1 4 -0.95 -4.311125240437589 -7.40353853925054 1.0092720638271473 1 1 0 +607 1 3 -1.05 -5.657299784603431 -5.4106924097233176 3.2389943904105305 1 1 0 +608 1 3 -1.05 -1.9171495422821216 -7.586325941483118 3.240752466809111 1 1 0 +609 1 3 -1.05 -3.0677722086720367 -5.258836440196976 3.049168256994184 1 1 0 +610 1 5 0.425 -4.819631740010255 -8.28264108406943 1.147905363302021 1 1 0 +612 1 2 2.1 -4.434571167364412 -4.584112138551882 2.757328005439472 1 1 0 +613 1 2 2.1 -4.35477408472807 -1.6289950360376295 2.765282611037348 1 1 0 +614 1 3 -1.05 -4.575922294211849 -4.513277152884633 1.1390617721334308 1 1 0 +615 1 3 -1.05 -4.220537459731567 -1.7218458350906225 1.1403790499316706 1 1 0 +616 1 4 -0.95 -1.7029315494609438 -2.920581286855313 1.009300527997441 1 1 0 +617 1 3 -1.05 -3.049127074634125 -0.9277662650700513 3.238986045411176 1 1 0 +618 1 3 -1.05 -4.468977447340732 -3.1034809491568094 3.2407397732533028 1 1 0 +619 1 3 -1.05 -5.619602858246474 -0.7759278254115536 3.049170295315518 1 1 0 +621 1 1 1.575 -2.600299716589224 -1.4893328740682321 -0.0004870341551566071 1 1 0 +631 1 1 1.575 -5.208482201015078 -5.972214134547372 -0.00048794280472819196 1 1 0 +1064 1 3 -1.05 -10.105516488566403 -9.1172864545765 8.050812017095526 1 1 0 +1104 1 3 -1.05 -4.94551950969972 -9.117284875156573 8.050809425873682 1 1 0 +1186 1 4 -0.95 -12.922163181072401 -1.7441480862858398 8.180580989048229 1 1 0 +1187 1 3 -1.05 -11.57594038211662 -3.736962709780661 5.950884644536357 1 1 0 +1192 1 2 2.1 -12.798703976852076 -4.563477223935855 6.4325671062022085 1 1 0 +1193 1 2 2.1 -12.87850157426794 -7.518594866582022 6.4245838131711785 1 1 0 +1194 1 3 -1.05 -12.657357075069436 -4.634391310577776 8.050841207753615 1 1 0 +1195 1 3 -1.05 -13.012730287598115 -7.425811152679454 8.049471003457095 1 1 0 +1198 1 3 -1.05 -12.764298791768608 -6.044310212087346 5.949124918515372 1 1 0 +1199 1 3 -1.05 -11.613660044905913 -8.371724217557164 6.1406940776695595 1 1 0 +1222 1 2 2.1 -10.246880790109062 -9.046458105402944 6.432550667813766 1 1 0 +1223 1 2 2.1 -10.270345504929077 -3.0357170181300006 6.4245835383340175 1 1 0 +1225 1 3 -1.05 -10.404563440926587 -2.942901247124354 8.049482833097352 1 1 0 +1226 1 4 -0.95 -7.762161044690064 -1.744149770475424 8.180574812461733 1 1 0 +1227 1 3 -1.05 -6.415966116593134 -3.73697764701976 5.9508871807473405 1 1 0 +1228 1 3 -1.05 -10.156132949827974 -1.561327845464941 5.94913393800597 1 1 0 +1229 1 3 -1.05 -9.005492655293018 -3.8888143704750213 6.140697709391448 1 1 0 +1230 1 5 0.425 -7.2536721407950715 -0.8650918391092688 8.041681601019869 1 1 0 +1232 1 2 2.1 -7.638697839125399 -4.56346976541589 6.432562555622445 1 1 0 +1233 1 2 2.1 -7.718506635912108 -7.518604577415662 6.424576758095252 1 1 0 +1234 1 3 -1.05 -7.497352805807434 -4.634391240549473 8.05083153086409 1 1 0 +1235 1 3 -1.05 -7.852728552110221 -7.425811958730723 8.04946655754084 1 1 0 +1236 1 4 -0.95 -10.370302228905821 -6.227020102282678 8.180601687842392 1 1 0 +1237 1 3 -1.05 -9.024099865185532 -8.219863735211376 5.950878868492627 1 1 0 +1238 1 3 -1.05 -7.604295208524743 -6.044303920000971 5.949123349425319 1 1 0 +1239 1 3 -1.05 -6.453672021320111 -8.371717534647717 6.140695622062312 1 1 0 +1240 1 5 0.425 -9.861830566687258 -5.347927183164366 8.041954081983627 1 1 0 +1262 1 2 2.1 -5.086864118735355 -9.046460824361695 6.432547962331752 1 1 0 +1263 1 2 2.1 -5.110325060390764 -3.0357264779902415 6.4245866001619785 1 1 0 +1265 1 3 -1.05 -5.2445627758652025 -2.942901539041836 8.049486192414905 1 1 0 +1266 1 4 -0.95 -2.6021638449486817 -1.7441493140479167 8.180581250684453 1 1 0 +1268 1 3 -1.05 -4.996131563332382 -1.561304142262447 5.949134847146771 1 1 0 +1269 1 3 -1.05 -3.845505208560062 -3.888806435255866 6.140696375915624 1 1 0 +1270 1 5 0.425 -2.0936552768300416 -0.8650931226092098 8.041708750676337 1 1 0 +1272 1 2 2.1 -2.478705083415977 -4.563478530750267 6.432566771448263 1 1 0 +1273 1 2 2.1 -2.5585004268402667 -7.518594042662386 6.424583683031381 1 1 0 +1274 1 3 -1.05 -2.337357058590765 -4.6343911683899695 8.050841730975934 1 1 0 +1275 1 3 -1.05 -2.692730278146584 -7.4258112933116 8.049471495897967 1 1 0 +1276 1 4 -0.95 -5.210295954483267 -6.227010516473889 8.180604547038248 1 1 0 +1277 1 3 -1.05 -3.86412183624161 -8.219875620457026 5.950879044037668 1 1 0 +1278 1 3 -1.05 -2.4442987304969392 -6.044309813785304 5.949125141897687 1 1 0 +1280 1 5 0.425 -4.701819271798077 -5.3479252345724095 8.041997228930375 1 1 0 +322 1 2 2.1 3.2772610098674093 -9.067055693067015 2.7573080000114008 0 1 0 +481 1 1 1.575 2.5439746362228775 -7.476504835357023 0.000488107312015984 0 1 0 +483 1 2 2.1 3.3570675650653765 -6.1119409913516805 2.765283658968608 0 1 0 +484 1 3 -1.05 3.1359149983488273 -8.996180653887704 1.1390399010220378 0 1 0 +485 1 3 -1.05 3.4912982864764803 -6.204761586269585 1.1403902809350122 0 1 0 +486 1 4 -0.95 0.848880935670131 -7.403529456906714 1.0092745179848404 0 1 0 +487 1 3 -1.05 -0.4973223654953767 -5.4107043825084595 3.2389946445963442 0 1 0 +488 1 3 -1.05 3.2428541215929787 -7.586319587782093 3.2407506030975757 0 1 0 +489 1 3 -1.05 2.0922173637001134 -5.258830509141244 3.049170069120102 0 1 0 +490 1 5 0.425 0.34037849430467304 -8.282638585494373 1.1479463507303809 0 1 0 +491 1 1 1.575 5.152133947531471 -2.9935699784041603 0.0004881234041693716 0 1 0 +492 1 2 2.1 0.7254445666488039 -4.58411548963857 2.7573250239711573 0 1 0 +493 1 2 2.1 0.8052450939405311 -1.6290034167113632 2.7652851130343965 0 1 0 +494 1 3 -1.05 0.5840751961211161 -4.513275537476634 1.139058491185871 0 1 0 +495 1 3 -1.05 0.9394635072903537 -1.7218461634728612 1.1403819751435265 0 1 0 +496 1 4 -0.95 3.457070537219302 -2.9205829409048096 1.0092940260285932 0 1 0 +497 1 3 -1.05 2.1108492600941755 -0.9277801182782781 3.2389884882436597 0 1 0 +498 1 3 -1.05 0.6910242115697844 -3.103459227797069 3.240740522829608 0 1 0 +499 1 3 -1.05 -0.45961486738614177 -0.7759203181741157 3.049168941857401 0 1 0 +500 1 5 0.425 2.9485416775480626 -3.7996487609851872 1.148181457575495 0 1 0 +501 1 1 1.575 2.5596923414053414 -1.4893294760794085 -0.00048791691717831043 0 1 0 +511 1 1 1.575 -0.048475029845510775 -5.9722163822729275 -0.000486610148280775 0 1 0 +521 1 1 1.575 7.703982404569835 -7.47650743047992 0.0004894906663004406 0 1 0 +524 1 3 -1.05 8.295910521844277 -8.996180577499706 1.1390498499011983 0 1 0 +526 1 4 -0.95 6.008874247849864 -7.403539256615227 1.009272347205897 0 1 0 +527 1 3 -1.05 4.662703122988951 -5.410690550977408 3.238994424282833 0 1 0 +529 1 3 -1.05 7.25222715620599 -5.258835994797554 3.0491686153219923 0 1 0 +530 1 5 0.425 5.500368804549042 -8.28264025862713 1.1479051971829524 0 1 0 +532 1 2 2.1 5.885427782213334 -4.584112735919096 2.7573281465655857 0 1 0 +533 1 2 2.1 5.965226467012172 -1.628993361989938 2.7652825479136283 0 1 0 +534 1 3 -1.05 5.74407777792679 -4.5132770146071675 1.1390625883841388 0 1 0 +535 1 3 -1.05 6.0994626404410575 -1.7218459113188977 1.1403796100854198 0 1 0 +537 1 3 -1.05 7.270871098481489 -0.9277673866200544 3.2389858821101694 0 1 0 +538 1 3 -1.05 5.8510226007914845 -3.103482215879012 3.240739632064548 0 1 0 +539 1 3 -1.05 4.7003977117094315 -0.7759282649032926 3.0491702761655297 0 1 0 +541 1 1 1.575 7.71970016267743 -1.4893327968293981 -0.0004872882145861013 0 1 0 +551 1 1 1.575 5.111517638351028 -5.972214169324591 -0.00048815723114969956 0 1 0 +611 1 1 1.575 -0.0078587283292606 -2.9935732581044245 0.0004888922267749507 1 1 0 +620 1 5 0.425 -2.211440966107384 -3.7996504259139385 1.1482119261243664 1 1 0 +984 1 3 -1.05 0.21448346242866556 -9.117286626928372 8.050812484857788 0 1 0 +1024 1 3 -1.05 5.374480667058524 -9.1172847465118 8.050809703370929 0 1 0 +1142 1 2 2.1 0.07311912259206466 -9.046457539168534 6.432550791979782 0 1 0 +1143 1 2 2.1 0.049653605296850145 -3.0357192834675537 6.424583355385289 0 1 0 +1145 1 3 -1.05 -0.0845634540740896 -2.9429011311913413 8.049482748208861 0 1 0 +1146 1 4 -0.95 2.5578388527215647 -1.7441499838946513 8.180574701561904 0 1 0 +1147 1 3 -1.05 3.904034789903749 -3.7369772016518894 5.950886974857758 0 1 0 +1148 1 3 -1.05 0.1638671308906794 -1.5613255896995106 5.949134016742535 0 1 0 +1149 1 3 -1.05 1.3145065630773924 -3.8888138098807907 6.140697691304018 0 1 0 +1150 1 5 0.425 3.0663280045002406 -0.8650917177898982 8.041681658582819 0 1 0 +1152 1 2 2.1 2.681301696828717 -4.563470951334201 6.432562677791433 0 1 0 +1153 1 2 2.1 2.6014924415491016 -7.518605844210883 6.42457670393048 0 1 0 +1154 1 3 -1.05 2.8226473559855396 -4.634391279296553 8.050831493737332 0 1 0 +1155 1 3 -1.05 2.467271583937638 -7.425811889492945 8.049466671649265 0 1 0 +1156 1 4 -0.95 -0.050302377863054915 -6.227020357623573 8.18060186788391 0 1 0 +1157 1 3 -1.05 1.2959016487701724 -8.219862803300558 5.950878899595457 0 1 0 +1158 1 3 -1.05 2.715705046173925 -6.0443024015894125 5.949123183070215 0 1 0 +1159 1 3 -1.05 3.866328082096553 -8.371717396852594 6.14069563666442 0 1 0 +1160 1 5 0.425 0.45816960045136135 -5.347926987828261 8.041953993827088 0 1 0 +1182 1 2 2.1 5.233134204042219 -9.046462615939763 6.432548060968001 0 1 0 +1183 1 2 2.1 5.209674691687077 -3.035725777150798 6.42458687581861 0 1 0 +1185 1 3 -1.05 5.075437238345243 -2.9429016177355063 8.0494859105205 0 1 0 +1188 1 3 -1.05 5.323868516938102 -1.5613057900237983 5.949134653550393 0 1 0 +1189 1 3 -1.05 6.474494628261173 -3.8888064118199335 6.140696088678768 0 1 0 +1190 1 5 0.425 -12.413655946990202 -0.8650943030890836 8.041708999745945 1 1 0 +1196 1 4 -0.95 5.109703718829179 -6.227011235494134 8.180604680522054 0 1 0 +1197 1 3 -1.05 6.455880799282653 -8.219873725450045 5.950878826176888 0 1 0 +1200 1 5 0.425 5.618181133036563 -5.347924456811681 8.0419973088112 0 1 0 +1267 1 3 -1.05 -1.255938118796582 -3.736961290112877 5.95088464227411 1 1 0 +1279 1 3 -1.05 -1.293660683856988 -8.371724040207935 6.140694136436416 1 1 0 +43 1 2 2.1 -12.066603589864952 2.8538562635186473 2.765296766562731 1 0 0 +45 1 3 -1.05 -11.932382242280934 2.7610755310900856 1.1404071534983906 1 0 0 +48 1 3 -1.05 -12.180808606969753 1.3795772494375846 3.2407446680776673 1 0 0 +51 1 1 1.575 -10.271517178678891 5.972243043476983 0.0004897808600361486 1 0 0 +56 1 4 -0.95 -11.966601677508535 6.045223061069063 1.0092868522084313 1 0 0 +60 1 5 0.425 8.164880641989306 5.166117461622548 1.1480432930863778 0 0 0 +81 1 1 1.575 -7.719695704081541 1.489301984598221 0.00048620424004042206 1 0 0 +83 1 2 2.1 -6.906611830118906 2.853845289659727 2.765289513921303 1 0 0 +84 1 3 -1.05 -7.1277496439282455 -0.03034856107450068 1.1390300687638835 1 0 0 +85 1 3 -1.05 -6.772380470440207 2.7610747346770914 1.1404021270114981 1 0 0 +86 1 4 -0.95 -9.414807068563295 1.5622817433164506 1.0092737068243878 1 0 0 +87 1 3 -1.05 -10.761016708915546 3.5551257680232844 3.2389908036271358 1 0 0 +88 1 3 -1.05 -7.020805055588193 1.3795831051103633 3.2407426091838403 1 0 0 +89 1 3 -1.05 -8.171444330233143 3.706977523559388 3.0491815779021074 1 0 0 +90 1 5 0.425 -9.92326936174715 0.6832053400645641 1.1479687029894574 1 0 0 +91 1 1 1.575 -5.111524290678346 5.972246284049945 0.0004894123271252937 1 0 0 +92 1 2 2.1 -9.53822873762858 4.381702400713934 2.7573155444923465 1 0 0 +93 1 2 2.1 -9.458410238013869 7.33682068730624 2.765285098253017 1 0 0 +94 1 3 -1.05 -9.679588290829658 4.45254795692637 1.139053534311218 1 0 0 +95 1 3 -1.05 -9.324195367300185 7.243969020237035 1.140384524236497 1 0 0 +96 1 4 -0.95 -6.806599492258481 6.045221693669681 1.009280187757005 1 0 0 +97 1 3 -1.05 -8.152808576570859 8.038030966821896 3.2389942981918 1 0 0 +98 1 3 -1.05 -9.572645830808028 5.862365851389207 3.2407504502190747 1 0 0 +99 1 3 -1.05 -10.723283583484175 8.18991092687909 3.0491638601374564 1 0 0 +100 1 5 0.425 -7.315136856531225 5.166118893998583 1.1480131499026296 1 0 0 +101 1 1 1.575 -7.703976890906669 7.476521396452124 -0.0004869418693242977 1 0 0 +111 1 1 1.575 -10.312141649453219 2.9935784879242497 -0.0004884588124696165 1 0 0 +121 1 1 1.575 -2.5596879766050353 1.4892994022197463 0.0004876271542961774 1 0 0 +123 1 2 2.1 -1.7466035808200573 2.8538558037971953 2.7652965023822897 1 0 0 +124 1 3 -1.05 -1.9677540888021028 -0.03034837344207375 1.1390396821506403 1 0 0 +126 1 4 -0.95 -4.25481364789182 1.5622718466590193 1.009271365196117 1 0 0 +127 1 3 -1.05 -5.6009918988674805 3.55513912460707 3.2389905823258385 1 0 0 +128 1 3 -1.05 -1.860808583710753 1.3795779024677586 3.240744602344904 1 0 0 +129 1 3 -1.05 -3.0114343038263804 3.7069717528642663 3.04917999216428 1 0 0 +130 1 5 0.425 -4.763279165866927 0.6832035674608612 1.1479273728367207 1 0 0 +132 1 2 2.1 -4.378245338999152 4.38170538944145 2.7573185647135965 1 0 0 +133 1 2 2.1 -4.298430131568423 7.336829694841267 2.765282372169972 1 0 0 +134 1 3 -1.05 -4.519585788114593 4.452546370179327 1.1390572508955117 1 0 0 +135 1 3 -1.05 -4.164196247857843 7.243969567402647 1.1403816035253787 1 0 0 +137 1 3 -1.05 -2.992784551459188 8.03804507272551 3.238991727054886 1 0 0 +138 1 3 -1.05 -4.412647569640212 5.86234321615736 3.240749786939869 1 0 0 +139 1 3 -1.05 -5.563270268907923 8.189902706625428 3.049165190799913 1 0 0 +141 1 1 1.575 -2.543969087429206 7.476517985115045 -0.0004862871680106906 1 0 0 +151 1 1 1.575 -5.1521487769677075 2.9935806762264434 -0.0004901006836668387 1 0 0 +522 1 2 2.1 -12.14640321562067 -0.10127600189234087 2.7573120460055964 1 1 0 +562 1 2 2.1 -6.986398207724864 -0.1012687687102094 2.757308016998312 1 1 0 +602 1 2 2.1 -1.826404614623005 -0.10127704899093004 2.757311825180798 1 1 0 +706 1 4 -0.95 -12.865796022624288 7.221714492772325 8.180601207822123 1 0 0 +707 1 3 -1.05 -11.519585625908963 5.228867443825511 5.950880624420204 1 0 0 +712 1 2 2.1 -12.74236927286661 4.402289337375663 6.432558526054466 1 0 0 +713 1 2 2.1 -12.822179576740067 1.447177436755961 6.424591364431221 1 0 0 +714 1 3 -1.05 -12.601019912922663 4.331444765726772 8.050823657348074 1 0 0 +715 1 3 -1.05 -12.9564089613341 1.5400239285377246 8.049490036152141 1 0 0 +718 1 3 -1.05 -12.707959347486435 2.9216270213267173 5.949122287867622 1 0 0 +719 1 3 -1.05 -11.557321491073434 0.5940835324113181 6.140706214927789 1 0 0 +742 1 2 2.1 -10.190553470512224 -0.08060471526624369 6.432549047518183 1 0 0 +743 1 2 2.1 -10.214006128678415 5.930145239857023 6.424577571546896 1 0 0 +744 1 3 -1.05 -9.992850905501367 8.814341090685996 8.050837610229921 1 0 0 +745 1 3 -1.05 -10.348222732989003 6.022917560970665 8.049466320772085 1 0 0 +746 1 4 -0.95 -7.705793610382077 7.221713363185231 8.180594909971527 1 0 0 +747 1 3 -1.05 -6.35961233585525 5.228852016801575 5.9508831678014555 1 0 0 +748 1 3 -1.05 -10.099798574650077 7.404404225630994 5.9491289200407405 1 0 0 +749 1 3 -1.05 -8.949159917306673 5.077015823046679 6.140691533239114 1 0 0 +750 1 5 0.425 -7.197339347246486 8.100783891909586 8.041891733507144 1 0 0 +752 1 2 2.1 -7.582363175185557 4.40229803595107 6.4325545097206405 1 0 0 +753 1 2 2.1 -7.66218773373044 1.447166595599981 6.424584171396804 1 0 0 +754 1 3 -1.05 -7.441015439936198 4.33144478045222 8.050813764214633 1 0 0 +755 1 3 -1.05 -7.796407081280542 1.5400232594796925 8.04948484594699 1 0 0 +756 1 4 -0.95 -10.314003547549689 2.7387701229844446 8.180589327508356 1 0 0 +757 1 3 -1.05 -8.96779392146795 0.7459625241507837 5.950878464150257 1 0 0 +758 1 3 -1.05 -7.547955743292837 2.9216319108547033 5.949120478977855 1 0 0 +759 1 3 -1.05 -6.3973326315710946 0.5940898261857264 6.140707747743219 1 0 0 +760 1 5 0.425 -9.805482010992158 3.617879643362997 8.041844519342556 1 0 0 +782 1 2 2.1 -5.03053752216915 -0.08060905658925677 6.432546163227055 1 0 0 +783 1 2 2.1 -5.053985551934414 5.930137706277755 6.424580953091224 1 0 0 +784 1 3 -1.05 -4.832853756870046 8.814342658293356 8.050834914156995 1 0 0 +785 1 3 -1.05 -5.188222049286011 6.022917177987889 8.049469621886916 1 0 0 +786 1 4 -0.95 -2.54579593414841 7.221714309210501 8.180601184681164 1 0 0 +788 1 3 -1.05 -4.939797262145818 7.404425893631185 5.949129853518169 1 0 0 +789 1 3 -1.05 -3.7891712049732114 5.0770228189984365 6.140690056642459 1 0 0 +790 1 5 0.425 -2.0373227716303397 8.10078202006681 8.041919069850836 1 0 0 +792 1 2 2.1 -2.4223707917632114 4.4022905679840285 6.432558910582037 1 0 0 +793 1 2 2.1 -2.502180902736985 1.4471777819542027 6.424591034963024 1 0 0 +794 1 3 -1.05 -2.281019735917397 4.331444825132113 8.05082378781694 1 0 0 +795 1 3 -1.05 -2.636408851343842 1.5400238125742547 8.049489411652035 1 0 0 +796 1 4 -0.95 -5.15399710310596 2.7387798895711164 8.180592034792975 1 0 0 +797 1 3 -1.05 -3.8078161189370086 0.7459504928387659 5.950878669052271 1 0 0 +798 1 3 -1.05 -2.3879594652644123 2.921624344781165 5.949122369199204 1 0 0 +800 1 5 0.425 -4.645471004162632 3.6178812975920884 8.04188765083781 1 0 0 +902 1 2 2.1 -10.13421204796398 8.885266130047459 6.432562263168402 1 0 0 +942 1 2 2.1 -4.974195624506329 8.885262572218696 6.432559619577779 1 0 0 +1224 1 3 -1.05 -10.049182067561132 -0.1514710749753796 8.050819995602664 1 1 0 +1264 1 3 -1.05 -4.889184949315168 -0.15146950774747125 8.050816947112386 1 1 0 +1 1 1 1.575 2.6003042619820214 1.489301985987158 0.00048617890439572875 0 0 0 +3 1 2 2.1 3.413388030780098 2.853846993110462 2.7652894411413254 0 0 0 +4 1 3 -1.05 3.192250473216461 -0.030348573470597273 1.1390305483599548 0 0 0 +5 1 3 -1.05 3.5476195187832538 2.7610747149761714 1.1404022330734858 0 0 0 +6 1 4 -0.95 0.9051929855174699 1.5622817529618267 1.0092737393651063 0 0 0 +7 1 3 -1.05 -0.44101638859063463 3.5551258758509157 3.238990910738922 0 0 0 +8 1 3 -1.05 3.2991948449339965 1.3795809791892317 3.240742744153204 0 0 0 +9 1 3 -1.05 2.1485570809573353 3.706976696393621 3.0491817246607322 0 0 0 +10 1 5 0.425 0.39673058856233645 0.6832052188137645 1.1479686487541496 0 0 0 +11 1 1 1.575 5.208475641812152 5.972246199066358 0.0004892910943752327 0 0 0 +12 1 2 2.1 0.7817706041936265 4.38170355408236 2.7573153336218788 0 0 0 +13 1 2 2.1 0.8615899292224434 7.336820998825839 2.7652848366160274 0 0 0 +14 1 3 -1.05 0.6404116649888252 4.452548002411522 1.1390536212036224 0 0 0 +15 1 3 -1.05 0.9958046216279932 7.243969023731864 1.1403840581284594 0 0 0 +16 1 4 -0.95 3.513400407792714 6.045221555334809 1.0092802794076725 0 0 0 +17 1 3 -1.05 2.1671922380352218 8.038031393312668 3.238994437034231 0 0 0 +18 1 3 -1.05 0.7473539186626681 5.862364303969713 3.2407507095117243 0 0 0 +19 1 3 -1.05 -0.40328409120112596 8.189911245998463 3.049163882753348 0 0 0 +20 1 5 0.425 3.004863256879574 5.166119171117636 1.148013285968382 0 0 0 +21 1 1 1.575 2.6160231450750047 7.476521308095869 -0.0004868964845172741 0 0 0 +31 1 1 1.575 0.007858486069929782 2.9935784838606985 -0.0004886229058236324 0 0 0 +41 1 1 1.575 7.7603119800836495 1.4892994847808687 0.0004876409956668937 0 0 0 +44 1 3 -1.05 8.352245958178369 -0.03034852708280411 1.1390398478533665 0 0 0 +46 1 4 -0.95 6.065186680101519 1.5622726468860186 1.0092713469534331 0 0 0 +47 1 3 -1.05 4.719006945859089 3.5551385256659387 3.2389905613454264 0 0 0 +49 1 3 -1.05 7.308566510413744 3.7069713740300188 3.0491800116188568 0 0 0 +50 1 5 0.425 5.55672029157318 0.6832027274139385 1.1479273774212633 0 0 0 +52 1 2 2.1 5.941754691612481 4.381705907161461 2.7573185281719876 0 0 0 +53 1 2 2.1 6.021569156148551 7.336829473327171 2.765282260596237 0 0 0 +54 1 3 -1.05 5.800414227629432 4.452546389521338 1.1390571190245424 0 0 0 +55 1 3 -1.05 6.155803761978959 7.243969506923385 1.1403815022420147 0 0 0 +57 1 3 -1.05 7.327216945332573 8.038046038385499 3.2389918433149454 0 0 0 +58 1 3 -1.05 5.90735237093768 5.8623431966956865 3.2407497213706336 0 0 0 +59 1 3 -1.05 4.756730041223639 8.189902512772619 3.0491652979454305 0 0 0 +61 1 1 1.575 7.776030915957783 7.4765179856259465 -0.00048630439592045605 0 0 0 +71 1 1 1.575 5.1678511824652595 2.993580880857408 -0.0004901999814084235 0 0 0 +125 1 3 -1.05 -1.6123821854260463 2.761075314653997 1.1404070154366295 1 0 0 +131 1 1 1.575 0.04848294209673121 5.972242935779885 0.000490037179456948 1 0 0 +136 1 4 -0.95 -1.6466013343857568 6.0452237602823296 1.009286692899492 1 0 0 +140 1 5 0.425 -2.1551197559159263 5.166116856956847 1.1480434751617512 1 0 0 +482 1 2 2.1 3.333600286305689 -0.10126833528058299 2.7573081542690936 0 1 0 +662 1 2 2.1 0.1294481221970365 -0.0806049883627118 6.432548631586574 0 0 0 +663 1 2 2.1 0.10599334680137673 5.930145648028347 6.424577282515786 0 0 0 +664 1 3 -1.05 0.3271491025311537 8.814340912836013 8.050837562340648 0 0 0 +665 1 3 -1.05 -0.02822261760979572 6.022917492990221 8.049465873202388 0 0 0 +666 1 4 -0.95 2.6142064843337565 7.221713579662975 8.180594739766134 0 0 0 +667 1 3 -1.05 3.9603863448931165 5.228851139494434 5.950883261310734 0 0 0 +668 1 3 -1.05 0.22020140066630312 7.404402962508492 5.9491292107555385 0 0 0 +669 1 3 -1.05 1.370839907147385 5.0770158086882695 6.140691527571985 0 0 0 +670 1 5 0.425 3.122660489762424 8.10078354117201 8.041891815867482 0 0 0 +672 1 2 2.1 2.7376378509887616 4.402297453839999 6.432554269024626 0 0 0 +673 1 2 2.1 2.657814143905327 1.4471664246497618 6.4245845586209835 0 0 0 +674 1 3 -1.05 2.8789843747672226 4.331444892670223 8.050813478586486 0 0 0 +675 1 3 -1.05 2.5235929276811255 1.5400231451238362 8.049485296604866 0 0 0 +676 1 4 -0.95 0.005996469612352229 2.7387702634425644 8.180589156812536 0 0 0 +677 1 3 -1.05 1.35220333360148 0.7459606544277797 5.9508784991572234 0 0 0 +678 1 3 -1.05 2.772044403479459 2.921633738753382 5.949120410621978 0 0 0 +679 1 3 -1.05 3.9226663496817054 0.5940905195502779 6.140707539547966 0 0 0 +680 1 5 0.425 0.5145179216393423 3.6178796125064707 8.041844457004787 0 0 0 +702 1 2 2.1 5.2894644486370055 -0.08060750115534532 6.432546281853535 0 0 0 +703 1 2 2.1 5.266015505409589 5.930137771849527 6.424580792396204 0 0 0 +704 1 3 -1.05 5.487146353425208 8.814342589672009 8.050834969489033 0 0 0 +705 1 3 -1.05 5.131778084715114 6.022917199990019 8.049469586914329 0 0 0 +708 1 3 -1.05 5.380202829540677 7.404425443770609 5.949129963862305 0 0 0 +709 1 3 -1.05 6.530827312385206 5.077023734536393 6.14069010936865 0 0 0 +710 1 5 0.425 -12.357322833708851 8.100781892429946 8.041919043812685 1 0 0 +716 1 4 -0.95 5.166002670346272 2.7387795855326154 8.180592061839635 0 0 0 +717 1 3 -1.05 6.512180758213354 0.7459485736595788 5.95087865003107 0 0 0 +720 1 5 0.425 5.674529320524975 3.6178817424015435 8.041887704595718 0 0 0 +787 1 3 -1.05 -1.1995843894029825 5.228868218664132 5.950880728607876 1 0 0 +799 1 3 -1.05 -1.2373222098897543 0.594084068338713 6.140706246118169 1 0 0 +822 1 2 2.1 0.18578805441223878 8.885266915444351 6.432562197797376 0 0 0 +862 1 2 2.1 5.345804278048195 8.885262760376076 6.43255959737005 0 0 0 +1144 1 3 -1.05 0.2708179763475371 -0.15147101591706758 8.050819426100988 0 1 0 +1184 1 3 -1.05 5.430815094845618 -0.1514693876244877 8.050816796018031 0 1 0 +42 1 2 2.1 -12.09006260584677 8.864589177393615 2.7573256224033216 1 0 0 +82 1 2 2.1 -6.930057050782033 8.864597863376343 2.75732164447156 1 0 0 +122 1 2 2.1 -1.7700616265708433 8.864589076582348 2.757325473341277 1 0 0 +203 1 2 2.1 -12.010265577762329 11.819715092153334 2.765291360502074 1 0 0 +205 1 3 -1.05 -11.87604343181911 11.726896004685383 1.1403924325820025 1 0 0 +208 1 3 -1.05 -12.124473345038734 10.345319342352141 3.2407384604436444 1 0 0 +211 1 1 1.575 -10.215182621282619 14.938036557984727 0.00048802927475399827 1 0 0 +216 1 4 -0.95 -11.910303760846821 15.011010860547525 1.009271885162475 1 0 0 +220 1 5 0.425 8.221226954671451 14.131915169973812 1.1479074717195452 0 0 0 +241 1 1 1.575 -7.6633658312451125 10.455150665603007 0.00048674450156838134 1 0 0 +243 1 2 2.1 -6.850274802226975 11.819704709836884 2.765284103462397 1 0 0 +244 1 3 -1.05 -7.071418315464653 8.935464760159874 1.1390470826570223 1 0 0 +245 1 3 -1.05 -6.716041717351411 11.726895262208341 1.1403871825276504 1 0 0 +246 1 4 -0.95 -9.358442661161195 10.528143597063906 1.0092939108167762 1 0 0 +247 1 3 -1.05 -10.704668560055994 12.520955558501814 3.238986452455382 1 0 0 +248 1 3 -1.05 -6.964469919678031 10.345323343327664 3.240736447098582 1 0 0 +249 1 3 -1.05 -8.115109303492375 12.672804410356928 3.0491772060606532 1 0 0 +250 1 5 0.425 -9.86693405976031 9.649084784979703 1.1481844532688399 1 0 0 +251 1 1 1.575 -5.055189905145113 14.938039766588904 0.00048748737759041205 1 0 0 +252 1 2 2.1 -9.481895140515539 13.347470616334537 2.7573054912819455 1 0 0 +253 1 2 2.1 -9.402089479039432 16.30259246114154 2.7652923834422634 1 0 0 +254 1 3 -1.05 -9.623250645681043 13.418384253474247 1.1390345417105667 1 0 0 +255 1 3 -1.05 -9.267873053886584 16.209803149203925 1.1404029722314721 1 0 0 +256 1 4 -0.95 -6.750301966919627 15.011008793832236 1.0092655645710042 1 0 0 +257 1 3 -1.05 -8.096502014882379 17.003855189336587 3.2389947631306786 1 0 0 +258 1 3 -1.05 -9.516307273099796 14.828301501434499 3.2407492682947225 1 0 0 +259 1 3 -1.05 -10.666945431166095 17.155719681097477 3.049175296686906 1 0 0 +260 1 5 0.425 -7.258790173497739 14.131917299137772 1.1478771519031028 1 0 0 +261 1 1 1.575 -7.647647928463388 16.442332287146424 -0.000488821392487182 1 0 0 +271 1 1 1.575 -10.255800064478802 11.959390414342575 -0.0004879826669004217 1 0 0 +281 1 1 1.575 -2.5033581396528186 10.455148150790972 0.0004880508656999183 1 0 0 +283 1 2 2.1 -1.6902660097883206 11.819714042088183 2.765291463935222 1 0 0 +284 1 3 -1.05 -1.9114228701738583 8.935464925204077 1.1390564976711524 1 0 0 +286 1 4 -0.95 -4.198449098102025 10.528133983598376 1.0092915759019423 1 0 0 +287 1 3 -1.05 -5.544642042234459 12.520970094327684 3.238986094778216 1 0 0 +288 1 3 -1.05 -1.8044733565556257 10.345320519191759 3.240738331572988 1 0 0 +289 1 3 -1.05 -2.9550994549008944 12.672798772092221 3.049175605830589 1 0 0 +290 1 5 0.425 -4.706943981189049 9.649082683942861 1.1481432534485005 1 0 0 +292 1 2 2.1 -4.321912545284045 13.347472426595289 2.7573087615778036 1 0 0 +293 1 2 2.1 -4.242109237221538 16.302601544499094 2.765289869034204 1 0 0 +294 1 3 -1.05 -4.463248142025066 13.418382607713053 1.139038490940214 1 0 0 +295 1 3 -1.05 -4.107873920942626 16.209803614349052 1.140400471186064 1 0 0 +297 1 3 -1.05 -2.9364789165855 17.003868750171616 3.2389922567955534 1 0 0 +298 1 3 -1.05 -4.356308934359183 14.828279979627037 3.240748540873943 1 0 0 +299 1 3 -1.05 -5.506931509883287 17.155711082805635 3.049176629135548 1 0 0 +301 1 1 1.575 -2.487640147269909 16.442328816418733 -0.0004883367723138576 1 0 0 +311 1 1 1.575 -5.095807262339934 11.959392592244821 -0.0004895582360084916 1 0 0 +866 1 4 -0.95 -12.809479347924743 16.18753152254509 8.180603331692547 1 0 0 +867 1 3 -1.05 -11.463275683185296 14.194699747323508 5.950876300797919 1 0 0 +872 1 2 2.1 -12.686044332292026 13.368099433193311 6.432547920762799 1 0 0 +873 1 2 2.1 -12.765838937433683 10.41299358974631 6.424592111086573 1 0 0 +874 1 3 -1.05 -12.544682823574899 13.297270345420436 8.050815957208696 1 0 0 +875 1 3 -1.05 -12.900069677630333 10.5058408528262 8.049494880803467 1 0 0 +878 1 3 -1.05 -12.651628832221212 11.887467867095268 5.9491319671647975 1 0 0 +879 1 3 -1.05 -11.500988895646206 9.559912285205048 6.140702884052089 1 0 0 +903 1 2 2.1 -10.15768664396984 14.895937931749142 6.4245820529389 1 0 0 +905 1 3 -1.05 -10.29190066164717 14.988753381285374 8.049475307293187 1 0 0 +906 1 4 -0.95 -7.649476933430761 16.18753061890442 8.180596856550588 1 0 0 +907 1 3 -1.05 -6.303303846872613 14.194683336474714 5.950878682998182 1 0 0 +908 1 3 -1.05 -10.043457772052527 16.370291645323054 5.949120441329194 1 0 0 +909 1 3 -1.05 -8.892821020091677 14.04282418271502 6.140702337720942 1 0 0 +910 1 5 0.425 -7.140987432429453 17.0666357882375 8.041945898966334 1 0 0 +912 1 2 2.1 -7.526036934568197 13.368107272741312 6.4325434453563695 1 0 0 +913 1 2 2.1 -7.605845200775825 10.412982355545267 6.424585246251738 1 0 0 +914 1 3 -1.05 -7.384678562846151 13.297270278808188 8.05080567732344 1 0 0 +915 1 3 -1.05 -7.740067845155377 10.505840284961845 8.049490055106476 1 0 0 +916 1 4 -0.95 -10.257679256880722 11.704569167331858 8.180573427892023 1 0 0 +917 1 3 -1.05 -8.911455963414486 9.711773926551299 5.950884469289509 1 0 0 +918 1 3 -1.05 -7.491625174558855 11.887475216511863 5.949130124537872 1 0 0 +919 1 3 -1.05 -6.341000561150384 9.559918847426733 6.140704372264844 1 0 0 +920 1 5 0.425 -9.749158350506987 12.583644683468911 8.041657698866539 1 0 0 +943 1 2 2.1 -4.997664858400203 14.895930244835991 6.424585607692931 1 0 0 +945 1 3 -1.05 -5.131900069757357 14.98875297667713 8.049478722615055 1 0 0 +946 1 4 -0.95 -2.4894793258925247 16.187531494753113 8.18060331455739 1 0 0 +948 1 3 -1.05 -4.883456385777334 16.37031336364964 5.94912114642281 1 0 0 +949 1 3 -1.05 -3.73283340764449 14.042831878604474 6.140700750137892 1 0 0 +950 1 5 0.425 -1.980970887878998 17.06663388813786 8.041972936030264 1 0 0 +952 1 2 2.1 -2.366044208847562 13.368099483410205 6.432548100519332 1 0 0 +953 1 2 2.1 -2.4458390869474993 10.41299379338496 6.424592232856112 1 0 0 +954 1 3 -1.05 -2.2246828141337502 13.297270286553317 8.050815880203343 1 0 0 +955 1 3 -1.05 -2.580069543536629 10.505840925682026 8.04949477487744 1 0 0 +956 1 4 -0.95 -5.097672768092682 11.704578991964201 8.180576300900766 1 0 0 +957 1 3 -1.05 -3.7514781712957763 9.711761903734494 5.950884468909207 1 0 0 +958 1 3 -1.05 -2.331628855394513 11.887467661981258 5.949131825798181 1 0 0 +960 1 5 0.425 -4.589147136714461 12.583646517910942 8.04170102397289 1 0 0 +2 1 2 2.1 3.3899422731256763 8.864597132006875 2.7573215253057963 0 0 0 +161 1 1 1.575 2.6566341651466274 10.455150667308867 0.00048679133445439504 0 0 0 +163 1 2 2.1 3.469725611191784 11.819703973175962 2.7652842672918947 0 0 0 +164 1 3 -1.05 3.248581755192575 8.935464749145556 1.139046873812159 0 0 0 +165 1 3 -1.05 3.603958382521821 11.726895331931285 1.1403873843045993 0 0 0 +166 1 4 -0.95 0.9615570622632923 10.528143207441804 1.009293869122109 0 0 0 +167 1 3 -1.05 -0.38466753745755256 12.520956284187964 3.23898646303282 0 0 0 +168 1 3 -1.05 3.3555302887687066 10.345324712237673 3.240736497944564 0 0 0 +169 1 3 -1.05 2.2048898258862977 12.672804867739217 3.0491771504206753 0 0 0 +170 1 5 0.425 0.4530661645712417 9.649085132213575 1.1481844303253137 0 0 0 +171 1 1 1.575 5.26481011625984 14.93803969584685 0.00048748398004327953 0 0 0 +172 1 2 2.1 0.8381050018951335 13.34746956355098 2.7573056112375216 0 0 0 +173 1 2 2.1 0.917912298353329 16.302592791352506 2.7652928068938305 0 0 0 +174 1 3 -1.05 0.6967492081717079 13.418384331397867 1.139034719167146 0 0 0 +175 1 3 -1.05 1.0521268357949296 16.209803156886135 1.140403453608636 0 0 0 +176 1 4 -0.95 3.5696981167895174 15.011008782764922 1.0092654803188346 0 0 0 +177 1 3 -1.05 2.2234949075541905 17.003853295689932 3.238994790958916 0 0 0 +178 1 3 -1.05 0.8036927614304492 14.828302477588569 3.24074919764605 0 0 0 +179 1 3 -1.05 -0.346945556400458 17.155719849990422 3.049175316321957 0 0 0 +180 1 5 0.425 3.0612098947390933 14.131917293649405 1.1478771488772814 0 0 0 +181 1 1 1.575 2.672352132552927 16.44233222954824 -0.00048893065522293 0 0 0 +191 1 1 1.575 0.06419990661849084 11.959390323353912 -0.00048797842278069936 0 0 0 +201 1 1 1.575 7.816641875063663 10.455148129073955 0.00048818953626117434 0 0 0 +204 1 3 -1.05 8.408577227951085 8.935464962316985 1.1390566838791205 0 0 0 +206 1 4 -0.95 6.121550861150677 10.528133978087624 1.0092915549395602 0 0 0 +207 1 3 -1.05 4.775356948747724 12.520969520585183 3.2389861172974115 0 0 0 +209 1 3 -1.05 7.364900489893909 12.672798839171666 3.0491755919080212 0 0 0 +210 1 5 0.425 5.61305603153591 9.649082778114593 1.1481433153431642 0 0 0 +212 1 2 2.1 5.998088138826827 13.347473093028178 2.757308731405036 0 0 0 +213 1 2 2.1 6.077891819286037 16.30260193787044 2.765289869913314 0 0 0 +214 1 3 -1.05 5.856751818752759 13.418382710971027 1.1390384696391855 0 0 0 +215 1 3 -1.05 6.212125957670043 16.209803423133504 1.1404002385609324 0 0 0 +217 1 3 -1.05 7.383520043027556 17.003868000384625 3.2389922772992907 0 0 0 +218 1 3 -1.05 5.963690997777473 14.828279007765143 3.2407484945718856 0 0 0 +219 1 3 -1.05 4.81306751453938 17.15571169886196 3.049176642461564 0 0 0 +221 1 1 1.575 7.832359959333601 16.442328863944606 -0.00048820826442330656 0 0 0 +231 1 1 1.575 5.224192755472295 11.9593926983067 -0.0004895452984730042 0 0 0 +285 1 3 -1.05 -1.5560434921058235 11.7268960105203 1.1403925019894405 1 0 0 +291 1 1 1.575 0.10481734176789637 14.938036477244676 0.0004878663631728841 1 0 0 +296 1 4 -0.95 -1.5903037992881508 15.011010762815896 1.0092720957491625 1 0 0 +300 1 5 0.425 -2.098773053271172 14.131915286048635 1.1479073358599035 1 0 0 +823 1 2 2.1 0.16231342663308013 14.895938457827786 6.424582090151786 0 0 0 +825 1 3 -1.05 0.028099381747342278 14.988753389323616 8.049475282091281 0 0 0 +826 1 4 -0.95 2.6705230911081035 16.187530512616757 8.180596938009801 0 0 0 +827 1 3 -1.05 4.01669641754593 14.194683450538559 5.950878753854479 0 0 0 +828 1 3 -1.05 0.27654220878196156 16.37029094078053 5.94912022193599 0 0 0 +829 1 3 -1.05 1.4271792302815047 14.04282401595217 6.140702235142053 0 0 0 +830 1 5 0.425 3.1790125843134582 17.06663589040934 8.04194593753139 0 0 0 +832 1 2 2.1 2.79396271801315 13.368107134362024 6.432543570568198 0 0 0 +833 1 2 2.1 2.714155029782006 10.41298252966632 6.424585299216295 0 0 0 +834 1 3 -1.05 2.9353214295031798 13.297270348704007 8.050805811867614 0 0 0 +835 1 3 -1.05 2.5799322816150294 10.50584016157514 8.049490292842309 0 0 0 +836 1 4 -0.95 0.06232056055442037 11.704568865571552 8.180573482122442 0 0 0 +837 1 3 -1.05 1.4085436097406188 9.711773557309051 5.950884536837345 0 0 0 +838 1 3 -1.05 2.8283748897425482 11.88747503498794 5.949130056988739 0 0 0 +839 1 3 -1.05 3.9789992841497615 9.559918964820955 6.140704411314749 0 0 0 +840 1 5 0.425 0.570841912948957 12.58364508171616 8.04165770071049 0 0 0 +863 1 2 2.1 5.322334696359004 14.895930247507533 6.424585536161565 0 0 0 +865 1 3 -1.05 5.18810009801547 14.988752969144539 8.049478754913183 0 0 0 +868 1 3 -1.05 5.4365436852018405 16.370313581914186 5.949121173646114 0 0 0 +869 1 3 -1.05 6.587166884334117 14.04283169075152 6.140700804578795 0 0 0 +870 1 5 0.425 -12.300970922825949 17.06663390893986 8.041972903281911 1 0 0 +876 1 4 -0.95 5.222326990852581 11.704578679828789 8.180576328926842 0 0 0 +877 1 3 -1.05 6.5685219413996165 9.71176198584331 5.950884504654278 0 0 0 +880 1 5 0.425 5.7308529675779845 12.58364688731844 8.041700994215164 0 0 0 +947 1 3 -1.05 -1.14327593789511 14.194699627136107 5.950876109055651 1 0 0 +959 1 3 -1.05 -1.1809884299906663 9.55991198753285 6.140702871950106 1 0 0 Velocities -1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -30 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -31 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -32 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -33 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -34 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -35 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -36 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -37 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -38 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -39 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -40 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -41 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -42 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -43 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -44 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -45 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -46 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -47 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -48 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -49 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -50 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -51 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -52 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -53 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -54 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -55 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -56 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -57 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -58 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -59 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -60 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -61 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -62 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -63 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -64 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -65 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -66 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -67 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -68 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -69 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -70 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -71 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -72 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -73 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -74 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -75 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -76 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -77 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -78 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -79 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -80 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -81 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -82 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -83 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -84 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -85 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -86 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -87 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -88 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -89 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -90 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -91 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -92 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -93 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -94 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -95 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -96 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -97 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -98 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -99 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -100 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -101 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -102 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -103 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -104 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -105 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -106 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -107 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -108 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -109 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -110 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -111 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -112 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -113 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -114 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -115 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -116 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -117 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -118 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -119 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -120 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -121 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -122 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -123 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -124 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -125 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -126 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -127 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -128 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -129 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -130 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -131 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -132 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -133 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -134 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -135 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -136 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -137 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -138 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -139 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -140 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -141 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -142 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -143 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -144 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -145 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -146 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -147 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -148 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -149 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -150 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -151 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -152 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -153 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -154 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -155 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -156 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -157 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -158 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -159 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -160 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -161 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -162 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -163 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -164 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -165 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -166 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -167 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -168 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -169 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -170 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -171 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -172 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -173 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -174 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -175 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -176 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -177 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -178 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -179 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -180 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -181 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -182 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -183 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -184 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -185 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -186 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -187 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -188 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -189 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -190 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -191 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -192 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -193 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -194 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -195 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -196 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -197 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -198 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -199 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -200 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -201 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -202 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -203 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -204 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -205 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -206 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -207 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -208 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -209 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -210 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -211 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -212 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -213 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -214 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -215 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -216 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -217 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -218 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -219 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -220 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -221 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -222 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -223 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -224 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -225 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -226 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -227 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -228 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -229 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -230 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -231 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -232 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -233 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -234 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -235 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -236 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -237 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -238 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -239 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -240 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -241 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -242 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -243 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -244 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -245 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -246 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -247 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -248 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -249 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -250 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -251 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -252 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -253 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -254 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -255 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -256 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -257 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -258 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -259 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -260 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -261 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -262 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -263 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -264 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -265 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -266 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -267 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -268 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -269 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -270 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -271 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -272 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -273 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -274 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -275 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -276 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -277 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -278 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -279 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -280 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -281 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -282 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -283 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -284 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -285 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -286 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -287 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -288 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -289 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -290 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -291 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -292 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -293 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -294 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -295 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -296 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -297 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -298 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -299 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -300 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -301 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -302 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -303 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -304 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -305 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -306 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -307 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -308 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -309 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -310 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -311 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -312 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -313 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -314 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -315 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -316 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -317 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -318 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -319 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -320 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -321 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -322 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -323 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -324 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -325 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -326 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -327 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -328 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -329 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -330 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -331 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -332 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -333 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -334 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -335 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -336 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -337 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -338 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -339 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -340 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -341 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -342 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -343 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -344 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -345 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -346 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -347 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -348 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -349 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -350 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -351 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -352 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -353 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -354 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -355 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -356 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -357 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -358 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -359 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -360 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -361 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -362 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -363 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -364 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -365 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -366 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -367 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -368 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -369 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -370 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -371 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -372 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -373 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -374 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -375 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -376 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -377 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -378 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -379 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -380 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -381 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -382 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -383 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -384 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -385 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -386 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -387 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -388 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -389 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -390 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -391 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -392 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -393 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -394 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -395 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -396 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -397 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -398 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -399 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -400 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -401 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -402 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -403 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -404 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -405 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -406 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -407 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -408 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -409 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -410 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -411 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -412 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -413 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -414 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -415 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -416 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -417 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -418 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -419 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -420 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -421 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -422 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -423 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -424 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -425 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -426 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -427 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -428 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -429 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -430 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -431 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -432 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -433 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -434 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -435 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -436 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -437 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -438 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -439 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -440 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -441 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -442 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -443 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -444 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -445 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -446 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -447 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -448 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -449 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -450 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -451 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -452 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -453 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -454 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -455 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -456 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -457 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -458 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -459 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -460 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -461 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -462 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -463 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -464 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -465 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -466 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -467 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -468 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -469 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -470 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -471 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -472 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -473 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -474 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -475 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -476 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -477 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -478 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -479 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -480 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -481 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -482 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -483 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -484 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -485 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -486 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -487 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -488 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -489 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -490 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -491 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -492 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -493 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -494 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -495 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -496 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -497 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -498 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -499 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -500 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -501 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -502 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -503 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -504 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -505 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -506 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -507 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -508 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -509 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -510 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -511 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -512 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -513 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -514 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -515 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -516 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -517 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -518 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -519 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -520 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -521 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -522 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -523 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -524 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -525 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -526 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -527 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -528 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -529 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -530 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -531 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -532 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -533 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -534 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -535 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -536 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -537 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -538 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -539 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -540 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -541 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -542 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -543 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -544 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -545 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -546 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -547 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -548 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -549 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -550 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -551 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -552 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -553 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -554 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -555 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -556 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -557 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -558 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -559 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -560 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -561 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -562 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -563 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -564 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -565 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -566 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -567 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -568 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -569 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -570 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -571 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -572 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -573 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -574 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -575 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -576 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -577 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -578 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -579 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -580 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -581 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -582 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -583 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -584 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -585 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -586 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -587 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -588 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -589 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -590 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -591 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -592 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -593 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -594 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -595 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -596 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -597 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -598 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -599 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -600 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -601 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -602 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -603 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -604 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -605 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -606 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -607 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -608 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -609 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -610 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -611 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -612 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -613 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -614 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -615 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -616 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -617 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -618 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -619 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -620 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -621 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -622 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -623 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -624 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -625 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -626 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -627 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -628 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -629 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -630 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -631 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -632 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -633 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -634 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -635 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -636 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -637 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -638 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -639 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -640 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -641 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -642 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -643 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -644 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -645 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -646 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -647 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -648 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -649 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -650 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -651 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -652 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -653 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -654 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -655 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -656 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -657 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -658 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -659 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -660 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -661 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -662 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -663 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -664 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -665 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -666 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -667 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -668 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -669 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -670 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -671 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -672 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -673 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -674 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -675 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -676 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -677 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -678 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -679 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -680 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -681 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -682 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -683 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -684 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -685 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -686 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -687 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -688 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -689 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -690 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -691 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -692 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -693 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -694 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -695 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -696 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -697 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -698 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -699 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -700 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -701 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -702 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -703 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -704 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -705 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -706 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -707 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -708 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -709 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -710 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -711 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -712 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -713 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -714 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -715 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -716 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -717 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -718 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -719 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -720 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -721 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -722 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -723 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -724 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -725 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -726 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -727 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -728 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -729 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -730 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -731 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -732 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -733 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -734 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -735 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -736 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -737 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -738 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -739 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -740 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -741 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -742 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -743 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -744 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -745 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -746 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -747 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -748 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -749 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -750 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -751 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -752 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -753 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -754 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -755 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -756 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -757 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -758 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -759 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -760 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -761 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -762 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -763 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -764 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -765 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -766 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -767 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -768 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -769 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -770 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -771 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -772 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -773 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -774 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -775 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -776 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -777 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -778 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -779 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -780 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -781 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -782 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -783 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -784 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -785 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -786 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -787 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -788 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -789 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -790 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -791 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -792 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -793 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -794 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -795 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -796 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -797 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -798 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -799 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -800 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -801 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -802 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -803 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -804 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -805 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -806 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -807 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -808 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -809 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -810 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -811 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -812 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -813 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -814 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -815 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -816 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -817 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -818 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -819 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -820 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -821 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -822 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -823 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -824 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -825 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -826 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -827 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -828 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -829 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -830 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -831 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -832 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -833 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -834 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -835 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -836 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -837 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -838 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -839 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -840 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -841 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -842 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -843 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -844 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -845 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -846 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -847 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -848 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -849 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -850 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -851 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -852 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -853 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -854 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -855 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -856 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -857 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -858 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -859 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -860 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -861 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -862 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -863 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -864 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -865 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -866 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -867 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -868 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -869 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -870 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -871 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -872 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -873 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -874 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -875 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -876 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -877 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -878 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -879 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -880 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -881 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -882 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -883 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -884 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -885 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -886 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -887 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -888 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -889 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -890 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -891 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -892 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -893 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -894 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -895 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -896 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -897 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -898 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -899 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -900 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -901 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -902 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -903 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -904 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -905 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -906 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -907 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -908 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -909 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -910 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -911 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -912 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -913 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -914 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -915 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -916 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -917 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -918 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -919 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -920 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -921 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -922 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -923 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -924 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -925 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -926 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -927 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -928 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -929 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -930 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -931 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -932 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -933 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -934 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -935 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -936 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -937 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -938 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -939 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -940 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -941 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -942 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -943 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -944 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -945 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -946 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -947 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -948 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -949 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -950 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -951 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -952 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -953 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -954 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -955 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -956 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -957 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -958 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -959 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -960 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -961 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -962 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -963 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -964 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -965 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -966 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -967 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -968 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -969 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -970 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -971 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -972 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -973 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -974 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -975 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -976 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -977 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -978 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -979 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -980 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -981 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -982 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -983 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -984 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -985 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -986 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -987 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -988 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -989 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -990 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -991 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -992 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -993 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -994 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -995 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -996 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -997 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -998 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -999 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1000 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1001 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1002 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1003 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1004 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1005 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1006 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1007 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1008 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1009 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1010 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1011 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1012 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1013 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1014 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1015 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1016 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1017 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1018 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1019 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1020 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1021 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1022 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1023 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1024 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1025 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1026 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1027 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1028 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1029 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1030 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1031 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1032 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1033 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1034 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1035 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1036 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1037 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1038 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1039 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1040 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1041 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1042 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1043 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1044 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1045 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1046 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1047 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1048 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1049 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1050 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1051 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1052 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1053 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1054 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1055 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1056 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1057 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1058 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1059 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1060 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1061 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1062 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1063 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1064 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1065 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1066 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1067 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1068 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1069 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1070 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1071 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1072 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1073 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1074 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1075 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1076 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1077 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1078 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1079 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1080 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1081 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1082 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1083 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1084 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1085 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1086 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1087 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1088 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1089 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1090 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1091 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1092 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1093 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1094 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1095 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1096 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1097 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1098 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1099 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1100 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1101 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1102 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1103 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1104 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1105 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1106 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1107 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1108 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1109 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1110 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1111 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1112 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1113 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1114 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1115 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1116 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1117 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1118 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1119 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1120 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1121 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1122 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1123 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1124 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1125 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1126 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1127 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1128 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1129 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1130 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1131 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1132 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1133 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1134 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1135 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1136 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1137 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1138 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1139 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1140 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1141 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1142 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1143 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1144 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1145 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1146 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1147 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1148 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1149 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1150 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1151 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1152 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1153 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1154 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1155 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1156 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1157 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1158 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1159 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1160 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1161 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1162 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1163 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1164 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1165 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1166 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1167 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1168 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1169 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1170 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1171 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1172 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1173 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1174 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1175 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1176 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1177 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1178 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1179 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1180 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1181 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1182 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1183 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1184 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1185 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1186 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1187 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1188 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1189 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1190 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1191 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1192 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1193 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1194 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1195 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1196 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1197 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1198 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1199 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1200 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1201 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1202 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1203 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1204 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1205 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1206 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1207 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1208 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1209 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1210 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1211 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1212 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1213 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1214 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1215 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1216 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1217 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1218 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1219 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1220 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1221 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1222 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1223 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1224 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1225 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1226 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1227 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1228 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1229 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1230 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1231 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1232 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1233 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1234 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1235 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1236 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1237 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1238 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1239 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1240 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1241 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1242 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1243 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1244 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1245 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1246 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1247 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1248 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1249 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1250 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1251 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1252 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1253 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1254 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1255 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1256 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1257 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1258 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1259 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1260 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1261 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1262 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1263 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1264 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1265 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1266 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1267 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1268 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1269 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1270 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1271 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1272 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1273 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1274 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1275 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1276 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1277 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1278 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1279 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1280 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +264 0 0 0 +304 0 0 0 +386 0 0 0 +387 0 0 0 +392 0 0 0 +393 0 0 0 +394 0 0 0 +395 0 0 0 +398 0 0 0 +399 0 0 0 +422 0 0 0 +423 0 0 0 +425 0 0 0 +426 0 0 0 +427 0 0 0 +428 0 0 0 +429 0 0 0 +430 0 0 0 +432 0 0 0 +433 0 0 0 +434 0 0 0 +435 0 0 0 +436 0 0 0 +437 0 0 0 +438 0 0 0 +439 0 0 0 +440 0 0 0 +462 0 0 0 +463 0 0 0 +465 0 0 0 +468 0 0 0 +469 0 0 0 +470 0 0 0 +476 0 0 0 +477 0 0 0 +480 0 0 0 +842 0 0 0 +882 0 0 0 +1003 0 0 0 +1005 0 0 0 +1008 0 0 0 +1011 0 0 0 +1016 0 0 0 +1020 0 0 0 +1041 0 0 0 +1043 0 0 0 +1044 0 0 0 +1045 0 0 0 +1046 0 0 0 +1047 0 0 0 +1048 0 0 0 +1049 0 0 0 +1050 0 0 0 +1051 0 0 0 +1052 0 0 0 +1053 0 0 0 +1054 0 0 0 +1055 0 0 0 +1056 0 0 0 +1057 0 0 0 +1058 0 0 0 +1059 0 0 0 +1060 0 0 0 +1061 0 0 0 +1071 0 0 0 +1086 0 0 0 +1087 0 0 0 +1092 0 0 0 +1093 0 0 0 +1094 0 0 0 +1095 0 0 0 +1098 0 0 0 +1099 0 0 0 +1111 0 0 0 +184 0 0 0 +224 0 0 0 +342 0 0 0 +343 0 0 0 +345 0 0 0 +346 0 0 0 +347 0 0 0 +348 0 0 0 +349 0 0 0 +350 0 0 0 +352 0 0 0 +353 0 0 0 +354 0 0 0 +355 0 0 0 +356 0 0 0 +357 0 0 0 +358 0 0 0 +359 0 0 0 +360 0 0 0 +382 0 0 0 +383 0 0 0 +385 0 0 0 +388 0 0 0 +389 0 0 0 +390 0 0 0 +396 0 0 0 +397 0 0 0 +400 0 0 0 +466 0 0 0 +467 0 0 0 +472 0 0 0 +473 0 0 0 +474 0 0 0 +475 0 0 0 +478 0 0 0 +479 0 0 0 +802 0 0 0 +922 0 0 0 +961 0 0 0 +963 0 0 0 +964 0 0 0 +965 0 0 0 +966 0 0 0 +967 0 0 0 +968 0 0 0 +969 0 0 0 +970 0 0 0 +971 0 0 0 +972 0 0 0 +973 0 0 0 +974 0 0 0 +975 0 0 0 +976 0 0 0 +977 0 0 0 +978 0 0 0 +979 0 0 0 +980 0 0 0 +981 0 0 0 +991 0 0 0 +1001 0 0 0 +1004 0 0 0 +1006 0 0 0 +1007 0 0 0 +1009 0 0 0 +1010 0 0 0 +1012 0 0 0 +1013 0 0 0 +1014 0 0 0 +1015 0 0 0 +1017 0 0 0 +1018 0 0 0 +1019 0 0 0 +1021 0 0 0 +1031 0 0 0 +1081 0 0 0 +1083 0 0 0 +1084 0 0 0 +1085 0 0 0 +1088 0 0 0 +1089 0 0 0 +1090 0 0 0 +1091 0 0 0 +1096 0 0 0 +1097 0 0 0 +1100 0 0 0 +1101 0 0 0 +424 0 0 0 +464 0 0 0 +546 0 0 0 +547 0 0 0 +552 0 0 0 +553 0 0 0 +554 0 0 0 +555 0 0 0 +558 0 0 0 +559 0 0 0 +582 0 0 0 +583 0 0 0 +585 0 0 0 +586 0 0 0 +587 0 0 0 +588 0 0 0 +589 0 0 0 +590 0 0 0 +592 0 0 0 +593 0 0 0 +594 0 0 0 +595 0 0 0 +596 0 0 0 +597 0 0 0 +598 0 0 0 +599 0 0 0 +600 0 0 0 +622 0 0 0 +623 0 0 0 +625 0 0 0 +628 0 0 0 +629 0 0 0 +630 0 0 0 +636 0 0 0 +637 0 0 0 +640 0 0 0 +1002 0 0 0 +1042 0 0 0 +1163 0 0 0 +1165 0 0 0 +1168 0 0 0 +1171 0 0 0 +1176 0 0 0 +1180 0 0 0 +1201 0 0 0 +1203 0 0 0 +1204 0 0 0 +1205 0 0 0 +1206 0 0 0 +1207 0 0 0 +1208 0 0 0 +1209 0 0 0 +1210 0 0 0 +1211 0 0 0 +1212 0 0 0 +1213 0 0 0 +1214 0 0 0 +1215 0 0 0 +1216 0 0 0 +1217 0 0 0 +1218 0 0 0 +1219 0 0 0 +1220 0 0 0 +1221 0 0 0 +1231 0 0 0 +1246 0 0 0 +1247 0 0 0 +1252 0 0 0 +1253 0 0 0 +1254 0 0 0 +1255 0 0 0 +1258 0 0 0 +1259 0 0 0 +1271 0 0 0 +344 0 0 0 +384 0 0 0 +502 0 0 0 +503 0 0 0 +505 0 0 0 +506 0 0 0 +507 0 0 0 +508 0 0 0 +509 0 0 0 +510 0 0 0 +512 0 0 0 +513 0 0 0 +514 0 0 0 +515 0 0 0 +516 0 0 0 +517 0 0 0 +518 0 0 0 +519 0 0 0 +520 0 0 0 +542 0 0 0 +543 0 0 0 +545 0 0 0 +548 0 0 0 +549 0 0 0 +550 0 0 0 +556 0 0 0 +557 0 0 0 +560 0 0 0 +626 0 0 0 +627 0 0 0 +632 0 0 0 +633 0 0 0 +634 0 0 0 +635 0 0 0 +638 0 0 0 +639 0 0 0 +962 0 0 0 +1082 0 0 0 +1121 0 0 0 +1123 0 0 0 +1124 0 0 0 +1125 0 0 0 +1126 0 0 0 +1127 0 0 0 +1128 0 0 0 +1129 0 0 0 +1130 0 0 0 +1131 0 0 0 +1132 0 0 0 +1133 0 0 0 +1134 0 0 0 +1135 0 0 0 +1136 0 0 0 +1137 0 0 0 +1138 0 0 0 +1139 0 0 0 +1140 0 0 0 +1141 0 0 0 +1151 0 0 0 +1161 0 0 0 +1164 0 0 0 +1166 0 0 0 +1167 0 0 0 +1169 0 0 0 +1170 0 0 0 +1172 0 0 0 +1173 0 0 0 +1174 0 0 0 +1175 0 0 0 +1177 0 0 0 +1178 0 0 0 +1179 0 0 0 +1181 0 0 0 +1191 0 0 0 +1241 0 0 0 +1243 0 0 0 +1244 0 0 0 +1245 0 0 0 +1248 0 0 0 +1249 0 0 0 +1250 0 0 0 +1251 0 0 0 +1256 0 0 0 +1257 0 0 0 +1260 0 0 0 +1261 0 0 0 +66 0 0 0 +67 0 0 0 +72 0 0 0 +73 0 0 0 +74 0 0 0 +75 0 0 0 +78 0 0 0 +79 0 0 0 +102 0 0 0 +103 0 0 0 +105 0 0 0 +106 0 0 0 +107 0 0 0 +108 0 0 0 +109 0 0 0 +110 0 0 0 +112 0 0 0 +113 0 0 0 +114 0 0 0 +115 0 0 0 +116 0 0 0 +117 0 0 0 +118 0 0 0 +119 0 0 0 +120 0 0 0 +142 0 0 0 +143 0 0 0 +145 0 0 0 +148 0 0 0 +149 0 0 0 +150 0 0 0 +156 0 0 0 +157 0 0 0 +160 0 0 0 +584 0 0 0 +624 0 0 0 +683 0 0 0 +685 0 0 0 +688 0 0 0 +691 0 0 0 +696 0 0 0 +700 0 0 0 +721 0 0 0 +723 0 0 0 +724 0 0 0 +725 0 0 0 +726 0 0 0 +727 0 0 0 +728 0 0 0 +729 0 0 0 +730 0 0 0 +731 0 0 0 +732 0 0 0 +733 0 0 0 +734 0 0 0 +735 0 0 0 +736 0 0 0 +737 0 0 0 +738 0 0 0 +739 0 0 0 +740 0 0 0 +741 0 0 0 +751 0 0 0 +766 0 0 0 +767 0 0 0 +772 0 0 0 +773 0 0 0 +774 0 0 0 +775 0 0 0 +778 0 0 0 +779 0 0 0 +791 0 0 0 +1162 0 0 0 +1202 0 0 0 +22 0 0 0 +23 0 0 0 +25 0 0 0 +26 0 0 0 +27 0 0 0 +28 0 0 0 +29 0 0 0 +30 0 0 0 +32 0 0 0 +33 0 0 0 +34 0 0 0 +35 0 0 0 +36 0 0 0 +37 0 0 0 +38 0 0 0 +39 0 0 0 +40 0 0 0 +62 0 0 0 +63 0 0 0 +65 0 0 0 +68 0 0 0 +69 0 0 0 +70 0 0 0 +76 0 0 0 +77 0 0 0 +80 0 0 0 +146 0 0 0 +147 0 0 0 +152 0 0 0 +153 0 0 0 +154 0 0 0 +155 0 0 0 +158 0 0 0 +159 0 0 0 +504 0 0 0 +544 0 0 0 +641 0 0 0 +643 0 0 0 +644 0 0 0 +645 0 0 0 +646 0 0 0 +647 0 0 0 +648 0 0 0 +649 0 0 0 +650 0 0 0 +651 0 0 0 +652 0 0 0 +653 0 0 0 +654 0 0 0 +655 0 0 0 +656 0 0 0 +657 0 0 0 +658 0 0 0 +659 0 0 0 +660 0 0 0 +661 0 0 0 +671 0 0 0 +681 0 0 0 +684 0 0 0 +686 0 0 0 +687 0 0 0 +689 0 0 0 +690 0 0 0 +692 0 0 0 +693 0 0 0 +694 0 0 0 +695 0 0 0 +697 0 0 0 +698 0 0 0 +699 0 0 0 +701 0 0 0 +711 0 0 0 +761 0 0 0 +763 0 0 0 +764 0 0 0 +765 0 0 0 +768 0 0 0 +769 0 0 0 +770 0 0 0 +771 0 0 0 +776 0 0 0 +777 0 0 0 +780 0 0 0 +781 0 0 0 +1122 0 0 0 +1242 0 0 0 +104 0 0 0 +144 0 0 0 +226 0 0 0 +227 0 0 0 +232 0 0 0 +233 0 0 0 +234 0 0 0 +235 0 0 0 +238 0 0 0 +239 0 0 0 +262 0 0 0 +263 0 0 0 +265 0 0 0 +266 0 0 0 +267 0 0 0 +268 0 0 0 +269 0 0 0 +270 0 0 0 +272 0 0 0 +273 0 0 0 +274 0 0 0 +275 0 0 0 +276 0 0 0 +277 0 0 0 +278 0 0 0 +279 0 0 0 +280 0 0 0 +302 0 0 0 +303 0 0 0 +305 0 0 0 +308 0 0 0 +309 0 0 0 +310 0 0 0 +316 0 0 0 +317 0 0 0 +320 0 0 0 +682 0 0 0 +722 0 0 0 +843 0 0 0 +845 0 0 0 +848 0 0 0 +851 0 0 0 +856 0 0 0 +860 0 0 0 +881 0 0 0 +883 0 0 0 +884 0 0 0 +885 0 0 0 +886 0 0 0 +887 0 0 0 +888 0 0 0 +889 0 0 0 +890 0 0 0 +891 0 0 0 +892 0 0 0 +893 0 0 0 +894 0 0 0 +895 0 0 0 +896 0 0 0 +897 0 0 0 +898 0 0 0 +899 0 0 0 +900 0 0 0 +901 0 0 0 +911 0 0 0 +926 0 0 0 +927 0 0 0 +932 0 0 0 +933 0 0 0 +934 0 0 0 +935 0 0 0 +938 0 0 0 +939 0 0 0 +951 0 0 0 +24 0 0 0 +64 0 0 0 +182 0 0 0 +183 0 0 0 +185 0 0 0 +186 0 0 0 +187 0 0 0 +188 0 0 0 +189 0 0 0 +190 0 0 0 +192 0 0 0 +193 0 0 0 +194 0 0 0 +195 0 0 0 +196 0 0 0 +197 0 0 0 +198 0 0 0 +199 0 0 0 +200 0 0 0 +222 0 0 0 +223 0 0 0 +225 0 0 0 +228 0 0 0 +229 0 0 0 +230 0 0 0 +236 0 0 0 +237 0 0 0 +240 0 0 0 +306 0 0 0 +307 0 0 0 +312 0 0 0 +313 0 0 0 +314 0 0 0 +315 0 0 0 +318 0 0 0 +319 0 0 0 +642 0 0 0 +762 0 0 0 +801 0 0 0 +803 0 0 0 +804 0 0 0 +805 0 0 0 +806 0 0 0 +807 0 0 0 +808 0 0 0 +809 0 0 0 +810 0 0 0 +811 0 0 0 +812 0 0 0 +813 0 0 0 +814 0 0 0 +815 0 0 0 +816 0 0 0 +817 0 0 0 +818 0 0 0 +819 0 0 0 +820 0 0 0 +821 0 0 0 +831 0 0 0 +841 0 0 0 +844 0 0 0 +846 0 0 0 +847 0 0 0 +849 0 0 0 +850 0 0 0 +852 0 0 0 +853 0 0 0 +854 0 0 0 +855 0 0 0 +857 0 0 0 +858 0 0 0 +859 0 0 0 +861 0 0 0 +871 0 0 0 +921 0 0 0 +923 0 0 0 +924 0 0 0 +925 0 0 0 +928 0 0 0 +929 0 0 0 +930 0 0 0 +931 0 0 0 +936 0 0 0 +937 0 0 0 +940 0 0 0 +941 0 0 0 +202 0 0 0 +242 0 0 0 +282 0 0 0 +363 0 0 0 +365 0 0 0 +368 0 0 0 +371 0 0 0 +376 0 0 0 +380 0 0 0 +401 0 0 0 +403 0 0 0 +404 0 0 0 +405 0 0 0 +406 0 0 0 +407 0 0 0 +408 0 0 0 +409 0 0 0 +410 0 0 0 +411 0 0 0 +412 0 0 0 +413 0 0 0 +414 0 0 0 +415 0 0 0 +416 0 0 0 +417 0 0 0 +418 0 0 0 +419 0 0 0 +420 0 0 0 +421 0 0 0 +431 0 0 0 +441 0 0 0 +443 0 0 0 +444 0 0 0 +445 0 0 0 +446 0 0 0 +447 0 0 0 +448 0 0 0 +449 0 0 0 +450 0 0 0 +452 0 0 0 +453 0 0 0 +454 0 0 0 +455 0 0 0 +456 0 0 0 +457 0 0 0 +458 0 0 0 +459 0 0 0 +461 0 0 0 +471 0 0 0 +904 0 0 0 +944 0 0 0 +1026 0 0 0 +1027 0 0 0 +1032 0 0 0 +1033 0 0 0 +1034 0 0 0 +1035 0 0 0 +1038 0 0 0 +1039 0 0 0 +1062 0 0 0 +1063 0 0 0 +1065 0 0 0 +1066 0 0 0 +1067 0 0 0 +1068 0 0 0 +1069 0 0 0 +1070 0 0 0 +1072 0 0 0 +1073 0 0 0 +1074 0 0 0 +1075 0 0 0 +1076 0 0 0 +1077 0 0 0 +1078 0 0 0 +1079 0 0 0 +1080 0 0 0 +1102 0 0 0 +1103 0 0 0 +1105 0 0 0 +1106 0 0 0 +1108 0 0 0 +1109 0 0 0 +1110 0 0 0 +1112 0 0 0 +1113 0 0 0 +1114 0 0 0 +1115 0 0 0 +1116 0 0 0 +1117 0 0 0 +1118 0 0 0 +1120 0 0 0 +162 0 0 0 +321 0 0 0 +323 0 0 0 +324 0 0 0 +325 0 0 0 +326 0 0 0 +327 0 0 0 +328 0 0 0 +329 0 0 0 +330 0 0 0 +331 0 0 0 +332 0 0 0 +333 0 0 0 +334 0 0 0 +335 0 0 0 +336 0 0 0 +337 0 0 0 +338 0 0 0 +339 0 0 0 +340 0 0 0 +341 0 0 0 +351 0 0 0 +361 0 0 0 +364 0 0 0 +366 0 0 0 +367 0 0 0 +369 0 0 0 +370 0 0 0 +372 0 0 0 +373 0 0 0 +374 0 0 0 +375 0 0 0 +377 0 0 0 +378 0 0 0 +379 0 0 0 +381 0 0 0 +391 0 0 0 +451 0 0 0 +460 0 0 0 +824 0 0 0 +864 0 0 0 +982 0 0 0 +983 0 0 0 +985 0 0 0 +986 0 0 0 +987 0 0 0 +988 0 0 0 +989 0 0 0 +990 0 0 0 +992 0 0 0 +993 0 0 0 +994 0 0 0 +995 0 0 0 +996 0 0 0 +997 0 0 0 +998 0 0 0 +999 0 0 0 +1000 0 0 0 +1022 0 0 0 +1023 0 0 0 +1025 0 0 0 +1028 0 0 0 +1029 0 0 0 +1030 0 0 0 +1036 0 0 0 +1037 0 0 0 +1040 0 0 0 +1107 0 0 0 +1119 0 0 0 +362 0 0 0 +402 0 0 0 +442 0 0 0 +523 0 0 0 +525 0 0 0 +528 0 0 0 +531 0 0 0 +536 0 0 0 +540 0 0 0 +561 0 0 0 +563 0 0 0 +564 0 0 0 +565 0 0 0 +566 0 0 0 +567 0 0 0 +568 0 0 0 +569 0 0 0 +570 0 0 0 +571 0 0 0 +572 0 0 0 +573 0 0 0 +574 0 0 0 +575 0 0 0 +576 0 0 0 +577 0 0 0 +578 0 0 0 +579 0 0 0 +580 0 0 0 +581 0 0 0 +591 0 0 0 +601 0 0 0 +603 0 0 0 +604 0 0 0 +605 0 0 0 +606 0 0 0 +607 0 0 0 +608 0 0 0 +609 0 0 0 +610 0 0 0 +612 0 0 0 +613 0 0 0 +614 0 0 0 +615 0 0 0 +616 0 0 0 +617 0 0 0 +618 0 0 0 +619 0 0 0 +621 0 0 0 +631 0 0 0 +1064 0 0 0 +1104 0 0 0 +1186 0 0 0 +1187 0 0 0 +1192 0 0 0 +1193 0 0 0 +1194 0 0 0 +1195 0 0 0 +1198 0 0 0 +1199 0 0 0 +1222 0 0 0 +1223 0 0 0 +1225 0 0 0 +1226 0 0 0 +1227 0 0 0 +1228 0 0 0 +1229 0 0 0 +1230 0 0 0 +1232 0 0 0 +1233 0 0 0 +1234 0 0 0 +1235 0 0 0 +1236 0 0 0 +1237 0 0 0 +1238 0 0 0 +1239 0 0 0 +1240 0 0 0 +1262 0 0 0 +1263 0 0 0 +1265 0 0 0 +1266 0 0 0 +1268 0 0 0 +1269 0 0 0 +1270 0 0 0 +1272 0 0 0 +1273 0 0 0 +1274 0 0 0 +1275 0 0 0 +1276 0 0 0 +1277 0 0 0 +1278 0 0 0 +1280 0 0 0 +322 0 0 0 +481 0 0 0 +483 0 0 0 +484 0 0 0 +485 0 0 0 +486 0 0 0 +487 0 0 0 +488 0 0 0 +489 0 0 0 +490 0 0 0 +491 0 0 0 +492 0 0 0 +493 0 0 0 +494 0 0 0 +495 0 0 0 +496 0 0 0 +497 0 0 0 +498 0 0 0 +499 0 0 0 +500 0 0 0 +501 0 0 0 +511 0 0 0 +521 0 0 0 +524 0 0 0 +526 0 0 0 +527 0 0 0 +529 0 0 0 +530 0 0 0 +532 0 0 0 +533 0 0 0 +534 0 0 0 +535 0 0 0 +537 0 0 0 +538 0 0 0 +539 0 0 0 +541 0 0 0 +551 0 0 0 +611 0 0 0 +620 0 0 0 +984 0 0 0 +1024 0 0 0 +1142 0 0 0 +1143 0 0 0 +1145 0 0 0 +1146 0 0 0 +1147 0 0 0 +1148 0 0 0 +1149 0 0 0 +1150 0 0 0 +1152 0 0 0 +1153 0 0 0 +1154 0 0 0 +1155 0 0 0 +1156 0 0 0 +1157 0 0 0 +1158 0 0 0 +1159 0 0 0 +1160 0 0 0 +1182 0 0 0 +1183 0 0 0 +1185 0 0 0 +1188 0 0 0 +1189 0 0 0 +1190 0 0 0 +1196 0 0 0 +1197 0 0 0 +1200 0 0 0 +1267 0 0 0 +1279 0 0 0 +43 0 0 0 +45 0 0 0 +48 0 0 0 +51 0 0 0 +56 0 0 0 +60 0 0 0 +81 0 0 0 +83 0 0 0 +84 0 0 0 +85 0 0 0 +86 0 0 0 +87 0 0 0 +88 0 0 0 +89 0 0 0 +90 0 0 0 +91 0 0 0 +92 0 0 0 +93 0 0 0 +94 0 0 0 +95 0 0 0 +96 0 0 0 +97 0 0 0 +98 0 0 0 +99 0 0 0 +100 0 0 0 +101 0 0 0 +111 0 0 0 +121 0 0 0 +123 0 0 0 +124 0 0 0 +126 0 0 0 +127 0 0 0 +128 0 0 0 +129 0 0 0 +130 0 0 0 +132 0 0 0 +133 0 0 0 +134 0 0 0 +135 0 0 0 +137 0 0 0 +138 0 0 0 +139 0 0 0 +141 0 0 0 +151 0 0 0 +522 0 0 0 +562 0 0 0 +602 0 0 0 +706 0 0 0 +707 0 0 0 +712 0 0 0 +713 0 0 0 +714 0 0 0 +715 0 0 0 +718 0 0 0 +719 0 0 0 +742 0 0 0 +743 0 0 0 +744 0 0 0 +745 0 0 0 +746 0 0 0 +747 0 0 0 +748 0 0 0 +749 0 0 0 +750 0 0 0 +752 0 0 0 +753 0 0 0 +754 0 0 0 +755 0 0 0 +756 0 0 0 +757 0 0 0 +758 0 0 0 +759 0 0 0 +760 0 0 0 +782 0 0 0 +783 0 0 0 +784 0 0 0 +785 0 0 0 +786 0 0 0 +788 0 0 0 +789 0 0 0 +790 0 0 0 +792 0 0 0 +793 0 0 0 +794 0 0 0 +795 0 0 0 +796 0 0 0 +797 0 0 0 +798 0 0 0 +800 0 0 0 +902 0 0 0 +942 0 0 0 +1224 0 0 0 +1264 0 0 0 +1 0 0 0 +3 0 0 0 +4 0 0 0 +5 0 0 0 +6 0 0 0 +7 0 0 0 +8 0 0 0 +9 0 0 0 +10 0 0 0 +11 0 0 0 +12 0 0 0 +13 0 0 0 +14 0 0 0 +15 0 0 0 +16 0 0 0 +17 0 0 0 +18 0 0 0 +19 0 0 0 +20 0 0 0 +21 0 0 0 +31 0 0 0 +41 0 0 0 +44 0 0 0 +46 0 0 0 +47 0 0 0 +49 0 0 0 +50 0 0 0 +52 0 0 0 +53 0 0 0 +54 0 0 0 +55 0 0 0 +57 0 0 0 +58 0 0 0 +59 0 0 0 +61 0 0 0 +71 0 0 0 +125 0 0 0 +131 0 0 0 +136 0 0 0 +140 0 0 0 +482 0 0 0 +662 0 0 0 +663 0 0 0 +664 0 0 0 +665 0 0 0 +666 0 0 0 +667 0 0 0 +668 0 0 0 +669 0 0 0 +670 0 0 0 +672 0 0 0 +673 0 0 0 +674 0 0 0 +675 0 0 0 +676 0 0 0 +677 0 0 0 +678 0 0 0 +679 0 0 0 +680 0 0 0 +702 0 0 0 +703 0 0 0 +704 0 0 0 +705 0 0 0 +708 0 0 0 +709 0 0 0 +710 0 0 0 +716 0 0 0 +717 0 0 0 +720 0 0 0 +787 0 0 0 +799 0 0 0 +822 0 0 0 +862 0 0 0 +1144 0 0 0 +1184 0 0 0 +42 0 0 0 +82 0 0 0 +122 0 0 0 +203 0 0 0 +205 0 0 0 +208 0 0 0 +211 0 0 0 +216 0 0 0 +220 0 0 0 +241 0 0 0 +243 0 0 0 +244 0 0 0 +245 0 0 0 +246 0 0 0 +247 0 0 0 +248 0 0 0 +249 0 0 0 +250 0 0 0 +251 0 0 0 +252 0 0 0 +253 0 0 0 +254 0 0 0 +255 0 0 0 +256 0 0 0 +257 0 0 0 +258 0 0 0 +259 0 0 0 +260 0 0 0 +261 0 0 0 +271 0 0 0 +281 0 0 0 +283 0 0 0 +284 0 0 0 +286 0 0 0 +287 0 0 0 +288 0 0 0 +289 0 0 0 +290 0 0 0 +292 0 0 0 +293 0 0 0 +294 0 0 0 +295 0 0 0 +297 0 0 0 +298 0 0 0 +299 0 0 0 +301 0 0 0 +311 0 0 0 +866 0 0 0 +867 0 0 0 +872 0 0 0 +873 0 0 0 +874 0 0 0 +875 0 0 0 +878 0 0 0 +879 0 0 0 +903 0 0 0 +905 0 0 0 +906 0 0 0 +907 0 0 0 +908 0 0 0 +909 0 0 0 +910 0 0 0 +912 0 0 0 +913 0 0 0 +914 0 0 0 +915 0 0 0 +916 0 0 0 +917 0 0 0 +918 0 0 0 +919 0 0 0 +920 0 0 0 +943 0 0 0 +945 0 0 0 +946 0 0 0 +948 0 0 0 +949 0 0 0 +950 0 0 0 +952 0 0 0 +953 0 0 0 +954 0 0 0 +955 0 0 0 +956 0 0 0 +957 0 0 0 +958 0 0 0 +960 0 0 0 +2 0 0 0 +161 0 0 0 +163 0 0 0 +164 0 0 0 +165 0 0 0 +166 0 0 0 +167 0 0 0 +168 0 0 0 +169 0 0 0 +170 0 0 0 +171 0 0 0 +172 0 0 0 +173 0 0 0 +174 0 0 0 +175 0 0 0 +176 0 0 0 +177 0 0 0 +178 0 0 0 +179 0 0 0 +180 0 0 0 +181 0 0 0 +191 0 0 0 +201 0 0 0 +204 0 0 0 +206 0 0 0 +207 0 0 0 +209 0 0 0 +210 0 0 0 +212 0 0 0 +213 0 0 0 +214 0 0 0 +215 0 0 0 +217 0 0 0 +218 0 0 0 +219 0 0 0 +221 0 0 0 +231 0 0 0 +285 0 0 0 +291 0 0 0 +296 0 0 0 +300 0 0 0 +823 0 0 0 +825 0 0 0 +826 0 0 0 +827 0 0 0 +828 0 0 0 +829 0 0 0 +830 0 0 0 +832 0 0 0 +833 0 0 0 +834 0 0 0 +835 0 0 0 +836 0 0 0 +837 0 0 0 +838 0 0 0 +839 0 0 0 +840 0 0 0 +863 0 0 0 +865 0 0 0 +868 0 0 0 +869 0 0 0 +870 0 0 0 +876 0 0 0 +877 0 0 0 +880 0 0 0 +947 0 0 0 +959 0 0 0 Bonds -1 1 6 10 -2 1 16 20 -3 1 26 30 -4 1 36 40 -5 1 46 50 -6 1 56 60 -7 1 66 70 -8 1 76 80 -9 1 86 90 -10 1 96 100 -11 1 106 110 -12 1 116 120 -13 1 126 130 -14 1 136 140 -15 1 146 150 -16 1 156 160 -17 1 166 170 -18 1 176 180 -19 1 186 190 -20 1 196 200 -21 1 206 210 -22 1 216 220 -23 1 226 230 -24 1 236 240 -25 1 246 250 -26 1 256 260 -27 1 266 270 -28 1 276 280 -29 1 286 290 -30 1 296 300 -31 1 306 310 -32 1 316 320 -33 1 326 330 -34 1 336 340 -35 1 346 350 -36 1 356 360 -37 1 366 370 -38 1 376 380 -39 1 386 390 -40 1 396 400 -41 1 406 410 -42 1 416 420 -43 1 426 430 -44 1 436 440 -45 1 446 450 -46 1 456 460 -47 1 466 470 -48 1 476 480 -49 1 486 490 -50 1 496 500 -51 1 506 510 -52 1 516 520 -53 1 526 530 -54 1 536 540 -55 1 546 550 -56 1 556 560 -57 1 566 570 -58 1 576 580 -59 1 586 590 -60 1 596 600 -61 1 606 610 -62 1 616 620 -63 1 626 630 -64 1 636 640 -65 1 646 650 -66 1 656 660 -67 1 666 670 -68 1 676 680 -69 1 686 690 -70 1 696 700 -71 1 706 710 -72 1 716 720 -73 1 726 730 -74 1 736 740 -75 1 746 750 -76 1 756 760 -77 1 766 770 -78 1 776 780 -79 1 786 790 -80 1 796 800 -81 1 806 810 -82 1 816 820 -83 1 826 830 -84 1 836 840 -85 1 846 850 -86 1 856 860 -87 1 866 870 -88 1 876 880 -89 1 886 890 -90 1 896 900 -91 1 906 910 -92 1 916 920 -93 1 926 930 -94 1 936 940 -95 1 946 950 -96 1 956 960 -97 1 966 970 -98 1 976 980 -99 1 986 990 -100 1 996 1000 -101 1 1006 1010 -102 1 1016 1020 -103 1 1026 1030 -104 1 1036 1040 -105 1 1046 1050 -106 1 1056 1060 -107 1 1066 1070 -108 1 1076 1080 -109 1 1086 1090 -110 1 1096 1100 -111 1 1106 1110 -112 1 1116 1120 -113 1 1126 1130 -114 1 1136 1140 -115 1 1146 1150 -116 1 1156 1160 -117 1 1166 1170 -118 1 1176 1180 -119 1 1186 1190 -120 1 1196 1200 -121 1 1206 1210 -122 1 1216 1220 -123 1 1226 1230 -124 1 1236 1240 -125 1 1246 1250 -126 1 1256 1260 -127 1 1266 1270 -128 1 1276 1280 +1 1 386 390 +2 1 426 430 +3 1 436 440 +4 1 476 480 +5 1 1016 1020 +6 1 1046 1050 +7 1 1056 1060 +8 1 1086 1090 +9 1 346 350 +10 1 356 360 +11 1 396 400 +12 1 466 470 +13 1 966 970 +14 1 976 980 +15 1 1006 1010 +16 1 1096 1100 +17 1 546 550 +18 1 586 590 +19 1 596 600 +20 1 636 640 +21 1 1176 1180 +22 1 1206 1210 +23 1 1216 1220 +24 1 1246 1250 +25 1 506 510 +26 1 516 520 +27 1 556 560 +28 1 626 630 +29 1 1126 1130 +30 1 1136 1140 +31 1 1166 1170 +32 1 1256 1260 +33 1 66 70 +34 1 106 110 +35 1 116 120 +36 1 156 160 +37 1 696 700 +38 1 726 730 +39 1 736 740 +40 1 766 770 +41 1 26 30 +42 1 36 40 +43 1 76 80 +44 1 146 150 +45 1 646 650 +46 1 656 660 +47 1 686 690 +48 1 776 780 +49 1 226 230 +50 1 266 270 +51 1 276 280 +52 1 316 320 +53 1 856 860 +54 1 886 890 +55 1 896 900 +56 1 926 930 +57 1 186 190 +58 1 196 200 +59 1 236 240 +60 1 306 310 +61 1 806 810 +62 1 816 820 +63 1 846 850 +64 1 936 940 +65 1 376 380 +66 1 406 410 +67 1 416 420 +68 1 446 450 +69 1 456 460 +70 1 1026 1030 +71 1 1066 1070 +72 1 1076 1080 +73 1 1106 1110 +74 1 1116 1120 +75 1 326 330 +76 1 336 340 +77 1 366 370 +78 1 986 990 +79 1 996 1000 +80 1 1036 1040 +81 1 536 540 +82 1 566 570 +83 1 576 580 +84 1 606 610 +85 1 616 620 +86 1 1186 1190 +87 1 1226 1230 +88 1 1236 1240 +89 1 1266 1270 +90 1 1276 1280 +91 1 486 490 +92 1 496 500 +93 1 526 530 +94 1 1146 1150 +95 1 1156 1160 +96 1 1196 1200 +97 1 56 60 +98 1 86 90 +99 1 96 100 +100 1 126 130 +101 1 706 710 +102 1 746 750 +103 1 756 760 +104 1 786 790 +105 1 796 800 +106 1 6 10 +107 1 16 20 +108 1 46 50 +109 1 136 140 +110 1 666 670 +111 1 676 680 +112 1 716 720 +113 1 216 220 +114 1 246 250 +115 1 256 260 +116 1 286 290 +117 1 866 870 +118 1 906 910 +119 1 916 920 +120 1 946 950 +121 1 956 960 +122 1 166 170 +123 1 176 180 +124 1 206 210 +125 1 296 300 +126 1 826 830 +127 1 836 840 +128 1 876 880 diff --git a/tools/msi2lmp/test/reference/ethane-oplsaa.data2 b/tools/msi2lmp/test/reference/ethane-oplsaa.data2 index 7932115e6e..cf058a4ca9 100644 --- a/tools/msi2lmp/test/reference/ethane-oplsaa.data2 +++ b/tools/msi2lmp/test/reference/ethane-oplsaa.data2 @@ -1,4 +1,4 @@ -LAMMPS data file via write_data, version 24 Oct 2015-ICMS, timestep = 60 +LAMMPS data file via write_data, version 29 Aug 2024, timestep = 64, units = real 8 atoms 2 atom types @@ -9,13 +9,13 @@ LAMMPS data file via write_data, version 24 Oct 2015-ICMS, timestep = 60 9 dihedrals 1 dihedral types --5.0000000000000000e+00 5.0000000000000000e+00 xlo xhi --5.0000000000000000e+00 5.0000000000000000e+00 ylo yhi --5.0000000000000000e+00 5.0000000000000000e+00 zlo zhi +-5 5 xlo xhi +-5 5 ylo yhi +-5 5 zlo zhi Masses -1 12.0112 +1 12.01115 2 1.00797 Pair Coeffs # lj/cut/coul/cut @@ -39,25 +39,25 @@ Dihedral Coeffs # opls Atoms # full -1 1 1 -1.7999999999999999e-01 4.4520961794662339e+00 -4.8331316055118139e+00 4.9921953697666774e+00 0 1 -1 -2 1 1 -1.7999999999999999e-01 -4.0208267456132498e+00 -4.9408970766661060e+00 -4.9962668603193716e+00 1 1 0 -3 1 2 5.9999999999999998e-02 4.1241637410618237e+00 -3.9187715229527775e+00 4.4953501727331462e+00 0 1 -1 -4 1 2 5.9999999999999998e-02 3.9927847754165149e+00 4.3246628372301563e+00 4.4708516600772406e+00 0 0 -1 -5 1 2 5.9999999999999998e-02 4.0591846610420355e+00 -4.8176278098672096e+00 -3.9904372631272924e+00 0 1 0 -6 1 2 5.9999999999999998e-02 -3.5762405194770461e+00 -4.0716394256255244e+00 -4.5137080084717223e+00 1 1 0 -7 1 2 5.9999999999999998e-02 -3.6936651570105905e+00 4.1740778258698201e+00 -4.4511582014826949e+00 1 0 0 -8 1 2 5.9999999999999998e-02 -3.6211369348857190e+00 4.9967667775234554e+00 3.9920131308240276e+00 1 0 -1 +1 1 1 -0.18 4.450690671089314 -4.83133647736812 4.992428212375868 0 1 -1 +2 1 1 -0.18 -4.022291647324266 -4.942324611094984 -4.995836359730008 1 1 0 +3 1 2 0.06 4.1294147504666165 -3.9160894986014068 4.495489435092607 0 1 -1 +4 1 2 0.06 3.9961668185396193 4.3269540139496785 4.470098784261843 0 0 -1 +5 1 2 0.06 4.059021520856406 -4.816007407764785 -3.990885907539094 0 1 0 +6 1 2 0.06 -3.5752328984263166 -4.071671221696795 -4.5159098704661425 1 1 0 +7 1 2 0.06 -3.6980390300174055 4.172581688097945 -4.448408184535642 1 0 0 +8 1 2 0.06 -3.6233701851839633 4.991333514478468 3.9918638905405723 1 0 -1 Velocities -1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 -8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +1 0 0 0 +2 0 0 0 +3 0 0 0 +4 0 0 0 +5 0 0 0 +6 0 0 0 +7 0 0 0 +8 0 0 0 Bonds diff --git a/tools/msi2lmp/test/runtests.sh b/tools/msi2lmp/test/runtests.sh index b81c8637cd..2cfbcb9f72 100755 --- a/tools/msi2lmp/test/runtests.sh +++ b/tools/msi2lmp/test/runtests.sh @@ -2,8 +2,8 @@ MSI2LMP_LIBRARY=../frc_files VALGRIND='valgrind -v --track-origins=yes --show-reachable=yes --leak-check=full' -MSI2LMP=../src/msi2lmp.exe -LAMMPS=../../../src/lmp_serial +MSI2LMP=../../../build-test/msi2lmp +LAMMPS=../../../build-test/lmp CHECKDATA=./data-compare.pl if [ ! -x $MSI2LMP ] diff --git a/tools/regression-tests/README b/tools/regression-tests/README index 810b96e87c..1342e50310 100644 --- a/tools/regression-tests/README +++ b/tools/regression-tests/README @@ -1,5 +1,5 @@ The script `run_tests.py` in this folder is used to perform regression tests -using in-place example scripts. +using in-place example scripts and provided log files as reference. What this single script does is to launch the selected LAMMPS binary using a testing configuration defined in a `.yaml` file (e.g., `config.yaml`) @@ -19,60 +19,88 @@ within the specified tolerances in the test configuration `config.yaml` file. With the current features, users can: + specify which LAMMPS binary version to test (e.g., the version from a commit, or those from `lammps-testing`) - + specify the examples subfolders (thus the reference log files) seperately (e.g. from other LAMMPS versions or commits) - + specify tolerances for individual quantities for any input script to override the global values - + launch tests with `mpirun` with all supported command line features (multiple procs, multiple paritions, and suffices) - + skip certain input files if not interested, or no reference log file exists - + simplify the main LAMMPS builds, as long as a LAMMPS binary is available + + specify the examples subfolders (thus the reference log files) seperately (e.g. from other LAMMPS versions or commits), or + + specify a file that lists of the examples input scripts to test + + specify tolerances for individual quantities for any input script to override the global values in the config file + + launch tests with `mpirun` with all supported command line features (multiple procs, multiple paritions, and suffixes) + + skip certain input files (whose names match specified patterns) if not interested, or packaged not installed, or no reference log file exists + + set a timeout for every input script run if they may take too long + + skip numerical checks if the goal is just to check if the runs do not fail + +Some benefits include: + + + separating regression testing from building LAMMPS + + performing quick and full regression tests + + keeping track of the testing progress to resume the testing from the last checkpoint (skipping completed runs) + + distributing the input list across multiple processes by + splitting the list of input scripts into separate runs (there are ~800 input scripts under the top-level examples) + +Input arguments: + + + the path to a LAMMPS binary (can be relative to the working directory) + + a test configuration file (see tools/regression-tests/config.yaml for an example) + + a text file that lists of folders where the input scripts reside and how many of them line by line, or + a text file that list of input scripts, or + the path to the top-level examples + +Output: + + + failure.yaml : a dictionary of the failed runs and reasons + + progress.yaml: full testing results of the tested input scripts with the status (completed, failed or skipped) + with error messages (for failed runs), and walltime (in seconds) + + output.xml : testing results in the JUnit XML format + + run.log : screen output and error of individual runs Limitations: - - input scripts use thermo style multi (e.g., examples/peptide) do not work with the expected thermo output format - - input scripts that require partition runs (e.g. examples/neb) need a separate config file, e.g. "args: --partition 2x1" - - testing accelerator packages (GPU, INTEL, KOKKOS, OPENMP) need separate config files, "args: -sf omp -pk omp 4" - -TODO: - - + keep track of the testing progress to resume the testing from the last checkpoint - + distribute the input list across multiple processes via multiprocessing, or - split the list of input scripts into separate runs (there are 800+ input script under the top-level examples) - + be able to be invoked from run_tests in the lammps-testing infrastruture - + + input scripts use thermo style multi (e.g., examples/peptide) do not work with the expected thermo output format + + input scripts that require partition runs (e.g. examples/neb) need a separate config file, e.g., args: "--partition 3x1" + + testing with accelerator packages (GPU, INTEL, KOKKOS, OPENMP) need separate config files, e.g., args: "-sf omp -pk omp 4" The following Python packages need to be installed into an activated environment: - + python3 -m venv testing-env source testing-env/bin/activate pip install numpy pyyaml junit_xml +For all the supported arguments, run: -Example uses: + python3 tools/regression-tests/run_tests.py -h + +Example uses (aka, tests for this script): 1) Simple use (using the provided tools/regression-tests/config.yaml and the examples/ folder at the top level) - python3 run_tests.py --lmp-bin=/path/to/lmp_binary + python3 run_tests.py --lmp-bin=build/lmp --config-file=tools/regression-tests/config.yaml 2) Use a custom testing configuration python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml 3) Specify a list of example folders python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \ - --example-folders="/path/to/examples/folder1;/path/to/examples/folder2" + --example-folders="/path/to/examples/melt;/path/to/examples/rigid" - The example folders can also be loaded from a text file list_subfolders1.txt: + The example subfolders can also be loaded from a text file list_subfolders1.txt: python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \ - --list-input=list_subfolders1.txt --output-file=output1.txt --progress-file=progress1.yaml \ + --list-subfolders=list_subfolders1.txt --output-file=output1.txt --progress-file=progress1.yaml \ --log-file=run1.log - - 4) Test a LAMMPS binary with the whole top-level /examples folder in a LAMMPS source tree - python3 run_tests.py --lmp-bin=/path/to/lmp_binary --examples-top-level=/path/to/lammps/examples - 5) Analyze (dry run) the LAMMPS binary annd whole top-level /examples folder in a LAMMPS source tree + 4) Specify a list of example input scripts (e.g. obtained from running tools/regression-tests/get-quick-list.py) + python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \ + --list-input=input_list.txt + + 5) Test a LAMMPS binary with the whole top-level /examples folder in a LAMMPS source tree + python3 run_tests.py --lmp-bin=/path/to/lmp_binary --examples-top-level=/path/to/lammps/examples + --config-file=tools/regression-tests/config.yaml + + 6) Analyze the LAMMPS binary and whole top-level /examples folder in a LAMMPS source tree and generate separate input lists for 8 workers: python3 run_tests.py --lmp-bin=/path/to/lmp_binary --examples-top-level=/path/to/lammps/examples \ - --dry-run --num-workers=8 + --analyze --num-workers=8 - This is used for splitting the subfolders into separate input lists and launching different instances - of run_tests.py simultaneously. + The output of this run is 8 files folder-list-[0-7].txt that lists the subfolders + and 8 files input-list-[0-7].txt that lists the input scripts under the top-level example folders. + With these lists, one can launch multiple instances of run_tests.py simultaneously + each with a list of example subfolders (Case 3), or with a list of input scripts (Case 4). An example of the test configuration `config.yaml` is given as below. @@ -107,17 +135,31 @@ An example of the test configuration `config.yaml` is given as below. abs: 1e-2 rel: 1e-4 skip: - [ in.rigid.poems3, - in.rigid.poems4, - in.rigid.poems5, + [ in.displ, + in.displ2, + in.*_imd*, ] nugget: 1.0 epsilon: 1e-16 + timeout: 180 -An example of the list of input scripts in a text file `list_subfolders1.txt` +An example of the list of example subfolders in a text file `list_subfolders1.txt` + + /home/codes/lammps/examples/melt 1 + /home/codes/lammps/examples/body 5 + /home/codes/lammps/examples/PACKAGES/dielectric 2 + /home/codes/lammps/examples/PACKAGES/tally 3 + +where the numbers are the number of input scripts (in.*) in the folders. + + +An example of the list of input scripts in a text file `input_list.txt` + + /home/codes/lammps/examples/melt/in.melt + /home/codes/lammps/examples/body/in.body + /home/codes/lammps/examples/body/in.cubes + /home/codes/lammps/examples/PACKAGES/dielectric/in.confined + /home/codes/lammps/examples/PACKAGES/tally/in.pe + /home/codes/lammps/examples/PACKAGES/tally/in.force -/home/codes/lammps/examples/melt -/home/codes/lammps/examples/body -/home/codes/lammps/examples/PACKAGES/dielectric -/home/codes/lammps/examples/PACKAGES/tally diff --git a/tools/regression-tests/config.yaml b/tools/regression-tests/config.yaml index 24f1ab0d67..dd7ffe0b03 100644 --- a/tools/regression-tests/config.yaml +++ b/tools/regression-tests/config.yaml @@ -3,7 +3,7 @@ nprocs: "4" args: "-cite none" mpiexec: "mpirun" - mpiexec_numproc_flag: "-np" + mpiexec_numproc_flag: "--host localhost:4 -np" tolerance: PotEng: abs: 1e-4 @@ -29,13 +29,10 @@ abs: 1e-2 rel: 1e-4 skip: - [ in.rigid.poems3, - in.rigid.poems4, - in.rigid.poems5, - in.peptide, - in.voronoi, - in.voronoi.2d, - in.voronoi.data, + [ + in.disp, + in.disp2, + in.dos, in.*_imd*, in.bucky-plus-cnt*, ] diff --git a/tools/regression-tests/config_quick.yaml b/tools/regression-tests/config_quick.yaml new file mode 100644 index 0000000000..bc6e19b730 --- /dev/null +++ b/tools/regression-tests/config_quick.yaml @@ -0,0 +1,44 @@ +--- + lmp_binary: "" + nprocs: "1" + args: "-cite none" + mpiexec: "" + mpiexec_numproc_flag: "" + tolerance: + PotEng: + abs: 1e-4 + rel: 1e-7 + TotEng: + abs: 1e-4 + rel: 1e-7 + Press: + abs: 1e-4 + rel: 1e-7 + Temp: + abs: 1e-4 + rel: 1e-7 + E_vdwl: + abs: 1e-3 + rel: 1e-7 + overrides: + in.rigid.tnr: + Temp: + abs: 1e-3 + rel: 1e-5 + Press: + abs: 1e-2 + rel: 1e-4 + skip: + [ + in.disp, + in.disp2, + in.dos, + in.*_imd*, + in.bucky-plus-cnt*, + ] + + timeout: 30 + nugget: 1.0 + epsilon: 1e-16 + + diff --git a/tools/regression-tests/config_serial.yaml b/tools/regression-tests/config_serial.yaml new file mode 100644 index 0000000000..c685815ff0 --- /dev/null +++ b/tools/regression-tests/config_serial.yaml @@ -0,0 +1,44 @@ +--- + lmp_binary: "" + nprocs: "1" + args: "-cite none" + mpiexec: "" + mpiexec_numproc_flag: "" + tolerance: + PotEng: + abs: 1e-4 + rel: 1e-7 + TotEng: + abs: 1e-4 + rel: 1e-7 + Press: + abs: 1e-4 + rel: 1e-7 + Temp: + abs: 1e-4 + rel: 1e-7 + E_vdwl: + abs: 1e-3 + rel: 1e-7 + overrides: + in.rigid.tnr: + Temp: + abs: 1e-3 + rel: 1e-5 + Press: + abs: 1e-2 + rel: 1e-4 + skip: + [ + in.disp, + in.disp2, + in.dos, + in.*_imd*, + in.bucky-plus-cnt*, + ] + + timeout: 180 + nugget: 1.0 + epsilon: 1e-16 + + diff --git a/tools/regression-tests/get_quick_list.py b/tools/regression-tests/get_quick_list.py new file mode 100644 index 0000000000..9ebcce0aa2 --- /dev/null +++ b/tools/regression-tests/get_quick_list.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +""" +Find all example input files containing commands changed in this branch versus develop. +Companion script to run_tests.py regression tester. +""" + +import os, re, sys, subprocess +from pathlib import Path + +if sys.version_info < (3,5): + raise BaseException("Must use at least Python 3.5") + +# infer top level LAMMPS dir from filename +LAMMPS_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', '..')) + +# ---------------------------------------------------------------------- + +def changed_files_from_git(branch='develop'): + """ + Return list of changed file from git. + + This function queries git to return the list of changed files on + the current branch relative to a given branch (default is 'develop'). + + param branch: branch to compare with + type branch: string + return: path names of files with changes relative to the repository root + rtype: list of strings + """ + + # get list of changed files relative to the develop branch from git + output = None + try: + output = subprocess.run('git diff --diff-filter=MA --name-status ' + branch, + shell=True, capture_output=True) + except: + pass + + # collect header files to check for styles + # - skip files that don't end in '.h' or '.cpp' + # - skip paths that don't start with 'src/' + # - replace '.cpp' with '.h' w/o checking it exists + headers = [] + # output will have a letter 'A' or 'M' for added or modified files followed by pathname + # append iterms to list and return it + if output: + for changed in output.stdout.decode().split(): + if (changed == 'A') or (changed == 'M'): continue + if not changed.startswith('src/'): continue + if changed.endswith('.h'): headers.append(changed) + if changed.endswith('.cpp'): headers.append(changed.replace('.cpp','.h')) + return headers + +# ---------------------------------------------------------------------- + +def get_command_from_header(headers, topdir="."): + """ + Loop over list of header files and extract style names, if present. + + LAMMPS commands have macros XxxxStyle() that connects a string with a class. + We search the header files for those macros, extract the string and append + it to a list in a dictionary of different types of styles. We skip over known + suffixes and deprecated commands. + + param headers: header files to check for commands + type headers: + return: dictionary with lists of style names + rtype: dict + """ + + styles = {} + styles['command'] = [] + styles['atom'] = [] + styles['compute'] = [] + styles['fix'] = [] + styles['pair'] = [] + styles['body'] = [] + styles['bond'] = [] + styles['angle'] = [] + styles['dihedral'] = [] + styles['improper'] = [] + styles['kspace'] = [] + styles['dump'] = [] + styles['region'] = [] + styles['integrate'] = [] + styles['minimize'] = [] + + # some regex + style_pattern = re.compile(r"(.+)Style\((.+),(.+)\)") + upper = re.compile("[A-Z]+") + gpu = re.compile("(.+)/gpu$") + intel = re.compile("(.+)/intel$") + kokkos = re.compile("(.+)/kk$") + kokkos_skip = re.compile("(.+)/kk/(host|device)$") + omp = re.compile("(.+)/omp$") + opt = re.compile("(.+)/opt$") + removed = re.compile("(.*)Deprecated$") + + for file in headers: + # don't fail if file is not present + try: + with open(os.path.join(topdir,file)) as f: + for line in f: + matches = style_pattern.findall(line) + for m in matches: + # skip over internal styles w/o explicit documentation + style = m[1] + if upper.match(style): + continue + + # skip over suffix styles: + suffix = kokkos_skip.match(style) + if suffix: + continue + suffix = gpu.match(style) + if suffix: + continue + suffix = intel.match(style) + if suffix: + continue + suffix = kokkos.match(style) + if suffix: + continue + suffix = omp.match(style) + if suffix: + continue + suffix = opt.match(style) + if suffix: + continue + deprecated = removed.match(m[2]) + if deprecated: + continue + + # register style and suffix flags + if m[0] == 'Angle': + styles['angle'].append(style) + elif m[0] == 'Atom': + styles['atom'].append(style) + elif m[0] == 'Body': + styles['body'].append(style) + elif m[0] == 'Bond': + styles['bond'].applend(style) + elif m[0] == 'Command': + styles['command'].append(style) + elif m[0] == 'Compute': + styles['compute'].append(style) + elif m[0] == 'Dihedral': + styles['dihedral'].append(style) + elif m[0] == 'Dump': + styles['dump'].append(style) + elif m[0] == 'Fix': + styles['fix'].append(style) + elif m[0] == 'Improper': + styles['improper'].append(style) + elif m[0] == 'Integrate': + styles['integrate'].append(style) + elif m[0] == 'KSpace': + styles['kspace'].append(style) + elif m[0] == 'Minimize': + styles['minimize'].append(style) + elif m[0] == 'Pair': + styles['pair'].append(style) + elif m[0] == 'Region': + styles['region'].append(style) + else: + pass + # header file not found or not readable + except: + pass + return styles + +# ---------------------------------------------------------------------- + +def make_regex(styles): + """Convert dictionary with styles into a regular expression to scan input files with + + This will construct a regular expression matching LAMMPS commands. Ignores continuation + + param styles: dictionary with style names + type styles: dict + return: combined regular expression string + rtype: string + """ + + restring = "^\\s*(" + if len(styles['command']): + restring += '(' + '|'.join(styles['command']) + ')|' + if len(styles['atom']): + restring += '(atom_style\\s+(' + '|'.join(styles['atom']) + '))|' + if len(styles['compute']): + restring += '(compute\\s+\\S+\\s+\\S+\\s+(' + '|'.join(styles['compute']) + '))|' + if len(styles['fix']): + restring += '(fix\\s+\\S+\\s+\\S+\\s+(' + '|'.join(styles['fix']) + '))|' + if len(styles['pair']): + restring += '(pair_style\\s+(' + '|'.join(styles['pair']) + '))|' + if len(styles['body']): + restring += '(atom_style\\s+body\\s+(' + '|'.join(styles['body']) + '))|' + if len(styles['bond']): + restring += '(bond_style\\s+(' + '|'.join(styles['bond']) + '))|' + if len(styles['angle']): + restring += '(angle_style\\s+(' + '|'.join(styles['angle']) + '))|' + if len(styles['dihedral']): + restring += '(dihedral_style\\s+(' + '|'.join(styles['dihedral']) + '))|' + if len(styles['improper']): + restring += '(improper_style\\s+(' + '|'.join(styles['improper']) + '))|' + if len(styles['kspace']): + restring += '(kspace_style\\s+(' + '|'.join(styles['kspace']) + '))|' + if len(styles['dump']): + restring += '(dump\\s+\\S+\\s+\\S+\\s+(' + '|'.join(styles['dump']) + '))|' + if len(styles['region']): + restring += '(region\\s+(' + '|'.join(styles['region']) + '))|' + if len(styles['integrate']): + restring += '(run_style\\s+(' + '|'.join(styles['integrate']) + '))|' + if len(styles['minimize']): + restring += '(min_style\\s+(' + '|'.join(styles['minimize']) + '))|' + + # replace last (pipe) character with closing parenthesis + length = len(restring) + restring = restring[:length-1] + ')' + # return combined regex string + if length > 5: + return restring + else: + return None + +# ---------------------------------------------------------------------- + +def get_examples_using_styles(regex, examples='examples'): + """ + Loop through LAMMPS examples tree and find all files staring with 'in.' + that have at least one line matching the regex. + + param regex: string pattern matching LAMMPS commands + type regex: compiled regex + param example: path where to start looking for examples recursively + type example: string + return: list of matching example inputs + rtype: list of strings + """ + + commands = re.compile(regex) + inputs = [] + for filename in Path(examples).rglob('in.*'): + with open(filename) as f: + for line in f: + if commands.match(line): + inputs.append(str(filename)) + break + return inputs + +# ---------------------------------------------------------------------- +# ---------------------------------------------------------------------- + +if __name__ == "__main__": + + headers = changed_files_from_git('origin/develop') + styles = get_command_from_header(headers, LAMMPS_DIR) + regex = make_regex(styles) + if regex: + inputs = get_examples_using_styles(regex, os.path.join(LAMMPS_DIR,'examples')) + else: + inputs = [] + print("Found changes to the following styles:") + print("Commands: ", styles['command']) + print("Atom styles: ", styles['atom']) + print("Compute styles: ", styles['compute']) + print("Fix styles: ", styles['fix']) + print("Pair styles: ", styles['pair']) + print("Body styles: ", styles['body']) + print("Bond styles: ", styles['bond']) + print("Angle styles: ", styles['angle']) + print("Dihedral styles: ", styles['dihedral']) + print("Improper styles: ", styles['improper']) + print("Kspace styles: ", styles['kspace']) + print("Dump styles: ", styles['dump']) + print("Region styles: ", styles['region']) + print("Integrate styles: ", styles['integrate']) + print("Minimize styles: ", styles['minimize']) + + print("Example input files affected: ", len(inputs)) + print("inputs: ", inputs.sort()) diff --git a/tools/regression-tests/reference.yaml b/tools/regression-tests/reference.yaml new file mode 100644 index 0000000000..c18883f375 --- /dev/null +++ b/tools/regression-tests/reference.yaml @@ -0,0 +1,932 @@ +in.granregion.box: { folder: examples/granregion, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 8 }, walltime: 0.0, walltime_norm: 0.0 } +in.granregion.funnel: { folder: examples/granregion, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 106.0, walltime_norm: 17.666666666666668 } +in.granregion.mixer: { folder: examples/granregion, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 84.0, walltime_norm: 14.0 } +in.melt: { folder: examples/melt, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.airebo: { folder: examples/airebo, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 60.0, walltime_norm: 10.0 } +in.airebo-0-0: { folder: examples/airebo, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.airebo-m: { folder: examples/airebo, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 63.0, walltime_norm: 10.5 } +in.rebo2: { folder: examples/airebo, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.hybrid: { folder: examples/template, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 7.0, walltime_norm: 1.1666666666666667 } +in.mol-data-mix: { folder: examples/template, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 18.0, walltime_norm: 3.0 } +in.mol-restart-mix: { folder: examples/template, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 45.0, walltime_norm: 7.5 } +in.molecular-mix: { folder: examples/template, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 26.0, walltime_norm: 4.333333333333333 } +in.template-mix: { folder: examples/template, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 26.0, walltime_norm: 4.333333333333333 } +in.tmpl-data-mix: { folder: examples/template, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 18.0, walltime_norm: 3.0 } +in.tmpl-restart-mix: { folder: examples/template, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 45.0, walltime_norm: 7.5 } +in.first: { folder: examples/rerun, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 16.0, walltime_norm: 2.6666666666666665 } +in.rdf.first: { folder: examples/rerun, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 16.0, walltime_norm: 2.6666666666666665 } +in.rdf.rerun: { folder: examples/rerun, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.read_dump: { folder: examples/rerun, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.rerun: { folder: examples/rerun, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.lj.ehex: { folder: examples/HEAT, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.lj.hex: { folder: examples/HEAT, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.spce.ehex: { folder: examples/HEAT, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.spce.hex: { folder: examples/HEAT, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.vashishta.inp: { folder: examples/vashishta, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.vashishta.sio2: { folder: examples/vashishta, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.vashishta.table.inp: { folder: examples/vashishta, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.vashishta.table.sio2: { folder: examples/vashishta, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.atomfile: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.atomvar: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.early: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.gravity: { folder: examples/rigid, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.rigid.infile: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.molecule: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.nve: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.nve.early: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.poems: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 3.0, walltime_norm: 0.5 } +in.rigid.poems2: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 3.0, walltime_norm: 0.5 } +in.rigid.poems3: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.rigid.poems4: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 3.0, walltime_norm: 0.5 } +in.rigid.poems5: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 3.0, walltime_norm: 0.5 } +in.rigid.property: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.small: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.small.infile: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.rigid.tnr: { folder: examples/rigid, status: "completed", failed_checks: { abs_diff_failed: 18, rel_diff_failed: 22 }, walltime: 21.0, walltime_norm: 3.5 } +in.voronoi: { folder: examples/voronoi, status: "completed, but no Step nor Loop in the output.", walltime: 6.0, walltime_norm: 1.0 } +in.voronoi.2d: { folder: examples/voronoi, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.voronoi.data: { folder: examples/voronoi, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.ehex: { folder: examples/KAPPA, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.heat: { folder: examples/KAPPA, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.heatflux: { folder: examples/KAPPA, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.langevin: { folder: examples/KAPPA, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.mp: { folder: examples/KAPPA, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.pour: { folder: examples/pour, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 20.0, walltime_norm: 3.3333333333333335 } +in.pour.2d: { folder: examples/pour, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.pour.2d.molecule: { folder: examples/pour, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.deposit.atom: { folder: examples/deposit, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 3.0, walltime_norm: 0.5 } +in.deposit.molecule: { folder: examples/deposit, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.deposit.molecule.rigid-nve-small: { folder: examples/deposit, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.deposit.molecule.rigid-nvt-small: { folder: examples/deposit, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.deposit.molecule.rigid-small: { folder: examples/deposit, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.deposit.molecule.shake: { folder: examples/deposit, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.charmmfsw: { folder: examples/charmmfsw, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 17.0, walltime_norm: 2.8333333333333335 } +in.indent: { folder: examples/indent, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.indent.min: { folder: examples/indent, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.qeq.buck: { folder: examples/qeq, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 21.0, walltime_norm: 3.5 } +in.qeq.reaxff: { folder: examples/qeq, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.dreiding: { folder: examples/dreiding, status: "completed, error parsing log.lammps into YAML", walltime: 0.0, walltime_norm: 0.0 } +in.22DMH.real: { folder: examples/relres, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 57.0, walltime_norm: 9.5 } +in.22DMH.relres: { folder: examples/relres, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 8.0, walltime_norm: 1.3333333333333333 } +in.22DMH.respa: { folder: examples/relres, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.track: { folder: examples/tracker, status: "failed, ERROR: Illegal pair_style command (src/MISC/pair_tracker.cpp:221).", walltime: -1 } +in.pour.drum: { folder: examples/granular, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.pour.flatwall: { folder: examples/granular, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.pour.heat: { folder: examples/granular, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 83.0, walltime_norm: 13.833333333333334 } +in.restitution: { folder: examples/granular, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.micelle: { folder: examples/micelle, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 1 }, walltime: 0.0, walltime_norm: 0.0 } +in.micelle-rigid: { folder: examples/micelle, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.replicate.bond.x: { folder: examples/replicate, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.replicate.bond.x.noloop: { folder: examples/replicate, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.replicate.bond.x.y: { folder: examples/replicate, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.replicate.bond.xy: { folder: examples/replicate, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.replicate.cnt: { folder: examples/replicate, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 8.0, walltime_norm: 1.3333333333333333 } +in.srd.mixture: { folder: examples/srd, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.srd.pure: { folder: examples/srd, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.ttm: { folder: examples/ttm, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.ttm.grid: { folder: examples/ttm, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.ttm.mod: { folder: examples/ttm, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.colloid: { folder: examples/multi, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 0.0, walltime_norm: 0.0 } +in.granular: { folder: examples/multi, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.powerlaw: { folder: examples/multi, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.msst: { folder: examples/msst, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.gjf.vfull: { folder: examples/gjf, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.gjf.vhalf: { folder: examples/gjf, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.spin.cobalt_fcc: { folder: examples/SPIN/cobalt_fcc, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.spin.nickel: { folder: examples/SPIN/nickel, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.spin.nickel_cubic: { folder: examples/SPIN/nickel, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.spin.cobalt_hcp: { folder: examples/SPIN/cobalt_hcp, status: "completed", failed_checks: { abs_diff_failed: 4, rel_diff_failed: 4 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.spin.iron: { folder: examples/SPIN/iron, status: "completed", failed_checks: { abs_diff_failed: 4, rel_diff_failed: 4 }, walltime: 0.0, walltime_norm: 0.0 } +in.spin.iron_cubic: { folder: examples/SPIN/iron, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 0.0, walltime_norm: 0.0 } +in.gneb.skyrmion: { folder: examples/SPIN/gneb/skyrmion, status: "failed, ERROR: Did not assign all atoms correctly (src/read_data.cpp:1562).", walltime: -1 } +in.gneb.iron: { folder: examples/SPIN/gneb/iron, status: "failed, ERROR: Cannot use NEBSpin with a single replica (src/SPIN/neb_spin.cpp:133).", walltime: -1 } +in.spin.read_data: { folder: examples/SPIN/read_restart, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.spin.restart: { folder: examples/SPIN/read_restart, status: "failed, ERROR: Invalid flag in force field section of restart file (src/read_restart.cpp:948).", walltime: -1 } +in.spin.write_restart: { folder: examples/SPIN/read_restart, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.spin.bfo_min: { folder: examples/SPIN/spinmin, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.spin.bfo_min_cg: { folder: examples/SPIN/spinmin, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 7.0, walltime_norm: 1.1666666666666667 } +in.spin.bfo_min_lbfgs: { folder: examples/SPIN/spinmin, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.spin.iron_min: { folder: examples/SPIN/spinmin, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.spin.setforce: { folder: examples/SPIN/setforce_spin, status: "failed, no Total wall time in the output, [fv-az1014-42:16323] *** Process received signal *** +[fv-az1014-42:16323] Signal: Segmentation fault (11) +[fv-az1014-42:16323] Signal code: Address not mapped (1) +[fv-az1014-42:16323] Failing at address: 0x390 +[fv-az1014-42:16323] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x42520)[0x7f09e7842520] +[fv-az1014-42:16323] [ 1] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS11ComputeSpin14compute_vectorEv+0x2d8)[0x5590ad415268] +[fv-az1014-42:16323] [ 2] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS8Variable8evaluateEPcPPNS0_4TreeEi+0x6e7f)[0x5590ad0078ef] +[fv-az1014-42:16323] [ 3] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS8Variable13compute_equalEi+0x22b)[0x5590ad00d2ab] +[fv-az1014-42:16323] [ 4] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS6Thermo16compute_variableEv+0x5b)[0x5590acfbfa6b] +[fv-az1014-42:16323] [ 5] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS6Thermo7computeEi+0x203)[0x5590acfc9dc3] +[fv-az1014-42:16323] [ 6] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS6Output5setupEi+0x64)[0x5590acf57f14] +[fv-az1014-42:16323] [ 7] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS3Min5setupEi+0x57d)[0x5590acee421d] +[fv-az1014-42:16323] [ 8] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS8Minimize7commandEiPPc+0x1d7)[0x5590acee5a67] +[fv-az1014-42:16323] [ 9] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS5Input15execute_commandEv+0xb1d)[0x5590ace91b9d] +[fv-az1014-42:16323] [10] /home/runner/work/lammps/lammps/build/lmp(_ZN9LAMMPS_NS5Input4fileEv+0x19e)[0x5590ace91f5e] +[fv-az1014-42:16323] [11] /home/runner/work/lammps/lammps/build/lmp(main+0x51)[0x5590ace7ed41] +[fv-az1014-42:16323] [12] /lib/x86_64-linux-gnu/libc.so.6(+0x29d90)[0x7f09e7829d90] +[fv-az1014-42:16323] [13] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x80)[0x7f09e7829e40] +[fv-az1014-42:16323] [14] /home/runner/work/lammps/lammps/build/lmp(_start+0x25)[0x5590ace834e5] +[fv-az1014-42:16323] *** End of error message *** +Segmentation fault (core dumped) +", walltime: -1 } +in.spin.bfo: { folder: examples/SPIN/bfo, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 7.0, walltime_norm: 1.1666666666666667 } +in.spin.iron_dipole_cut: { folder: examples/SPIN/dipole_spin, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.spin.iron_dipole_ewald: { folder: examples/SPIN/dipole_spin, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 0 }, walltime: 20.0, walltime_norm: 3.3333333333333335 } +in.spin.iron_dipole_pppm: { folder: examples/SPIN/dipole_spin, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.spin.iron-nve: { folder: examples/SPIN/test_problems/validation_nve, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 19.0, walltime_norm: 3.1666666666666665 } +in.spin.nvt_lattice: { folder: examples/SPIN/test_problems/validation_nvt, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 95.0, walltime_norm: 15.833333333333334 } +in.spin.nvt_spin: { folder: examples/SPIN/test_problems/validation_nvt, status: "failed, ERROR: Fix langevin period must be > 0.0 (src/fix_langevin.cpp:80).", walltime: -1 } +in.mliap.ace.compute: { folder: examples/mliap, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.mliap.nn.Cu: { folder: examples/mliap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.mliap.nn.Ta06A: { folder: examples/mliap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.mliap.pytorch.Ta06A: { folder: examples/mliap, status: "failed, ERROR: Using pair_style mliap model mliappy requires ML-IAP with python support (src/ML-IAP/pair_mliap.cpp:173).", walltime: -1 } +in.mliap.pytorch.ace: { folder: examples/mliap, status: "failed, ERROR: Using pair_style mliap model mliappy requires ML-IAP with python support (src/ML-IAP/pair_mliap.cpp:173).", walltime: -1 } +in.mliap.pytorch.ace.NN: { folder: examples/mliap, status: "failed, ERROR: Using pair_style mliap model mliappy requires ML-IAP with python support (src/ML-IAP/pair_mliap.cpp:173).", walltime: -1 } +in.mliap.pytorch.relu1hidden: { folder: examples/mliap, status: "failed, ERROR: Using pair_style mliap model mliappy requires ML-IAP with python support (src/ML-IAP/pair_mliap.cpp:173).", walltime: -1 } +in.mliap.quadratic.compute: { folder: examples/mliap, status: "failed, no Total wall time in the output, munmap_chunk(): invalid pointer +[fv-az1014-42:16535] *** Process received signal *** +[fv-az1014-42:16535] Signal: Aborted (6) +[fv-az1014-42:16535] Signal code: (-6) +corrupted double-linked list +Aborted (core dumped) +", walltime: -1 } +in.mliap.snap.Ta06A: { folder: examples/mliap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.mliap.snap.WBe.PRB2019: { folder: examples/mliap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.mliap.snap.chem: { folder: examples/mliap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 15.0, walltime_norm: 2.5 } +in.mliap.snap.compute: { folder: examples/mliap, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.mliap.snap.quadratic: { folder: examples/mliap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.mliap.so3.Ni_Mo: { folder: examples/mliap, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.mliap.so3.nn.Si: { folder: examples/mliap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 3.0, walltime_norm: 0.5 } +in.mliap.unified.lj.Ar: { folder: examples/mliap, status: "failed, ERROR: Could not process Python string: .", walltime: -1 } +in.run: { folder: examples/mliap/jax, status: "failed, ERROR: Using pair_style mliap unified requires ML-IAP with python support (src/ML-IAP/pair_mliap.cpp:213).", walltime: -1 } +in.eim: { folder: examples/eim, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 3.0, walltime_norm: 0.5 } +in.shear: { folder: examples/shear, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.shear.void: { folder: examples/shear, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 3.0, walltime_norm: 0.5 } +in.aimd.alone: { folder: examples/mdi, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.aimd.driver: { folder: examples/mdi, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.aimd.driver.plugin: { folder: examples/mdi, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.aimd.engine: { folder: examples/mdi, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi engine (src/input.cpp:314)", walltime: -1 } +in.aimdpy.mm: { folder: examples/mdi, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi engine (src/input.cpp:314)", walltime: -1 } +in.aimdpy.qm: { folder: examples/mdi, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi engine (src/input.cpp:314)", walltime: -1 } +in.sequence.python: { folder: examples/mdi, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi engine (src/input.cpp:314)", walltime: -1 } +in.series.alone: { folder: examples/mdi, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.series.driver: { folder: examples/mdi, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi connect (src/input.cpp:314)", walltime: -1 } +in.series.driver.plugin: { folder: examples/mdi, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.series.engine: { folder: examples/mdi, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi engine (src/input.cpp:314)", walltime: -1 } +in.snapshot.alone: { folder: examples/mdi, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.snapshot.driver: { folder: examples/mdi, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.snapshot.driver.plugin: { folder: examples/mdi, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.snapshot.engine: { folder: examples/mdi, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi engine (src/input.cpp:314)", walltime: -1 } +in.lammps: { folder: examples/PACKAGES/dpd-smooth/2d-diffusion, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 57.0, walltime_norm: 9.5 } +in.lammps: { folder: examples/PACKAGES/dpd-smooth/2d-diffusion-in-shear-flow, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 102.0, walltime_norm: 17.0 } +in.lammps: { folder: examples/PACKAGES/dpd-smooth/equipartition-verification, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 41.0, walltime_norm: 6.833333333333333 } +in.fitpod: { folder: examples/PACKAGES/pod/InP, status: "failed, ERROR: Cannot fit potential without data files. The data paths may not be valid. Please check the data paths in the POD data file. (src/ML-POD/fitpod_command.cpp:718).", walltime: -1 } +in.pod: { folder: examples/PACKAGES/pod/InP, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.fitpod: { folder: examples/PACKAGES/pod/Ta, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.pod: { folder: examples/PACKAGES/pod/Ta, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.pod.compute: { folder: examples/PACKAGES/pod/Ta, status: "failed, ERROR: Per-atom data too large (src/ML-POD/compute_podd_atom.cpp:62).", walltime: -1 } +in.duplex2: { folder: examples/PACKAGES/cgdna/examples/real_units/oxRNA2/duplex2, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 1 }, walltime: 22.0, walltime_norm: 3.6666666666666665 } +in.duplex2: { folder: examples/PACKAGES/cgdna/examples/real_units/oxRNA2/potential_file, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 1 }, walltime: 19.0, walltime_norm: 3.1666666666666665 } +in.duplex2: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA/duplex2, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 21.0, walltime_norm: 3.5 } +in.duplex1: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA/duplex1, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.duplex1: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA/potential_file, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.duplex2: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA2/duplex2, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 24.0, walltime_norm: 4.0 } +in.duplex1: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA2/duplex1, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.dsring: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA2/dsring, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 46.0, walltime_norm: 7.666666666666667 } +in.duplex1: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA2/potential_file, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.duplex4.4type: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA2/unique_bp, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 51.0, walltime_norm: 8.5 } +in.duplex4.8type: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA2/unique_bp, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 50.0, walltime_norm: 8.333333333333334 } +in.duplex3: { folder: examples/PACKAGES/cgdna/examples/real_units/oxDNA2/duplex3, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.duplex2: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxRNA2/duplex2, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 19.0, walltime_norm: 3.1666666666666665 } +in.duplex2: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxRNA2/potential_file, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 19.0, walltime_norm: 3.1666666666666665 } +in.duplex2: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA/duplex2, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 22.0, walltime_norm: 3.6666666666666665 } +in.duplex1: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA/duplex1, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.duplex1: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA/potential_file, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.duplex2: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA2/duplex2, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 25.0, walltime_norm: 4.166666666666667 } +in.duplex1: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA2/duplex1, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.dsring: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA2/dsring, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 46.0, walltime_norm: 7.666666666666667 } +in.duplex1: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA2/potential_file, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.duplex4.4type: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA2/unique_bp, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 50.0, walltime_norm: 8.333333333333334 } +in.duplex4.8type: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA2/unique_bp, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 50.0, walltime_norm: 8.333333333333334 } +in.duplex3: { folder: examples/PACKAGES/cgdna/examples/lj_units/oxDNA2/duplex3, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.temper_npt: { folder: examples/PACKAGES/temper_npt, status: "failed, ERROR: World variable count doesn't match # of partitions (src/variable.cpp:255).", walltime: -1 } +in.peptide-plumed: { folder: examples/PACKAGES/plumed, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'plumed' is part of the PLUMED package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.methanol: { folder: examples/PACKAGES/bocs, status: "completed", failed_checks: { abs_diff_failed: 4, rel_diff_failed: 4 }, walltime: 23.0, walltime_norm: 3.8333333333333335 } +in.pedone.melt: { folder: examples/PACKAGES/pedone, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 2 }, walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.pedone.relax: { folder: examples/PACKAGES/pedone, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 15.0, walltime_norm: 2.5 } +in.methanol_implicit_water: { folder: examples/PACKAGES/local_density/methanol_implicit_water, status: "completed, but no Step nor Loop in the output.", walltime: 62.0, walltime_norm: 10.333333333333334 } +in.benzene_water: { folder: examples/PACKAGES/local_density/benzene_water, status: "completed, but no Step nor Loop in the output.", walltime: 25.0, walltime_norm: 4.166666666666667 } +in.gauss-diel: { folder: examples/PACKAGES/gauss_diel, status: "completed, error parsing log.lammps into YAML", walltime: 8.0, walltime_norm: 1.3333333333333333 } +in.gauss-diel-cg: { folder: examples/PACKAGES/gauss_diel, status: "completed, error parsing log.lammps into YAML", walltime: 3.0, walltime_norm: 0.5 } +in.gauss-diel-split: { folder: examples/PACKAGES/gauss_diel, status: "completed, error parsing log.lammps into YAML", walltime: 3.0, walltime_norm: 0.5 } +in.alloy: { folder: examples/PACKAGES/alchemy, status: "failed, ERROR: World variable count doesn't match # of partitions (src/variable.cpp:255).", walltime: -1 } +in.twowater: { folder: examples/PACKAGES/alchemy, status: "failed, ERROR: World variable count doesn't match # of partitions (src/variable.cpp:255).", walltime: -1 } +in.sds-hybrid: { folder: examples/PACKAGES/cgspica/sds-monolayer, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.sds-regular: { folder: examples/PACKAGES/cgspica/sds-monolayer, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.pegc12e8: { folder: examples/PACKAGES/cgspica/peg-verlet, status: "completed, error parsing log.lammps into YAML", walltime: 69.0, walltime_norm: 11.5 } +in.pegc12e8-angle: { folder: examples/PACKAGES/cgspica/peg-verlet, status: "completed, error parsing log.lammps into YAML", walltime: 69.0, walltime_norm: 11.5 } +in.hkust1: { folder: examples/PACKAGES/mofff, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.hkust1_long: { folder: examples/PACKAGES/mofff, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.e3b-tip4p2005: { folder: examples/PACKAGES/e3b, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.uf3.Nb: { folder: examples/PACKAGES/uf3, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.fep01.lmp: { folder: examples/PACKAGES/fep/CC-CO/fep01, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.fep10.lmp: { folder: examples/PACKAGES/fep/CC-CO/fep10, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.insertion: { folder: examples/PACKAGES/fep/C7inEthanol/fep01, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.deletion: { folder: examples/PACKAGES/fep/C7inEthanol/fep10, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.fep01.lmp: { folder: examples/PACKAGES/fep/CH4-CF4/fep01, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.fep10.lmp: { folder: examples/PACKAGES/fep/CH4-CF4/fep10, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.bar10.lmp: { folder: examples/PACKAGES/fep/CH4-CF4/bar10, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.bar01.lmp: { folder: examples/PACKAGES/fep/CH4-CF4/bar01, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.fep01.lmp: { folder: examples/PACKAGES/fep/CH4hyd/fep01, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.fdti01.lmp: { folder: examples/PACKAGES/fep/CH4hyd/fdti01, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.fep10.lmp: { folder: examples/PACKAGES/fep/CH4hyd/fep10, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.fdti10.lmp: { folder: examples/PACKAGES/fep/CH4hyd/fdti10, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.spce.lmp: { folder: examples/PACKAGES/fep/ta, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.gap: { folder: examples/PACKAGES/quip, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized pair style 'quip' is part of the ML-QUIP package which is not enabled in this LAMMPS binary. (src/force.cpp:275)", walltime: -1 } +in.molecular: { folder: examples/PACKAGES/quip, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized pair style 'quip' is part of the ML-QUIP package which is not enabled in this LAMMPS binary. (src/force.cpp:275)", walltime: -1 } +in.sw: { folder: examples/PACKAGES/quip, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized pair style 'quip' is part of the ML-QUIP package which is not enabled in this LAMMPS binary. (src/force.cpp:275)", walltime: -1 } +in.srp_react: { folder: examples/PACKAGES/srp_react, status: "failed, ERROR: Invalid bond type 0 for pair style srp (src/MISC/pair_srp.cpp:403).", walltime: -1 } +in.spce: { folder: examples/PACKAGES/manybody_table, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.spce2: { folder: examples/PACKAGES/manybody_table, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 6.0, walltime_norm: 1.0 } +in.spce_sw: { folder: examples/PACKAGES/manybody_table, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.confined: { folder: examples/PACKAGES/dielectric, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.nopbc: { folder: examples/PACKAGES/dielectric, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.methane_qtb: { folder: examples/PACKAGES/qtb/methane_qtb, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 9.0, walltime_norm: 1.5 } +in.alpha_quartz_qtb: { folder: examples/PACKAGES/qtb/alpha_quartz_qtb, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.alpha_quartz_qbmsst: { folder: examples/PACKAGES/qtb/alpha_quartz_qbmsst, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 42.0, walltime_norm: 7.0 } +in.methane_qbmsst: { folder: examples/PACKAGES/qtb/methane_qbmsst, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 97.0, walltime_norm: 16.166666666666668 } +in.tmd: { folder: examples/PACKAGES/tmd, status: "completed, error parsing log.lammps into YAML", walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.meam-spline.Si: { folder: examples/PACKAGES/meam_spline, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.meam-spline.TiO2: { folder: examples/PACKAGES/meam_spline, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 79.0, walltime_norm: 13.166666666666666 } +in.silicon: { folder: examples/PACKAGES/phonon/dynamical_matrix_command/Silicon, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.EAM3D: { folder: examples/PACKAGES/phonon/3-3D-FCC-Cu-EAM, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.disp: { folder: examples/PACKAGES/phonon/3-3D-FCC-Cu-EAM, status: "skipped", walltime: -2 } +in.disp2: { folder: examples/PACKAGES/phonon/3-3D-FCC-Cu-EAM, status: "skipped", walltime: -2 } +in.dos: { folder: examples/PACKAGES/phonon/3-3D-FCC-Cu-EAM, status: "skipped", walltime: -2 } +in.Ana: { folder: examples/PACKAGES/phonon/1-1D-mono, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 6.0, walltime_norm: 1.0 } +in.disp: { folder: examples/PACKAGES/phonon/1-1D-mono, status: "skipped", walltime: -2 } +in.Ana: { folder: examples/PACKAGES/phonon/2-1D-diatomic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 17.0, walltime_norm: 2.8333333333333335 } +in.disp: { folder: examples/PACKAGES/phonon/2-1D-diatomic, status: "skipped", walltime: -2 } +in.disp: { folder: examples/PACKAGES/phonon/4-Graphene, status: "skipped", walltime: -2 } +in.graphene: { folder: examples/PACKAGES/phonon/4-Graphene, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.dpde-shardlow: { folder: examples/PACKAGES/dpd-react/dpde-shardlow, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.dpde-vv: { folder: examples/PACKAGES/dpd-react/dpde-vv, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 14.0, walltime_norm: 2.3333333333333335 } +in.dpd-shardlow: { folder: examples/PACKAGES/dpd-react/dpd-shardlow, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.dpd-vv: { folder: examples/PACKAGES/dpd-react/dpd-vv, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.dpdp-shardlow: { folder: examples/PACKAGES/dpd-react/dpdp-shardlow, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.multi-lucy: { folder: examples/PACKAGES/dpd-react/multi-lucy, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.dpdh-shardlow: { folder: examples/PACKAGES/dpd-react/dpdh-shardlow, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 3.0, walltime_norm: 0.5 } +in.dpdrx-shardlow: { folder: examples/PACKAGES/dpd-react/dpdrx-shardlow, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.GD: { folder: examples/PACKAGES/flow_gauss, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.basal: { folder: examples/PACKAGES/basal, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.cascade_AlCu: { folder: examples/PACKAGES/electron_stopping, status: "failed, ERROR: Must set 'extscalar' when setting 'scalar_flag' for fix electron/stopping/fit. Contact the developer. (src/fix.cpp:135).", walltime: -1 } +in.cascade_SiSi: { folder: examples/PACKAGES/electron_stopping, status: "failed, ERROR: Must set 'extscalar' when setting 'scalar_flag' for fix electron/stopping/fit. Contact the developer. (src/fix.cpp:135).", walltime: -1 } +in.elstop: { folder: examples/PACKAGES/electron_stopping, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 26.0, walltime_norm: 4.333333333333333 } +in.elstop.only: { folder: examples/PACKAGES/electron_stopping, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.chreg-acid: { folder: examples/PACKAGES/charge_regulation, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.chreg-acid-real: { folder: examples/PACKAGES/charge_regulation, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 0.0, walltime_norm: 0.0 } +in.chreg-polymer: { folder: examples/PACKAGES/charge_regulation, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.adatom: { folder: examples/PACKAGES/agni, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 16.0, walltime_norm: 2.6666666666666665 } +in.vacancy: { folder: examples/PACKAGES/agni, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.bucky-plus-cnt: { folder: examples/PACKAGES/imd, status: "skipped", walltime: -2 } +in.bucky-plus-cnt-gpu: { folder: examples/PACKAGES/imd, status: "skipped", walltime: -2 } +in.deca-ala-solv-filter_imd: { folder: examples/PACKAGES/imd, status: "skipped", walltime: -2 } +in.deca-ala-solv_imd: { folder: examples/PACKAGES/imd, status: "skipped", walltime: -2 } +in.deca-ala_imd: { folder: examples/PACKAGES/imd, status: "skipped", walltime: -2 } +in.deca-ala_imd-gpu: { folder: examples/PACKAGES/imd, status: "skipped", walltime: -2 } +in.melt_imd: { folder: examples/PACKAGES/imd, status: "skipped", walltime: -2 } +in.melt_imd-gpu: { folder: examples/PACKAGES/imd, status: "skipped", walltime: -2 } +in.first: { folder: examples/PACKAGES/adios/rerun, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized dump style 'custom/adios' is part of the ADIOS package which is not enabled in this LAMMPS binary. (src/output.cpp:776)", walltime: -1 } +in.read_dump: { folder: examples/PACKAGES/adios/rerun, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized reader style 'adios' is part of the ADIOS package which is not enabled in this LAMMPS binary. (src/read_dump.cpp:236)", walltime: -1 } +in.rerun: { folder: examples/PACKAGES/adios/rerun, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized reader style 'adios' is part of the ADIOS package which is not enabled in this LAMMPS binary. (src/read_dump.cpp:236)", walltime: -1 } +in.adios_balance: { folder: examples/PACKAGES/adios/balance, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized dump style 'custom/adios' is part of the ADIOS package which is not enabled in this LAMMPS binary. (src/output.cpp:776)", walltime: -1 } +in.adios_balance2: { folder: examples/PACKAGES/adios/balance, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized reader style 'adios' is part of the ADIOS package which is not enabled in this LAMMPS binary. (src/read_dump.cpp:236)", walltime: -1 } +in.bcc0: { folder: examples/PACKAGES/mgpt, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized pair style 'mgpt' is part of the MGPT package which is not enabled in this LAMMPS binary. (src/force.cpp:275)", walltime: -1 } +in.vac0-bcc: { folder: examples/PACKAGES/mgpt, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized pair style 'mgpt' is part of the MGPT package which is not enabled in this LAMMPS binary. (src/force.cpp:275)", walltime: -1 } +in.vacmin-bcc: { folder: examples/PACKAGES/mgpt, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized pair style 'mgpt' is part of the MGPT package which is not enabled in this LAMMPS binary. (src/force.cpp:275)", walltime: -1 } +in.vtk: { folder: examples/PACKAGES/vtk, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized dump style 'vtk' is part of the VTK package which is not enabled in this LAMMPS binary. (src/output.cpp:776)", walltime: -1 } +in.vtp: { folder: examples/PACKAGES/vtk, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized dump style 'vtk' is part of the VTK package which is not enabled in this LAMMPS binary. (src/output.cpp:776)", walltime: -1 } +in.dpdext: { folder: examples/PACKAGES/dpd-basic/dpdext, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.dpd: { folder: examples/PACKAGES/dpd-basic/dpd, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 0.0, walltime_norm: 0.0 } +in.dpdext_tstat: { folder: examples/PACKAGES/dpd-basic/dpdext_tstat, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 30.0, walltime_norm: 5.0 } +in.dpd_tstat: { folder: examples/PACKAGES/dpd-basic/dpd_tstat, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.dpd_coul_slater_long: { folder: examples/PACKAGES/dpd-basic/dpd_coul_slater_long, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.piston: { folder: examples/PACKAGES/electrode/piston, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.cg: { folder: examples/PACKAGES/electrode/madelung, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.eta: { folder: examples/PACKAGES/electrode/madelung, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.eta_cg: { folder: examples/PACKAGES/electrode/madelung, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.eta_mix: { folder: examples/PACKAGES/electrode/madelung, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.ewald-ew2d: { folder: examples/PACKAGES/electrode/madelung, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.ewald-ew3dc: { folder: examples/PACKAGES/electrode/madelung, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.ewald-ffield: { folder: examples/PACKAGES/electrode/madelung, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.pppm-ew3dc: { folder: examples/PACKAGES/electrode/madelung, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.pppm-ffield: { folder: examples/PACKAGES/electrode/madelung, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.ffield: { folder: examples/PACKAGES/electrode/au-aq, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.tf: { folder: examples/PACKAGES/electrode/au-aq, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.conp: { folder: examples/PACKAGES/electrode/graph-il, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 96.0, walltime_norm: 16.0 } +in.conq: { folder: examples/PACKAGES/electrode/graph-il, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 90.0, walltime_norm: 15.0 } +in.conq2: { folder: examples/PACKAGES/electrode/graph-il, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 62.0, walltime_norm: 10.333333333333334 } +in.etypes: { folder: examples/PACKAGES/electrode/graph-il, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 58.0, walltime_norm: 9.666666666666666 } +in.ffield: { folder: examples/PACKAGES/electrode/graph-il, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 41.0, walltime_norm: 6.833333333333333 } +in.ramp: { folder: examples/PACKAGES/electrode/graph-il, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 60.0, walltime_norm: 10.0 } +in.thermo: { folder: examples/PACKAGES/electrode/graph-il, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 57.0, walltime_norm: 9.5 } +in.planar-ewald-ew2d: { folder: examples/PACKAGES/electrode/planar, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.planar-ewald-ew3dc: { folder: examples/PACKAGES/electrode/planar, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.planar-ewald-ffield: { folder: examples/PACKAGES/electrode/planar, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.planar-pppm-ew3dc: { folder: examples/PACKAGES/electrode/planar, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.planar-pppm-ffield: { folder: examples/PACKAGES/electrode/planar, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.convective_pulse: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.ddm_schrodinger: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.finite_well: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.no_atoms_ddm: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.null_material_ddm: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.poisson1d_noatoms: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.poisson2d_noatoms: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.schrodinger-poisson2d_Jconstraint: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.schrodinger-poisson2d_convective: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.schrodinger-poisson2d_noatoms: { folder: examples/PACKAGES/atc/drift_diffusion, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.cb_biaxial: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.cb_shear: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.cb_unistrain: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.cb_unistrain_eam: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.cb_unistrain_eam_linear: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.cb_unistrain_linear: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.cb_volumetric: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.flying_cube: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.ftcb_constV: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.read_xref: { folder: examples/PACKAGES/atc/cauchy_born, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.consistency: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eam_kernel_convergence: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eam_unistrain_cell: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eam_unistrain_mesh: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eam_unistrain_qcylinder: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eam_unistrain_qsphere: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eam_unistrain_step: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eam_volume_stretch: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eshelby_static: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.nvt: { folder: examples/PACKAGES/atc/hardy, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_fluids: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.concentration: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.conducting_interface: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.dielectric_interface: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.double_layer: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.liquid_electrostatic: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.opp_force: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.poisson: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.shear_flow: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.shear_no_atoms: { folder: examples/PACKAGES/atc/fluids, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.harmonic_bonds: { folder: examples/PACKAGES/atc/molecule, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.polarize: { folder: examples/PACKAGES/atc/molecule, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.quartic_bonds: { folder: examples/PACKAGES/atc/molecule, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water: { folder: examples/PACKAGES/atc/molecule, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_all_atoms: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_combined: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_flux: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_frac_step: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_hoover: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_interpolate: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_lumped: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.no_atoms: { folder: examples/PACKAGES/atc/thermal, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.gaussianIC1d_hex: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.gaussianIC2d_hex: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.gaussianIC2d_hex20_uniform: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.gaussianIC2d_hex27_uniform: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.gaussianIC2d_tet: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.kernel2d_hex: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.kernel2d_tet: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.mesh2d_tet: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.semicircle: { folder: examples/PACKAGES/atc/mesh, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_damped: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_flux: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_frac_step: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_ghost_flux: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_thermo_elastic: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.eam_energy: { folder: examples/PACKAGES/atc/elastic, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.electron_density: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.no_atoms: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.no_atoms_cb: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.no_atoms_cb_linear: { folder: examples/PACKAGES/atc/elastic, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.bar1d_ttm: { folder: examples/PACKAGES/atc/two_temperature, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.cutout: { folder: examples/PACKAGES/atc/two_temperature, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.gaussianIC_ttm: { folder: examples/PACKAGES/atc/two_temperature, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.no_atoms: { folder: examples/PACKAGES/atc/two_temperature, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.restart: { folder: examples/PACKAGES/atc/two_temperature, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.uniform_exchange: { folder: examples/PACKAGES/atc/two_temperature, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.uniform_heating: { folder: examples/PACKAGES/atc/two_temperature, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'atc' is part of the ATC package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.pits: { folder: examples/PACKAGES/latboltz/pit_geometry, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'lb/fluid' is part of the LATBOLTZ package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.polymer: { folder: examples/PACKAGES/latboltz/polymer, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'lb/fluid' is part of the LATBOLTZ package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.confined_colloids: { folder: examples/PACKAGES/latboltz/confined_colloid, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'lb/fluid' is part of the LATBOLTZ package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.trapnewsphere: { folder: examples/PACKAGES/latboltz/diffusingsphere, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'lb/fluid' is part of the LATBOLTZ package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.translocation: { folder: examples/PACKAGES/latboltz/translocation, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'lb/fluid' is part of the LATBOLTZ package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.toycar: { folder: examples/PACKAGES/latboltz/toycar, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.microrheology: { folder: examples/PACKAGES/latboltz/microrheology, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'lb/fluid' is part of the LATBOLTZ package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.dragtest: { folder: examples/PACKAGES/latboltz/dragforce, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'lb/fluid' is part of the LATBOLTZ package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.planewall: { folder: examples/PACKAGES/latboltz/planewall, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'lb/fluid' is part of the LATBOLTZ package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.compute: { folder: examples/PACKAGES/pace/compute, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.pace.product: { folder: examples/PACKAGES/pace, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 9.0, walltime_norm: 1.5 } +in.pace.recursive: { folder: examples/PACKAGES/pace, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 7.0, walltime_norm: 1.1666666666666667 } +in.addtorque: { folder: examples/PACKAGES/addtorque, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 3 }, walltime: 51.0, walltime_norm: 8.5 } +in.cnp: { folder: examples/PACKAGES/cnp, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 18.0, walltime_norm: 3.0 } +in.CH4fc.ang: { folder: examples/PACKAGES/eff/fixed-core/CH4, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 3.0, walltime_norm: 0.5 } +in.CH4fc.bohr: { folder: examples/PACKAGES/eff/fixed-core/CH4, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.CH4fc.spe.ang: { folder: examples/PACKAGES/eff/fixed-core/CH4, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.CH4fc.spe.bohr: { folder: examples/PACKAGES/eff/fixed-core/CH4, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.C2H6fc.ang: { folder: examples/PACKAGES/eff/fixed-core/C2H6, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.C2H6fc.bohr: { folder: examples/PACKAGES/eff/fixed-core/C2H6, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.ch4.dynamics: { folder: examples/PACKAGES/eff/CH4, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.ch4.min: { folder: examples/PACKAGES/eff/CH4, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.ch4_ionized.dynamics: { folder: examples/PACKAGES/eff/CH4, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 16.0, walltime_norm: 2.6666666666666665 } +in.Be-solid.spe: { folder: examples/PACKAGES/eff/Be-solid, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 14.0, walltime_norm: 2.3333333333333335 } +in.adamantane_ionized.nve: { folder: examples/PACKAGES/eff/Auger-Adamantane, status: "failed, ERROR: Lost atoms: original 101 current 100 (src/thermo.cpp:494).", walltime: -1 } +in.SiH4: { folder: examples/PACKAGES/eff/ECP/SiH4, status: "completed, error parsing log.lammps into YAML", walltime: 0.0, walltime_norm: 0.0 } +in.SiH4.ang: { folder: examples/PACKAGES/eff/ECP/SiH4, status: "completed, error parsing log.lammps into YAML", walltime: 0.0, walltime_norm: 0.0 } +in.Si2H6: { folder: examples/PACKAGES/eff/ECP/Si2H6, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.Si2H6.ang: { folder: examples/PACKAGES/eff/ECP/Si2H6, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.SiC: { folder: examples/PACKAGES/eff/ECP/SiC/bulk, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 31.0, walltime_norm: 5.166666666666667 } +in.h2bulk.npt: { folder: examples/PACKAGES/eff/H_plasma, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 46.0, walltime_norm: 7.666666666666667 } +in.h2bulk.nve: { folder: examples/PACKAGES/eff/H_plasma, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 78.0, walltime_norm: 13.0 } +in.h2bulk.nve.ang: { folder: examples/PACKAGES/eff/H_plasma, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 87.0, walltime_norm: 14.5 } +in.Li-dendritic.min: { folder: examples/PACKAGES/eff/Li-dendritic, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 35.0, walltime_norm: 5.833333333333333 } +in.Li-dendritic.nvt: { folder: examples/PACKAGES/eff/Li-dendritic, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 69.0, walltime_norm: 11.5 } +in.Li.ang: { folder: examples/PACKAGES/eff/Li-solid, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 74.0, walltime_norm: 12.333333333333334 } +in.Li.bohr: { folder: examples/PACKAGES/eff/Li-solid, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 53.0, walltime_norm: 8.833333333333334 } +in.h2: { folder: examples/PACKAGES/eff/H2, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.h_atom.spe.ang: { folder: examples/PACKAGES/eff/H, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.h_atom.spe.bohr: { folder: examples/PACKAGES/eff/H, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.npt_biaxial: { folder: examples/PACKAGES/uef/npt_biaxial, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 3.0, walltime_norm: 0.5 } +in.nvt_uniaxial: { folder: examples/PACKAGES/uef/nvt_uniaxial, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 3.0, walltime_norm: 0.5 } +in.crystal: { folder: examples/PACKAGES/rhok, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 23.0, walltime_norm: 3.8333333333333335 } +in.pinning: { folder: examples/PACKAGES/rhok, status: "failed, ERROR: Cannot open file data.halfhalf: No such file or directory (src/read_data.cpp:367).", walltime: -1 } +in.setup: { folder: examples/PACKAGES/rhok, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 17.0, walltime_norm: 2.8333333333333335 } +in.peptide-colvars: { folder: examples/PACKAGES/colvars, status: "completed, error parsing log.lammps into YAML", walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.peptide-colvars2: { folder: examples/PACKAGES/colvars, status: "completed, error parsing log.lammps into YAML", walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.peptide-spring: { folder: examples/PACKAGES/colvars, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.peptide-spring2: { folder: examples/PACKAGES/colvars, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.hdnnp: { folder: examples/PACKAGES/hdnnp, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized pair style 'hdnnp' is part of the ML-HDNNP package which is not enabled in this LAMMPS binary. (src/force.cpp:275)", walltime: -1 } +in.hybrid: { folder: examples/PACKAGES/hdnnp, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized pair style 'hdnnp' is part of the ML-HDNNP package which is not enabled in this LAMMPS binary. (src/force.cpp:275)", walltime: -1 } +in.edip-Si: { folder: examples/PACKAGES/edip, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.edip-Si-multi: { folder: examples/PACKAGES/edip, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.edip-SiC: { folder: examples/PACKAGES/edip, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.large_nylon_melt: { folder: examples/PACKAGES/reaction/nylon,6-6_melt, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 60.0, walltime_norm: 10.0 } +in.tiny_polystyrene.stabilized: { folder: examples/PACKAGES/reaction/tiny_polystyrene, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 18.0, walltime_norm: 3.0 } +in.tiny_epoxy.stabilized: { folder: examples/PACKAGES/reaction/tiny_epoxy, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 0.0, walltime_norm: 0.0 } +in.grow_styrene: { folder: examples/PACKAGES/reaction/create_atoms_polystyrene, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 6.0, walltime_norm: 1.0 } +in.tiny_nylon.stabilized: { folder: examples/PACKAGES/reaction/tiny_nylon, status: "failed, unknown command, package not installed, ERROR: Unknown command: react rxn2 all 1 0.0 5.0 mol3 mol4 rxn1_stp2_map rescale_charges yes (src/input.cpp:314)", walltime: -1 } +in.tiny_nylon.stabilized_variable_probability: { folder: examples/PACKAGES/reaction/tiny_nylon, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 0.0, walltime_norm: 0.0 } +in.tiny_nylon.unstabilized: { folder: examples/PACKAGES/reaction/tiny_nylon, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 0.0, walltime_norm: 0.0 } +in.BulkNi: { folder: examples/PACKAGES/diffraction, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 158.0, walltime_norm: 26.333333333333332 } +in.tdpd: { folder: examples/PACKAGES/dpd-meso/tdpd, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 64.0, walltime_norm: 10.666666666666666 } +in.tdpd-region: { folder: examples/PACKAGES/dpd-meso/tdpd, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 63.0, walltime_norm: 10.5 } +in.mdpd: { folder: examples/PACKAGES/dpd-meso/mdpd, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 100.0, walltime_norm: 16.666666666666668 } +in.edpd: { folder: examples/PACKAGES/dpd-meso/edpd, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 87.0, walltime_norm: 14.5 } +in.edpd-region: { folder: examples/PACKAGES/dpd-meso/edpd, status: "completed", failed_checks: { abs_diff_failed: 6, rel_diff_failed: 6 }, walltime: 87.0, walltime_norm: 14.5 } +in.cylinder: { folder: examples/PACKAGES/stressprofile, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.flat: { folder: examples/PACKAGES/stressprofile, status: "failed, ERROR: Illegal compute stress/cartesian command: missing argument(s) (src/EXTRA-COMPUTE/compute_stress_cartesian.cpp:65).", walltime: -1 } +in.sphere: { folder: examples/PACKAGES/stressprofile, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.srp: { folder: examples/PACKAGES/srp, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.scafacos: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.cw.ewald: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.cw.fmm: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.cw.p2nfft: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.cw.p3m: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.ewald: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.fmm: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.hsph.direct: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.hsph.fmm: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.hsph.p2nfft: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.p2nfft: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.scafacos.p3m: { folder: examples/PACKAGES/scafacos, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized kspace style 'scafacos' is part of the SCAFACOS package which is not enabled in this LAMMPS binary. (src/force.cpp:660)", walltime: -1 } +in.h_atom: { folder: examples/PACKAGES/awpmd, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized atom style 'wavepacket' is part of the AWPMD package which is not enabled in this LAMMPS binary. (src/atom.cpp:745)", walltime: -1 } +in.h_molecule: { folder: examples/PACKAGES/awpmd, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized atom style 'wavepacket' is part of the AWPMD package which is not enabled in this LAMMPS binary. (src/atom.cpp:745)", walltime: -1 } +in.gold_gr: { folder: examples/PACKAGES/interlayer/saip_metal, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.atom-diffusion: { folder: examples/PACKAGES/interlayer/kolmogorov_crespi_z, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.bilayer-graphene: { folder: examples/PACKAGES/interlayer/kolmogorov_crespi_z, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.gr_water: { folder: examples/PACKAGES/interlayer/aip_water_2dm, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 19.0, walltime_norm: 3.1666666666666665 } +in.gr_water.opt: { folder: examples/PACKAGES/interlayer/aip_water_2dm, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.bilayer-graphene: { folder: examples/PACKAGES/interlayer/kolmogorov_crespi_full, status: "failed, mismatched columns in the log files", walltime: 121.0, walltime_norm: 20.166666666666668 } +in.CH_drip: { folder: examples/PACKAGES/interlayer/drip, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 1 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.C_drip: { folder: examples/PACKAGES/interlayer/drip, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.mos2: { folder: examples/PACKAGES/interlayer/ilp_tmds, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 57.0, walltime_norm: 9.5 } +in.bilayer-graphene: { folder: examples/PACKAGES/interlayer/ilp_graphene_hbn, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 70.0, walltime_norm: 11.666666666666666 } +in.bilayer-hBN: { folder: examples/PACKAGES/interlayer/ilp_graphene_hbn, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 134.0, walltime_norm: 22.333333333333332 } +in.grhBN: { folder: examples/PACKAGES/interlayer/ilp_graphene_hbn, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 123.0, walltime_norm: 20.5 } +in.ilp_graphene_hbn: { folder: examples/PACKAGES/interlayer/ilp_graphene_hbn, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 129.0, walltime_norm: 21.5 } +in.smatbAgCuPancake: { folder: examples/PACKAGES/smtbq, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.smatbBulkFCC: { folder: examples/PACKAGES/smtbq, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.smtbq.Al: { folder: examples/PACKAGES/smtbq, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 3.0, walltime_norm: 0.5 } +in.smtbq.Al2O3: { folder: examples/PACKAGES/smtbq, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.smtbq.TiO2: { folder: examples/PACKAGES/smtbq, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.slater: { folder: examples/PACKAGES/slater, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.slcsa: { folder: examples/PACKAGES/sna_nnn_slcsa, status: "completed, error parsing log.lammps into YAML", walltime: 42.0, walltime_norm: 7.0 } +in.orient_eco: { folder: examples/PACKAGES/orient_eco, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 128.0, walltime_norm: 21.333333333333332 } +in.entropy: { folder: examples/PACKAGES/entropy, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.bpti: { folder: examples/PACKAGES/filter_corotate, status: "completed, error parsing log.lammps into YAML", walltime: 24.0, walltime_norm: 4.0 } +in.peptide: { folder: examples/PACKAGES/filter_corotate, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 98.0, walltime_norm: 16.333333333333332 } +in.graphene: { folder: examples/PACKAGES/ipi, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.gREM-npt: { folder: examples/PACKAGES/grem/lj-single, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.gREM-nvt: { folder: examples/PACKAGES/grem/lj-single, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.gREM: { folder: examples/PACKAGES/grem/lj-6rep, status: "failed, ERROR: Cannot open file restart_file: No such file or directory (src/read_data.cpp:367).", walltime: -1 } +in.gREM-temper: { folder: examples/PACKAGES/grem/lj-temper, status: "failed, ERROR: World variable count doesn't match # of partitions (src/variable.cpp:255).", walltime: -1 } +in.compute_stress_mop: { folder: examples/PACKAGES/mop, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.fix_wall: { folder: examples/PACKAGES/ees, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.fix_wall_region: { folder: examples/PACKAGES/ees, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.ti_spring: { folder: examples/PACKAGES/ti, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.extep-bn: { folder: examples/PACKAGES/extep, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 0.0, walltime_norm: 0.0 } +in.toluene.lang: { folder: examples/PACKAGES/drude/toluene, status: "completed", failed_checks: { abs_diff_failed: 4, rel_diff_failed: 5 }, walltime: 68.0, walltime_norm: 11.333333333333334 } +in.toluene.nh: { folder: examples/PACKAGES/drude/toluene, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 68.0, walltime_norm: 11.333333333333334 } +in.butane.lang: { folder: examples/PACKAGES/drude/butane, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 50.0, walltime_norm: 8.333333333333334 } +in.butane.nh: { folder: examples/PACKAGES/drude/butane, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 5 }, walltime: 49.0, walltime_norm: 8.166666666666666 } +in.butane.tgnh: { folder: examples/PACKAGES/drude/butane, status: "completed", failed_checks: { abs_diff_failed: 4, rel_diff_failed: 4 }, walltime: 49.0, walltime_norm: 8.166666666666666 } +in.swm4-ndp.lang: { folder: examples/PACKAGES/drude/swm4-ndp, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 65.0, walltime_norm: 10.833333333333334 } +in.swm4-ndp.nh: { folder: examples/PACKAGES/drude/swm4-ndp, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 54.0, walltime_norm: 9.0 } +in.ethylene_glycol: { folder: examples/PACKAGES/drude/ethylene_glycol, status: "completed", failed_checks: { abs_diff_failed: 4, rel_diff_failed: 4 }, walltime: 25.0, walltime_norm: 4.166666666666667 } +in.ethanol.lang: { folder: examples/PACKAGES/drude/ethanol, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 43.0, walltime_norm: 7.166666666666667 } +in.ethanol.nh: { folder: examples/PACKAGES/drude/ethanol, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 42.0, walltime_norm: 7.0 } +in.ethanol.tgnh: { folder: examples/PACKAGES/drude/ethanol, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 44.0, walltime_norm: 7.333333333333333 } +in.force: { folder: examples/PACKAGES/tally, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.pe: { folder: examples/PACKAGES/tally, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.stress: { folder: examples/PACKAGES/tally, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.system: { folder: examples/PACKAGES/momb, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.momentum: { folder: examples/PACKAGES/momentum, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 41.0, walltime_norm: 6.833333333333333 } +in.alpha: { folder: examples/PACKAGES/meam_sw_spline/Ti, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.alpha_relaxation: { folder: examples/PACKAGES/meam_sw_spline/Ti, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.beta: { folder: examples/PACKAGES/meam_sw_spline/Ti, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.energy_conservation.meam.sw: { folder: examples/PACKAGES/meam_sw_spline/Ti, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 74.0, walltime_norm: 12.333333333333334 } +in.fcc: { folder: examples/PACKAGES/meam_sw_spline/Ti, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.hexagonal: { folder: examples/PACKAGES/meam_sw_spline/Ti, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.omega: { folder: examples/PACKAGES/meam_sw_spline/Ti, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.single_atom: { folder: examples/PACKAGES/meam_sw_spline/Ti, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.bcc: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.bcc_relax: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.dc: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.dc_relax: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.energy_conservation.meam.sw: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.fcc: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.fcc_relax: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.hcp_relax: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.sc: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.sc_relax: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.single_atom: { folder: examples/PACKAGES/meam_sw_spline/Si, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.film_mesocnt: { folder: examples/PACKAGES/mesont, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.cauchystat: { folder: examples/PACKAGES/cauchy, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.rubber_strip_pull: { folder: examples/PACKAGES/machdyn/rubber_strip_pull, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.aluminum_strip_pull: { folder: examples/PACKAGES/machdyn/aluminum_strip_pull, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.funnel_flow: { folder: examples/PACKAGES/machdyn/funnel_flow, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 29.0, walltime_norm: 4.833333333333333 } +in.fluid_structure_interaction: { folder: examples/PACKAGES/machdyn/fluid_structure_interaction, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 32.0, walltime_norm: 5.333333333333333 } +in.rubber_rings_3d: { folder: examples/PACKAGES/machdyn/rubber_rings_3d, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 21.0, walltime_norm: 3.5 } +in.h2o-quantum: { folder: examples/PACKAGES/gle, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.h2o-smart: { folder: examples/PACKAGES/gle, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.pafi: { folder: examples/PACKAGES/pafi, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.scp: { folder: examples/PACKAGES/pimd/prot-hairpin, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 15.0, walltime_norm: 2.5 } +in.scp: { folder: examples/PACKAGES/pimd/para-h2, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 6.0, walltime_norm: 1.0 } +in.lmp: { folder: examples/PACKAGES/pimd/langevin_reduced_units, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.langevin.metal: { folder: examples/PACKAGES/pimd/langevin_metal_units, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 0.0, walltime_norm: 0.0 } +in.pimd-langevin.metal: { folder: examples/PACKAGES/pimd/langevin_metal_units, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 0.0, walltime_norm: 0.0 } +in.rann: { folder: examples/PACKAGES/rann, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.msd.2d: { folder: examples/DIFFUSE, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 36.0, walltime_norm: 6.0 } +in.vacf.2d: { folder: examples/DIFFUSE, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 39.0, walltime_norm: 6.5 } +in.numdiff: { folder: examples/numdiff, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.bpm.pour: { folder: examples/bpm/pour, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.bpm.impact.rotational: { folder: examples/bpm/impact, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 102.0, walltime_norm: 17.0 } +in.bpm.impact.spring: { folder: examples/bpm/impact, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 12.0, walltime_norm: 2.0 } +in.rheo.balloon: { folder: examples/rheo/balloon, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.rheo.oxidation: { folder: examples/rheo/oxidation, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.rheo.taylor.green: { folder: examples/rheo/taylor-green, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 113.0, walltime_norm: 18.833333333333332 } +in.rheo.ice.cubes: { folder: examples/rheo/ice-cubes, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.rheo.poiseuille: { folder: examples/rheo/poiseuille, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 29.0, walltime_norm: 4.833333333333333 } +in.rheo.dam.break: { folder: examples/rheo/dam-break, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 159.0, walltime_norm: 26.5 } +in.peptide: { folder: examples/peptide, status: "completed, error parsing log.lammps into YAML", walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.coreshell: { folder: examples/coreshell, status: "completed", failed_checks: { abs_diff_failed: 7, rel_diff_failed: 9 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.coreshell.dsf: { folder: examples/coreshell, status: "completed", failed_checks: { abs_diff_failed: 7, rel_diff_failed: 8 }, walltime: 20.0, walltime_norm: 3.3333333333333335 } +in.coreshell.thermostats: { folder: examples/coreshell, status: "completed", failed_checks: { abs_diff_failed: 12, rel_diff_failed: 14 }, walltime: 14.0, walltime_norm: 2.3333333333333335 } +in.coreshell.wolf: { folder: examples/coreshell, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 22.0, walltime_norm: 3.6666666666666665 } +in.marble_race: { folder: examples/mesh, status: "completed", failed_checks: { abs_diff_failed: 5, rel_diff_failed: 5 }, walltime: 131.0, walltime_norm: 21.833333333333332 } +in.mesh_box: { folder: examples/mesh, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.abcfire: { folder: examples/fire, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.abcfire_mod: { folder: examples/fire, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.cg: { folder: examples/fire, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.fire: { folder: examples/fire, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.fire_mod: { folder: examples/fire, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.meam.abcfire: { folder: examples/fire, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 43.0, walltime_norm: 7.166666666666667 } +in.meam.fire: { folder: examples/fire, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 62.0, walltime_norm: 10.333333333333334 } +in.neb.sivac.abcfire: { folder: examples/fire, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.neb.sivac.abcfire_mod: { folder: examples/fire, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.neb.sivac.fire: { folder: examples/fire, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.neb.sivac.fire_mod: { folder: examples/fire, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.neb.sivac.qm: { folder: examples/fire, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.bcc.orthog: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.bcc.primitive: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.data.general: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.fcc.orthog: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.fcc.primitive: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.hex.orthog: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.hex.primitive: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.sq2.orthog: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.sq2.primitive: { folder: examples/triclinic, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.tri.srd: { folder: examples/ASPHERE/tri, status: "completed", failed_checks: { abs_diff_failed: 4, rel_diff_failed: 4 }, walltime: 143.0, walltime_norm: 23.833333333333332 } +in.star: { folder: examples/ASPHERE/star, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 12.0, walltime_norm: 2.0 } +in.star.mp: { folder: examples/ASPHERE/star, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 12.0, walltime_norm: 2.0 } +in.box: { folder: examples/ASPHERE/box, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 3 }, walltime: 18.0, walltime_norm: 3.0 } +in.box.mp: { folder: examples/ASPHERE/box, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 19.0, walltime_norm: 3.1666666666666665 } +in.dimer: { folder: examples/ASPHERE/dimer, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 6.0, walltime_norm: 1.0 } +in.dimer.mp: { folder: examples/ASPHERE/dimer, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 16.0, walltime_norm: 2.6666666666666665 } +in.vesicle: { folder: examples/ASPHERE/vesicle, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 9.0, walltime_norm: 1.5 } +in.line: { folder: examples/ASPHERE/line, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.line.srd: { folder: examples/ASPHERE/line, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.poly: { folder: examples/ASPHERE/poly, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 8.0, walltime_norm: 1.3333333333333333 } +in.poly.mp: { folder: examples/ASPHERE/poly, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 8.0, walltime_norm: 1.3333333333333333 } +in.flat_membrane: { folder: examples/ASPHERE/flat_membrane, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 7.0, walltime_norm: 1.1666666666666667 } +in.ellipsoid: { folder: examples/ASPHERE/ellipsoid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 9.0, walltime_norm: 1.5 } +in.ellipsoid.mp: { folder: examples/ASPHERE/ellipsoid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 9.0, walltime_norm: 1.5 } +in.ubiquitin: { folder: examples/amoeba, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 12.0, walltime_norm: 2.0 } +in.water_box.amoeba: { folder: examples/amoeba, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 8.0, walltime_norm: 1.3333333333333333 } +in.water_box.hippo: { folder: examples/amoeba, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.water_dimer.amoeba: { folder: examples/amoeba, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.water_dimer.hippo: { folder: examples/amoeba, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 0.0, walltime_norm: 0.0 } +in.water_hexamer.amoeba: { folder: examples/amoeba, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 0.0, walltime_norm: 0.0 } +in.water_hexamer.hippo: { folder: examples/amoeba, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 0.0, walltime_norm: 0.0 } +in.nb3b: { folder: examples/nb3b, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.nb3b.screened: { folder: examples/nb3b, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 29.0, walltime_norm: 4.833333333333333 } +in.min: { folder: examples/min, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.min.box: { folder: examples/min, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.balance: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.balance.bond.fast: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.balance.bond.slow: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.balance.clock.dynamic: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 16.0, walltime_norm: 2.6666666666666665 } +in.balance.clock.static: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 6.0, walltime_norm: 1.0 } +in.balance.group.dynamic: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.balance.group.static: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.balance.kspace: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 6.0, walltime_norm: 1.0 } +in.balance.neigh.dynamic: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 16.0, walltime_norm: 2.6666666666666665 } +in.balance.neigh.rcb: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.balance.neigh.static: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.balance.var.dynamic: { folder: examples/balance, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 16.0, walltime_norm: 2.6666666666666665 } +in.ch4: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.ch4.plugin: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.graphene: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.graphene.plugin: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.series: { folder: examples/QUANTUM/LATTE, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi connect (src/input.cpp:314)", walltime: -1 } +in.series.plugin: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.sucrose: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.sucrose.plugin: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.uo2: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.uo2.plugin: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water.min: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water.min.plugin: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water.plugin: { folder: examples/QUANTUM/LATTE, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.mixture.mm: { folder: examples/QUANTUM/PySCF, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.mixture.qmmm: { folder: examples/QUANTUM/PySCF, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qmmm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.mixture.qmmm.plugin: { folder: examples/QUANTUM/PySCF, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qmmm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water.qmmm: { folder: examples/QUANTUM/PySCF, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qmmm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water.qmmm.plugin: { folder: examples/QUANTUM/PySCF, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qmmm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.series: { folder: examples/QUANTUM/NWChem, status: "failed, unknown command, package not installed, ERROR: Unknown command: mdi connect (src/input.cpp:314)", walltime: -1 } +in.series.plugin: { folder: examples/QUANTUM/NWChem, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water.mm: { folder: examples/QUANTUM/NWChem, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.water.qmmm: { folder: examples/QUANTUM/NWChem, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qmmm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.water.qmmm.plugin: { folder: examples/QUANTUM/NWChem, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qmmm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.zeolite.mm: { folder: examples/QUANTUM/NWChem, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.zeolite.qmmm: { folder: examples/QUANTUM/NWChem, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qmmm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.zeolite.qmmm.plugin: { folder: examples/QUANTUM/NWChem, status: "failed, unrecognized command, package not installed, ERROR: Unrecognized fix style 'mdi/qmmm' is part of the MDI package which is not enabled in this LAMMPS binary. (src/modify.cpp:924)", walltime: -1 } +in.wall.ccl: { folder: examples/wall, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.wall.diffusive: { folder: examples/wall, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.wall.flow: { folder: examples/wall, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.wall.lepton: { folder: examples/wall, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.wall.maxwell: { folder: examples/wall, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.wall.specular: { folder: examples/wall, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.wall.table: { folder: examples/wall, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.meam: { folder: examples/meam, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.meam.shear: { folder: examples/meam, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 31.0, walltime_norm: 5.166666666666667 } +in.msmeam: { folder: examples/meam/msmeam, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.hugoniostat: { folder: examples/hugoniostat, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.comb.Cu: { folder: examples/comb, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.comb.Cu2O.elastic: { folder: examples/comb, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 21.0, walltime_norm: 3.5 } +in.comb.HfO2: { folder: examples/comb, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.comb.Si: { folder: examples/comb, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.comb.Si.elastic: { folder: examples/comb, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.comb3: { folder: examples/comb, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.tad: { folder: examples/tad, status: "failed, ERROR: Cannot use TAD with a single replica for NEB (src/REPLICA/tad.cpp:79).", walltime: -1 } +in.controller.temp: { folder: examples/controller, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.controller.wall: { folder: examples/controller, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 19.0, walltime_norm: 3.1666666666666665 } +in.reaxff.rdx: { folder: examples/reaxff, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 0.0, walltime_norm: 0.0 } +in.reaxff.rdx-shielded: { folder: examples/reaxff, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.reaxff.tatb: { folder: examples/reaxff, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 2 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.reaxff.tatb-shielded: { folder: examples/reaxff, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 2 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.ci-reax.CH: { folder: examples/reaxff/ci-reaxFF, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 12.0, walltime_norm: 2.0 } +in.reaxff.hns: { folder: examples/reaxff/HNS, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 20.0, walltime_norm: 3.3333333333333335 } +in.VOH: { folder: examples/reaxff/VOH, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 2 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.water.acks2: { folder: examples/reaxff/water, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.water.acks2.field: { folder: examples/reaxff/water, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 17.0, walltime_norm: 2.8333333333333335 } +in.water.qeq: { folder: examples/reaxff/water, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 6.0, walltime_norm: 1.0 } +in.water.qeq.field: { folder: examples/reaxff/water, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.ZnOH2: { folder: examples/reaxff/ZnOH2, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.FC: { folder: examples/reaxff/FC, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 15.0, walltime_norm: 2.5 } +in.RDX: { folder: examples/reaxff/RDX, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 6.0, walltime_norm: 1.0 } +in.AuO: { folder: examples/reaxff/AuO, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 6.0, walltime_norm: 1.0 } +in.CHO: { folder: examples/reaxff/CHO, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 2 }, walltime: 3.0, walltime_norm: 0.5 } +in.FeOH3: { folder: examples/reaxff/FeOH3, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.AB: { folder: examples/reaxff/AB, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.grid.2d: { folder: examples/grid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.grid.3d: { folder: examples/grid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.sph: { folder: examples/grid, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 36.0, walltime_norm: 6.0 } +in.yaml: { folder: examples/yaml, status: "completed, error parsing log.lammps into YAML", walltime: 3.0, walltime_norm: 0.5 } +in.hBN_shift: { folder: examples/tersoff, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.tersoff: { folder: examples/tersoff, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.friction: { folder: examples/friction, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.cmap: { folder: examples/cmap, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 1 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.dipole: { folder: examples/dipole, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.colloid: { folder: examples/colloid, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.streitz.ewald: { folder: examples/streitz, status: "completed", failed_checks: { abs_diff_failed: 1, rel_diff_failed: 2 }, walltime: 53.0, walltime_norm: 8.833333333333334 } +in.streitz.wolf: { folder: examples/streitz, status: "failed, mismatched columns in the log files", walltime: 57.0, walltime_norm: 9.5 } +in.neb.hop1: { folder: examples/neb, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.neb.hop1.end: { folder: examples/neb, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.neb.hop2: { folder: examples/neb, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.neb.sivac: { folder: examples/neb, status: "failed, ERROR: Cannot use NEB with a single replica (src/REPLICA/neb.cpp:141).", walltime: -1 } +in.fix_python_invoke: { folder: examples/python, status: "failed, ERROR: Could not process Python string: .", walltime: -1 } +in.fix_python_invoke_neighlist: { folder: examples/python, status: "failed, ERROR: Could not process Python string: .", walltime: -1 } +in.fix_python_move_nve_melt: { folder: examples/python, status: "failed, ERROR: Loading python integrator module failure (src/PYTHON/fix_python_move.cpp:64).", walltime: -1 } +in.fix_python_move_nve_melt_opt: { folder: examples/python, status: "failed, ERROR: Loading python integrator module failure (src/PYTHON/fix_python_move.cpp:64).", walltime: -1 } +in.pair_python_coulomb: { folder: examples/python, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 3.0, walltime_norm: 0.5 } +in.pair_python_harmonic: { folder: examples/python, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 45.0, walltime_norm: 7.5 } +in.pair_python_hybrid: { folder: examples/python, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 3 }, walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.pair_python_long: { folder: examples/python, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 4.0, walltime_norm: 0.6666666666666666 } +in.pair_python_melt: { folder: examples/python, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 3 }, walltime: 28.0, walltime_norm: 4.666666666666667 } +in.pair_python_spce: { folder: examples/python, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.pair_python_table: { folder: examples/python, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.python: { folder: examples/python, status: "failed, ERROR on proc 0: Python evaluation of function loop failed (src/PYTHON/python_impl.cpp:384).", walltime: -1 } +in.bcc: { folder: examples/steinhardt, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.fcc: { folder: examples/steinhardt, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.icos: { folder: examples/steinhardt, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.kim-ex.melt: { folder: examples/kim, status: "failed, unknown command, package not installed, ERROR: Unknown command: kim init LennardJones_Ar real (src/input.cpp:314)", walltime: -1 } +in.kim-pm-property: { folder: examples/kim, status: "failed, unknown command, package not installed, ERROR: Unknown command: kim init LJ_Shifted_Bernardes_1958MedCutoff_Ar__MO_126566794224_004 metal (src/input.cpp:314)", walltime: -1 } +in.kim-pm-query.melt: { folder: examples/kim, status: "failed, unknown command, package not installed, ERROR: Unknown command: kim init SW_StillingerWeber_1985_Si__MO_405512056662_005 real (src/input.cpp:314)", walltime: -1 } +in.kim-pm.melt: { folder: examples/kim, status: "failed, unknown command, package not installed, ERROR: Unknown command: kim init SW_StillingerWeber_1985_Si__MO_405512056662_005 real (src/input.cpp:314)", walltime: -1 } +in.kim-query: { folder: examples/kim, status: "failed, unknown command, package not installed, ERROR: Unknown command: kim init EAM_Dynamo_ErcolessiAdams_1994_Al__MO_123629422045_005 metal (src/input.cpp:314)", walltime: -1 } +in.kim-sm.melt: { folder: examples/kim, status: "failed, unknown command, package not installed, ERROR: Unknown command: kim init Sim_LAMMPS_ReaxFF_StrachanVanDuinChakraborty_2003_CHNO__SM_107643900657_000 real (src/input.cpp:314)", walltime: -1 } +in.lammps.melt: { folder: examples/kim, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.ellipse.gayberne: { folder: examples/ellipse, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.ellipse.resquared: { folder: examples/ellipse, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 2 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.cos.1000SPCE: { folder: examples/VISCOSITY, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 36.0, walltime_norm: 6.0 } +in.einstein.2d: { folder: examples/VISCOSITY, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 39.0, walltime_norm: 6.5 } +in.gk.2d: { folder: examples/VISCOSITY, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 39.0, walltime_norm: 6.5 } +in.mp.2d: { folder: examples/VISCOSITY, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 5.0, walltime_norm: 0.8333333333333334 } +in.nemd.2d: { folder: examples/VISCOSITY, status: "completed", failed_checks: { abs_diff_failed: 8, rel_diff_failed: 8 }, walltime: 17.0, walltime_norm: 2.8333333333333335 } +in.wall.2d: { folder: examples/VISCOSITY, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 10.0, walltime_norm: 1.6666666666666667 } +in.gcmc.co2: { folder: examples/mc, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.gcmc.h2o: { folder: examples/mc, status: "completed", failed_checks: { abs_diff_failed: 8, rel_diff_failed: 8 }, walltime: 138.0, walltime_norm: 23.0 } +in.gcmc.lj: { folder: examples/mc, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 11.0, walltime_norm: 1.8333333333333333 } +in.mixed: { folder: examples/mc, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 57.0, walltime_norm: 9.5 } +in.pure: { folder: examples/mc, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 57.0, walltime_norm: 9.5 } +in.sgcmc.eam: { folder: examples/mc, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 43.0, walltime_norm: 7.166666666666667 } +in.widom.lj: { folder: examples/mc, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 8.0, walltime_norm: 1.3333333333333333 } +in.widom.spce: { folder: examples/mc, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 44.0, walltime_norm: 7.333333333333333 } +in.mc: { folder: examples/MC-LOOP, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.flow.couette: { folder: examples/flow, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.flow.pois: { folder: examples/flow, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.prd: { folder: examples/prd, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 3.0, walltime_norm: 0.5 } +in.C_SNAP: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 34.0, walltime_norm: 5.666666666666667 } +in.grid.snap: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.grid.tri: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.snap.InP.JCPA2020: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 15.0, walltime_norm: 2.5 } +in.snap.Mo_Chen: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.snap.Ta06A: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.snap.W.2940: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.snap.WBe.PRB2019: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.snap.compute: { folder: examples/snap, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.snap.compute.quadratic: { folder: examples/snap, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.snap.hybrid.WSNAP.HePair: { folder: examples/snap, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.snap.scale.Ni_Zuo_JCPA2020: { folder: examples/snap, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 3.0, walltime_norm: 0.5 } +in.lammps: { folder: examples/COUPLE/lammps_spparks, status: "failed, ERROR: Cannot open file data.lammps: No such file or directory (src/read_data.cpp:367).", walltime: -1 } +in.spparks: { folder: examples/COUPLE/lammps_spparks, status: "failed, unknown command, package not installed, ERROR: Unknown command: seed 56789 (src/input.cpp:314)", walltime: -1 } +in.lj: { folder: examples/COUPLE/plugin, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.fix_external: { folder: examples/COUPLE/python, status: "completed, but no Step nor Loop in the output.", walltime: 0.0, walltime_norm: 0.0 } +in.chain: { folder: examples/COUPLE/multiple, status: "failed, no Total wall time in the output, -------------------------------------------------------------------------- +MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD +with errorcode 1. + +NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes. +You may or may not see output from other processes, depending on +exactly when Open MPI kills them. +-------------------------------------------------------------------------- +", walltime: -1 } +in.lj: { folder: examples/COUPLE/simple, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.body: { folder: examples/body, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.cubes: { folder: examples/body, status: "completed", failed_checks: { abs_diff_failed: 2, rel_diff_failed: 2 }, walltime: 36.0, walltime_norm: 6.0 } +in.pour3d: { folder: examples/body, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.squares: { folder: examples/body, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 3.0, walltime_norm: 0.5 } +in.wall2d: { folder: examples/body, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 2.0, walltime_norm: 0.3333333333333333 } +in.atm: { folder: examples/atm, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.ar.lj: { folder: examples/UNITS, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.ar.metal: { folder: examples/UNITS, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.ar.real: { folder: examples/UNITS, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.mos2-bulk: { folder: examples/threebody, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.mos2.rebomos: { folder: examples/threebody, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 21.0, walltime_norm: 3.5 } +in.mos2.sw.mod: { folder: examples/threebody, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.threebody: { folder: examples/threebody, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 1.0, walltime_norm: 0.16666666666666666 } +in.nemd: { folder: examples/nemd, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 0.0, walltime_norm: 0.0 } +in.obstacle: { folder: examples/obstacle, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 0.0, walltime_norm: 0.0 } +in.crack: { folder: examples/crack, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 3.0, walltime_norm: 0.5 } +in.elastic: { folder: examples/ELASTIC, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 0.0, walltime_norm: 0.0 } +in.peri-pmb: { folder: examples/peri, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 9.0, walltime_norm: 1.5 } +in.peri.eps: { folder: examples/peri, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 21.0, walltime_norm: 3.5 } +in.peri.lps: { folder: examples/peri, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 17.0, walltime_norm: 2.8333333333333335 } +in.peri.pmb: { folder: examples/peri, status: "completed", failed_checks: { abs_diff_failed: 0, rel_diff_failed: 0 }, walltime: 9.0, walltime_norm: 1.5 } +in.peri.ves: { folder: examples/peri, status: "completed", failed_checks: { abs_diff_failed: 3, rel_diff_failed: 3 }, walltime: 21.0, walltime_norm: 3.5 } +in.hyper.global: { folder: examples/hyper, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 53.0, walltime_norm: 8.833333333333334 } +in.hyper.local: { folder: examples/hyper, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 13.0, walltime_norm: 2.1666666666666665 } +in.spce: { folder: examples/rdf-adf, status: "completed", failed_checks: { abs_diff_failed: 6, rel_diff_failed: 6 }, walltime: 88.0, walltime_norm: 14.666666666666666 } +in.elastic: { folder: examples/ELASTIC_T/BORN_MATRIX/Silicon, status: "completed, error parsing log.lammps into YAML", walltime: 62.0, walltime_norm: 10.333333333333334 } +in.ljcov: { folder: examples/ELASTIC_T/BORN_MATRIX/Argon/Analytical, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.ljcov: { folder: examples/ELASTIC_T/BORN_MATRIX/Argon/Numdiff, status: "failed, no Total wall time in the output, timeout (180s expired)", walltime: -1 } +in.elastic: { folder: examples/ELASTIC_T/DEFORMATION/Silicon, status: "completed, numerical checks skipped due to missing the reference log file", walltime: 6.0, walltime_norm: 1.0 } diff --git a/tools/regression-tests/run_tests.py b/tools/regression-tests/run_tests.py old mode 100644 new mode 100755 index b2144478ec..63fa8c59c1 --- a/tools/regression-tests/run_tests.py +++ b/tools/regression-tests/run_tests.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 ''' -UPDATE: August 13, 2024: +UPDATE: September 4, 2024: Launching the LAMMPS binary under testing using a configuration defined in a yaml file (e.g. config.yaml). Comparing the output thermo with that in the existing log file (with the same nprocs) + data in the log files are extracted and converted into yaml data structure @@ -9,55 +9,81 @@ UPDATE: August 13, 2024: With the current features, users can: + specify which LAMMPS binary version to test (e.g., the version from a commit, or those from `lammps-testing`) + specify the examples subfolders (thus the reference log files) seperately (e.g. from other LAMMPS versions or commits) + + specify the list of examples input scripts to test + specify tolerances for individual quantities for any input script to override the global values - + launch tests with `mpirun` with all supported command line features (multiple procs, multiple paritions, and suffices) + + launch tests with `mpirun` with all supported command line features (multiple procs, multiple paritions, and suffixes) + skip certain input files (whose names match specified patterns) if not interested, or packaged not installed, or no reference log file exists - + simplify the main LAMMPS builds, as long as a LAMMPS binary is available - + keep track of the testing progress to resume the testing from the last checkpoint (skipping completed runs) - + distribute the input list across multiple processes via multiprocessing, or - split the list of input scripts into separate runs (there are 800+ input script under the top-level examples) + + set a timeout for every input script run if they may take too long + + skip numerical checks if the goal is just to check if the runs do not fail + +Some benefits include: + + + separating regression testing from building LAMMPS + + performing quick and full regression tests + + keeping track of the testing progress to resume the testing from the last checkpoint (skipping completed runs) + + distributing the input list across multiple processes by + splitting the list of input scripts into separate runs (there are ~800 input scripts under the top-level examples) + + generating new reference log files if desirable + +Input arguments: + + the path to a LAMMPS binary (can be relative to the working directory) + + a test configuration file (see tools/regression-tests/config.yaml for an example) + + a text file that lists of folders where the input scripts reside and how many of them line by line, or + a text file that list of input scripts, or + the path to the top-level examples + +Output: + + failure.yaml : list of the failed runs and reasons + + progress.yaml: full testing results of the tested input scripts with the status (completed, failed or skipped) + with error messages (for failed runs), and walltime (in seconds) + + output.xml : testing results in the JUnit XML format + + run.log : screen output and error of individual runs Limitations: - input scripts use thermo style multi (e.g., examples/peptide) do not work with the expected thermo output format - input scripts that require partition runs (e.g. examples/neb) need a separate config file, e.g. args: "--partition 3x1" - testing accelerator packages (GPU, INTEL, KOKKOS, OPENMP) need separate config files, "args: -sf omp -pk omp 4" -TODO: - + be able to be invoked from run_tests in the lammps-testing infrastruture - The following Python packages need to be installed into an activated environment: - + python3 -m venv testing-env source testing-env/bin/activate pip install numpy pyyaml junit_xml -Example usage: +Example usage (aka, tests for this script): 1) Simple use (using the provided tools/regression-tests/config.yaml and the examples/ folder at the top level) - python3 run_tests.py --lmp-bin=/path/to/lmp_binary + python3 run_tests.py --lmp-bin=build/lmp --config-file=tools/regression-tests/config.yaml 2) Use a custom testing configuration python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml 3) Specify a list of example folders python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \ - --example-folders="/path/to/examples/folder1;/path/to/examples/folder2" + --example-folders="/path/to/examples/melt;/path/to/examples/rigid" - The example folders can also be loaded from a text file list_subfolders1.txt: + The example subfolders can also be loaded from a text file list_subfolders1.txt: python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \ - --list-input=list_subfolders1.txt --output-file=output1.txt --progress-file=progress1.yaml \ + --list-subfolders=list_subfolders1.txt --output-file=output1.txt --progress-file=progress1.yaml \ --log-file=run1.log - - 4) Test a LAMMPS binary with the whole top-level /examples folder in a LAMMPS source tree - python3 run_tests.py --lmp-bin=/path/to/lmp_binary --examples-top-level=/path/to/lammps/examples - 5) Analyze the LAMMPS binary annd whole top-level /examples folder in a LAMMPS source tree + 4) Specify a list of example input scripts (e.g. obtained from running tools/regression-tests/get-quick-list.py) + python3 run_tests.py --lmp-bin=/path/to/lmp_binary --config-file=/path/to/config/file/config.yaml \ + --list-input=input_list.txt + + 5) Test a LAMMPS binary with the whole top-level /examples folder in a LAMMPS source tree + python3 run_tests.py --lmp-bin=/path/to/lmp_binary --examples-top-level=/path/to/lammps/examples + --config-file=tools/regression-tests/config_serial.yaml + + 6) Analyze the LAMMPS binary and whole top-level /examples folder in a LAMMPS source tree and generate separate input lists for 8 workers: python3 run_tests.py --lmp-bin=/path/to/lmp_binary --examples-top-level=/path/to/lammps/examples \ --analyze --num-workers=8 - This is used for splitting the subfolders into separate input lists and launching different instances - of run_tests.py simultaneously. + The output of this run is 8 files folder-list-[0-7].txt that lists the subfolders + and 8 files input-list-[0-7].txt that lists the input scripts under the top-level example folders. + With these lists, one can launch multiple instances of run_tests.py simultaneously + each with a list of example subfolders (Case 3), or with a list of input scripts (Case 4). ''' from argparse import ArgumentParser @@ -65,8 +91,10 @@ import datetime import fnmatch import logging import os +import random import re import subprocess +import sys #from multiprocessing import Pool # need "pip install numpy pyyaml" @@ -81,6 +109,13 @@ try: except ImportError: from yaml import SafeLoader as Loader +# infer top level LAMMPS dir from filename +LAMMPS_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', '..')) + +# import git interface module +sys.path.append(os.path.realpath(os.path.join(LAMMPS_DIR, 'tools', 'regression-tests'))) +import get_quick_list + ''' data structure to store the test result ''' @@ -95,20 +130,21 @@ class TestResult: ''' Iterate over a list of input folders and scripts using the given lmp_binary and the testing configuration - lmp_binary : full path to the LAMMPS binary + lmp_binary : full path to the LAMMPS binary input_folder : the absolute path to the input files input_list : list of the input scripts under the input_folder config : the dict that contains the test configuration - - output_buf: placeholder for storing the output of a given worker + walltime_ref : reference walltime return results : a list of TestResult objects stat : a dictionary that lists the number of passed, skipped, failed tests progress_file: yaml file that stores the tested input script and status + failure_file : file that reports the failed runs (a subset of progress_file) last_progress: the dictionary that shows the status of the last tests + output_buf: placeholder for storing the output of a given worker ''' -def iterate(lmp_binary, input_folder, input_list, config, results, progress_file, last_progress=None, output_buf=None): +def iterate(lmp_binary, input_folder, input_list, config, results, progress_file, failure_file, walltime_ref=1, verbose=False, last_progress=None, output_buf=None): num_tests = len(input_list) num_completed = 0 @@ -122,10 +158,15 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file using_markers = False EPSILON = np.float64(config['epsilon']) nugget = float(config['nugget']) + genref = config['genref'] + compiler = config['compiler'] use_valgrind = False if 'valgrind' in config['mpiexec']: use_valgrind = True + # record all the failed runs + failure = open(failure_file, "a") + # iterate over the input scripts for input in input_list: @@ -135,13 +176,18 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file else: progress = open(progress_file, "w") + # walltime = -2: skipped tests + # -1: failed tests + # >= 0: walltime in seconds (e.g. in.melt walltime = 0.2 seconds) + walltime = -2 + # skip the input file if listed in the config file or matched with a pattern if 'skip' in config: if input in config['skip']: msg = " + " + input + f" ({test_id+1}/{num_tests}): skipped as specified in {configFileName}" print(msg) logger.info(msg) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"skipped\" }}\n") + progress.write(f"{input}: {{ folder: {input_folder}, status: \"skipped\", walltime: {walltime} }}\n") progress.close() num_skipped = num_skipped + 1 test_id = test_id + 1 @@ -150,6 +196,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file matched_pattern = False for skipped_files in config['skip']: if '*' in skipped_files: + # check input script name e.g. in.*_imd* if fnmatch.fnmatch(input, skipped_files): matched_pattern = True break @@ -158,7 +205,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file msg = " + " + input + f" ({test_id+1}/{num_tests}): skipped as specified in {configFileName}" print(msg) logger.info(msg) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"skipped\" }}\n") + progress.write(f"{input}: {{ folder: {input_folder}, status: \"skipped\", walltime: {walltime} }}\n") progress.close() num_skipped = num_skipped + 1 test_id = test_id + 1 @@ -176,7 +223,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file num_skipped = num_skipped + 1 test_id = test_id + 1 continue - + if 'packaged not installed' in status: msg = " + " + input + f" ({test_id+1}/{num_tests}): due to package not installed (see {progress_file})" logger.info(msg) @@ -186,14 +233,14 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file num_skipped = num_skipped + 1 test_id = test_id + 1 continue - + # if annotating input scripts with REG markers is True if using_markers == True: input_test = 'test.' + input if os.path.isfile(input) == True: if has_markers(input): process_markers(input, input_test) - + else: print(f"WARNING: {input} does not have REG markers") input_markers = input + '.markers' @@ -204,7 +251,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file os.system(cmd_str) generate_markers(input, input_markers) process_markers(input_markers, input_test) - + else: # else the same file name for testing input_test = input @@ -212,7 +259,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file str_t = " + " + input_test + f" ({test_id+1}/{num_tests})" logger.info(str_t) print(str_t) - + # check if a reference log file exists in the current folder: log.DDMMMYY.basename.g++.[nprocs] # assuming that input file names start with "in." (except in.disp, in.disp2 and in.dos in phonon/) basename = input_test[3:] @@ -234,7 +281,8 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file if fnmatch.fnmatch(file, pattern): p = file.rsplit('.', 1) if p[1].isnumeric(): - if use_valgrind == True: + # if using valgrind or running in serial, then use the log file with 1 proc + if use_valgrind == True or config['mpiexec'] == "": if int(p[1]) == 1: max_np = int(p[1]) ref_logfile_exist = True @@ -249,14 +297,15 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file # if there is no ref log file and not running with valgrind if ref_logfile_exist == False and use_valgrind == False: max_np = 4 - - # if the maximum number of procs is different from the value in the configuration file - # then override the setting for this input script + saved_nprocs = config['nprocs'] + + # if the maximum number of procs is different from the value in the configuration file + # then override the setting for this particular input script if max_np != int(config['nprocs']): config['nprocs'] = str(max_np) - # store the value of nprocs + # store the value of nprocs nprocs = int(config['nprocs']) # if valgrind is used for mem check, the run command will be @@ -267,16 +316,19 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file config['mpiexec_numproc_flag'] = "" nprocs = 1 + # default walltime value of failed tests + walltime = -1 + result = TestResult(name=input, output="", time="", status="passed") # run the LAMMPS binary with the input script - cmd_str, output, error, returncode = execute(lmp_binary, config, input_test) + cmd_str, output, error, returncode, logfilename = execute(lmp_binary, config, input_test) # restore the nprocs value in the configuration config['nprocs'] = saved_nprocs # check if the output contains ERROR - # there might not be a log.lammps generated at this point, or only log.lammps contains only the date line + # there might not be a log file generated at this point, or only the log file contains only the date line if "ERROR" in output: error_line = "" for line in output.split('\n'): @@ -284,13 +336,13 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file error_line = line break logger.info(f" The run terminated with {input_test} gives the following output:") - logger.info(f" {error_line}") + logger.info(f" {error_line}") if "Unrecognized" in output: - result.status = f"error, unrecognized command, package not installed, {error_line}" + result.status = f"failed, unrecognized command, package not installed, {error_line}" elif "Unknown" in output: - result.status = f"error, unknown command, package not installed, {error_line}" + result.status = f"failed, unknown command, package not installed, {error_line}" else: - result.status = f"error, {error_line}." + result.status = f"failed, {error_line}." logger.info(f" Output:") logger.info(f" {output}") @@ -298,57 +350,119 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file num_error = num_error + 1 results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\" }}\n") - progress.close() + print(f"{result.status}") + msg = f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime} }}\n" + progress.write(msg) + progress.close() + failure.write(msg) + + test_id = test_id + 1 + continue + + # check if a log file log.{basename}.{nprocs} exists in the current folder + if os.path.isfile(logfilename) == False: + msg = f" failed, no log.{basename}.{nprocs} generated with {input_test} with return code {returncode}.\n" + print(msg) + logger.info(msg) + logger.info(f" Output:") + logger.info(f" {output}") + logger.info(f" Error:\n{error}") + + msg = f"{input}: {{ folder: {input_folder}, status: \"failed, no log file generated\", walltime: {walltime} }}\n" + progress.write(msg) + progress.close() + failure.write(msg) + + num_error = num_error + 1 + test_id = test_id + 1 + continue + else: + # generate a new log file whose name has the format of log.{date}.{basename}.{compiler}.{nprocs} + if genref == True: + dmy = datetime.datetime.now() + date = dmy.strftime("%d%b%y") + # assume g++ for now, but is be available from running "lmp_binary -h" + compiler = "g++" + cmd_str = f"cp log.{basename}.{nprocs} log.{date}.{basename}.{compiler}.{nprocs}" + p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True) + + # if skip numerical checks, then skip the rest + if skip_numerical_check == True: + msg = "completed, skipping numerical checks" + if use_valgrind == True: + if "All heap blocks were freed" in error: + msg += ", no memory leak" + else: + msg += ", memory leaks detected" + num_memleak = num_memleak + 1 + result.status = msg + results.append(result) + + msg = f"{input}: {{ folder: {input_folder}, status: \"{msg}\", walltime: {walltime} }}\n" + progress.write(msg) + progress.close() + failure.write(msg) + + # count the number of completed runs + num_completed = num_completed + 1 test_id = test_id + 1 continue # if there is no ERROR in the output, but there is no Total wall time printed out if "Total wall time" not in output: - logger.info(f" ERROR: no Total wall time in the output.\n") + msg = f" failed, no Total wall time in the output.\n" + print(msg) + logger.info(msg) logger.info(f"\n{input_test}:") logger.info(f"\n Output:\n{output}") logger.info(f"\n Error:\n{error}") - progress.write(f"{input}: {{ folder: {input_folder}, status: \"error, no Total wall time in the output.\" }}\n") + + msg = f"{input}: {{ folder: {input_folder}, status: \"failed, no Total wall time in the output, {error}\", walltime: {walltime} }}\n" + progress.write(msg) progress.close() + failure.write(msg) + num_error = num_error + 1 test_id = test_id + 1 continue + # NOTE: Total wall time could be 00:00:00 whereas Loop time is non-zero seconds + walltime_norm = 1.0 + for line in output.split('\n'): + if "Total wall time" in line: + walltime_str = line.split('time:')[1] + hms = walltime_str.split(':') + hours = float(hms[0]) + minutes = float(hms[1]) + seconds = float(hms[2]) + walltime = hours * 3600.0 + minutes * 60.0 + seconds + walltime_norm = float(walltime) / float(walltime_ref) + break + # if there is no Step or no Loop printed out if "Step" not in output or "Loop" not in output: - logger.info(f" ERROR: no Step nor Loop in the output.\n") + msg = f" completed, but no Step nor Loop in the output.\n" + print(msg) + logger.info(msg) logger.info(f"\n{input_test}:") logger.info(f"\n Output:\n{output}") logger.info(f"\n Error:\n{error}") - progress.write(f"{input}: {{ folder: {input_folder}, status: \"error, no Step nor Loop in the output.\" }}\n") + + msg = f"{input}: {{ folder: {input_folder}, status: \"completed, but no Step nor Loop in the output.\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n" + progress.write(msg) progress.close() + failure.write(msg) + num_error = num_error + 1 test_id = test_id + 1 continue - # check if a log.lammps file exists in the current folder - if os.path.isfile("log.lammps") == False: - logger.info(f" ERROR: No log.lammps generated with {input_test} with return code {returncode}.\n") - logger.info(f" Output:") - logger.info(f" {output}") - logger.info(f" Error:\n{error}") - progress.write(f"{input}: {{ folder: {input_folder}, status: \"error, no log.lammps\" }}\n") - progress.close() - num_error = num_error + 1 - test_id = test_id + 1 - continue - else: - # save a copy of the log file for further inspection - cmd_str = f"cp log.lammps log.{basename}.{nprocs}" - p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True) - - # parse thermo output in log.lammps from the run - thermo = extract_data_to_yaml("log.lammps") + # parse thermo output in the log file from the run + thermo = extract_data_to_yaml(logfilename) num_runs = len(thermo) - # the run completed normally but log.lammps may not be friendly for parsing into YAML format + # the run completed normally but the log file may not be friendly for parsing into YAML format if num_runs == 0: logger.info(f" The run terminated with {input_test} gives the following output:") logger.info(f" {output}") @@ -361,16 +475,19 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file msg += ", memory leaks detected" num_memleak = num_memleak + 1 - result.status = msg + ", error parsing log.lammps into YAML" + result.status = msg + f", error parsing {logfilename} into YAML" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\" }}\n") + progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") progress.close() + if verbose == True: + print(result.status) + num_completed = num_completed + 1 test_id = test_id + 1 continue - # At this point, the run completed without trivial errors, proceed with numerical checks + # At this point, the run completed without trivial errors, proceed with numerical checks for thermo output # check if there is a reference log file for this input if ref_logfile_exist: # parse the thermo output in reference log file @@ -378,17 +495,18 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file if thermo_ref: num_runs_ref = len(thermo_ref) else: - # dictionary is empty - logger.info(f" ERROR: Error parsing the reference log file {thermo_ref_file}.") + # thhe thermo_ref dictionary is empty + logger.info(f" failed, error parsing the reference log file {thermo_ref_file}.") result.status = "skipped numerical checks due to parsing the reference log file" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"completed, numerical checks skipped, unsupported log file format\" }}\n") + progress.write(f"{input}: {{ folder: {input_folder}, status: \"completed, numerical checks skipped, unsupported log file format\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") progress.close() + num_completed = num_completed + 1 num_error = num_error + 1 test_id = test_id + 1 continue else: - msg = f" Cannot find the reference log file for {input_test} with the expected format log.[date].{basename}.*.[nprocs]" + msg = f" failed, cannot find the reference log file for {input_test} with the expected format log.[date].{basename}.*.[nprocs]" logger.info(msg) print(msg) # attempt to read in the thermo yaml output from the working directory (the following section will be deprecated) @@ -398,26 +516,30 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file thermo_ref = extract_thermo(thermo_ref_file) num_runs_ref = len(thermo_ref) else: - # mostly will come to here if the reference log file does not exist + # most likely to reach here if the reference log file does not exist logger.info(f" {thermo_ref_file} also does not exist in the working directory.") result.status = "skipped due to missing the reference log file" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"completed, numerical checks skipped, missing the reference log file\" }}\n") + + msg = f"{input}: {{ folder: {input_folder}, status: \"completed, numerical checks skipped due to missing the reference log file\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n" + progress.write(msg) progress.close() + failure.write(msg) + num_completed = num_completed + 1 num_error = num_error + 1 test_id = test_id + 1 continue - logger.info(f" Comparing thermo output from log.lammps against the reference log file {thermo_ref_file}") + logger.info(f" Comparing thermo output from {logfilename} against the reference log file {thermo_ref_file}") # check if the number of runs matches with that in the reference log file # maybe due to some changes to the input where the ref log file is not updated yet if num_runs != num_runs_ref: - logger.info(f" ERROR: Number of runs in log.lammps ({num_runs}) is different from that in the reference log ({num_runs_ref})." + logger.info(f" ERROR: Number of runs in {logfilename} ({num_runs}) is different from that in the reference log ({num_runs_ref})." " Check README in the folder, possibly due to using mpirun with partitions or parsing the wrong reference log file.") - result.status = "error, incomplete runs" + result.status = "failed, incomplete runs" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\" }}\n") + progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") progress.close() num_error = num_error + 1 test_id = test_id + 1 @@ -429,11 +551,11 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file num_fields = len(thermo[0]['keywords']) num_fields_ref = len(thermo_ref[0]['keywords']) if num_fields != num_fields_ref: - logger.info(f" ERROR: Number of thermo colums in log.lammps ({num_fields}) is different from that in the reference log ({num_fields_ref}) in the first run.") + logger.info(f" failed, number of thermo colums in {logfilename} ({num_fields}) is different from that in the reference log ({num_fields_ref}) in the first run.") logger.info(f" Check both log files for more details.") - result.status = "error, mismatched columns in the log files" + result.status = "failed, mismatched columns in the log files" results.append(result) - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\" }}\n") + progress.write(f"{input}: {{ folder: {input_folder}, status: \"{result.status}\", walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") progress.close() num_error = num_error + 1 test_id = test_id + 1 @@ -442,9 +564,9 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file # comparing output vs reference values width = 20 if verbose == True: - print("Quantities".ljust(width) + "Output".center(width) + "Reference".center(width) + + print(" Quantities".ljust(width) + "Output".center(width) + "Reference".center(width) + "Abs Diff Check".center(width) + "Rel Diff Check".center(width)) - + # check if overrides for this input scipt is specified overrides = {} if 'overrides' in config: @@ -464,7 +586,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file num_fields = len(thermo[irun]['keywords']) num_fields_ref = len(thermo_ref[irun]['keywords']) if num_fields != num_fields_ref: - logger.info(f" ERROR: Number of thermo columns in log.lammps ({num_fields})") + logger.info(f" failed: Number of thermo columns in {logfilename} ({num_fields})") logger.info(f" is different from that in the reference log ({num_fields_ref}) in run {irun}.") mismatched_columns = True continue @@ -490,7 +612,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file abs_diff_check = "PASSED" rel_diff_check = "PASSED" - + if quantity in config['tolerance'] or quantity in overrides: if quantity in config['tolerance']: @@ -516,42 +638,48 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file else: # N/A means that tolerances are not defined in the config file abs_diff_check = "N/A" - rel_diff_check = "N/A" + rel_diff_check = "N/A" if verbose == True and abs_diff_check != "N/A" and rel_diff_check != "N/A": - print(f"{thermo[irun]['keywords'][i].ljust(width)} {str(val).rjust(20)} {str(ref).rjust(20)} " - "{abs_diff_check.rjust(20)} {rel_diff_check.rjust(20)}") + print(f" {thermo[irun]['keywords'][i].ljust(width)} {str(val).rjust(20)} {str(ref).rjust(20)} {abs_diff_check.rjust(20)} {rel_diff_check.rjust(20)}") # after all runs completed, or are interrupted in one of the runs (mismatched_columns = True) + if mismatched_columns == True: - msg = f" mismatched log files after the first run. Check both log files for more details." + msg = f" mismatched log files after the first run. Check both log files for more details." print(msg) logger.info(msg) - result.status = "failed" + result.status = "thermo checks failed due to mismatched log files after the first run" + result.status = "" if num_abs_failed > 0: - msg = f" {num_abs_failed} abs diff thermo checks failed." + msg = f" {num_abs_failed} abs diff checks failed." print(msg) logger.info(msg) - result.status = "failed" + #result.status = f"abs_diff_failed: {num_abs_failed}, " if verbose == True: - for i in failed_abs_output: - print(f"- {i}") - if num_rel_failed > 0: - msg = f" {num_rel_failed} rel diff thermo checks failed." - print(msg) - logger.info(msg) - result.status = "failed" - if verbose == True: - for i in failed_rel_output: - print(f"- {i}") - if num_abs_failed == 0 and num_rel_failed == 0: - msg = f" all {num_checks} thermo checks passed." - print(msg) - logger.info(msg) - result.status = "passed" - num_passed = num_passed + 1 + for out in failed_abs_output: + print(f" - {out}") + if num_rel_failed > 0: + msg = f" {num_rel_failed} rel diff checks failed." + print(msg) + logger.info(msg) + #result.status += f"rel_diff_failed: {num_rel_failed}" + if verbose == True: + for out in failed_rel_output: + print(f" - {out}") + + if num_abs_failed == 0 and num_rel_failed == 0: + msg = f" all {num_checks} checks passed." + print(msg) + logger.info(msg) + #result.status = f"all {num_checks} checks passed." + num_passed = num_passed + 1 + else: + num_error = num_error + 1 + + result.status = f"abs_diff_failed: {num_abs_failed}, rel_diff_failed: {num_rel_failed}" results.append(result) # check if memleak detects from valgrind run (need to replace "mpirun" -> valgrind --leak-check=yes mpirun") @@ -563,13 +691,20 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file msg += ", memory leaks detected" num_memleak = num_memleak + 1 - progress.write(f"{input}: {{ folder: {input_folder}, status: \"{msg}\" }}\n") + progress.write(f"{input}: {{ folder: {input_folder}, status: \"{msg}\", failed_checks: {{ {result.status} }}, walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") progress.close() + # write to failure if there is any numerical failed check + if num_abs_failed > 0 or num_rel_failed > 0: + failure.write(f"{input}: {{ folder: {input_folder}, status: \"{msg}\", failed_checks: {{ {result.status} }}, walltime: {walltime}, walltime_norm: {walltime_norm} }}\n") + # count the number of completed runs num_completed = num_completed + 1 test_id = test_id + 1 + # close the failure file + failure.close() + stat = { 'num_completed': num_completed, 'num_passed': num_passed, 'num_skipped': num_skipped, @@ -590,7 +725,7 @@ def iterate(lmp_binary, input_folder, input_list, config, results, progress_file of output and the inner list the values of the columns matching the header keywords for that step. ''' def extract_thermo(yamlFileName): - docs = "" + docs = "" with open(yamlFileName) as f: for line in f: m = re.search(r"^(keywords:.*$|data:$|---$|\.\.\.$| - \[.*\]$)", line) @@ -627,7 +762,7 @@ def extract_data_to_yaml(inputFileName): if "Loop" in line: reading = False docs += "...\n" - + if reading == True and "Step" not in line: if "WARNING" in line: continue @@ -662,7 +797,7 @@ def extract_data_to_yaml(inputFileName): return thermo ''' - return a tuple of the list of installed packages, OS, GitInfo and compile_flags + return a dictionary of the list of installed packages, OS, GitInfo, compiler and compile_flags ''' def get_lammps_build_configuration(lmp_binary): cmd_str = lmp_binary + " -h" @@ -672,6 +807,8 @@ def get_lammps_build_configuration(lmp_binary): reading = False operating_system = "" GitInfo = "" + compiler = "g++" + compiler_full = "" row = 0 for line in output: if line != "": @@ -687,7 +824,12 @@ def get_lammps_build_configuration(lmp_binary): operating_system = line if "Git info" in line: GitInfo = line - + if "Compiler" in line: + compiler_full = line + if "GNU" in line: + compiler = "g++" + if "Intel" in line: + compiler = "icc" row += 1 packages = packages.strip() @@ -698,24 +840,124 @@ def get_lammps_build_configuration(lmp_binary): if line != "": if "-DLAMMPS" in line: compile_flags += " " + line.strip() - + row += 1 - return packages.split(" "), operating_system, GitInfo, compile_flags + installed_packages = packages.split(" ") + build_config = { + 'installed_packages': installed_packages, + 'operating_system': operating_system, + 'git_info': GitInfo, + 'compiler': compiler, + 'compiler_full': compiler_full, + 'compile_flags': compile_flags, + } + + return build_config ''' launch LAMMPS using the configuration defined in the dictionary config with an input file - TODO: - - generate new reference values if needed - - wrap subprocess with try/catch to handle exceptions + return + - cmd_str: the complete command used to launch LAMMPS with the input + - stdout: stdout of the process + - stderr: stderr of the process + - errorcode: error code returned by the process + - logfilename: the log file name for the given input + to avoid duplicate writes to log.lammps if multiple workers execute in the same folder ''' -def execute(lmp_binary, config, input_file_name, generate_ref_yaml=False): - cmd_str = config['mpiexec'] + " " + config['mpiexec_numproc_flag'] + " " + config['nprocs'] + " " - cmd_str += lmp_binary + " -in " + input_file_name + " " + config['args'] - logger.info(f" Executing: {cmd_str}") - p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True) +def execute(lmp_binary, config, input_file_name, generate_ref=False): + cmd_str = "" + # check if mpiexec/mpirun is used + if config['mpiexec']: + cmd_str += config['mpiexec'] + " " + config['mpiexec_numproc_flag'] + " " + config['nprocs'] + " " - return cmd_str, p.stdout, p.stderr, p.returncode + # write to a log file with format log.{basename}.{nprocs} + basename = input_file_name[3:] + logfilename = f"log.{basename}.{config['nprocs']}" + + cmd_str += lmp_binary + " -in " + input_file_name + " " + config['args'] + " -log " + logfilename + + logger.info(f" Executing: {cmd_str}") + # set a timeout (in seconds) for each run + timeout = 60 + if 'timeout' in config: + if config['timeout'] != "": + timeout = int(config['timeout']) + + try: + p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True, timeout=timeout) + return cmd_str, p.stdout, p.stderr, p.returncode, logfilename + + except subprocess.TimeoutExpired: + msg = f" Timeout for: {cmd_str} ({timeout}s expired)" + logger.info(msg) + print(msg) + + error_str = f"timeout ({timeout}s expired)" + return cmd_str, "", error_str, -1, logfilename + +''' + get the reference walltime by running the lmp_binary with config with an input script in the bench/ folder + in.lj is suitable as it doesn't need any potential file, nor any extra packages +''' +def get_reference_walltime(lmp_binary, config): + cmd_str = "" + # check if mpiexec/mpirun is used + if config['mpiexec']: + cmd_str += config['mpiexec'] + " " + config['mpiexec_numproc_flag'] + " " + config['nprocs'] + " " + + # guess the build folder path + lmp_build_folder = lmp_binary.rsplit('/', 1)[0] + + # guess the bench folder + lmp_bench_folder = lmp_build_folder + "/../bench/" + + # run with replicate for a copple of seconds long run + cmd_str += lmp_binary + " -in " + lmp_bench_folder + "in.lj -v x 2 -v y 2 -v z 1 " + config['args'] + + logger.info(f" Executing for reference walltime: {cmd_str}") + + # walltime = -1 indicates some timeout (issues) + walltime = -1 + + # set a timeout for this reference run + timeout = 60 + output = "" + try: + p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True, timeout=timeout) + output = p.stdout + + except subprocess.TimeoutExpired: + msg = f" Timeout for: {cmd_str} ({timeout}s expired)" + logger.info(msg) + print(msg) + + for line in output.split('\n'): + if "Total wall time" in line: + walltime_str = line.split('time:')[1] + hms = walltime_str.split(':') + hours = float(hms[0]) + minutes = float(hms[1]) + seconds = float(hms[2]) + walltime = hours * 3600.0 + minutes * 60.0 + seconds + + logger.info(f" Reference walltime, sec = {walltime}") + + return walltime + +''' + infer the tools/regression-tests folder from the absolute path to lmp_binary + return the default config file path tools/regression-tests/config.yaml +''' +def get_default_config(lmp_binary): + # guess the build folder path + lmp_build_folder = lmp_binary.rsplit('/', 1)[0] + + # guess the tools/regression-tests folder + regression_tests_folder = lmp_build_folder + "/../tools/regression-tests/" + + defaultConfigFile = regression_tests_folder + "config.yaml" + return defaultConfigFile ''' split a list into a list of N sublists @@ -732,7 +974,7 @@ def execute(lmp_binary, config, input_file_name, generate_ref_yaml=False): for i in range(num_workers): args.append((input1, input2, output_buf)) - with Pool(num_workers) as pool: + with Pool(num_workers) as pool: results = pool.starmap(func, args) ''' def divide_into_N(original_list, N): @@ -759,7 +1001,7 @@ def process_markers(inputFileName, outputFileName): # replace #REG:ADD with empty string (i.e. adding the text at the end of the line) data = data.replace("#REG:ADD", "") - # replace the line contaning #REG:SUB with a line with the text that follows this marker + # replace the line contaning #REG:SUB with a line with the text that follows this marker data = data.splitlines() separator="#REG:SUB" out = [] @@ -823,14 +1065,21 @@ if __name__ == "__main__": lmp_binary = "" configFileName = "config.yaml" example_subfolders = [] + example_inputs = [] example_toplevel = "" genref = False verbose = False output_file = "output.xml" progress_file = "progress.yaml" + failure_file = "failure.yaml" log_file = "run.log" list_input = "" + list_subfolders = "" analyze = False + quick = False + quick_branch = "origin/develop" + quick_max = 50 + quick_reference = os.path.join(LAMMPS_DIR, 'tools', 'regression-tests', 'reference.yaml') # distribute the total number of input scripts over the workers num_workers = 1 @@ -838,93 +1087,178 @@ if __name__ == "__main__": # parse the arguments parser = ArgumentParser() parser.add_argument("--lmp-bin", dest="lmp_binary", default="", help="LAMMPS binary") - parser.add_argument("--config-file", dest="config_file", default=configFileName, - help="Configuration YAML file") + parser.add_argument("--config-file", dest="config_file", default="", help="Configuration YAML file") parser.add_argument("--examples-top-level", dest="example_toplevel", default="", help="Examples top-level") parser.add_argument("--example-folders", dest="example_folders", default="", help="Example subfolders") - parser.add_argument("--list-input", dest="list_input", default="", help="File that lists the subfolders") + parser.add_argument("--list-input", dest="list_input", default="", help="File that lists the input scripts") + parser.add_argument("--list-subfolders", dest="list_subfolders", default="", help="File that lists the subfolders") parser.add_argument("--num-workers", dest="num_workers", default=1, help="Number of workers") - parser.add_argument("--gen-ref",dest="genref", action='store_true', default=False, - help="Generating reference data") - parser.add_argument("--verbose",dest="verbose", action='store_true', default=False, - help="Verbose output") - parser.add_argument("--resume",dest="resume", action='store_true', default=False, - help="Resume the test run") parser.add_argument("--output-file",dest="output", default=output_file, help="Output file") parser.add_argument("--log-file",dest="logfile", default=log_file, help="Log file") parser.add_argument("--progress-file",dest="progress_file", default=progress_file, help="Progress file") - parser.add_argument("--analyze",dest="analyze", action='store_true', default=False, + parser.add_argument("--failure-file",dest="failure_file", default=failure_file, help="Failure file") + analyze = parser.add_mutually_exclusive_group() + analyze.add_argument("--analyze",dest="analyze", action='store_true', default=False, help="Analyze the testing folders and report statistics, not running the tests") + analyze.add_argument("--quick", dest="quick", action='store_true', default=False, + help="Determine which test inputs have commands changed between a branch and the head") + parser.add_argument("--quick-branch", dest="quick_branch", default=quick_branch, + help="Branch to which compare the current head to for changed styles") + parser.add_argument("--quick-max", dest="quick_max", default=50, + help="Maximum number of inputs to randomly select") + parser.add_argument("--quick-reference", dest="quick_reference", default=quick_reference, + help="Reference YAML file with progress data from full regression test run") + parser.add_argument("--skip-numerical-check",dest="skip_numerical_check", action='store_true', default=False, + help="Skip numerical checks") + parser.add_argument("--gen-ref",dest="genref", action='store_true', default=False, + help="Generating reference log files") + parser.add_argument("--verbose",dest="verbose", action='store_true', default=False, + help="Verbose screen output") + parser.add_argument("--resume",dest="resume", action='store_true', default=False, + help="Resume the test run from the list of inputs given the progress in progress.yaml") args = parser.parse_args() lmp_binary = os.path.abspath(args.lmp_binary) - configFileName = args.config_file + if len(args.config_file) > 0: + configFileName = args.config_file + else: + configFileName = get_default_config(lmp_binary) + output_file = args.output if int(args.num_workers) > 0: num_workers = int(args.num_workers) list_input = args.list_input + list_subfolders = args.list_subfolders # example_toplevel is where all the examples subfolders reside if args.example_toplevel != "": example_toplevel = args.example_toplevel if args.example_folders != "": example_subfolders = args.example_folders.split(';') - + genref = args.genref verbose = args.verbose log_file = args.logfile analyze = args.analyze + quick = args.quick + quick_branch = args.quick_branch + quick_max = int(args.quick_max) + quick_reference = args.quick_reference + skip_numerical_check = args.skip_numerical_check resume = args.resume progress_file = args.progress_file + failure_file = args.failure_file # logging logger = logging.getLogger(__name__) logging.basicConfig(filename=log_file, level=logging.INFO, filemode="w") - # read in the configuration of the tests - with open(configFileName, 'r') as f: - config = yaml.load(f, Loader=Loader) - absolute_path = os.path.abspath(configFileName) - print(f"\nRegression tests with the settings defined in the configuration file:\n {absolute_path}") - f.close() - - # check if lmp_binary is specified in the config yaml - if lmp_binary == "": - if config['lmp_binary'] == "": - print("Needs a valid LAMMPS binary") - quit() - else: - lmp_binary = os.path.abspath(config['lmp_binary']) - - # print out the binary info - packages, operating_system, GitInfo, compile_flags = get_lammps_build_configuration(lmp_binary) - print("\nLAMMPS build info:") - print(f" - {operating_system}") - print(f" - {GitInfo}") - print(f" - Active compile flags: {compile_flags}") - print(f" - List of {len(packages)} installed packages:") - all_pkgs = "" - for p in packages: - all_pkgs += p + " " - print(all_pkgs) - if len(example_subfolders) > 0: print("\nExample folders to test:") print(*example_subfolders, sep='\n') if example_toplevel != "": print("\nTop-level example folder:") print(f" {example_toplevel}") + if list_input != "": + print("\nInput scripts to test as listed in the file:") + print(f" {list_input}") # Using in place input scripts inplace_input = True test_cases = [] + # generate list of input scripts with commands that have been changed + if quick: + headers = get_quick_list.changed_files_from_git(quick_branch) + styles = get_quick_list.get_command_from_header(headers, LAMMPS_DIR) + regex = get_quick_list.make_regex(styles) + if regex: + if not example_toplevel: example_toplevel = os.path.join(LAMMPS_DIR, 'examples') + input_list = get_quick_list.get_examples_using_styles(regex, example_toplevel) + msg = f"\nThere are {len(input_list)} input scripts with changed styles relative to branch {quick_branch}." + msg += "\nChanged styles: " + str(styles) + + # read in refrence data from a previous test run + with open(quick_reference, 'r') as f: + reference = yaml.load(f, Loader=Loader) + f.close() + + # trim previously failing run and runs that would take too long + new_list = [] + keys = reference.keys() + msg += "\nTrimming inputs using reference data from " + str(len(keys)) + " previous runs: " + for infile in input_list: + input = os.path.split(infile)[1] + if input in keys: + if (reference[input]['walltime'] < 0.0): + # print("Skipping ", input, " for previous failure") + pass + elif (reference[input]['walltime'] > 29.0): + # print("Skipping ", input, " for wall time limit") + pass + else: + new_list.append(infile) + else: + new_list.append(infile) + input_list = new_list + msg += "trimmed list has " + str(len(input_list)) + " entries" + + if len(input_list) > quick_max: + input_list = random.sample(input_list, quick_max) + msg += "\nTesting " + str(quick_max) + " randomly selected inputs" + + print(msg) + logger.info(msg) + + # divide the list of input scripts into num_workers chunks + sublists = divide_into_N(input_list, num_workers) + + # write each chunk to a file + idx = 0 + for list_input in sublists: + filename = f"input-list-{idx}.txt" + with open(filename, "w") as f: + for inp in list_input: + f.write(inp + '\n') + f.close() + idx = idx + 1 + else: + msg = f"\nThere are no input scripts with changed styles relative to branch {quick_branch}." + print(msg) + logger.info(msg) + for idx in range(0, num_workers): + try: + os.remove(f"folder-list-{idx}.txt") + except: + pass + try: + os.remove(f"input-list-{idx}.txt") + except: + pass + filename = f"run-{idx}.log" + with open(filename, "w") as f: + f.write('\n') + f.close() + filename = f"progress-{idx}.yaml" + with open(filename, "w") as f: + f.write('\n') + f.close() + filename = f"output-{idx}.xml" + with open(filename, "w") as f: + f.write('\n') + f.close() + filename = f"failure-{idx}.yaml" + with open(filename, "w") as f: + f.write('\n') + f.close() + quit() + # if the example folders are not specified from the command-line argument --example-folders # then use the path from --example-top-folder, or from the input-list read from a text file - if len(example_subfolders) == 0: + elif len(example_subfolders) == 0: - # need top level specified + # if the top level is specified if len(example_toplevel) != 0: # getting the list of all the input files because there are subfolders (e.g. PACKAGES) under the top level cmd_str = f"find {example_toplevel} -name \"in.*\" " @@ -951,7 +1285,7 @@ if __name__ == "__main__": # write each chunk to a file idx = 0 for list_input in sublists: - filename = f"input-list-{idx}.txt" + filename = f"folder-list-{idx}.txt" with open(filename, "w") as f: for folder in list_input: # count the number of input scripts in each folder @@ -965,14 +1299,28 @@ if __name__ == "__main__": # working on all the folders for now example_subfolders = folder_list - # if a list of subfolders are provided from a text file (list_input from the command-line argument) - elif len(list_input) != 0: + # divide the list of input scripts into num_workers chunks + sublists = divide_into_N(input_list, num_workers) + + # write each chunk to a file + idx = 0 + for list_input in sublists: + filename = f"input-list-{idx}.txt" + with open(filename, "w") as f: + for inp in list_input: + f.write(inp + '\n') + f.close() + idx = idx + 1 + + # if a list of subfolders is provided from a text file (list_subfolders from the command-line argument) + elif len(list_subfolders) != 0: num_inputscripts = 0 - with open(list_input, "r") as f: + with open(list_subfolders, "r") as f: all_subfolders = f.read().splitlines() f.close() for line in all_subfolders: if len(line) > 0: + # skip subfolders if line[0] == '#': continue folder = line.split()[0] @@ -981,6 +1329,33 @@ if __name__ == "__main__": msg = f"\nThere are {len(example_subfolders)} folders with {num_inputscripts} input scripts in total listed in {list_input}." print(msg) logger.info(msg) + + # if a list of input scripts is provided from a text file (list_input from the command-line argument) + elif len(list_input) != 0: + num_inputscripts = 0 + folder_list = [] + with open(list_input, "r") as f: + all_inputs = f.read().splitlines() + f.close() + + for line in all_inputs: + if len(line) > 0: + # skip input scripts + if line[0] == '#': + continue + input = line.split()[0] + folder = input.rsplit('/', 1)[0] + # unique folders in the list + if folder not in folder_list: + folder_list.append(folder) + example_inputs.append(input) + num_inputscripts += 1 + + example_subfolders = folder_list + msg = f"\nThere are {num_inputscripts} input scripts listed in {list_input}." + print(msg) + logger.info(msg) + else: inplace_input = False @@ -988,6 +1363,45 @@ if __name__ == "__main__": if analyze == True: quit() + # read in the configuration of the tests + with open(configFileName, 'r') as f: + config = yaml.load(f, Loader=Loader) + absolute_path = os.path.abspath(configFileName) + print(f"\nRegression test configuration file:\n {absolute_path}") + f.close() + + # check if lmp_binary is specified in the config yaml + if lmp_binary == "": + if config['lmp_binary'] == "": + print("Needs a valid LAMMPS binary") + quit() + else: + lmp_binary = os.path.abspath(config['lmp_binary']) + + # print out the binary info + build_config = get_lammps_build_configuration(lmp_binary) + packages = build_config['installed_packages'] + operating_system = build_config['operating_system'] + GitInfo = build_config['git_info'] + compiler = build_config['compiler'] + compiler_full = build_config['compiler_full'] + compile_flags = build_config['compile_flags'] + + print("\nLAMMPS build info:") + print(f" - {operating_system}") + print(f" - {GitInfo}") + print(f" - {compiler_full}") + print(f" - Active compile flags: {compile_flags}") + print(f" - List of {len(packages)} installed packages:") + all_pkgs = "" + for p in packages: + all_pkgs += p + " " + print(all_pkgs) + + # augment config with additional keys + config['compiler'] = compiler + config['genref'] = genref + all_results = [] # save current working dir @@ -995,7 +1409,7 @@ if __name__ == "__main__": pwd = p.stdout.split('\n')[0] pwd = os.path.abspath(pwd) print("\nWorking directory: " + pwd) - + progress_file_abs = pwd + "/" + progress_file last_progress = {} if resume == False: @@ -1009,6 +1423,14 @@ if __name__ == "__main__": except Exception: print(f" Cannot open progress file {progress_file_abs} to resume, rerun all the tests") + # get a reference walltime + walltime_ref = get_reference_walltime(lmp_binary, config) + + # record all the failure cases (overwrite if the file exists) + failure_file_abs = pwd + "/" + failure_file + failure = open(failure_file_abs, "w") + failure.close() + # initialize all the counters total_tests = 0 completed_tests = 0 @@ -1028,7 +1450,7 @@ if __name__ == "__main__": for i in range(num_workers): args.append((input1, input2, output)) - with Pool(num_workers) as pool: + with Pool(num_workers) as pool: results = pool.starmap(func, args) ''' @@ -1042,15 +1464,27 @@ if __name__ == "__main__": cmd_str = "ls in.*" p = subprocess.run(cmd_str, shell=True, text=True, capture_output=True) - input_list = p.stdout.split('\n') - input_list.remove('') + all_input_list = p.stdout.split('\n') + all_input_list.remove('') - print(f"{len(input_list)} input script(s): {input_list}") + # if the list of example input scripts is provided + # if an input script is not in the list, then remove it from input_list + input_list = [] + if len(example_inputs) > 0: + for inp in all_input_list: + full_path = directory + "/" + inp + if full_path in example_inputs: + input_list.append(inp) + else: + input_list = all_input_list + + print(f"{len(input_list)} input script(s) to be tested: {input_list}") total_tests += len(input_list) # iterate through the input scripts results = [] - stat = iterate(lmp_binary, directory, input_list, config, results, progress_file_abs, last_progress) + stat = iterate(lmp_binary, directory, input_list, config, + results, progress_file_abs, failure_file_abs, walltime_ref, verbose, last_progress) completed_tests += stat['num_completed'] skipped_tests += stat['num_skipped'] @@ -1090,16 +1524,17 @@ if __name__ == "__main__": if passed_tests <= completed_tests: msg += f" - numerical tests passed: {passed_tests}\n" msg += "\nOutput:\n" + msg += f" - Failed inputs and reasons : {failure_file}\n" + msg += f" - Status of the tested inputs : {progress_file}\n" msg += f" - Running log with screen output: {log_file}\n" - msg += f" - Progress with the input list : {progress_file}\n" - msg += f" - Regression test results : {output_file}\n" + msg += f" - Testing result in JUnit XML : {output_file}\n" print(msg) # optional: need to check if junit_xml packaged is already installed in the env - # generate a JUnit XML file + # generate a JUnit XML file with open(output_file, 'w') as f: - test_cases = [] + test_cases = [] for result in all_results: #print(f"{result.name}: {result.status}") case = TestCase(name=result.name, classname=result.name) diff --git a/tools/swig/lammps.i b/tools/swig/lammps.i index 9bef047da4..11f5a270a1 100644 --- a/tools/swig/lammps.i +++ b/tools/swig/lammps.i @@ -130,6 +130,7 @@ extern void *lammps_extract_pair(void *handle, const char *name); extern int lammps_map_atom(void *handle, const void *id); extern int lammps_extract_atom_datatype(void *handle, const char *name); +extern int lammps_extract_atom_size(void *handle, const char *name, int type); extern void *lammps_extract_atom(void *handle, const char *name); extern void *lammps_extract_compute(void *handle, const char *id, int, int); @@ -319,6 +320,7 @@ extern void *lammps_extract_pair(void *handle, const char *name); extern int lammps_map_atom(void *handle, const void *id); extern int lammps_extract_atom_datatype(void *handle, const char *name); +extern int lammps_extract_atom_size(void *handle, const char *name, int type); extern void *lammps_extract_atom(void *handle, const char *name); extern void *lammps_extract_compute(void *handle, const char *id, int, int); diff --git a/unittest/c-library/test_library_properties.cpp b/unittest/c-library/test_library_properties.cpp index 96d63629df..737015ccdc 100644 --- a/unittest/c-library/test_library_properties.cpp +++ b/unittest/c-library/test_library_properties.cpp @@ -49,6 +49,7 @@ protected: if (verbose) std::cout << output; EXPECT_THAT(output, StartsWith("LAMMPS (")); } + void TearDown() override { ::testing::internal::CaptureStdout(); @@ -466,6 +467,33 @@ TEST_F(LibraryProperties, global) if (!verbose) ::testing::internal::GetCapturedStdout(); map_style = *(int *)lammps_extract_global(lmp, "map_style"); EXPECT_EQ(map_style, Atom::MAP_ARRAY); + + EXPECT_EQ(lammps_extract_global_datatype(lmp, "xlattice"), LAMMPS_DOUBLE); + EXPECT_EQ(lammps_extract_global_datatype(lmp, "ylattice"), LAMMPS_DOUBLE); + EXPECT_EQ(lammps_extract_global_datatype(lmp, "zlattice"), LAMMPS_DOUBLE); + auto *xlattice = (double *)lammps_extract_global(lmp, "xlattice"); + auto *ylattice = (double *)lammps_extract_global(lmp, "ylattice"); + auto *zlattice = (double *)lammps_extract_global(lmp, "zlattice"); + EXPECT_NE(xlattice, nullptr); + EXPECT_NE(ylattice, nullptr); + EXPECT_NE(zlattice, nullptr); + EXPECT_DOUBLE_EQ(*xlattice, 1.0); + EXPECT_DOUBLE_EQ(*ylattice, 1.0); + EXPECT_DOUBLE_EQ(*zlattice, 1.0); + if (!verbose) ::testing::internal::CaptureStdout(); + lammps_command(lmp, "clear"); + lammps_command(lmp, "units real"); + lammps_command(lmp, "lattice fcc 2.0"); + if (!verbose) ::testing::internal::GetCapturedStdout(); + xlattice = (double *)lammps_extract_global(lmp, "xlattice"); + ylattice = (double *)lammps_extract_global(lmp, "ylattice"); + zlattice = (double *)lammps_extract_global(lmp, "zlattice"); + EXPECT_NE(xlattice, nullptr); + EXPECT_NE(ylattice, nullptr); + EXPECT_NE(zlattice, nullptr); + EXPECT_DOUBLE_EQ(*xlattice, 2.0); + EXPECT_DOUBLE_EQ(*ylattice, 2.0); + EXPECT_DOUBLE_EQ(*zlattice, 2.0); }; TEST_F(LibraryProperties, pair1) @@ -667,11 +695,10 @@ TEST_F(LibraryProperties, has_error) class AtomProperties : public ::testing::Test { protected: void *lmp; + int ntypes, nlocal, nall; - AtomProperties() = default; - ; + AtomProperties() = default; ~AtomProperties() override = default; - ; void SetUp() override { @@ -686,11 +713,30 @@ protected: if (verbose) std::cout << output; EXPECT_THAT(output, StartsWith("LAMMPS (")); ::testing::internal::CaptureStdout(); + lammps_command(lmp, "fix props all property/atom i_one i2_two 2 d_three d2_four 2"); + lammps_command(lmp, "fix rmass all property/atom mol q rmass ghost yes"); lammps_command(lmp, "region box block 0 2 0 2 0 2"); lammps_command(lmp, "create_box 1 box"); lammps_command(lmp, "mass 1 3.0"); lammps_command(lmp, "create_atoms 1 single 1.0 1.0 1.5"); lammps_command(lmp, "create_atoms 1 single 0.2 0.1 0.1"); + lammps_command(lmp, "set group all mass 2.0"); + lammps_command(lmp, "set atom 1 charge -1"); + lammps_command(lmp, "set atom 2 charge 1"); + lammps_command(lmp, "set atom 1 mol 2"); + lammps_command(lmp, "set atom 2 mol 1"); + lammps_command(lmp, "set atom 1 i_one -3"); + lammps_command(lmp, "set atom 2 i_one 3"); + lammps_command(lmp, "set atom 1 d_three -1.3"); + lammps_command(lmp, "set atom 2 d_three 3.5"); + lammps_command(lmp, "set atom 1 i_two[1] -3"); + lammps_command(lmp, "set atom 2 i_two[2] 3"); + lammps_command(lmp, "set atom * d_four[1] -1.3"); + lammps_command(lmp, "set atom * d_four[2] 3.5"); + ntypes = lammps_extract_setting(lmp, "ntypes"); + nlocal = lammps_extract_setting(lmp, "nlocal"); + nall = lammps_extract_setting(lmp, "nall"); + output = ::testing::internal::GetCapturedStdout(); if (verbose) std::cout << output; } @@ -713,14 +759,42 @@ TEST_F(AtomProperties, invalid) TEST_F(AtomProperties, mass) { EXPECT_EQ(lammps_extract_atom_datatype(lmp, "mass"), LAMMPS_DOUBLE); + EXPECT_EQ(lammps_extract_atom_size(lmp, "mass", 0), ntypes + 1); auto *mass = (double *)lammps_extract_atom(lmp, "mass"); ASSERT_NE(mass, nullptr); ASSERT_DOUBLE_EQ(mass[1], 3.0); + EXPECT_EQ(lammps_extract_atom_datatype(lmp, "rmass"), LAMMPS_DOUBLE); + EXPECT_EQ(lammps_extract_atom_size(lmp, "rmass", 0), nall); + mass = (double *)lammps_extract_atom(lmp, "rmass"); + ASSERT_NE(mass, nullptr); + ASSERT_DOUBLE_EQ(mass[0], 2.0); + ASSERT_DOUBLE_EQ(mass[1], 2.0); +} + +TEST_F(AtomProperties, charge) +{ + EXPECT_EQ(lammps_extract_atom_datatype(lmp, "q"), LAMMPS_DOUBLE); + EXPECT_EQ(lammps_extract_atom_size(lmp, "rmass", 0), nall); + auto *charge = (double *)lammps_extract_atom(lmp, "q"); + ASSERT_NE(charge, nullptr); + ASSERT_DOUBLE_EQ(charge[0], -1.0); + ASSERT_DOUBLE_EQ(charge[1], 1.0); +} + +TEST_F(AtomProperties, molecule) +{ + EXPECT_EQ(lammps_extract_atom_datatype(lmp, "molecule"), LAMMPS_TAGINT); + EXPECT_EQ(lammps_extract_atom_size(lmp, "molecule", 0), nall); + auto *molecule = (tagint *)lammps_extract_atom(lmp, "molecule"); + ASSERT_NE(molecule, nullptr); + ASSERT_EQ(molecule[0], 2); + ASSERT_EQ(molecule[1], 1); } TEST_F(AtomProperties, id) { EXPECT_EQ(lammps_extract_atom_datatype(lmp, "id"), LAMMPS_TAGINT); + EXPECT_EQ(lammps_extract_atom_size(lmp, "id", 0), nall); auto *id = (tagint *)lammps_extract_atom(lmp, "id"); ASSERT_NE(id, nullptr); ASSERT_EQ(id[0], 1); @@ -730,6 +804,7 @@ TEST_F(AtomProperties, id) TEST_F(AtomProperties, type) { EXPECT_EQ(lammps_extract_atom_datatype(lmp, "type"), LAMMPS_INT); + EXPECT_EQ(lammps_extract_atom_size(lmp, "type", 0), nall); int *type = (int *)lammps_extract_atom(lmp, "type"); ASSERT_NE(type, nullptr); ASSERT_EQ(type[0], 1); @@ -739,6 +814,8 @@ TEST_F(AtomProperties, type) TEST_F(AtomProperties, position) { EXPECT_EQ(lammps_extract_atom_datatype(lmp, "x"), LAMMPS_DOUBLE_2D); + EXPECT_EQ(lammps_extract_atom_size(lmp, "x", LMP_SIZE_ROWS), nall); + EXPECT_EQ(lammps_extract_atom_size(lmp, "x", LMP_SIZE_COLS), 3); auto **x = (double **)lammps_extract_atom(lmp, "x"); ASSERT_NE(x, nullptr); EXPECT_DOUBLE_EQ(x[0][0], 1.0); @@ -749,6 +826,41 @@ TEST_F(AtomProperties, position) EXPECT_DOUBLE_EQ(x[1][2], 0.1); } +TEST_F(AtomProperties, custom) +{ + EXPECT_EQ(lammps_extract_atom_datatype(lmp, "i_one"), LAMMPS_INT); + EXPECT_EQ(lammps_extract_atom_size(lmp, "i_one", 0), nlocal); + auto *one = (int *)lammps_extract_atom(lmp, "i_one"); + ASSERT_NE(one, nullptr); + EXPECT_EQ(lammps_extract_atom_datatype(lmp, "i2_two"), LAMMPS_INT_2D); + EXPECT_EQ(lammps_extract_atom_size(lmp, "i2_two", LMP_SIZE_ROWS), nlocal); + EXPECT_EQ(lammps_extract_atom_size(lmp, "i2_two", LMP_SIZE_COLS), 2); + auto **two = (int **)lammps_extract_atom(lmp, "i2_two"); + ASSERT_NE(two, nullptr); + EXPECT_EQ(lammps_extract_atom_datatype(lmp, "d_three"), LAMMPS_DOUBLE); + EXPECT_EQ(lammps_extract_atom_size(lmp, "d_three", 0), nlocal); + auto *three = (double *)lammps_extract_atom(lmp, "d_three"); + ASSERT_NE(three, nullptr); + EXPECT_EQ(lammps_extract_atom_datatype(lmp, "d2_four"), LAMMPS_DOUBLE_2D); + EXPECT_EQ(lammps_extract_atom_size(lmp, "d2_four", LMP_SIZE_ROWS), nlocal); + EXPECT_EQ(lammps_extract_atom_size(lmp, "d2_four", LMP_SIZE_COLS), 2); + auto **four = (double **)lammps_extract_atom(lmp, "d2_four"); + ASSERT_NE(four, nullptr); + + EXPECT_EQ(one[0], -3); + EXPECT_EQ(one[1], 3); + EXPECT_EQ(two[0][0], -3); + EXPECT_EQ(two[0][1], 0); + EXPECT_EQ(two[1][0], 0); + EXPECT_EQ(two[1][1], 3); + EXPECT_DOUBLE_EQ(three[0], -1.3); + EXPECT_DOUBLE_EQ(three[1], 3.5); + EXPECT_DOUBLE_EQ(four[0][0], -1.3); + EXPECT_DOUBLE_EQ(four[0][1], 3.5); + EXPECT_DOUBLE_EQ(four[1][0], -1.3); + EXPECT_DOUBLE_EQ(four[1][1], 3.5); +} + TEST(SystemSettings, kokkos) { if (!lammps_config_has_package("KOKKOS")) GTEST_SKIP(); diff --git a/unittest/commands/test_variables.cpp b/unittest/commands/test_variables.cpp index 2390b1b675..c7686cbf12 100644 --- a/unittest/commands/test_variables.cpp +++ b/unittest/commands/test_variables.cpp @@ -206,7 +206,7 @@ TEST_F(VariableTest, CreateDelete) TEST_FAILURE(".*ERROR: Invalid variable loop argument: -1.*", command("variable dummy loop -1");); TEST_FAILURE(".*ERROR: Illegal variable loop command.*", command("variable dummy loop 10 1");); - TEST_FAILURE(".*ERROR: Unknown variable keyword: xxx.*", command("variable dummy xxxx");); + TEST_FAILURE(".*ERROR: Unknown variable style: xxx.*", command("variable dummy xxxx");); TEST_FAILURE(".*ERROR: Cannot redefine variable as a different style.*", command("variable two string xxx");); TEST_FAILURE(".*ERROR: Cannot redefine variable as a different style.*", diff --git a/unittest/force-styles/check_tests.py b/unittest/force-styles/check_tests.py index 4dba8f9b9e..7305168ecb 100755 --- a/unittest/force-styles/check_tests.py +++ b/unittest/force-styles/check_tests.py @@ -88,10 +88,9 @@ for header in headers: style = m[1] if upper.match(style): continue - if style in ['reax/c', 'reax/c/omp', 'reax/c/kk', - 'reax/c/kk/device', 'reax/c/kk/host', - 'reax/c/species', 'reax/c/bonds', - 'reax/c/species/kk', 'reax/c/bonds/kk', 'meam/c']: + if style in ['lj/sdk', 'lj/sdk/coul/long', 'lj/sdk/coul/msm', 'sdk', 'lj/sdk/gpu', + 'lj/sdk/coul/long/gpu', 'lj/sdk/omp', 'lj/sdk/coul/long/omp', 'sdk/omp', + 'lj/sdk/coul/msm/omp', 'lj/sdk/kk', 'lj/sdk/coul/long/kk', 'sdk/kk']: continue # detect, process, and flag suffix styles: @@ -176,11 +175,12 @@ def check_tests(name,styles,yaml,search,skip=()): counter = 0 counter += check_tests('pair',pair,'*-pair-*.yaml', - '.*pair_style:\\s*((\\S+).*)?',skip=('meam','lj/sf')) + '.*pair_style:\\s*((\\S+).*)?', + skip=('lj/sf','lj/sdk', 'lj/sdk/coul/long', 'lj/sdk/coul/msm')) counter += check_tests('bond',bond,'bond-*.yaml', '.*bond_style:\\s*((\\S+).*)?') counter += check_tests('angle',angle,'angle-*.yaml', - '.*angle_style:\\s*((\\S+).*)?') + '.*angle_style:\\s*((\\S+).*)?', skip=('sdk')) counter += check_tests('dihedral',dihedral,'dihedral-*.yaml', '.*dihedral_style:\\s*((\\S+).*)?') counter += check_tests('improper',improper,'improper-*.yaml', diff --git a/unittest/force-styles/tests/angle-charmm.yaml b/unittest/force-styles/tests/angle-charmm.yaml index 52e78abaf6..8ff5118390 100644 --- a/unittest/force-styles/tests/angle-charmm.yaml +++ b/unittest/force-styles/tests/angle-charmm.yaml @@ -15,7 +15,9 @@ angle_coeff: ! | 3 40.0 120.0 35.0 2.410 4 33.0 108.5 30.0 2.163 equilibrium: 4 1.9216075064457567 1.9425514574696887 2.0943951023931953 1.8936822384138476 -extract: ! "" +extract: ! | + k 1 + theta0 1 natoms: 29 init_energy: 85.42486388459771 init_stress: ! |2- diff --git a/unittest/force-styles/tests/angle-class2.yaml b/unittest/force-styles/tests/angle-class2.yaml index ae2e3ff5ee..8901157d17 100644 --- a/unittest/force-styles/tests/angle-class2.yaml +++ b/unittest/force-styles/tests/angle-class2.yaml @@ -23,7 +23,11 @@ angle_coeff: ! | 3 ba 10.0 10.0 1.5 1.5 4 ba 0.0 20.0 1.5 1.5 equilibrium: 4 1.9216075064457565 1.9373154697137058 2.0943951023931953 1.8936822384138474 -extract: ! "" +extract: ! | + k2 1 + k3 1 + k4 1 + theta0 1 natoms: 29 init_energy: 46.44089683774903 init_stress: ! |2- diff --git a/unittest/force-styles/tests/angle-cosine_periodic.yaml b/unittest/force-styles/tests/angle-cosine_periodic.yaml index 5c8227fcbd..ee3e5c1469 100644 --- a/unittest/force-styles/tests/angle-cosine_periodic.yaml +++ b/unittest/force-styles/tests/angle-cosine_periodic.yaml @@ -15,7 +15,10 @@ angle_coeff: ! | 3 50.0 -1 3 4 100.0 -1 4 equilibrium: 4 3.141592653589793 3.141592653589793 2.0943951023931957 2.356194490192345 -extract: ! "" +extract: ! | + k 1 + b 1 + multiplicity 1 natoms: 29 init_energy: 1178.5476942873006 init_stress: ! |2- diff --git a/unittest/force-styles/tests/angle-cosine_squared_restricted.yaml b/unittest/force-styles/tests/angle-cosine_squared_restricted.yaml index 400babb3c0..341ccb3919 100644 --- a/unittest/force-styles/tests/angle-cosine_squared_restricted.yaml +++ b/unittest/force-styles/tests/angle-cosine_squared_restricted.yaml @@ -17,7 +17,9 @@ angle_coeff: ! | 3 50.0 120.0 4 100.0 108.5 equilibrium: 4 1.9216075064457567 1.9373154697137058 2.0943951023931953 1.8936822384138476 -extract: ! "" +extract: ! | + k 1 + theta0 1 natoms: 29 init_energy: 43.16721849625078 init_stress: ! |2- diff --git a/unittest/force-styles/tests/angle-dipole.yaml b/unittest/force-styles/tests/angle-dipole.yaml index 877ffa19c7..3914973f0d 100644 --- a/unittest/force-styles/tests/angle-dipole.yaml +++ b/unittest/force-styles/tests/angle-dipole.yaml @@ -20,7 +20,9 @@ angle_coeff: ! | 3 50.0 120.0 4 100.0 108.5 equilibrium: 4 1.9216075064457565 1.9373154697137058 2.0943951023931953 1.8936822384138474 -extract: ! "" +extract: ! | + k 1 + gamma0 1 natoms: 29 init_energy: 1003.6681304854917 init_stress: ! |2- diff --git a/unittest/force-styles/tests/angle-fourier.yaml b/unittest/force-styles/tests/angle-fourier.yaml index 61165c5a92..275d62beca 100644 --- a/unittest/force-styles/tests/angle-fourier.yaml +++ b/unittest/force-styles/tests/angle-fourier.yaml @@ -16,7 +16,11 @@ angle_coeff: ! | 3 50.0 0.0 0.0 1.0 4 100.0 0.3 0.3 0.3 equilibrium: 4 3.141592653589793 1.5707963267948966 1.5707963267948966 1.8234765819369754 -extract: ! "" +extract: ! | + k 1 + C0 1 + C1 1 + C2 1 natoms: 29 init_energy: 400.84036632010225 init_stress: ! |- diff --git a/unittest/force-styles/tests/angle-fourier_simple.yaml b/unittest/force-styles/tests/angle-fourier_simple.yaml index e1a394ee3a..bd72e67912 100644 --- a/unittest/force-styles/tests/angle-fourier_simple.yaml +++ b/unittest/force-styles/tests/angle-fourier_simple.yaml @@ -16,7 +16,10 @@ angle_coeff: ! | 3 50.0 1.0 3.0 4 100.0 -0.5 1.5 equilibrium: 4 3.141592653589793 1.5707963267948966 1.0471975511965976 2.0943951023931953 -extract: ! "" +extract: ! | + k 1 + C 1 + N 1 natoms: 29 init_energy: 2474.0748013590646 init_stress: ! |- diff --git a/unittest/force-styles/tests/angle-mm3.yaml b/unittest/force-styles/tests/angle-mm3.yaml index 9fb9460183..381f43187e 100644 --- a/unittest/force-styles/tests/angle-mm3.yaml +++ b/unittest/force-styles/tests/angle-mm3.yaml @@ -16,7 +16,9 @@ angle_coeff: ! | 3 50.0 120.0 4 100.0 108.5 equilibrium: 4 1.9216075064457565 1.9373154697137058 2.0943951023931953 1.8936822384138474 -extract: ! "" +extract: ! | + k2 1 + theta0 1 natoms: 29 init_energy: 44.72461548562619 init_stress: ! |2- diff --git a/unittest/force-styles/tests/angle-quartic.yaml b/unittest/force-styles/tests/angle-quartic.yaml index 6ded709e84..15ec06d82a 100644 --- a/unittest/force-styles/tests/angle-quartic.yaml +++ b/unittest/force-styles/tests/angle-quartic.yaml @@ -16,7 +16,11 @@ angle_coeff: ! | 3 120.0 50.0 -9.5 -1.5 4 108.5 100.0 5.0 -2.0 equilibrium: 4 1.9216075064457565 1.9373154697137058 2.0943951023931953 1.8936822384138474 -extract: ! "" +extract: ! | + k2 1 + k3 1 + k4 1 + theta0 1 natoms: 29 init_energy: 41.0458477552901 init_stress: ! |2- diff --git a/unittest/force-styles/tests/angle-spica.yaml b/unittest/force-styles/tests/angle-spica.yaml index 7f88553c70..46e8238349 100644 --- a/unittest/force-styles/tests/angle-spica.yaml +++ b/unittest/force-styles/tests/angle-spica.yaml @@ -20,7 +20,9 @@ angle_coeff: ! | 3 40.0 120.0 4 33.0 108.5 equilibrium: 4 1.9216075064457565 1.9425514574696887 2.0943951023931953 1.8936822384138474 -extract: ! "" +extract: ! | + k 1 + theta0 1 natoms: 29 init_energy: 38.36438529349082 init_stress: ! |2- diff --git a/unittest/force-styles/tests/atomic-pair-meam_ms.yaml b/unittest/force-styles/tests/atomic-pair-meam_ms.yaml index fff938d940..d8dcd7b1eb 100644 --- a/unittest/force-styles/tests/atomic-pair-meam_ms.yaml +++ b/unittest/force-styles/tests/atomic-pair-meam_ms.yaml @@ -2,7 +2,7 @@ lammps_version: 7 Feb 2024 tags: slow date_generated: Wed Feb 28 17:07:42 2024 -epsilon: 2.5e-12 +epsilon: 2.5e-11 skip_tests: prerequisites: ! | pair meam/ms diff --git a/unittest/force-styles/tests/atomic-pair-pedone.yaml b/unittest/force-styles/tests/atomic-pair-pedone.yaml index ea97d9ee8c..82c6405e65 100644 --- a/unittest/force-styles/tests/atomic-pair-pedone.yaml +++ b/unittest/force-styles/tests/atomic-pair-pedone.yaml @@ -1,6 +1,6 @@ --- lammps_version: 7 Feb 2024 -tags: +tags: unstable date_generated: Tue Apr 9 07:44:34 2024 epsilon: 7.5e-13 skip_tests: diff --git a/unittest/force-styles/tests/bond-fene_expand.yaml b/unittest/force-styles/tests/bond-fene_expand.yaml index fc859d477c..250f89af15 100644 --- a/unittest/force-styles/tests/bond-fene_expand.yaml +++ b/unittest/force-styles/tests/bond-fene_expand.yaml @@ -17,7 +17,12 @@ bond_coeff: ! | 4 650 2.4 0.015 1.2 0.15 5 450 2 0.018 1 0.09 equilibrium: 5 1.5550000000000002 1.117 1.321 1.3139999999999998 1.06 -extract: ! "" +extract: ! | + k 1 + r0 1 + epsilon 1 + sigma 1 + shift 1 natoms: 29 init_energy: 5926.020859124294 init_stress: ! |- diff --git a/unittest/force-styles/tests/bond-harmonic_shift.yaml b/unittest/force-styles/tests/bond-harmonic_shift.yaml index 7a41c2c3cd..61212a468b 100644 --- a/unittest/force-styles/tests/bond-harmonic_shift.yaml +++ b/unittest/force-styles/tests/bond-harmonic_shift.yaml @@ -17,7 +17,10 @@ bond_coeff: ! | 4 650.0 1.2 0.2 5 450.0 1.0 0.0 equilibrium: 5 1.5 1.1 1.3 1.2 1 -extract: ! "" +extract: ! | + k 1 + r0 1 + r1 1 natoms: 29 init_energy: -9395.519982389222 init_stress: ! |- diff --git a/unittest/force-styles/tests/bond-mm3.yaml b/unittest/force-styles/tests/bond-mm3.yaml index eb7443f1c2..f5ba5c237c 100644 --- a/unittest/force-styles/tests/bond-mm3.yaml +++ b/unittest/force-styles/tests/bond-mm3.yaml @@ -17,7 +17,9 @@ bond_coeff: ! | 4 650.0 1.2 5 450.0 1.0 equilibrium: 5 1.5 1.1 1.3 1.2 1 -extract: ! "" +extract: ! | + k2 1 + r0 1 natoms: 29 init_energy: 4.247265008273143 init_stress: ! |- diff --git a/unittest/force-styles/tests/dihedral-cosine_squared_restricted.yaml b/unittest/force-styles/tests/dihedral-cosine_squared_restricted.yaml index f67a093017..3f4d217b9a 100644 --- a/unittest/force-styles/tests/dihedral-cosine_squared_restricted.yaml +++ b/unittest/force-styles/tests/dihedral-cosine_squared_restricted.yaml @@ -1,8 +1,8 @@ --- lammps_version: 7 Feb 2024 -tags: +tags: date_generated: Sat Apr 13 11:41:16 2024 -epsilon: 2.5e-13 +epsilon: 1.0e-11 skip_tests: prerequisites: ! | atom full diff --git a/unittest/fortran/keepstuff.f90 b/unittest/fortran/keepstuff.f90 index 63184e1006..c964c8a9c6 100644 --- a/unittest/fortran/keepstuff.f90 +++ b/unittest/fortran/keepstuff.f90 @@ -4,9 +4,9 @@ MODULE keepstuff TYPE(LAMMPS), SAVE :: lmp INTEGER, SAVE :: mycomm CHARACTER(LEN=40), DIMENSION(3), PARAMETER :: demo_input = & - [ CHARACTER(LEN=40) :: & - 'region box block 0 $x 0 2 0 2', & - 'create_box 1 box', & + [ CHARACTER(LEN=40) :: & + 'region box block 0 $x 0 2 0 2', & + 'create_box 1 box', & 'create_atoms 1 single 1.0 1.0 ${zpos}' ] CHARACTER(LEN=40), DIMENSION(3), PARAMETER :: big_input = & [ CHARACTER(LEN=40) :: & @@ -14,15 +14,26 @@ MODULE keepstuff 'create_box 1 box', & 'create_atoms 1 single 1.0 1.0 ${zpos}' ] CHARACTER(LEN=40), DIMENSION(2), PARAMETER :: cont_input = & - [ CHARACTER(LEN=40) :: & - 'create_atoms 1 single &', & + [ CHARACTER(LEN=40) :: & + 'create_atoms 1 single &', & ' 0.2 0.1 0.1' ] + CHARACTER(LEN=60), DIMENSION(18), PARAMETER :: prop_input = & + [ CHARACTER(LEN=60) :: 'fix 1 all nve', 'mass 1 3.0', & + 'fix 2 all property/atom mol q rmass ghost yes', & + 'fix 3 all property/atom i_one i2_two 2 d_three d2_four 2', & + 'set group all mass 2.0', 'set atom 1 charge -1', & + 'set atom 2 charge 1', 'set atom 1 mol 2', 'set atom 2 mol 1', & + 'set atom 1 i_one -3', 'set atom 2 i_one 3', & + 'set atom 1 d_three -1.3', 'set atom 2 d_three 3.5', & + 'set atom 1 i_two[1] -3', 'set atom 2 i_two[2] 3', & + 'set atom * d_four[1] -1.3', 'set atom * d_four[2] 3.5', & + 'run 0 post no' ] CHARACTER(LEN=40), DIMENSION(1), PARAMETER :: more_input = & [ CHARACTER(LEN=40) :: 'create_atoms 1 single 0.5 0.5 0.5' ] CHARACTER(LEN=40), DIMENSION(3), PARAMETER :: pair_input = & - [ CHARACTER(LEN=40) :: & - 'pair_style lj/cut 2.5', & - 'pair_coeff 1 1 1.0 1.0', & + [ CHARACTER(LEN=40) :: & + 'pair_style lj/cut 2.5', & + 'pair_coeff 1 1 1.0 1.0', & 'mass 1 2.0' ] INTERFACE @@ -63,4 +74,3 @@ CONTAINS END FUNCTION f2c_string END MODULE keepstuff - diff --git a/unittest/fortran/test_fortran_extract_atom.f90 b/unittest/fortran/test_fortran_extract_atom.f90 index 262e5de47d..0c5a52ef25 100644 --- a/unittest/fortran/test_fortran_extract_atom.f90 +++ b/unittest/fortran/test_fortran_extract_atom.f90 @@ -24,12 +24,13 @@ END SUBROUTINE f_lammps_close SUBROUTINE f_lammps_setup_extract_atom() BIND(C) USE LIBLAMMPS - USE keepstuff, ONLY : lmp, big_input, cont_input, pair_input + USE keepstuff, ONLY : lmp, big_input, cont_input, pair_input, prop_input IMPLICIT NONE CALL lmp%commands_list(big_input) CALL lmp%commands_list(cont_input) CALL lmp%commands_list(pair_input) + CALL lmp%commands_list(prop_input) END SUBROUTINE f_lammps_setup_extract_atom FUNCTION f_lammps_extract_atom_mass() BIND(C) @@ -44,6 +45,19 @@ FUNCTION f_lammps_extract_atom_mass() BIND(C) f_lammps_extract_atom_mass = mass(1) END FUNCTION f_lammps_extract_atom_mass +FUNCTION f_lammps_extract_atom_mass_size() BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_double, c_int + USE LIBLAMMPS + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int) :: f_lammps_extract_atom_mass_size, ntypes + REAL(c_double), DIMENSION(:), POINTER :: mass => NULL() + + ntypes = lmp%extract_setting('ntypes') + mass = lmp%extract_atom('mass') + f_lammps_extract_atom_mass_size = SIZE(mass) +END FUNCTION f_lammps_extract_atom_mass_size + FUNCTION f_lammps_extract_atom_tag_int(i) BIND(C) USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_double, c_int USE LIBLAMMPS @@ -83,6 +97,18 @@ FUNCTION f_lammps_extract_atom_type(i) BIND(C) f_lammps_extract_atom_type = atype(i) END FUNCTION f_lammps_extract_atom_type +FUNCTION f_lammps_extract_atom_type_size() BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_int + USE LIBLAMMPS + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int) :: f_lammps_extract_atom_type_size + INTEGER(c_int), DIMENSION(:), POINTER :: atype => NULL() + + atype = lmp%extract_atom('type') + f_lammps_extract_atom_type_size = size(atype) +END FUNCTION f_lammps_extract_atom_type_size + FUNCTION f_lammps_extract_atom_mask(i) BIND(C) USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_int USE LIBLAMMPS @@ -109,6 +135,19 @@ SUBROUTINE f_lammps_extract_atom_x(i, x) BIND(C) x = xptr(:,i) END SUBROUTINE f_lammps_extract_atom_x +FUNCTION f_lammps_extract_atom_x_size(i) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_double, c_int + USE LIBLAMMPS + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int), INTENT(IN), VALUE :: i + INTEGER(c_int) :: f_lammps_extract_atom_x_size + REAL(c_double), DIMENSION(:,:), POINTER :: xptr => NULL() + + xptr = lmp%extract_atom('x') + f_lammps_extract_atom_x_size = SIZE(xptr, i) +END FUNCTION f_lammps_extract_atom_x_size + SUBROUTINE f_lammps_extract_atom_v(i, v) BIND(C) USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_double, c_int USE LIBLAMMPS @@ -121,3 +160,16 @@ SUBROUTINE f_lammps_extract_atom_v(i, v) BIND(C) vptr = lmp%extract_atom('v') v = vptr(:,i) END SUBROUTINE f_lammps_extract_atom_v + +FUNCTION f_lammps_extract_atom_v_size(i) BIND(C) + USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_double, c_int + USE LIBLAMMPS + USE keepstuff, ONLY : lmp + IMPLICIT NONE + INTEGER(c_int), INTENT(IN), VALUE :: i + INTEGER(c_int) :: f_lammps_extract_atom_v_size + REAL(c_double), DIMENSION(:,:), POINTER :: xptr => NULL() + + xptr = lmp%extract_atom('v') + f_lammps_extract_atom_v_size = SIZE(xptr, i) +END FUNCTION f_lammps_extract_atom_v_size diff --git a/unittest/fortran/wrap_extract_atom.cpp b/unittest/fortran/wrap_extract_atom.cpp index 2552d6a10f..9430959b2b 100644 --- a/unittest/fortran/wrap_extract_atom.cpp +++ b/unittest/fortran/wrap_extract_atom.cpp @@ -1,6 +1,7 @@ // unit tests for extracting Atom class data from a LAMMPS instance through the // Fortran wrapper +#include "atom.h" #include "lammps.h" #include "library.h" #include @@ -16,12 +17,16 @@ void *f_lammps_with_args(); void f_lammps_close(); void f_lammps_setup_extract_atom(); double f_lammps_extract_atom_mass(); +int f_lammps_extract_atom_mass_size(); int f_lammps_extract_atom_tag_int(int); int64_t f_lammps_extract_atom_tag_int64(int64_t); int f_lammps_extract_atom_type(int); +int f_lammps_extract_atom_type_size(); int f_lammps_extract_atom_mask(int); void f_lammps_extract_atom_x(int, double *); +int f_lammps_extract_atom_x_size(int); void f_lammps_extract_atom_v(int, double *); +int f_lammps_extract_atom_v_size(int); } class LAMMPS_extract_atom : public ::testing::Test { @@ -50,7 +55,9 @@ protected: TEST_F(LAMMPS_extract_atom, mass) { f_lammps_setup_extract_atom(); - EXPECT_DOUBLE_EQ(f_lammps_extract_atom_mass(), 2.0); + int ntypes = lmp->atom->ntypes; + EXPECT_DOUBLE_EQ(f_lammps_extract_atom_mass(), 3.0); + EXPECT_EQ(f_lammps_extract_atom_mass_size(), ntypes + 1); }; TEST_F(LAMMPS_extract_atom, tag) @@ -68,8 +75,10 @@ TEST_F(LAMMPS_extract_atom, tag) TEST_F(LAMMPS_extract_atom, type) { f_lammps_setup_extract_atom(); + int nall = lmp->atom->nlocal + lmp->atom->nghost; EXPECT_EQ(f_lammps_extract_atom_type(1), 1); EXPECT_EQ(f_lammps_extract_atom_type(2), 1); + EXPECT_EQ(f_lammps_extract_atom_type_size(), nall); }; TEST_F(LAMMPS_extract_atom, mask) @@ -86,6 +95,7 @@ TEST_F(LAMMPS_extract_atom, mask) TEST_F(LAMMPS_extract_atom, x) { f_lammps_setup_extract_atom(); + int nall = lmp->atom->nlocal + lmp->atom->nghost; double x1[3]; double x2[3]; f_lammps_extract_atom_x(1, x1); @@ -96,11 +106,15 @@ TEST_F(LAMMPS_extract_atom, x) EXPECT_DOUBLE_EQ(x2[0], 0.2); EXPECT_DOUBLE_EQ(x2[1], 0.1); EXPECT_DOUBLE_EQ(x2[2], 0.1); + // in Fortran row and column are swapped + EXPECT_EQ(f_lammps_extract_atom_x_size(1), 3); + EXPECT_EQ(f_lammps_extract_atom_x_size(2), nall); } TEST_F(LAMMPS_extract_atom, v) { f_lammps_setup_extract_atom(); + int nall = lmp->atom->nlocal + lmp->atom->nghost; double v1[3]; double v2[3]; f_lammps_extract_atom_v(1, v1); @@ -117,4 +131,13 @@ TEST_F(LAMMPS_extract_atom, v) EXPECT_DOUBLE_EQ(v1[0], 1.0); EXPECT_DOUBLE_EQ(v1[1], 2.0); EXPECT_DOUBLE_EQ(v1[2], 3.0); + // in Fortran row and column are swapped! + EXPECT_EQ(f_lammps_extract_atom_v_size(1), 3); + EXPECT_EQ(f_lammps_extract_atom_v_size(2), lmp->atom->nlocal); + lammps_command(lmp, "comm_modify vel yes"); + lammps_command(lmp, "run 0 post no"); + EXPECT_EQ(f_lammps_extract_atom_v_size(1), 3); + EXPECT_EQ(f_lammps_extract_atom_v_size(2), nall); } + +// TODO: write tests for custom properties diff --git a/unittest/python/python-commands.py b/unittest/python/python-commands.py index fcf731bf3f..b1432e67b9 100644 --- a/unittest/python/python-commands.py +++ b/unittest/python/python-commands.py @@ -656,6 +656,9 @@ create_atoms 1 single & self.assertEqual(self.lmp.extract_global("map_tag_max"), -1) self.assertEqual(self.lmp.extract_global("sortfreq"), 1000) self.assertEqual(self.lmp.extract_global("nextsort"), 0) + self.assertEqual(self.lmp.extract_global("xlattice"), 1.0) + self.assertEqual(self.lmp.extract_global("ylattice"), 1.0) + self.assertEqual(self.lmp.extract_global("zlattice"), 1.0) # set and initialize r-RESPA self.lmp.command("run_style respa 3 5 2 pair 2 kspace 3") diff --git a/unittest/python/python-numpy.py b/unittest/python/python-numpy.py index 839e5d03af..4930527a61 100644 --- a/unittest/python/python-numpy.py +++ b/unittest/python/python-numpy.py @@ -155,67 +155,104 @@ class PythonNumpy(unittest.TestCase): self.assertEqual(values[1,0], 1.5) self.assertEqual(values[1,3], 1.5) - def testExtractAtomDeprecated(self): - self.lmp.command("units lj") - self.lmp.command("atom_style atomic") - self.lmp.command("atom_modify map array") - self.lmp.command("region box block 0 2 0 2 0 2") - self.lmp.command("create_box 1 box") - - x = [ - 1.0, 1.0, 1.0, - 1.0, 1.0, 1.5 - ] - - types = [1, 1] - - self.assertEqual(self.lmp.create_atoms(2, id=None, type=types, x=x), 2) - nlocal = self.lmp.extract_global("nlocal", LAMMPS_INT) - self.assertEqual(nlocal, 2) - - ident = self.lmp.numpy.extract_atom_iarray("id", nlocal, dim=1) - self.assertEqual(len(ident), 2) - - ntypes = self.lmp.extract_global("ntypes", LAMMPS_INT) - self.assertEqual(ntypes, 1) - - x = self.lmp.numpy.extract_atom_darray("x", nlocal, dim=3) - v = self.lmp.numpy.extract_atom_darray("v", nlocal, dim=3) - self.assertEqual(len(x), 2) - self.assertTrue((x[0] == (1.0, 1.0, 1.0)).all()) - self.assertTrue((x[1] == (1.0, 1.0, 1.5)).all()) - self.assertEqual(len(v), 2) - def testExtractAtom(self): self.lmp.command("units lj") self.lmp.command("atom_style atomic") self.lmp.command("atom_modify map array") self.lmp.command("region box block 0 2 0 2 0 2") - self.lmp.command("create_box 1 box") + self.lmp.command("create_box 2 box") - x = [ - 1.0, 1.0, 1.0, - 1.0, 1.0, 1.5 - ] + x = [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.5, 1.5, 1.0, 1.0 ] + types = [1, 2, 1] + ids = [1, 2, 3] + self.assertEqual(self.lmp.create_atoms(3, id=ids, type=types, x=x), 3) + self.lmp.command("mass * 2.0") + self.lmp.command("pair_style zero 1.1") + self.lmp.command("pair_coeff * *") + self.lmp.command("fix props all property/atom i_one i2_two 2 d_three d2_four 2"); + self.lmp.command("fix rmass all property/atom mol q rmass ghost yes"); + self.lmp.command("fix 1 all nve") + self.lmp.command("run 0 post no") + ntypes = self.lmp.extract_setting("ntypes"); + nlocal = self.lmp.extract_setting("nlocal"); + nall = self.lmp.extract_setting("nall"); + self.assertEqual(nlocal, 3) + self.assertEqual(ntypes, 2) + self.assertEqual(nall, 63) - types = [1, 1] + self.lmp.command("set atom 1 charge -1"); + self.lmp.command("set atom 2 charge 1"); + self.lmp.command("set atom 3 charge 0"); + self.lmp.command("set atom * mol 2"); + self.lmp.command("set atom 2 mol 1"); + self.lmp.command("set atom 1 i_one -3"); + self.lmp.command("set atom 2 i_one 3"); + self.lmp.command("set atom 2 d_three -1.3"); + self.lmp.command("set atom 3 d_three 3.5"); + self.lmp.command("set atom 1 i_two[1] -3"); + self.lmp.command("set atom 2 i_two[2] 3"); + self.lmp.command("set atom * d_four[1] -1.3"); + self.lmp.command("set atom * d_four[2] 3.5"); + self.lmp.command("run 0 post no") - self.assertEqual(self.lmp.create_atoms(2, id=None, type=types, x=x), 2) - nlocal = self.lmp.extract_global("nlocal") - self.assertEqual(nlocal, 2) + mass = self.lmp.numpy.extract_atom("mass") + self.assertEqual(len(mass), ntypes + 1) + self.assertTrue((mass == (0.0, 2.0, 2.0)).all()) + + rmass = self.lmp.numpy.extract_atom("rmass") + self.assertEqual(len(rmass), nall) + self.assertTrue((rmass[0:3] == (0.0, 0.0, 0.0)).all()) + + charge = self.lmp.numpy.extract_atom("q") + self.assertEqual(len(charge), nall) + self.assertTrue((charge[0:3] == (-1.0, 1.0, 0.0)).all()) + + molecule = self.lmp.numpy.extract_atom("molecule") + self.assertEqual(len(molecule), nall) + self.assertTrue((molecule[0:3] == (2, 1, 2)).all()) ident = self.lmp.numpy.extract_atom("id") - self.assertEqual(len(ident), 2) + self.assertEqual(len(ident), nall) + self.assertTrue((ident[0:3] == (1, 2, 3)).all()) - ntypes = self.lmp.extract_global("ntypes") - self.assertEqual(ntypes, 1) + atype = self.lmp.numpy.extract_atom("type") + self.assertEqual(len(atype), nall) + self.assertTrue((atype[0:3] == (1, 2, 1)).all()) x = self.lmp.numpy.extract_atom("x") v = self.lmp.numpy.extract_atom("v") - self.assertEqual(len(x), 2) + self.assertEqual(len(x), nall) + self.assertEqual(len(x[0]), 3) self.assertTrue((x[0] == (1.0, 1.0, 1.0)).all()) self.assertTrue((x[1] == (1.0, 1.0, 1.5)).all()) - self.assertEqual(len(v), 2) + self.assertTrue((x[2] == (1.5, 1.0, 1.0)).all()) + self.assertEqual(len(v), nlocal) + self.assertEqual(len(v[0]), 3) + + self.lmp.command("comm_modify vel yes"); + self.lmp.command("run 0 post no") + + v = self.lmp.numpy.extract_atom("v") + self.assertEqual(len(v), nall) + + one = self.lmp.numpy.extract_atom("i_one") + two = self.lmp.numpy.extract_atom("i2_two") + three = self.lmp.numpy.extract_atom("d_three") + four = self.lmp.numpy.extract_atom("d2_four") + self.assertEqual(len(one), nlocal) + self.assertTrue((one == (-3, 3, 0)).all()) + self.assertEqual(len(two), nlocal) + self.assertEqual(len(two[0]), 2) + self.assertTrue((two[0] == (-3, 0)).all()) + self.assertTrue((two[1] == (0, 3)).all()) + self.assertTrue((two[2] == (0, 0)).all()) + self.assertEqual(len(three), nlocal) + self.assertTrue((three == (0.0, -1.3, 3.5)).all()) + self.assertEqual(len(four), nlocal) + self.assertEqual(len(four[0]), 2) + self.assertTrue((four[0] == (-1.3, 3.5)).all()) + self.assertTrue((four[1] == (-1.3, 3.5)).all()) + self.assertTrue((four[2] == (-1.3, 3.5)).all()) @unittest.skipIf(not has_full,"Gather bonds test") def testGatherBond_newton_on(self):