diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b6892aa4ee..1b4cae3aaa 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -153,12 +153,12 @@ tools/vim/* @hammondkd unittest/* @akohlmey # cmake -cmake/* @rbberger +cmake/* @akohlmey cmake/Modules/LAMMPSInterfacePlugin.cmake @akohlmey cmake/Modules/MPI4WIN.cmake @akohlmey cmake/Modules/OpenCLLoader.cmake @akohlmey -cmake/Modules/Packages/COLVARS.cmake @rbberger @giacomofiorin -cmake/Modules/Packages/KIM.cmake @rbberger @ellio167 +cmake/Modules/Packages/COLVARS.cmake @giacomofiorin +cmake/Modules/Packages/KIM.cmake @ellio167 cmake/presets/*.cmake @akohlmey # python diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index b1c23e1f6a..00a4596cc8 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -59,16 +59,13 @@ jobs: -D BUILD_SHARED_LIBS=on \ -D LAMMPS_SIZES=SMALLBIG \ -D LAMMPS_EXCEPTIONS=off \ - -D PKG_MESSAGE=on \ - -D PKG_MPIIO=on \ -D PKG_ATC=on \ -D PKG_AWPMD=on \ - -D PKG_BOCS=on \ - -D PKG_EFF=on \ -D PKG_H5MD=on \ -D PKG_INTEL=on \ -D PKG_LATBOLTZ=on \ -D PKG_MANIFOLD=on \ + -D PKG_MDI=on \ -D PKG_MGPT=on \ -D PKG_ML-PACE=on \ -D PKG_ML-RANN=on \ @@ -77,7 +74,6 @@ jobs: -D PKG_PTM=on \ -D PKG_QTB=on \ -D PKG_SMTBQ=on \ - -D PKG_TALLY=on \ ../cmake - name: Run Coverity Scan diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 95d738d279..f7e9b314bd 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -2,7 +2,6 @@ ######################################## # CMake build system # This file is part of LAMMPS -# Created by Christoph Junghans and Richard Berger cmake_minimum_required(VERSION 3.16) ######################################## # set policy to silence warnings about ignoring _ROOT but use it @@ -106,7 +105,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4) set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512") else() - set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=2196") + set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=11074 -diag-disable=11076 -diag-disable=2196") endif() endif() endif() @@ -428,6 +427,18 @@ if(BUILD_OMP) target_link_libraries(lmp PRIVATE OpenMP::OpenMP_CXX) endif() +# lower C++ standard for fmtlib sources when using Intel classic compiler +if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUAL 17) + AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 2021.10)) + message(STATUS "Lowering C++ standard for compiling fmtlib sources with Intel Classic compiler") + get_filename_component(LMP_UTILS_SRC "${LAMMPS_SOURCE_DIR}/utils.cpp" ABSOLUTE) + get_filename_component(LMP_VARIABLE_SRC "${LAMMPS_SOURCE_DIR}/variable.cpp" ABSOLUTE) + get_filename_component(FMT_FORMAT_SRC "${LAMMPS_SOURCE_DIR}/fmtlib_format.cpp" ABSOLUTE) + get_filename_component(FMT_OS_SRC "${LAMMPS_SOURCE_DIR}/fmtlib_os.cpp" ABSOLUTE) + set_source_files_properties("${FMT_FORMAT_SRC}" "${FMT_OS_SRC}" "${LMP_VARIABLE_SRC}" "${LMP_UTILS_SRC}" + PROPERTIES COMPILE_OPTIONS "-std=c++14") +endif() + if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR BUILD_TOOLS) enable_language(C) if (NOT USE_INTERNAL_LINALG) @@ -960,33 +971,53 @@ if(PKG_KOKKOS) endif() endif() if(PKG_KSPACE) - message(STATUS "<<< FFT settings >>> --- Primary FFT lib: ${FFT}") - if(FFT_SINGLE) - message(STATUS "Using single precision FFTs") - else() - message(STATUS "Using double precision FFTs") - endif() - if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) - message(STATUS "Using threaded FFTs") - else() - message(STATUS "Using non-threaded FFTs") - endif() - if(PKG_KOKKOS) - if(Kokkos_ENABLE_CUDA) - if(FFT STREQUAL "KISS") - message(STATUS "Kokkos FFT: KISS") - else() - message(STATUS "Kokkos FFT: cuFFT") - endif() - elseif(Kokkos_ENABLE_HIP) - if(FFT STREQUAL "KISS") - message(STATUS "Kokkos FFT: KISS") - else() - message(STATUS "Kokkos FFT: hipFFT") - endif() + if (LMP_HEFFTE) + message(STATUS "<<< FFT settings >>> +-- Primary FFT lib: heFFTe") + if (HEFFTE_BACKEND) + message(STATUS "heFFTe backend: ${HEFFTE_BACKEND}") else() - message(STATUS "Kokkos FFT: ${FFT}") + message(STATUS "heFFTe backend: stock (builtin FFT implementation, tested for corrected but not optimized for production)") + endif() + if(FFT_SINGLE) + message(STATUS "Using single precision FFTs") + else() + message(STATUS "Using double precision FFTs") + endif() + else() + message(STATUS "<<< FFT settings >>> +-- Primary FFT lib: ${FFT}") + if(FFT_SINGLE) + message(STATUS "Using single precision FFTs") + else() + message(STATUS "Using double precision FFTs") + endif() + if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) + message(STATUS "Using threaded FFTs") + else() + message(STATUS "Using non-threaded FFTs") + endif() + if (FFT_HEFFTE) + message(STATUS "Using distributed algorithms from heFTTe") + else() + message(STATUS "Using builtin distributed algorithms") + endif() + if(PKG_KOKKOS) + if(Kokkos_ENABLE_CUDA) + if(FFT STREQUAL "KISS") + message(STATUS "Kokkos FFT: KISS") + else() + message(STATUS "Kokkos FFT: cuFFT") + endif() + elseif(Kokkos_ENABLE_HIP) + if(FFT STREQUAL "KISS") + message(STATUS "Kokkos FFT: KISS") + else() + message(STATUS "Kokkos FFT: hipFFT") + endif() + else() + message(STATUS "Kokkos FFT: ${FFT}") + endif() endif() endif() endif() diff --git a/cmake/Modules/LAMMPSUtils.cmake b/cmake/Modules/LAMMPSUtils.cmake index bb5ea07609..2ec9d1b706 100644 --- a/cmake/Modules/LAMMPSUtils.cmake +++ b/cmake/Modules/LAMMPSUtils.cmake @@ -83,17 +83,17 @@ function(check_for_autogen_files source_dir) file(GLOB SRC_AUTOGEN_FILES CONFIGURE_DEPENDS ${source_dir}/style_*.h) file(GLOB SRC_AUTOGEN_PACKAGES CONFIGURE_DEPENDS ${source_dir}/packages_*.h) list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h) - list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp) + list(APPEND SRC_AUTOGEN_FILES ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp) foreach(_SRC ${SRC_AUTOGEN_FILES}) get_filename_component(FILENAME "${_SRC}" NAME) if(EXISTS ${source_dir}/${FILENAME}) message(FATAL_ERROR "\n########################################################################\n" - "Found header file(s) generated by the make-based build system\n" - "\n" - "Please run\n" - "make -C ${source_dir} purge\n" - "to remove\n" - "########################################################################") + "Found header file ${source_dir}/${FILENAME} generated by the make-based build system\n" + "\n" + "Please run\n" + "make -C ${source_dir} purge\n" + "to remove\n" + "########################################################################") endif() endforeach() endfunction() diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index 47be8b8538..4c3288df84 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -151,10 +151,10 @@ if(GPU_API STREQUAL "CUDA") endif() cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC} - -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) + -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC} - -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) + -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES}) foreach(CU_OBJ ${GPU_GEN_OBJS}) get_filename_component(CU_NAME ${CU_OBJ} NAME_WE) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 6359d9e615..0edd9a3baa 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -50,8 +50,8 @@ if(DOWNLOAD_KOKKOS) list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") include(ExternalProject) - set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.1.00.tar.gz" CACHE STRING "URL for KOKKOS tarball") - set(KOKKOS_MD5 "a5f096bd8ad01b97fdc7a32583b17a33" CACHE STRING "MD5 checksum of KOKKOS tarball") + set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.00.tar.gz" CACHE STRING "URL for KOKKOS tarball") + set(KOKKOS_MD5 "731647b61a4233f568d583702e9cd6d1" CACHE STRING "MD5 checksum of KOKKOS tarball") mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_MD5) GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK) @@ -76,7 +76,7 @@ if(DOWNLOAD_KOKKOS) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) elseif(EXTERNAL_KOKKOS) - find_package(Kokkos 4.1.00 REQUIRED CONFIG) + find_package(Kokkos 4.2.00 REQUIRED CONFIG) target_link_libraries(lammps PRIVATE Kokkos::kokkos) else() set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index de7e7e5b20..9c9c879cd4 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -46,6 +46,42 @@ else() target_compile_definitions(lammps PRIVATE -DFFT_KISS) endif() +option(FFT_USE_HEFFTE "Use heFFTe as the distributed FFT engine, overrides the FFT option." OFF) +if(FFT_USE_HEFFTE) + # if FFT_HEFFTE is enabled, switch the builtin FFT engine with Heffte + set(FFT_HEFFTE_BACKEND_VALUES FFTW MKL) + set(FFT_HEFFTE_BACKEND "" CACHE STRING "Select heFFTe backend, e.g., FFTW or MKL") + set_property(CACHE FFT_HEFFTE_BACKEND PROPERTY STRINGS ${FFT_HEFFTE_BACKEND_VALUES}) + + if(FFT_HEFFTE_BACKEND STREQUAL "FFTW") # respect the backend choice, FFTW or MKL + set(HEFFTE_COMPONENTS "FFTW") + set(Heffte_ENABLE_FFTW "ON" CACHE BOOL "Enables FFTW backend for heFFTe") + elseif(FFT_HEFFTE_BACKEND STREQUAL "MKL") + set(HEFFTE_COMPONENTS "MKL") + set(Heffte_ENABLE_MKL "ON" CACHE BOOL "Enables MKL backend for heFFTe") + else() + message(WARNING "FFT_HEFFTE_BACKEND not selected, defaulting to the builtin 'stock' backend, which is intended for testing and is not optimized for production runs") + endif() + + find_package(Heffte 2.4.0 QUIET COMPONENTS ${HEFFTE_COMPONENTS}) + if (NOT Heffte_FOUND) # download and build + include(FetchContent) + FetchContent_Declare(HEFFTE_PROJECT # using v2.4.0 + URL "https://github.com/icl-utk-edu/heffte/archive/refs/tags/v2.4.0.tar.gz" + URL_HASH SHA256=02310fb4f9688df02f7181667e61c3adb7e38baf79611d80919d47452ff7881d + ) + FetchContent_Populate(HEFFTE_PROJECT) + add_subdirectory(${heffte_project_SOURCE_DIR} ${heffte_project_BINARY_DIR}) + set_target_properties(lmp PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + set_target_properties(lammps PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + add_library(Heffte::Heffte INTERFACE IMPORTED GLOBAL) + target_link_libraries(Heffte::Heffte INTERFACE Heffte) + endif() + + target_compile_definitions(lammps PRIVATE -DFFT_HEFFTE "-DFFT_HEFFTE_${FFT_HEFFTE_BACKEND}") + target_link_libraries(lammps PRIVATE Heffte::Heffte) +endif() + set(FFT_PACK "array" CACHE STRING "Optimization for FFT") set(FFT_PACK_VALUES array pointer memcpy) set_property(CACHE FFT_PACK PROPERTY STRINGS ${FFT_PACK_VALUES}) diff --git a/cmake/Modules/Packages/ML-PACE.cmake b/cmake/Modules/Packages/ML-PACE.cmake index ce8f02f5f4..248b8eea76 100644 --- a/cmake/Modules/Packages/ML-PACE.cmake +++ b/cmake/Modules/Packages/ML-PACE.cmake @@ -1,33 +1,40 @@ -set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.10.04.tar.gz" CACHE STRING "URL for PACE evaluator library sources") +set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.11.25.fix.tar.gz" CACHE STRING "URL for PACE evaluator library sources") -set(PACELIB_MD5 "70ff79f4e59af175e55d24f3243ad1ff" CACHE STRING "MD5 checksum of PACE evaluator library tarball") +set(PACELIB_MD5 "b45de9a633f42ed65422567e3ce56f9f" CACHE STRING "MD5 checksum of PACE evaluator library tarball") mark_as_advanced(PACELIB_URL) mark_as_advanced(PACELIB_MD5) GetFallbackURL(PACELIB_URL PACELIB_FALLBACK) -# download library sources to build folder -if(EXISTS ${CMAKE_BINARY_DIR}/libpace.tar.gz) - file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5) -endif() -if(NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}") - message(STATUS "Downloading ${PACELIB_URL}") - file(DOWNLOAD ${PACELIB_URL} ${CMAKE_BINARY_DIR}/libpace.tar.gz STATUS DL_STATUS SHOW_PROGRESS) - file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5) - if((NOT DL_STATUS EQUAL 0) OR (NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}")) - message(WARNING "Download from primary URL ${PACELIB_URL} failed\nTrying fallback URL ${PACELIB_FALLBACK}") - file(DOWNLOAD ${PACELIB_FALLBACK} ${CMAKE_BINARY_DIR}/libpace.tar.gz EXPECTED_HASH MD5=${PACELIB_MD5} SHOW_PROGRESS) - endif() +# LOCAL_ML-PACE points to top-level dir with local lammps-user-pace repo, +# to make it easier to check local build without going through the public github releases +if(LOCAL_ML-PACE) + set(lib-pace "${LOCAL_ML-PACE}") else() - message(STATUS "Using already downloaded archive ${CMAKE_BINARY_DIR}/libpace.tar.gz") -endif() + # download library sources to build folder + if(EXISTS ${CMAKE_BINARY_DIR}/libpace.tar.gz) + file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5) + endif() + if(NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}") + message(STATUS "Downloading ${PACELIB_URL}") + file(DOWNLOAD ${PACELIB_URL} ${CMAKE_BINARY_DIR}/libpace.tar.gz STATUS DL_STATUS SHOW_PROGRESS) + file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5) + if((NOT DL_STATUS EQUAL 0) OR (NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}")) + message(WARNING "Download from primary URL ${PACELIB_URL} failed\nTrying fallback URL ${PACELIB_FALLBACK}") + file(DOWNLOAD ${PACELIB_FALLBACK} ${CMAKE_BINARY_DIR}/libpace.tar.gz EXPECTED_HASH MD5=${PACELIB_MD5} SHOW_PROGRESS) + endif() + else() + message(STATUS "Using already downloaded archive ${CMAKE_BINARY_DIR}/libpace.tar.gz") + endif() -# uncompress downloaded sources -execute_process( - COMMAND ${CMAKE_COMMAND} -E remove_directory lammps-user-pace* - COMMAND ${CMAKE_COMMAND} -E tar xzf libpace.tar.gz - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} -) -get_newest_file(${CMAKE_BINARY_DIR}/lammps-user-pace-* lib-pace) + + # uncompress downloaded sources + execute_process( + COMMAND ${CMAKE_COMMAND} -E remove_directory lammps-user-pace* + COMMAND ${CMAKE_COMMAND} -E tar xzf libpace.tar.gz + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + ) + get_newest_file(${CMAKE_BINARY_DIR}/lammps-user-pace-* lib-pace) +endif() add_subdirectory(${lib-pace} build-pace) set_target_properties(pace PROPERTIES CXX_EXTENSIONS ON OUTPUT_NAME lammps_pace${LAMMPS_MACHINE}) diff --git a/cmake/presets/gpu-cuda.cmake b/cmake/presets/gpu-cuda.cmake new file mode 100644 index 0000000000..2ac6bd9ea6 --- /dev/null +++ b/cmake/presets/gpu-cuda.cmake @@ -0,0 +1,11 @@ +# preset that enables GPU and selects CUDA API + +set(PKG_GPU ON CACHE BOOL "Build GPU package" FORCE) +set(GPU_API "cuda" CACHE STRING "APU used by GPU package" FORCE) +set(GPU_PREC "mixed" CACHE STRING "" FORCE) + +set(CUDA_NVCC_FLAGS "-allow-unsupported-compiler" CACHE STRING "" FORCE) +set(CUDA_NVCC_FLAGS_DEBUG "-allow-unsupported-compiler" CACHE STRING "" FORCE) +set(CUDA_NVCC_FLAGS_MINSIZEREL "-allow-unsupported-compiler" CACHE STRING "" FORCE) +set(CUDA_NVCC_FLAGS_RELWITHDEBINFO "-allow-unsupported-compiler" CACHE STRING "" FORCE) +set(CUDA_NVCC_FLAGS_RELEASE "-allow-unsupported-compiler" CACHE STRING "" FORCE) diff --git a/cmake/presets/kokkos-cuda.cmake b/cmake/presets/kokkos-cuda.cmake index ace8ff0879..c3ee081898 100644 --- a/cmake/presets/kokkos-cuda.cmake +++ b/cmake/presets/kokkos-cuda.cmake @@ -6,6 +6,8 @@ set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE) set(Kokkos_ENABLE_CUDA ON CACHE BOOL "" FORCE) set(Kokkos_ARCH_PASCAL60 ON CACHE BOOL "" FORCE) set(BUILD_OMP ON CACHE BOOL "" FORCE) +get_filename_component(NVCC_WRAPPER_CMD ${CMAKE_CURRENT_SOURCE_DIR}/../lib/kokkos/bin/nvcc_wrapper ABSOLUTE) +set(CMAKE_CXX_COMPILER ${NVCC_WRAPPER_CMD} CACHE FILEPATH "" FORCE) # hide deprecation warnings temporarily for stable release set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) diff --git a/doc/lammps.1 b/doc/lammps.1 index 79964d1680..100ea9b663 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,7 +1,7 @@ -.TH LAMMPS "1" "2 August 2023" "2023-08-2" +.TH LAMMPS "1" "21 November 2023" "2023-11-21" .SH NAME .B LAMMPS -\- Molecular Dynamics Simulator. Version 2 August 2023 +\- Molecular Dynamics Simulator. Version 21 November 2023 .SH SYNOPSIS .B lmp diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 7a7b5cf0d5..1f643a9d14 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -626,22 +626,22 @@ They must be specified in uppercase. * - HOPPER90 - GPU - NVIDIA Hopper generation CC 9.0 GPU - * - VEGA900 + * - AMD_GFX906 - GPU - - AMD GPU MI25 GFX900 - * - VEGA906 + - AMD GPU MI50/MI60 + * - AMD_GFX908 - GPU - - AMD GPU MI50/MI60 GFX906 - * - VEGA908 + - AMD GPU MI100 + * - AMD_GFX90A - GPU - - AMD GPU MI100 GFX908 - * - VEGA90A + - AMD GPU MI200 + * - AMD_GFX942 - GPU - - AMD GPU MI200 GFX90A - * - NAVI1030 + - AMD GPU MI300 + * - AMD_GFX1030 - GPU - AMD GPU V620/W6800 - * - NAVI1100 + * - AMD_GFX1100 - GPU - AMD GPU RX7900XTX * - INTEL_GEN @@ -666,7 +666,7 @@ They must be specified in uppercase. - GPU - Intel GPU Ponte Vecchio -This list was last updated for version 4.0.1 of the Kokkos library. +This list was last updated for version 4.2 of the Kokkos library. .. tabs:: diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index 7576cae3eb..e2b096c6e0 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -43,6 +43,12 @@ When the KSPACE package is included in a LAMMPS build, the require use of an FFT library to compute 1d FFTs. The KISS FFT library is included with LAMMPS, but other libraries can be faster. LAMMPS can use them if they are available on your system. +Alternatively, LAMMPS can use the +`heFFTe `_ +library for the MPI communication algorithms, +which comes with many optimizations for special cases, +e.g., leveraging 2D and 3D backend transforms and +better pipelining for packing and communication. .. tabs:: @@ -53,6 +59,7 @@ LAMMPS can use them if they are available on your system. -D FFT=value # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS -D FFT_SINGLE=value # yes or no (default), no = double precision -D FFT_PACK=value # array (default) or pointer or memcpy + -D FFT_USE_HEFFTE=value # yes or no (default), yes links to heFFTe .. note:: @@ -76,6 +83,15 @@ LAMMPS can use them if they are available on your system. -D MKL_INCLUDE_DIR=path # ditto for Intel MKL library -D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries -D MKL_LIBRARY=path # path to MKL libraries + -D FFT_HEFFTE_BACKEND=value # FFTW or MKL or empty/undefined for the stock backend + -D Heffte_ROOT=path # path to an existing heFFTe installation + + .. note:: + + heFFTe comes with a builtin stock backend for FFTs; however, the backend + is intended for testing purposes and is not performance optimized + for large scale production runs. + .. tab:: Traditional make @@ -111,6 +127,24 @@ LAMMPS can use them if they are available on your system. files in its default search path. You must specify ``FFT_LIB`` with the appropriate FFT libraries to include in the link. + Traditional make can also link to heFFTe using an existing installation + + .. code-block:: make + + include /share/heffte/HeffteMakefile.in + FFT_INC = -DFFT_HEFFTE -DFFT_HEFFTE_FFTW $(heffte_include) + FFT_PATH = + FFT_LIB = $(heffte_link) $(heffte_libs) + + The heFFTe install path will contain `HeffteMakefile.in`. + which will define the `heffte_` include variables needed to link to heFFTe from + an external project using traditional make. + The `-DFFT_HEFFTE` is required to switch to using heFFTe, while the optional `-DFFT_HEFFTE_FFTW` + selects the desired heFFTe backend, e.g., `-DFFT_HEFFTE_FFTW` or `-DFFT_HEFFTE_MKL`, + omitting the variable will default to the `stock` backend. + The heFFTe `stock` backend is intended to be used for testing and debugging, + but is not performance optimized for large scale production runs. + The `KISS FFT library `_ is included in the LAMMPS distribution. It is portable across all platforms. Depending on the size of the FFTs and the number of @@ -170,6 +204,16 @@ Depending on the machine, the size of the FFT grid, the number of processors used, one option may be slightly faster. The default is ARRAY mode. +When using ``-DFFT_HEFFTE`` CMake will first look for an existing install +with hints provided by ``-DHeffte_ROOT``, as recommended by the CMake +standard and note that the name is case sensitive. If CMake cannot find +a heFFTe installation with the correct backend (e.g., FFTW or MKL), +it will attempt to download and build the library automatically. +In this case, LAMMPS CMake will also accept all heFFTe specific variables +listed in the +`heFFTe documentation `_ +and those variables will be passed into the heFFTe build. + ---------- .. _size: diff --git a/doc/src/Commands_compute.rst b/doc/src/Commands_compute.rst index 819a1b10d8..af76ff3f68 100644 --- a/doc/src/Commands_compute.rst +++ b/doc/src/Commands_compute.rst @@ -100,6 +100,7 @@ KOKKOS, o = OPENMP, t = OPT. * :doc:`nbond/atom ` * :doc:`omega/chunk ` * :doc:`orientorder/atom (k) ` + * :doc:`pace ` * :doc:`pair ` * :doc:`pair/local ` * :doc:`pe ` @@ -115,8 +116,9 @@ KOKKOS, o = OPENMP, t = OPT. * :doc:`property/grid ` * :doc:`property/local ` * :doc:`ptm/atom ` - * :doc:`reaxff/atom (k) ` + * :doc:`rattlers/atom ` * :doc:`rdf ` + * :doc:`reaxff/atom (k) ` * :doc:`reduce ` * :doc:`reduce/chunk ` * :doc:`reduce/region ` diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index 7301d1345e..e89e302673 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -122,6 +122,7 @@ OPT. * :doc:`mvv/tdpd ` * :doc:`neb ` * :doc:`neb/spin ` + * :doc:`nonaffine/displacement ` * :doc:`nph (ko) ` * :doc:`nph/asphere (o) ` * :doc:`nph/body ` @@ -238,10 +239,10 @@ OPT. * :doc:`store/force ` * :doc:`store/state ` * :doc:`tdpd/source ` - * :doc:`temp/berendsen ` + * :doc:`temp/berendsen (k) ` * :doc:`temp/csld ` * :doc:`temp/csvr ` - * :doc:`temp/rescale ` + * :doc:`temp/rescale (k) ` * :doc:`temp/rescale/eff ` * :doc:`tfmc ` * :doc:`tgnpt/drude ` diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index 828f0b10d9..e7761e7bee 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -87,7 +87,7 @@ OPT. * :doc:`coul/long/soft (o) ` * :doc:`coul/msm (o) ` * :doc:`coul/slater/cut ` - * :doc:`coul/slater/long ` + * :doc:`coul/slater/long (g) ` * :doc:`coul/shield ` * :doc:`coul/streitz ` * :doc:`coul/tt ` @@ -110,7 +110,7 @@ OPT. * :doc:`eam/he ` * :doc:`edip (o) ` * :doc:`edip/multi ` - * :doc:`edpd ` + * :doc:`edpd (g) ` * :doc:`eff/cut ` * :doc:`eim (o) ` * :doc:`exp6/rx (k) ` @@ -158,14 +158,14 @@ OPT. * :doc:`lj/cut (gikot) ` * :doc:`lj/cut/coul/cut (gko) ` * :doc:`lj/cut/coul/cut/dielectric (o) ` - * :doc:`lj/cut/coul/cut/soft (o) ` + * :doc:`lj/cut/coul/cut/soft (go) ` * :doc:`lj/cut/coul/debye (gko) ` * :doc:`lj/cut/coul/debye/dielectric (o) ` * :doc:`lj/cut/coul/dsf (gko) ` * :doc:`lj/cut/coul/long (gikot) ` * :doc:`lj/cut/coul/long/cs ` * :doc:`lj/cut/coul/long/dielectric (o) ` - * :doc:`lj/cut/coul/long/soft (o) ` + * :doc:`lj/cut/coul/long/soft (go) ` * :doc:`lj/cut/coul/msm (go) ` * :doc:`lj/cut/coul/msm/dielectric ` * :doc:`lj/cut/coul/wolf (o) ` @@ -202,7 +202,7 @@ OPT. * :doc:`lubricate/poly (o) ` * :doc:`lubricateU ` * :doc:`lubricateU/poly ` - * :doc:`mdpd ` + * :doc:`mdpd (g) ` * :doc:`mdpd/rhosum ` * :doc:`meam (k) ` * :doc:`meam/ms (k) ` @@ -268,11 +268,11 @@ OPT. * :doc:`smtbq ` * :doc:`snap (ik) ` * :doc:`soft (go) ` - * :doc:`sph/heatconduction ` + * :doc:`sph/heatconduction (g) ` * :doc:`sph/idealgas ` - * :doc:`sph/lj ` + * :doc:`sph/lj (g) ` * :doc:`sph/rhosum ` - * :doc:`sph/taitwater ` + * :doc:`sph/taitwater (g) ` * :doc:`sph/taitwater/morris ` * :doc:`spin/dipole/cut ` * :doc:`spin/dipole/long ` diff --git a/doc/src/Commands_removed.rst b/doc/src/Commands_removed.rst index 84cc534304..98a52fc2d7 100644 --- a/doc/src/Commands_removed.rst +++ b/doc/src/Commands_removed.rst @@ -88,7 +88,7 @@ The same functionality is available through MPIIO package ------------- -.. deprecated:: TBD +.. deprecated:: 21Nov2023 The MPIIO package has been removed from LAMMPS since it was unmaintained for many years and thus not updated to incorporate required changes that @@ -107,7 +107,7 @@ see :doc:`restart `, :doc:`read_restart `, MSCG package ------------ -.. deprecated:: TBD +.. deprecated:: 21Nov2023 The MSCG package has been removed from LAMMPS since it was unmaintained for many years and instead superseded by the `OpenMSCG software @@ -126,6 +126,17 @@ syntax compatible with the removed reax pair style, so input files will have to be adapted. The REAXFF package was originally called USER-REAXC. +USER-REAXC package +------------------ + +.. deprecated:: TBD + +The USER-REAXC package has been renamed to :ref:`REAXFF `. +In the process also the pair style and related fixes were renamed to use +the "reaxff" string instead of "reax/c". For a while LAMMPS was maintaining +backward compatibility by providing aliases for the styles. These have +been removed, so using "reaxff" is now *required*. + USER-CUDA package ----------------- diff --git a/doc/src/Developer_unittest.rst b/doc/src/Developer_unittest.rst index 9886e9e4b4..67c5ce365a 100644 --- a/doc/src/Developer_unittest.rst +++ b/doc/src/Developer_unittest.rst @@ -180,19 +180,11 @@ discarded but by setting the verbose flag (via setting the ``TEST_ARGS`` environment variable, ``TEST_ARGS=-v``) it can be printed and used to understand why tests fail unexpectedly. -Another complexity of these tests stems from the need to capture -situations where LAMMPS will stop with an error, i.e. handle so-called -"death tests". Here the LAMMPS code will operate differently depending -on whether it was configured to throw C++ exceptions on errors or call -either ``exit()`` or ``MPI_Abort()``. In the latter case, the test code -also needs to detect whether LAMMPS was compiled with the OpenMPI -library, as OpenMPI is **only** compatible the death test options of the -GoogleTest library when C++ exceptions are enabled; otherwise those -"death tests" must be skipped to avoid reporting bogus failures. The -specifics of this step are implemented in the ``TEST_FAILURE()`` -macro. These tests operate by capturing the screen output when executing -the failing command and then comparing that with a provided regular -expression string pattern. Example: +The specifics of so-called "death tests", i.e. conditions where LAMMPS +should fail and throw an exception, are implemented in the +``TEST_FAILURE()`` macro. These tests operate by capturing the screen +output when executing the failing command and then comparing that with a +provided regular expression string pattern. Example: .. code-block:: c++ diff --git a/doc/src/Fortran.rst b/doc/src/Fortran.rst index 913c31842e..76fdff753a 100644 --- a/doc/src/Fortran.rst +++ b/doc/src/Fortran.rst @@ -3038,14 +3038,6 @@ Procedures Bound to the :f:type:`lammps` Derived Type This function can be used to query if an error inside of LAMMPS has thrown a :ref:`C++ exception `. - .. note:: - - This function will always report "no error" when the LAMMPS library - has been compiled without ``-DLAMMPS_EXCEPTIONS``, which turns fatal - errors aborting LAMMPS into C++ exceptions. You can use the library - function :cpp:func:`lammps_config_has_exceptions` to check if this is - the case. - :to: :cpp:func:`lammps_has_error` :r has_error: ``.TRUE.`` if there is an error. :rtype has_error: logical @@ -3068,13 +3060,6 @@ Procedures Bound to the :f:type:`lammps` Derived Type would happen only in a single MPI rank and thus may not be recoverable, as other MPI ranks may be waiting on the failing MPI rank(s) to send messages. - .. note:: - - This function will do nothing when the LAMMPS library has been - compiled without ``-DLAMMPS_EXCEPTIONS``, which turns errors aborting - LAMMPS into C++ exceptions. You can use the function - :f:func:`config_has_exceptions` to check whether this is the case. - :p character(len=\*) buffer: string buffer to copy the error message into :o integer(c_int) status [optional]: 1 when all ranks had the error, 2 on a single-rank error. diff --git a/doc/src/Howto.rst b/doc/src/Howto.rst index b1f5da8abc..85c98bd6de 100644 --- a/doc/src/Howto.rst +++ b/doc/src/Howto.rst @@ -101,6 +101,7 @@ Tutorials howto Howto_cmake Howto_github Howto_lammps_gui + Howto_moltemplate Howto_pylammps Howto_wsl diff --git a/doc/src/Howto_body.rst b/doc/src/Howto_body.rst index 115b7797c8..968e10edd8 100644 --- a/doc/src/Howto_body.rst +++ b/doc/src/Howto_body.rst @@ -335,7 +335,7 @@ faces are listed, so that M = 6 + 3\*N + 1. The integer line has three values: number of vertices (N), number of edges (E) and number of faces (F). The floating point line(s) list 6 moments of inertia followed by the coordinates of the N vertices (x1 -to zN) as 3N values, followed by 2N vertex indices corresponding to +to zN) as 3N values, followed by 2E vertex indices corresponding to the end points of the E edges, then 4\*F vertex indices defining F faces. The last value is the diameter value = the rounded diameter of the sphere that surrounds each vertex. The diameter value can be diff --git a/doc/src/Howto_moltemplate.rst b/doc/src/Howto_moltemplate.rst new file mode 100644 index 0000000000..bb068a2e93 --- /dev/null +++ b/doc/src/Howto_moltemplate.rst @@ -0,0 +1,371 @@ +Moltemplate Tutorial +==================== + +In this tutorial, we are going to use the tool :ref:`Moltemplate +` to set up a classical molecular dynamic simulation using +the :ref:`OPLS-AA force field `. The first +task is to describe an organic compound and create a complete input deck +for LAMMPS. The second task is to map the OPLS-AA force field to a +molecular sample created with an external tool, e.g. PACKMOL, and +exported as a PDB file. The files used in this tutorial can be found +in the ``tools/moltemplate/tutorial-files`` folder of the LAMMPS +source code distribution. + +Simulating an organic solvent +""""""""""""""""""""""""""""" + +This example aims to create a cubic box of the organic solvent +formamide. + +The first step is to create a molecular topology in the +LAMMPS-template (LT) file format representing a single molecule, which +will be stored in a Moltemplate object called ``_FAM inherits OPLSAA {}``. +This command states that the object ``_FAM`` is based on an existing +object called ``OPLSAA``, which contains OPLS-AA parameters, atom type +definitions, partial charges, masses and bond-angle rules for many organic +and biological compounds. + +The atomic structure is the starting point to populate the command +``write('Data Atoms') {}``, which will write the ``Atoms`` section in the +LAMMPS data file. The OPLS-AA force field uses the ``atom_style full``, +therefore, this column format is used: +``# atomID molID atomType charge coordX coordY coordZ``. +The ``atomID``\ s are replaced with Moltemplate ``$``-type variables, which +are then substituted with unique numerical IDs. The same logic is applied +to the ``molID``, except that the same variable is used for the whole +molecule. The atom types are assigned using ``@``-type variables. The +assignment of atom types (e.g. ``@atom:177``, ``@atom:178``) is done using +the OPLS-AA atom types defined in the "In Charges" section of the file +``oplsaa.lt``, looking for a reasonable match with the description of the atom. +The resulting file (``formamide.lt``) follows: + +.. code-block:: bash + + _FAM inherits OPLSAA { + + # atomID molID atomType charge coordX coordY coordZ + write('Data Atoms') { + $atom:C00 $mol @atom:177 0.00 0.100 0.490 0.0 + $atom:O01 $mol @atom:178 0.00 1.091 -0.250 0.0 + $atom:N02 $mol @atom:179 0.00 -1.121 -0.181 0.0 + $atom:H03 $mol @atom:182 0.00 -2.013 0.272 0.0 + $atom:H04 $mol @atom:182 0.00 -1.056 -1.190 0.0 + $atom:H05 $mol @atom:221 0.00 0.144 1.570 0.0 + } + + # A list of the bonds in the molecule: + # BondID AtomID1 AtomID2 + write('Data Bond List') { + $bond:C1 $atom:C00 $atom:O01 + $bond:C2 $atom:C00 $atom:H05 + $bond:C3 $atom:C00 $atom:N02 + $bond:C4 $atom:N02 $atom:H03 + $bond:C5 $atom:N02 $atom:H04 + } + } + +You don't have to specify the charge in this example because they will +be assigned according to the atom type. Analogously, only a +"Data Bond List" section is needed as the atom type will determine the +bond type. The other bonded interactions (e.g. angles, +dihedrals, and impropers) will be automatically generated by +Moltemplate. + +If the simulation is non-neutral, or Moltemplate complains that you have +missing bond, angle, or dihedral types, this means at least one of your +atom types is incorrect. + +The second step is to create a master file with instructions to build a +starting structure and the LAMMPS commands to run an NPT simulation. The +master file (``solv_01.lt``) follows: + +.. code-block:: bash + + # Import the force field. + import /usr/local/moltemplate/moltemplate/force_fields/oplsaa.lt + import formamide.lt # after oplsaa.lt, as it depends on it. + + # Create the input sample. + solv = new _FAM [5].move( 4.6, 0, 0) + [5].move( 0, 4.6, 0) + [5].move( 0, 0, 4.6) + solv[*][*][*].move(-11.5, -11.5, -11.5) + + # Set the simulation box. + write_once("Data Boundary") { + -11.5 11.5 xlo xhi + -11.5 11.5 ylo yhi + -11.5 11.5 zlo zhi + } + + # Create an input deck for LAMMPS. + write_once("In Init"){ + # Input variables. + variable run string solv_01 # output name + variable ts equal 1 # timestep + variable temp equal 300 # equilibrium temperature + variable p equal 1. # equilibrium pressure + variable d equal 1000 # output frequency + variable equi equal 5000 # Equilibration steps + variable prod equal 30000 # Production steps + + # PBC (set them before the creation of the box). + boundary p p p + } + + # Run an NPT simulation. + write_once("In Run"){ + # Derived variables. + variable tcouple equal \$\{ts\}*100 + variable pcouple equal \$\{ts\}*1000 + + # Output. + thermo \$d + thermo_style custom step etotal evdwl ecoul elong ebond eangle & + edihed eimp ke pe temp press vol density cpu + thermo_modify flush yes + + # Trajectory. + dump TRJ all dcd \$d \$\{run\}.dcd + dump_modify TRJ unwrap yes + + # Thermalisation and relaxation, NPT ensemble. + timestep \$\{ts\} + fix NPT all npt temp \$\{temp\} \$\{temp\} \$\{tcouple\} iso \$p \$p \$\{pcouple\} + velocity all create \$\{temp\} 858096 dist gaussian + # Short runs to update the PPPM settings as the box shinks. + run \$\{equi\} post no + run \$\{equi\} post no + run \$\{equi\} post no + run \$\{equi\} + # From now on, the density shouldn't change too much. + run \$\{prod\} + unfix NPT + } + +The first two commands insert the content of files ``oplsaa.lt`` and +``formamide.lt`` into the master file. At this point, we can use the +command ``solv = new _FAM [N]`` to create N copies of a molecule of type +``_FAM``. In this case, we create an array of 5*5*5 molecules on a cubic +grid using the coordinate transformation command ``.move( 4.6, 0, 0)``. +See the Moltemplate documentation to learn more about the syntax. As +the sample was created from scratch, we also specify the simulation box +size in the "Data Boundary" section. + +The LAMMPS setting for the force field are specified in the file +``oplsaa.lt`` and are written automatically in the input deck. We also +specify the boundary conditions and a set of variables in +the "In Init" section. The remaining commands to run an NPT simulation +are written in the "In Run" section. Note that in this script, LAMMPS +variables are protected with the escape character ``\`` to distinguish +them from Moltemplate variables, e.g. ``\$\{run\}`` is a LAMMPS +variable that is written in the input deck as ``${run}``. + +Compile the master file with: + +.. code-block:: bash + + moltemplate.sh -overlay-all solv_01.lt + +And execute the simulation with the following: + +.. code-block:: bash + + mpirun -np 4 lmp -in solv_01.in -l solv_01.log + +.. figure:: JPG/solv_01.png + :figwidth: 80% + :figclass: align-center + + Snapshot of the sample at the beginning and end of the simulation. + Rendered with Ovito. + +Mapping an existing structure +""""""""""""""""""""""""""""" + +Another helpful way to use Moltemplate is mapping an existing molecular +sample to a force field. This is useful when a complex sample is +assembled from different simulations or created with specialized +software (e.g. PACKMOL). As in the previous example, all molecular +species in the sample must be defined using single-molecule Moltemplate +objects. For this example, we use a short polymer in a box containing +water molecules and ions in the PDB file ``model.pdb``. + +It is essential to understand that the order of atoms in the PDB file +and in the Moltemplate master script must match, as we are using the +coordinates from the PDB file in the order they appear. The order of +atoms and molecules in the PDB file provided is as follows: + +- 500 water molecules, with atoms ordered in this sequence: + + .. parsed-literal:: + + ATOM 1 O MOL D 1 5.901 7.384 1.103 0.00 0.00 DUM + ATOM 2 H MOL D 1 6.047 8.238 0.581 0.00 0.00 DUM + ATOM 3 H MOL D 1 6.188 7.533 2.057 0.00 0.00 DUM + +- 1 polymer molecule. +- 1 Ca\ :sup:`2+` ion. +- 2 Cl\ :sup:`-` ions. + +In the master LT file, this sequence of molecules is matched with the +following commands: + +.. code-block:: bash + + # Create the sample. + wat=new SPC[500] + pol=new PolyNIPAM[1] + cat=new Ca[1] + ani=new Cl[2] + +Note that the first command would create 500 water molecules in the +same position in space, and the other commands will use the coordinates +specified in the corresponding molecular topology block. However, the +coordinates will be overwritten by rendering an external atomic +structure file. Note that if the same molecule species are scattered in +the input structure, it is recommended to reorder and group together +for molecule types to facilitate the creation of the input sample. + +The molecular topology for the polymer is created as in the previous +example, with the atom types assigned as in the following schema: + +.. figure:: JPG/PolyNIPAM.jpg + :scale: 30% + :align: center + + Atom types assigned to the polymer's repeating unit. + +The molecular topology of the water and ions is stated directly into +the master file for the sake of space, but they could also be written +in a separate file(s) and imported before the sample is created. + +The resulting master LT file defining short annealing at a fixed volume +(NVT) follows: + +.. code-block:: bash + + # Use the OPLS-AA force field for all species. + import /usr/local/moltemplate/moltemplate/force_fields/oplsaa.lt + import PolyNIPAM.lt + + # Define the SPC water and ions as in the OPLS-AA + Ca inherits OPLSAA { + write("Data Atoms"){ + $atom:a1 $mol:. @atom:354 0.0 0.00000 0.00000 0.000000 + } + } + Cl inherits OPLSAA { + write("Data Atoms"){ + $atom:a1 $mol:. @atom:344 0.0 0.00000 0.00000 0.000000 + } + } + SPC inherits OPLSAA { + write("Data Atoms"){ + $atom:O $mol:. @atom:76 0. 0.0000000 0.00000 0.000000 + $atom:H1 $mol:. @atom:77 0. 0.8164904 0.00000 0.5773590 + $atom:H2 $mol:. @atom:77 0. -0.8164904 0.00000 0.5773590 + } + write("Data Bond List") { + $bond:OH1 $atom:O $atom:H1 + $bond:OH2 $atom:O $atom:H2 + } + } + + # Create the sample. + wat=new SPC[500] + pol=new PolyNIPAM[1] + cat=new Ca[1] + ani=new Cl[2] + + # Periodic boundary conditions: + write_once("Data Boundary"){ + 0 26 xlo xhi + 0 26 ylo yhi + 0 26 zlo zhi + } + + # Define the input variables. + write_once("In Init"){ + # Input variables. + variable run string sample01 # output name + variable ts equal 2 # timestep + variable temp equal 298.15 # equilibrium temperature + variable p equal 1. # equilibrium pressure + variable equi equal 30000 # equilibration steps + + # PBC (set them before the creation of the box). + boundary p p p + neighbor 3 bin + } + + # Run an NVT simulation. + write_once("In Run"){ + # Set the output. + thermo 1000 + thermo_style custom step etotal evdwl ecoul elong ebond eangle & + edihed eimp pe ke temp press atoms vol density cpu + thermo_modify flush yes + compute pe1 all pe/atom pair + dump TRJ all custom 100 \$\{run\}.dump id xu yu zu c_pe1 + + # Minimise the input structure, just in case. + minimize .01 .001 1000 100000 + write_data \$\{run\}.min + + # Set the constrains. + group watergroup type @atom:76 @atom:77 + fix 0 watergroup shake 0.0001 10 0 b @bond:042_043 a @angle:043_042_043 + + # Short annealing. + timestep \$\{ts\} + fix 1 all nvt temp \$\{temp\} \$\{temp\} \$(100*dt) + velocity all create \$\{temp\} 315443 + run \$\{equi\} + unfix 1 + } + + +In this example, the water model is SPC and it is defined in the +``oplsaa.lt`` file with atom types ``@atom:76`` and ``@atom:77``. For +water we also use the ``group`` and ``fix shake`` commands with +Moltemplate ``@``-type variables, to ensure consistency with the +numerical values assigned during compilation. To identify the bond and +angle types, look for the extended ``@atom`` IDs, which in this case +are: + +.. code-block:: bash + + replace{ @atom:76 @atom:76_b042_a042_d042_i042 } + replace{ @atom:77 @atom:77_b043_a043_d043_i043 } + +From which we can identify the following "Data Bonds By Type": +``@bond:042_043 @atom:*_b042*_a*_d*_i* @atom:*_b043*_a*_d*_i*`` and +"Data Angles By Type": ``@angle:043_042_043 @atom:*_b*_a043*_d*_i* +@atom:*_b*_a042*_d*_i* @atom:*_b*_a043*_d*_i*`` + +Compile the master file with: + +.. code-block:: bash + + moltemplate.sh -overlay-all -pdb model.pdb sample01.lt + +And execute the simulation with the following: + +.. code-block:: bash + + mpirun -np 4 lmp -in sample01.in -l sample01.log + +.. figure:: JPG/sample01.png + :figwidth: 50% + :figclass: align-center + + Sample visualized with Ovito loading the trajectory into the DATA + file written after minimization. + +------------ + +.. _OPLSAA96: + +**(OPLS-AA)** Jorgensen, Maxwell, Tirado-Rives, J Am Chem Soc, +118(45), 11225-11236 (1996). diff --git a/doc/src/JPG/PolyNIPAM.jpg b/doc/src/JPG/PolyNIPAM.jpg new file mode 100644 index 0000000000..4ad3ce8274 Binary files /dev/null and b/doc/src/JPG/PolyNIPAM.jpg differ diff --git a/doc/src/JPG/sample01.png b/doc/src/JPG/sample01.png new file mode 100644 index 0000000000..3a00176edd Binary files /dev/null and b/doc/src/JPG/sample01.png differ diff --git a/doc/src/JPG/solv_01.png b/doc/src/JPG/solv_01.png new file mode 100644 index 0000000000..fc52d44928 Binary files /dev/null and b/doc/src/JPG/solv_01.png differ diff --git a/doc/src/Library.rst b/doc/src/Library.rst index 09561cda82..50c28b7fcd 100644 --- a/doc/src/Library.rst +++ b/doc/src/Library.rst @@ -80,13 +80,15 @@ run LAMMPS in serial mode. :class: note If the LAMMPS executable encounters an error condition, it will abort - after printing an error message. For a library interface this is - usually not desirable. Thus LAMMPS can be compiled to to :ref:`throw - a C++ exception ` instead. If enabled, the library - functions will catch those exceptions and return. The error status - :cpp:func:`can be queried ` and an :cpp:func:`error - message retrieved `. We thus - recommend enabling C++ exceptions when using the library interface, + after printing an error message. It does so by catching the + exceptions that LAMMPS could throw. For a C library interface this + is usually not desirable since the calling code might lack the + ability to catch such exceptions. Thus, the library functions will + catch those exceptions and return from the affected functions. The + error status :cpp:func:`can be queried ` and an + :cpp:func:`error message retrieved `. + This is, for example used by the :doc:`LAMMPS python module + ` and then a suitable Python exception is thrown. .. admonition:: Using the C library interface as a plugin :class: note diff --git a/doc/src/Python_error.rst b/doc/src/Python_error.rst index f6a94c0e82..6aec8df391 100644 --- a/doc/src/Python_error.rst +++ b/doc/src/Python_error.rst @@ -15,9 +15,7 @@ Python exception handling mechanism. try: # LAMMPS will normally terminate itself and the running process if an error - # occurs. This would kill the Python interpreter. To avoid this, make sure to - # compile with LAMMPS_EXCEPTIONS enabled. This ensures the library API calls - # will not terminate the parent process. Instead, the library wrapper will + # occurs. This would kill the Python interpreter. The library wrapper will # detect that an error has occured and throw a Python exception lmp.command('unknown') diff --git a/doc/src/Python_install.rst b/doc/src/Python_install.rst index c4fbec0be4..01610b84f0 100644 --- a/doc/src/Python_install.rst +++ b/doc/src/Python_install.rst @@ -5,8 +5,7 @@ The LAMMPS Python module enables calling the :ref:`LAMMPS C library API ` from Python by dynamically loading functions in the LAMMPS shared library through the Python `ctypes `_ module. Because of the dynamic loading, it is required that LAMMPS is -compiled in :ref:`"shared" mode `. It is also recommended to -compile LAMMPS with :ref:`C++ exceptions ` enabled. +compiled in :ref:`"shared" mode `. Two components are necessary for Python to be able to invoke LAMMPS code: diff --git a/doc/src/compute.rst b/doc/src/compute.rst index 6ef093c16d..a7fc997d0f 100644 --- a/doc/src/compute.rst +++ b/doc/src/compute.rst @@ -264,6 +264,7 @@ The individual style names on the :doc:`Commands compute ` pag * :doc:`nbond/atom ` - calculates number of bonds per atom * :doc:`omega/chunk ` - angular velocity for each chunk * :doc:`orientorder/atom ` - Steinhardt bond orientational order parameters Ql +* :doc:`pace ` - atomic cluster expansion descriptors and related quantities * :doc:`pair ` - values computed by a pair style * :doc:`pair/local ` - distance/energy/force of each pairwise interaction * :doc:`pe ` - potential energy @@ -279,6 +280,7 @@ The individual style names on the :doc:`Commands compute ` pag * :doc:`property/grid ` - convert per-grid attributes to per-grid vectors/arrays * :doc:`property/local ` - convert local attributes to local vectors/arrays * :doc:`ptm/atom ` - determines the local lattice structure based on the Polyhedral Template Matching method +* :doc:`rattlers/atom ` - identify under-coordinated rattler atoms * :doc:`rdf ` - radial distribution function :math:`g(r)` histogram of group of atoms * :doc:`reaxff/atom ` - extract ReaxFF bond information * :doc:`reduce ` - combine per-atom quantities into a single global value diff --git a/doc/src/compute_composition_atom.rst b/doc/src/compute_composition_atom.rst index b7890fff8b..e973eaa234 100644 --- a/doc/src/compute_composition_atom.rst +++ b/doc/src/compute_composition_atom.rst @@ -36,7 +36,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 21Nov2023 Define a computation that calculates a local composition vector for each atom. For a central atom with :math:`M` neighbors within the neighbor cutoff sphere, diff --git a/doc/src/compute_contact_atom.rst b/doc/src/compute_contact_atom.rst index 31aa24aa60..b7ed062ff6 100644 --- a/doc/src/compute_contact_atom.rst +++ b/doc/src/compute_contact_atom.rst @@ -36,6 +36,9 @@ sum of the radii of the two particles. The value of the contact number will be 0.0 for atoms not in the specified compute group. +The optional *group2-ID* argument allows to specify from which group atoms +contribute to the coordination number. Default setting is group 'all'. + Output info """"""""""" @@ -47,9 +50,6 @@ overview of LAMMPS output options. The per-atom vector values will be a number :math:`\ge 0.0`, as explained above. -The optional *group2-ID* argument allows to specify from which group atoms -contribute to the coordination number. Default setting is group 'all.' - Restrictions """""""""""" @@ -69,6 +69,3 @@ Default """"""" *group2-ID* = all - - -none diff --git a/doc/src/compute_pace.rst b/doc/src/compute_pace.rst new file mode 100644 index 0000000000..0c4a295f1b --- /dev/null +++ b/doc/src/compute_pace.rst @@ -0,0 +1,251 @@ +.. index:: compute pace + +compute pace command +======================== + +Syntax +"""""" + +.. code-block:: LAMMPS + + compute ID group-ID pace ace_potential_filename ... keyword values ... + +* ID, group-ID are documented in :doc:`compute ` command +* pace = style name of this compute command +* ace_potential_filename = file name (in the .yace or .ace format from :doc:`pace pair_style `) including ACE hyperparameters, bonds, and generalized coupling coefficients +* keyword = *bikflag* or *dgradflag* + + .. parsed-literal:: + + *bikflag* value = *0* or *1* + *0* = descriptors are summed over atoms of each type + *1* = descriptors are listed separately for each atom + *dgradflag* value = *0* or *1* + *0* = descriptor gradients are summed over atoms of each type + *1* = descriptor gradients are listed separately for each atom pair + +Examples +"""""""" + +.. code-block:: LAMMPS + + compute pace all pace coupling_coefficients.yace + compute pace all pace coupling_coefficients.yace 0 1 + compute pace all pace coupling_coefficients.yace 1 1 + +Description +""""""""""" + +.. versionadded:: TBD + +This compute calculates a set of quantities related to the atomic cluster +expansion (ACE) descriptors of the atoms in a group. ACE descriptors are +a highly generalizable atomic descriptor, encoding the radial and angular +distribution of neighbor atoms, up to arbitrary bond order (rank). The +detailed mathematical definition is given in the paper by +:ref:`(Drautz) `. These descriptors are used in the +:doc:`pace pair_style `. Quantities obtained from `compute pace` +are related to those used in :doc:`pace pair_style ` to +evaluate atomic energies, forces, and stresses for linear ACE models. +For example, the energy for a linear ACE model is calculated as: +:math:`E=\sum_i^{N\_atoms} \sum_{\boldsymbol{\nu}} c_{\boldsymbol{\nu}} B_{i,\boldsymbol{\boldsymbol{\nu}}}`. +The ACE descriptors for atom `i` :math:`B_{i,\boldsymbol{\nu}}`, and +:math:`c_{\nu}` are linear model parameters. The detailed definition +and indexing convention for ACE descriptors is given in :ref:`(Drautz) `. +In short, body order :math:`N`, angular character, radial character, +and chemical elements in the *N-body* descriptor are encoded by :math:`\nu`. +In the :doc:`pace pair_style `, the linear model parameters +and the ACE descriptors are combined for efficient evaluation of energies +and forces. The details and benefits of this efficient implementation are +given in :ref:`(Lysogorskiy) `. et. al, but the combined +descriptors and linear model parameters for the purposes of `compute pace` +may be expressed in terms of the ACE descriptors mentioned above. + +:math:`c_{\boldsymbol{\nu}} B_{i,\boldsymbol{\nu}}= \sum_{\boldsymbol{\nu}' \in \boldsymbol{\nu} } \big[ c_{\boldsymbol{\nu}} C(\boldsymbol{\nu}') \big] A_{i,\boldsymbol{\nu}'}` + +where the bracketed terms on the right-hand side are the combined functions +with linear model parameters typically provided in the `.yace` potential +file for `pace pair_style`. When these bracketed terms are multiplied by the +products of the atomic base from :ref:`(Drautz) `, +:math:`A_{i,\boldsymbol{\nu'}}`, the ACE descriptors are recovered but they +are also scaled by linear model parameters. The generalized coupling coefficients, +written in short-hand here as :math:`C(\boldsymbol{\nu}')`, are the generalized +Clebsch-Gordan or generalized Wigner symbols. It may be desirable to reverse the +combination of these descriptors and the linear model parameters so that the +ACE descriptors themselves may be used. The ACE descriptors and their gradients +are often used when training ACE models, performing custom data analysis, +generalizing ACE model forms, and other tasks that involve direct computation of +descriptors. The key utility of `compute pace` is that it can compute the ACE +descriptors and gradients so that these tasks can be performed during a LAMMPS +simulation or so that LAMMPS can be used as a driver for tasks like ACE model +parameterization. To see how this command can be used within a Python workflow +to train ACE potentials, see the examples in +`FitSNAP `_. Examples on using outputs from +this compute to construct general ACE potential forms are demonstrated in +:ref:`(Goff) `. The various keywords and inputs to `compute pace` +determine what ACE descriptors and related quantities are returned in a compute +array. + +The coefficient file, `.yace`, ultimately defines the number of ACE +descriptors to be computed, their maximum body-order, the degree of angular +character they have, the degree of radial character they have, the chemical +character (which element-element interactions are encoded by descriptors), +and other hyperparameters defined in :ref:`(Drautz) `. These may +be modeled after the potential files in :doc:`pace pair_style `, +and have the same format. Details on how to generate the coefficient files +to train ACE models may be found in `FitSNAP `_. + +The keyword *bikflag* determines whether or not to list the descriptors of +each atom separately, or sum them together and list in a single row. If +*bikflag* is set to *0* then a single descriptor row is used, which contains +the per-atom ACE descriptors :math:`B_{i,\boldsymbol{\nu}}` summed over all +atoms *i* to produce :math:`B_{\boldsymbol{\nu}}`. If *bikflag* is set to +*1* this is replaced by a separate per-atom ACE descriptor row for each atom. +In this case, the entries in the final column for these rows are set to zero. + +The keyword *dgradflag* determines whether to sum atom gradients or list +them separately. If *dgradflag* is set to 0, the ACE +descriptor gradients w.r.t. atom *j* are summed over all atoms *i'* +of, which may be useful when training linear ACE models on atomic forces. +If *dgradflag* is set to 1, gradients are listed separately for each pair of atoms. +Each row corresponds +to a single term :math:`\frac{\partial {B_{i,\boldsymbol{\nu}}}}{\partial {r}^a_j}` +where :math:`{r}^a_j` is the *a-th* position coordinate of the atom with global +index *j*. This also changes the number of columns to be equal to the number of +ACE descriptors, with 3 additional columns representing the indices :math:`i`, +:math:`j`, and :math:`a`, as explained more in the Output info section below. +The option *dgradflag=1* requires that *bikflag=1*. + +.. note:: + + It is noted here that in contrast to :doc:`pace pair_style `, + the *.yace* file for `compute pace` typically should not contain linear + parameters for an ACE potential. If :math:`c_{\nu}` are included, + the value of the descriptor will not be returned in the `compute` array, + but instead, the energy contribution from that descriptor will be returned. + Do not do this unless it is the desired behavior. + *In short, you should not plug in a '.yace' for a pace potential into this + compute to evaluate descriptors.* + +.. note:: + + *Generalized Clebsch-Gordan or Generalized Wigner symbols (with appropriate + factors) must be used to evaluate ACE descriptors with this compute.* There + are multiple ways to define the generalized coupling coefficients. Because + of this, this compute will not revert your potential file to a coupling + coefficient file. Instead this compute allows the user to supply coupling + coefficients that follow any convention. + +.. note:: + + Using *dgradflag* = 1 produces a global array with :math:`N + 3N^2 + 1` rows + which becomes expensive for systems with more than 1000 atoms. + +.. note:: + + If you have a bonded system, then the settings of :doc:`special_bonds + ` command can remove pairwise interactions between + atoms in the same bond, angle, or dihedral. This is the default + setting for the :doc:`special_bonds ` command, and + means those pairwise interactions do not appear in the neighbor list. + Because this fix uses the neighbor list, it also means those pairs + will not be included in the calculation. One way to get around this, + is to write a dump file, and use the :doc:`rerun ` command to + compute the ACE descriptors for snapshots in the dump file. + The rerun script can use a :doc:`special_bonds ` + command that includes all pairs in the neighbor list. + +---------- + +Output info +""""""""""" + +Compute *pace* evaluates a global array. The columns are arranged into +*ntypes* blocks, listed in order of atom type *I*\ . Each block contains +one column for each ACE descriptor, the same as for compute +*sna/atom*\ in :doc:`compute snap `. A final column contains the corresponding energy, force +component on an atom, or virial stress component. The rows of the array +appear in the following order: + +* 1 row: *pace* average descriptor values for all atoms of type *I* +* 3\*\ *n* force rows: quantities, with derivatives w.r.t. x, y, and z coordinate of atom *i* appearing in consecutive rows. The atoms are sorted based on atom ID and run up to the total number of atoms, *n*. +* 6 rows: *virial* quantities summed for all atoms of type *I* + +For example, if :math:`\# \; B_{i, \boldsymbol{\nu}}` =30 and ntypes=1, the number of columns in the +The number of columns in the global array generated by *pace* are 31, and +931, respectively, while the number of rows is 1+3\*\ *n*\ +6, where *n* +is the total number of atoms. + +If the *bik* keyword is set to 1, the structure of the pace array is expanded. +The first :math:`N` rows of the pace array +correspond to :math:`\# \; B_{i,\boldsymbol{\nu}}` instead of a single row summed over atoms :math:`i`. +In this case, the entries in the final column for these rows +are set to zero. Also, each row contains only non-zero entries for the +columns corresponding to the type of that atom. This is not true in the case +of *dgradflag* keyword = 1 (see below). + +If the *dgradflag* keyword is set to 1, this changes the structure of the +global array completely. +Here the per-atom quantities are replaced with rows corresponding to +descriptor gradient components on single atoms: + +.. math:: + + \frac{\partial {B_{i,\boldsymbol{\nu}} }}{\partial {r}^a_j} + +where :math:`{r}^a_j` is the *a-th* position coordinate of the atom with global +index *j*. The rows are +organized in chunks, where each chunk corresponds to an atom with global index +:math:`j`. The rows in an atom :math:`j` chunk correspond to +atoms with global index :math:`i`. The total number of rows for +these descriptor gradients is therefore :math:`3N^2`. +The number of columns is equal to the number of ACE descriptors, +plus 3 additional left-most columns representing the global atom indices +:math:`i`, :math:`j`, +and Cartesian direction :math:`a` (0, 1, 2, for x, y, z). +The first 3 columns of the first :math:`N` rows belong to the reference +potential force components. The remaining K columns contain the +:math:`B_{i,\boldsymbol{\nu}}` per-atom descriptors corresponding to the non-zero entries +obtained when *bikflag* = 1. +The first column of the last row, after the first +:math:`N + 3N^2` rows, contains the reference potential +energy. The virial components are not used with this option. The total number of +rows is therefore :math:`N + 3N^2 + 1` and the number of columns is :math:`K + 3`. + +These values can be accessed by any command that uses global values +from a compute as input. See the :doc:`Howto output ` doc +page for an overview of LAMMPS output options. + +Restrictions +"""""""""""" + +These computes are part of the ML-PACE package. They are only enabled +if LAMMPS was built with that package. See the :doc:`Build package +` page for more info. + +Related commands +"""""""""""""""" + +:doc:`pair_style pace ` +:doc:`pair_style snap ` +:doc:`compute snap ` + +Default +""""""" + +The optional keyword defaults are *bikflag* = 0, +*dgradflag* = 0 + +---------- + +.. _Drautz19: + +**(Drautz)** Drautz, Phys Rev B, 99, 014104 (2019). + +.. _Lysogorskiy21: + +**(Lysogorskiy)** Lysogorskiy, van der Oord, Bochkarev, Menon, Rinaldi, Hammerschmidt, Mrovec, Thompson, Csanyi, Ortner, Drautz, npj Comp Mat, 7, 97 (2021). + +.. _Goff23: + +**(Goff)** Goff, Zhang, Negre, Rohskopf, Niklasson, Journal of Chemical Theory and Computation 19, no. 13 (2023). diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst index 5dbf600c36..b03d6eb74e 100644 --- a/doc/src/compute_property_atom.rst +++ b/doc/src/compute_property_atom.rst @@ -128,9 +128,9 @@ Attributes *i_name*, *d_name*, *i2_name*, *d2_name* refer to custom per-atom integer and floating-point vectors or arrays that have been added via the :doc:`fix property/atom ` command. When that command is used specific names are given to each attribute -which are the "name" portion of these attributes. For arrays *i2_name* -and *d2_name*, the column of the array must also be included following -the name in brackets (e.g., d2_xyz[2] or i2_mySpin[3]). +which are the "name" portion of these attributes. For arrays +*i2_name* and *d2_name*, the column of the array must also be included +following the name in brackets (e.g., d2_xyz[2] or i2_mySpin[3]). The additional quantities only accessible via this command, and not directly via the :doc:`dump custom ` command, are as follows. diff --git a/doc/src/compute_property_grid.rst b/doc/src/compute_property_grid.rst index 20a4f19605..dfdce220c6 100644 --- a/doc/src/compute_property_grid.rst +++ b/doc/src/compute_property_grid.rst @@ -61,7 +61,7 @@ varying fastest, then Y, then Z slowest. For 2d grids (in 2d simulations), the grid IDs range from 1 to Nx*Ny, with X varying fastest and Y slowest. -.. versionadded:: TBD +.. versionadded:: 21Nov2023 The *proc* attribute is the ID of the processor which owns the grid cell. Processor IDs range from 0 to Nprocs - 1, where Nprocs is the diff --git a/doc/src/compute_rattlers_atom.rst b/doc/src/compute_rattlers_atom.rst new file mode 100644 index 0000000000..a69d091466 --- /dev/null +++ b/doc/src/compute_rattlers_atom.rst @@ -0,0 +1,92 @@ +.. index:: compute rattlers/atom + +compute rattlers/atom command +======================== + +Syntax +"""""" + +.. parsed-literal:: + + compute ID group-ID rattlers/atom cutoff zmin ntries + +* ID, group-ID are documented in :doc:`compute ` command +* rattlers/atom = style name of this compute command +* cutoff = *type* or *radius* + + .. parsed-literal:: + + *type* = cutoffs determined based on atom types + *radius* = cutoffs determined based on atom diameters (atom style sphere) + +* zmin = minimum coordination for a non-rattler atom +* ntries = maximum number of iterations to remove rattlers + +Examples +"""""""" + +.. code-block:: LAMMPS + + compute 1 all rattlers/atom type 4 10 + +Description +""""""""""" + +.. versionadded:: TBD + +Define a compute that identifies rattlers in a system. Rattlers are often +identified in granular or glassy packings as undercoordinated atoms that +do not have the required number of contacts to constrain their translational +degrees of freedom. Such atoms are not considered rigid and can often freely +rattle around in the system. This compute identifies rattlers which can be +helpful for excluding them from analysis or providing extra damping forces +to accelerate relaxation processes. + +Rattlers are identified using an interactive approach. The coordination +number of all atoms is first calculated. The *type* and *radius* settings +are used to select whether interaction cutoffs are determined by atom +types or by the sum of atomic radii (atom style sphere), respectively. +Rattlers are then identified as atoms with a coordination number less +than *zmin* and are removed from consideration. Atomic coordination +numbers are then recalculated, excluding previously identified rattlers, +to identify a new set of rattlers. This process is iterated up to a maximum +of *ntries* or until no new rattlers are identified and the remaining +atoms form a stable network of contacts. + +In dense homogeneous systems where the average atom coordination number +is expected to be larger than *zmin*, this process usually only takes a few +iterations and a value of *ntries* around ten may be sufficient. In systems +with significant heterogeneity or average coordination numbers less than +*zmin*, an appropriate value of *ntries* depends heavily on the specific +system. For instance, a linear chain of N rattler atoms with a *zmin* of 2 +would take N/2 iterations to identify that all the atoms are rattlers. + +Output info +""""""""""" + +This compute calculates a per-atom vector and a global scalar. The vector +designates which atoms are rattlers, indicated by a value 1. Non-rattlers +have a value of 0. The global scalar returns the total number of rattlers +in the system. See the :doc:`Howto output ` page for an +overview of LAMMPS output options. + +Restrictions +"""""""""""" + +This compute is part of the EXTRA-COMPUTE package. It is only enabled if +LAMMPS was built with that package. See the +:doc:`Build package ` page for more info. + +The *radius* cutoff option requires that atoms store a radius as defined by the +:doc:`atom_style sphere ` or similar commands. + +Related commands +"""""""""""""""" + +:doc:`compute coord/atom ` +:doc:`compute contact/atom ` + +Default +""""""" + +none diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index 6820d2ee04..604b1c1571 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -201,7 +201,7 @@ information in this context, the *replace* keywords will extract the atom IDs for the two atoms in the bond of maximum stretch. These atom IDs and the bond stretch will be printed with thermodynamic output. -.. versionadded:: TBD +.. versionadded:: 21Nov2023 The *inputs* keyword allows selection of whether all the inputs are per-atom or local quantities. As noted above, all the inputs must be diff --git a/doc/src/compute_voronoi_atom.rst b/doc/src/compute_voronoi_atom.rst index 9607401ccd..3bada09518 100644 --- a/doc/src/compute_voronoi_atom.rst +++ b/doc/src/compute_voronoi_atom.rst @@ -190,7 +190,7 @@ Voro++ software in the src/VORONOI/README file. Output info """"""""""" -.. deprecated:: TBD +.. deprecated:: 21Nov2023 The *peratom* keyword was removed as it is no longer required. diff --git a/doc/src/compute_xrd.rst b/doc/src/compute_xrd.rst index 8673ce9199..18bb4c886f 100644 --- a/doc/src/compute_xrd.rst +++ b/doc/src/compute_xrd.rst @@ -62,28 +62,29 @@ equations: \frac{\sin(\theta)}{\lambda} &= \frac{\left\lVert\mathbf{k}\right\rVert}{2} Here, :math:`\mathbf{k}` is the location of the reciprocal lattice node, -:math:`r_j` is the position of each atom, :math:`f_j` are atomic scattering -factors, *Lp* is the Lorentz-polarization factor, and :math:`\theta` is the -scattering angle of diffraction. The Lorentz-polarization factor can be turned -off using the optional *LP* keyword. +:math:`r_j` is the position of each atom, :math:`f_j` are atomic +scattering factors, *Lp* is the Lorentz-polarization factor, and +:math:`\theta` is the scattering angle of diffraction. The +Lorentz-polarization factor can be turned off using the optional *LP* +keyword. Diffraction intensities are calculated on a three-dimensional mesh of -reciprocal lattice nodes. The mesh spacing is defined either (a) by the entire -simulation domain or (b) manually using selected values as -shown in the 2D diagram below. +reciprocal lattice nodes. The mesh spacing is defined either (a) by the +entire simulation domain or (b) manually using selected values as shown +in the 2D diagram below. -.. image:: img/xrd_mesh.jpg +.. image:: img/xrd_mesh.png :scale: 75% :align: center For a mesh defined by the simulation domain, a rectilinear grid is constructed with spacing :math:`c A^{-1}` along each reciprocal lattice -axis, where :math:`A` is a matrix containing the vectors corresponding to the -edges of the simulation cell. If one or two directions has non-periodic -boundary conditions, then the spacing in these directions is defined from the -average of the (inversed) box lengths with periodic boundary conditions. -Meshes defined by the simulation domain must contain at least one periodic -boundary. +axis, where :math:`A` is a matrix containing the vectors corresponding +to the edges of the simulation cell. If one or two directions has +non-periodic boundary conditions, then the spacing in these directions +is defined from the average of the (inversed) box lengths with periodic +boundary conditions. Meshes defined by the simulation domain must +contain at least one periodic boundary. If the *manual* flag is included, the mesh of reciprocal lattice nodes will be defined using the *c* values for the spacing along each diff --git a/doc/src/dump.rst b/doc/src/dump.rst index e5885dc25d..2d1598e493 100644 --- a/doc/src/dump.rst +++ b/doc/src/dump.rst @@ -613,7 +613,7 @@ when running on large numbers of processors. Note that using the "\*" and "%" characters together can produce a large number of small dump files! -.. deprecated:: TBD +.. deprecated:: 21Nov2023 The MPIIO package and the the corresponding "/mpiio" dump styles, except for the unrelated "netcdf/mpiio" style were removed from LAMMPS. @@ -805,16 +805,16 @@ computes, fixes, or variables when they are evaluated, so this is a very general means of creating quantities to output to a dump file. The *i_name*, *d_name*, *i2_name*, *d2_name* attributes refer to -per-atom integer and floating-point vectors or arrays that have been -added via the :doc:`fix property/atom ` command. -When that command is used specific names are given to each attribute -which are the "name" portion of these keywords. For arrays *i2_name* -and *d2_name*, the column of the array must also be included following -the name in brackets (e.g., d2_xyz[i], i2_mySpin[i], where :math:`i` is -in the range from 1 to :math:`M`, where :math:`M` is the number of -columns in the custom array). See the discussion above for how :math:`i` -can be specified with a wildcard asterisk to effectively specify -multiple values. +custom per-atom integer and floating-point vectors or arrays that have +been added via the :doc:`fix property/atom ` +command. When that command is used specific names are given to each +attribute which are the "name" portion of these keywords. For arrays +*i2_name* and *d2_name*, the column of the array must also be included +following the name in brackets (e.g., d2_xyz[i], i2_mySpin[i], where +:math:`i` is in the range from 1 to :math:`M`, where :math:`M` is the +number of columns in the custom array). See the discussion above for +how :math:`i` can be specified with a wildcard asterisk to effectively +specify multiple values. See the :doc:`Modify ` page for information on how to add new compute and fix styles to LAMMPS to calculate per-atom quantities diff --git a/doc/src/dump_image.rst b/doc/src/dump_image.rst index 4e227d2f72..3102caaa97 100644 --- a/doc/src/dump_image.rst +++ b/doc/src/dump_image.rst @@ -599,7 +599,7 @@ image will appear. The *sfactor* value must be a value 0.0 <= *sfactor* <= 1.0, where *sfactor* = 1 is a highly reflective surface and *sfactor* = 0 is a rough non-shiny surface. -.. versionadded:: TBD +.. versionadded:: 21Nov2023 The *fsaa* keyword can be used with the dump image command to improve the image quality by enabling full scene anti-aliasing. Internally the diff --git a/doc/src/fix.rst b/doc/src/fix.rst index 0889fe281f..69a7212487 100644 --- a/doc/src/fix.rst +++ b/doc/src/fix.rst @@ -287,6 +287,7 @@ accelerated styles exist. * :doc:`mvv/tdpd ` - constant temperature DPD using the modified velocity-Verlet algorithm * :doc:`neb ` - nudged elastic band (NEB) spring forces * :doc:`neb/spin ` - nudged elastic band (NEB) spring forces for spins +* :doc:`nonaffine/displacement ` - calculate nonaffine displacement of atoms * :doc:`nph ` - constant NPH time integration via Nose/Hoover * :doc:`nph/asphere ` - NPH for aspherical particles * :doc:`nph/body ` - NPH for body particles diff --git a/doc/src/fix_adapt.rst b/doc/src/fix_adapt.rst index 86eec3eadb..9cfbef7a11 100644 --- a/doc/src/fix_adapt.rst +++ b/doc/src/fix_adapt.rst @@ -205,6 +205,8 @@ formulas for the meaning of these parameters: +------------------------------------------------------------------------------+--------------------------------------------------+-------------+ | :doc:`pace, pace/extrapolation ` | scale | type pairs | +------------------------------------------------------------------------------+--------------------------------------------------+-------------+ +| :doc:`quip ` | scale | type global | ++------------------------------------------------------------------------------+--------------------------------------------------+-------------+ | :doc:`snap ` | scale | type pairs | +------------------------------------------------------------------------------+--------------------------------------------------+-------------+ | :doc:`spin/dmi ` | coulombic_cutoff | type global | @@ -315,21 +317,21 @@ Currently *bond* does not support bond_style hybrid nor bond_style hybrid/overlay as bond styles. The bond styles that currently work with fix_adapt are -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`class2 ` | r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`fene ` | k,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`fene/nm ` | k,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`gromos ` | k,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`harmonic ` | k,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`morse ` | r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`nonlinear ` | epsilon,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ ---------- @@ -353,11 +355,11 @@ Currently *angle* does not support angle_style hybrid nor angle_style hybrid/overlay as angle styles. The angle styles that currently work with fix_adapt are -+------------------------------------+-------+-----------------+ -| :doc:`harmonic ` | k,theta0 | type angles | -+------------------------------------+-------+-----------------+ -| :doc:`cosine ` | k | type angles | -+------------------------------------+-------+-----------------+ ++------------------------------------+----------+-------------+ +| :doc:`harmonic ` | k,theta0 | type angles | ++------------------------------------+----------+-------------+ +| :doc:`cosine ` | k | type angles | ++------------------------------------+----------+-------------+ Note that internally, theta0 is stored in radians, so the variable this fix uses to reset theta0 needs to generate values in radians. @@ -482,7 +484,7 @@ Restrictions Related commands """""""""""""""" -:doc:`compute ti ` +:doc:`compute ti `, :doc:`fix adapt/fep ` Default """"""" diff --git a/doc/src/fix_adapt_fep.rst b/doc/src/fix_adapt_fep.rst index c35986de49..1b2298cd96 100644 --- a/doc/src/fix_adapt_fep.rst +++ b/doc/src/fix_adapt_fep.rst @@ -307,7 +307,9 @@ the :doc:`run ` command. This fix is not invoked during Restrictions """""""""""" - none + +The keyword "scale yes" is not supported for scaling per-atom parameters +diameter and change. You can use :doc:`fix adapt ` for those. Related commands """""""""""""""" diff --git a/doc/src/fix_atom_swap.rst b/doc/src/fix_atom_swap.rst index ffd14ebb0f..aa8127561c 100644 --- a/doc/src/fix_atom_swap.rst +++ b/doc/src/fix_atom_swap.rst @@ -181,6 +181,12 @@ This fix is part of the MC package. It is only enabled if LAMMPS was built with that package. See the :doc:`Build package ` doc page for more info. +This fix cannot be used with systems that do not have per-type masses +(e.g. atom style sphere) since the implemented algorithm pre-computes +velocity rescaling factors from per-type masses and ignores any per-atom +masses, if present. In case both, per-type and per-atom masses are +present, a warning is printed. + Related commands """""""""""""""" diff --git a/doc/src/fix_deposit.rst b/doc/src/fix_deposit.rst index 4c256f524f..5264999839 100644 --- a/doc/src/fix_deposit.rst +++ b/doc/src/fix_deposit.rst @@ -220,7 +220,7 @@ rotated configuration of the molecule. existing particle. LAMMPS will issue a warning if R is smaller than this value, based on the radii of existing and inserted particles. -.. versionadded:: TBD +.. versionadded:: 21Nov2023 The *var* and *set* keywords can be used together to provide a criterion for accepting or rejecting the addition of an individual atom, based on its diff --git a/doc/src/fix_nh_uef.rst b/doc/src/fix_nh_uef.rst index a515375746..60d47994f0 100644 --- a/doc/src/fix_nh_uef.rst +++ b/doc/src/fix_nh_uef.rst @@ -23,7 +23,7 @@ Syntax .. parsed-literal:: keyword = *erate* or *ext* or *strain* or *temp* or *iso* or *x* or *y* or *z* or *tchain* or *pchain* or *tloop* or *ploop* or *mtk* - *erate* values = e_x e_y = engineering strain rates (required) + *erate* values = e_x e_y = true strain rates (required) *ext* value = *x* or *y* or *z* or *xy* or *yz* or *xz* = external dimensions sets the external dimensions used to calculate the scalar pressure *strain* values = e_x e_y = initial strain @@ -62,7 +62,7 @@ performed using the :doc:`fix deform `, :doc:`fix nvt/sllod `, and :doc:`compute temp/deform ` commands. -The applied flow field is set by the *eps* keyword. The values +The applied flow field is set by the *erate* keyword. The values *edot_x* and *edot_y* correspond to the strain rates in the xx and yy directions. It is implicitly assumed that the flow field is traceless, and therefore the strain rate in the zz direction is eqal diff --git a/doc/src/fix_nonaffine_displacement.rst b/doc/src/fix_nonaffine_displacement.rst new file mode 100644 index 0000000000..363b0a747a --- /dev/null +++ b/doc/src/fix_nonaffine_displacement.rst @@ -0,0 +1,133 @@ +.. index:: fix nonaffine/displacement + +fix nonaffine/displacement command +================================== + +Syntax +"""""" + +.. parsed-literal:: + + fix ID group nonaffine/displacement style args reference/style nstep + +* ID, group are documented in :doc:`fix ` command +* nonaffine/displacement = style name of this fix command +* nevery = calculate nonaffine displacement every this many timesteps +* style = *d2min* or *integrated* + + .. parsed-literal:: + + *d2min* args = cutoff args + cutoff = *type* or *radius* or *custom* + *type* args = none, cutoffs determined by atom types + *radius* args = none, cutoffs determined based on atom diameters (atom style sphere) + *custom* args = *rmax*, cutoff set by a constant numeric value *rmax* (distance units) + *integrated* args = none + +* reference/style = *fixed* or *update* or *offset* + + .. parsed-literal:: + + *fixed* = use a fixed reference frame at *nstep* + *update* = update the reference frame every *nstep* timesteps + *offset* = update the reference frame *nstep* timesteps before calculating the nonaffine displacement + +Examples +"""""""" + +.. code-block:: LAMMPS + + fix 1 all nonaffine/displacement 100 integrated update 100 + fix 1 all nonaffine/displacement 1000 d2min type fixed 0 + fix 1 all nonaffine/displacement 1000 d2min custom 2.0 offset 100 + +Description +""""""""""" + +.. versionadded:: TBD + +This fix computes different metrics of the nonaffine displacement of +particles. The first metric, *d2min* calculates the :math:`D^2_\mathrm{min}` +nonaffine displacement by Falk and Langer in :ref:`(Falk) `. +For each atom, the fix computes the two tensors + +.. math:: + + X = \sum_{\mathrm{neighbors}} \vec{r} \left(\vec{r}_{0} \right)^T + +and + +.. math:: + + Y = \sum_{\mathrm{neighbors}} \vec{r}_0 \left(\vec{r}_{0} \right)^T + +where the neighbors include all other atoms within the distance criterion +set by the cutoff option, discussed below, :math:`\vec{r}` is the current +displacement between particles, and :math:`\vec{r}_0` is the reference +displacement. A deformation gradient tensor is then calculated as +:math:`F = X Y^{-1}` from which + +.. math:: + + D^2_\mathrm{min} = \sum_{\mathrm{neighbors}} \left| \vec{r} - F \vec{r}_0 \right|^2 + +and a strain tensor is calculated :math:`E = F F^{T} - I` where :math:`I` +is the identity tensor. This calculation is only performed on timesteps that +are a multiple of *nevery* (including timestep zero). Data accessed before +this occurs will simply be zeroed. + +The *integrated* style simply integrates the velocity of particles +every timestep to calculate a displacement. This style only works if +used in conjunction with another fix that deforms the box and displaces +atom positions such as :doc:`fix deform ` with remap x, +:doc:`fix press/berendsen `, or :doc:`fix nh `. + +Both of these methods require defining a reference state. With the *fixed* reference +style, the user picks a specific timestep *nstep* at which particle positions are saved. +If peratom data is accessed from this compute prior to this timestep, it will simply be +zeroed. The *update* reference style implies the reference state will be updated every +*nstep* timesteps. The *offset* reference only applies to the *d2min* metric and will +update the reference state *nstep* timesteps before a multiple of *nevery* timesteps. + + +---------- + +Restart, fix_modify, output, run start/stop, minimize info +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +The reference state is saved to :doc:`binary restart files `. + +None of the :doc:`fix_modify ` options are relevant to this +fix. + +This fix computes a peratom array with 3 columns, which can be accessed +by indices 1-3 using any command that uses per-atom values from a fix +as input. + +For the *integrated* style, the three columns are the nonaffine +displacements in the x, y, and z directions. For the *d2min* style, +the three columns are the calculated :math:`\sqrt{D^2_\mathrm{min}}`, the +volumetric strain, and the deviatoric strain. + +Restrictions +"""""""""""" + +This compute is part of the EXTRA-FIX package. It is only enabled if +LAMMPS was built with that package. See the +:doc:`Build package ` page for more info. + +Related commands +"""""""""""""""" + +none + +Default +""""""" + +none + +---------- + +.. _d2min-Falk: + +**(Falk)** Falk and Langer PRE, 57, 7192 (1998). diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 91c5e58add..a2e137da25 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -149,7 +149,7 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics only the k > 0 modes are thermostatted, not the centroid degrees of freedom. -.. versionadded:: TBD +.. versionadded:: 21Nov2023 Mode *pimd* added to fix pimd/langevin. diff --git a/doc/src/fix_rigid.rst b/doc/src/fix_rigid.rst index a50e215681..3174a0929c 100644 --- a/doc/src/fix_rigid.rst +++ b/doc/src/fix_rigid.rst @@ -80,7 +80,7 @@ Syntax groupID1, groupID2, ... = list of N group IDs * zero or more keyword/value pairs may be appended -* keyword = *langevin* or *reinit* or *temp* or *iso* or *aniso* or *x* or *y* or *z* or *couple* or *tparam* or *pchain* or *dilate* or *force* or *torque* or *infile* or *gravity* +* keyword = *langevin* or *reinit* or *temp* or *mol* or *iso* or *aniso* or *x* or *y* or *z* or *couple* or *tparam* or *pchain* or *dilate* or *force* or *torque* or *infile* or *gravity* .. parsed-literal:: @@ -92,6 +92,8 @@ Syntax *temp* values = Tstart Tstop Tdamp Tstart,Tstop = desired temperature at start/stop of run (temperature units) Tdamp = temperature damping parameter (time units) + *mol* value = template-ID + template-ID = ID of molecule template specified in a separate :doc:`molecule ` command *iso* or *aniso* values = Pstart Pstop Pdamp Pstart,Pstop = scalar external pressure at start/end of run (pressure units) Pdamp = pressure damping parameter (time units) diff --git a/doc/src/fix_temp_berendsen.rst b/doc/src/fix_temp_berendsen.rst index 67e496e6c5..541f3191d5 100644 --- a/doc/src/fix_temp_berendsen.rst +++ b/doc/src/fix_temp_berendsen.rst @@ -1,8 +1,11 @@ .. index:: fix temp/berendsen +.. index:: fix temp/berendsen/kk fix temp/berendsen command ========================== +Accelerator Variants: *temp/berendsen/kk* + Syntax """""" @@ -118,6 +121,10 @@ remaining thermal degrees of freedom, and the bias is added back in. ---------- +.. include:: accel_styles.rst + +---------- + Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" diff --git a/doc/src/fix_temp_rescale.rst b/doc/src/fix_temp_rescale.rst index bfdcaa90f8..2dd2178346 100644 --- a/doc/src/fix_temp_rescale.rst +++ b/doc/src/fix_temp_rescale.rst @@ -1,8 +1,11 @@ .. index:: fix temp/rescale +.. index:: fix temp/rescale/kk fix temp/rescale command ======================== +Accelerator Variants: *temp/rescale/kk* + Syntax """""" @@ -125,6 +128,10 @@ remaining thermal degrees of freedom, and the bias is added back in. ---------- +.. include:: accel_styles.rst + +---------- + Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" diff --git a/doc/src/img/xrd_mesh.jpg b/doc/src/img/xrd_mesh.jpg deleted file mode 100644 index 677234caab..0000000000 Binary files a/doc/src/img/xrd_mesh.jpg and /dev/null differ diff --git a/doc/src/img/xrd_mesh.png b/doc/src/img/xrd_mesh.png new file mode 100644 index 0000000000..e58c7322a5 Binary files /dev/null and b/doc/src/img/xrd_mesh.png differ diff --git a/doc/src/molecule.rst b/doc/src/molecule.rst index 480e175e7b..1de905886c 100644 --- a/doc/src/molecule.rst +++ b/doc/src/molecule.rst @@ -154,21 +154,25 @@ These are the recognized header keywords. Header lines can come in any order. The numeric value(s) are read from the beginning of the line. The keyword should appear at the end of the line. All these settings have default values, as explained below. A line need only -appear if the value(s) are different than the default. +appear if the value(s) are different than the default, except when +defining a *body* particle, which requires setting the number of +*atoms* to 1, and setting the *inertia* in a specific section (see below). * N *atoms* = # of atoms N in molecule, default = 0 * Nb *bonds* = # of bonds Nb in molecule, default = 0 * Na *angles* = # of angles Na in molecule, default = 0 * Nd *dihedrals* = # of dihedrals Nd in molecule, default = 0 * Ni *impropers* = # of impropers Ni in molecule, default = 0 -* Nf *fragments* = # of fragments in molecule, default = 0 +* Nf *fragments* = # of fragments Nf in molecule, default = 0 +* Ninteger Ndouble *body* = # of integer and floating-point values in body +particle, default = 0 * Mtotal *mass* = total mass of molecule * Xc Yc Zc *com* = coordinates of center-of-mass of molecule * Ixx Iyy Izz Ixy Ixz Iyz *inertia* = 6 components of inertia tensor of molecule For *mass*, *com*, and *inertia*, the default is for LAMMPS to calculate this quantity itself if needed, assuming the molecules -consists of a set of point particles or finite-size particles (with a +consist of a set of point particles or finite-size particles (with a non-zero diameter) that do not overlap. If finite-size particles in the molecule do overlap, LAMMPS will not account for the overlap effects when calculating any of these 3 quantities, so you should @@ -188,6 +192,7 @@ These are the allowed section keywords for the body of the file. * *Bonds, Angles, Dihedrals, Impropers* = molecular topology sections * *Special Bond Counts, Special Bonds* = special neighbor info * *Shake Flags, Shake Atoms, Shake Bond Types* = SHAKE info +* *Body Integers, Body Doubles* = body-property sections For the Types, Bonds, Angles, Dihedrals, and Impropers sections, each atom/bond/angle/etc type can be specified either as a number (numeric @@ -515,6 +520,67 @@ of SHAKE clusters. ---------- +*Body Integers* section: + +* one line +* line syntax: N E F +* N = number of sub-particles or number or vertices +* E,F = number of edges and faces + +This section is only needed when the molecule is a body particle. the other +Body section must also appear in the file. + +The total number of values that must appear is determined by the body style, and +must be equal to the Ninteger value given in the *body* header. + +For *nparticle* and *rounded/polygon*, only the number of sub-particles or +vertices N is required, and Ninteger should have a value of 1. + +For *rounded/polyhedron*, the number of edges E and faces F is required, and +Ninteger should have a value of 3. + +See the :doc:`Howto body ` page for a further description of +the file format. + +---------- + +*Body Doubles* section: + +* first line +* line syntax: Ixx Iyy Izz Ixy Ixz Iyz +* Ixx Iyy Izz Ixy Ixz Iyz = 6 components of inertia tensor of body particle +* one line per sub-particle or vertex +* line syntax: x y z +* x, y, z = coordinates of sub-particle or vertex +* one line per edge +* line syntax: N1 N2 +* N1, N2 = vertex indices +* one line per face +* line syntax: N1 N2 N3 N4 +* N1, N2, N3, N4 = vertex indices +* last line +* line syntax: diam +* diam = rounded diameter that surrounds each vertex + +This section is only needed when the molecule is a body particle. the other +Body section must also appear in the file. + +The total number of values that must appear is determined by the body style, and +must be equal to the Ndouble value given in the *body* header. The 6 moments of +inertia and the 3N coordinates of the sub-particles or vertices are required +for all body styles. + +For *rounded/polygon*, the E = 6 + 3*N + 1 edges are automatically determined +from the vertices. + +For *rounded/polyhedron*, the 2E vertex indices for the end points of the edges +and 4F vertex indices defining the faces are required. + +See the :doc:`Howto body ` page for a further description of +the file format. + +---------- + Restrictions """""""""""" diff --git a/doc/src/pair_beck.rst b/doc/src/pair_beck.rst index 2bf027515c..6bb4afdc80 100644 --- a/doc/src/pair_beck.rst +++ b/doc/src/pair_beck.rst @@ -30,11 +30,11 @@ Description Style *beck* computes interactions based on the potential by :ref:`(Beck) `, originally designed for simulation of Helium. It -includes truncation at a cutoff distance Rc. +includes truncation at a cutoff distance :math:`r_c`. .. math:: - E(r) &= A \exp\left[-\alpha r - \beta r^6\right] - \frac{B}{\left(r^2+a^2\right)^3} \left(1+\frac{2.709+3a^2}{r^2+a^2}\right) \qquad r < R_c \\ + E(r) &= A \exp\left[-\alpha r - \beta r^6\right] - \frac{B}{\left(r^2+a^2\right)^3} \left(1+\frac{2.709+3a^2}{r^2+a^2}\right) \qquad r < r_c \\ The following coefficients must be defined for each pair of atoms types via the :doc:`pair_coeff ` command as in the examples @@ -50,7 +50,7 @@ commands. * cutoff (distance units) The last coefficient is optional. If not specified, the global cutoff -:math:`R_c` is used. +:math:`r_c` is used. ---------- diff --git a/doc/src/pair_coul_slater.rst b/doc/src/pair_coul_slater.rst index 443de4262b..bde14276db 100644 --- a/doc/src/pair_coul_slater.rst +++ b/doc/src/pair_coul_slater.rst @@ -1,6 +1,7 @@ .. index:: pair_style coul/slater .. index:: pair_style coul/slater/cut .. index:: pair_style coul/slater/long +.. index:: pair_style coul/slater/long/gpu pair_style coul/slater command ============================== @@ -11,6 +12,8 @@ pair_style coul/slater/cut command pair_style coul/slater/long command =================================== +Accelerator Variants: *coul/slater/long/gpu* + Syntax """""" diff --git a/doc/src/pair_fep_soft.rst b/doc/src/pair_fep_soft.rst index 400ad0cc4a..20e17ce0b4 100644 --- a/doc/src/pair_fep_soft.rst +++ b/doc/src/pair_fep_soft.rst @@ -1,8 +1,10 @@ .. index:: pair_style lj/cut/soft .. index:: pair_style lj/cut/soft/omp .. index:: pair_style lj/cut/coul/cut/soft +.. index:: pair_style lj/cut/coul/cut/soft/gpu .. index:: pair_style lj/cut/coul/cut/soft/omp .. index:: pair_style lj/cut/coul/long/soft +.. index:: pair_style lj/cut/coul/long/soft/gpu .. index:: pair_style lj/cut/coul/long/soft/omp .. index:: pair_style lj/cut/tip4p/long/soft .. index:: pair_style lj/cut/tip4p/long/soft/omp @@ -27,12 +29,12 @@ Accelerator Variants: *lj/cut/soft/omp* pair_style lj/cut/coul/cut/soft command ======================================= -Accelerator Variants: *lj/cut/coul/cut/soft/omp* +Accelerator Variants: *lj/cut/coul/cut/soft/gpu*, *lj/cut/coul/cut/soft/omp* pair_style lj/cut/coul/long/soft command ======================================== -Accelerator Variants: *lj/cut/coul/long/soft/omp* +Accelerator Variants: *lj/cut/coul/long/soft/gpu*, *lj/cut/coul/long/soft/omp* pair_style lj/cut/tip4p/long/soft command ========================================= diff --git a/doc/src/pair_lj_cut_tip4p.rst b/doc/src/pair_lj_cut_tip4p.rst index 7198b60159..d95f356ab7 100644 --- a/doc/src/pair_lj_cut_tip4p.rst +++ b/doc/src/pair_lj_cut_tip4p.rst @@ -58,6 +58,40 @@ Examples Description """"""""""" +The *lj/cut/tip4p* styles implement the TIP4P water model of +:ref:`(Jorgensen) ` and similar models, which introduce a +massless site M located a short distance away from the oxygen atom along +the bisector of the HOH angle. The atomic types of the oxygen and +hydrogen atoms, the bond and angle types for OH and HOH interactions, +and the distance to the massless charge site are specified as pair_style +arguments and are used to identify the TIP4P-like molecules and +determine the position of the M site from the positions of the hydrogen +and oxygen atoms of the water molecules. The M site location is used +for all Coulomb interactions instead of the oxygen atom location, also +with all other atom types, while the location of the oxygen atom is used +for the Lennard-Jones interactions. Style *lj/cut/tip4p/cut* uses a +cutoff for Coulomb interactions; style *lj/cut/tip4p/long* is for use +with a long-range Coulombic solver (Ewald or PPPM). + +.. note:: + + For each TIP4P water molecule in your system, the atom IDs for + the O and 2 H atoms must be consecutive, with the O atom first. This + is to enable LAMMPS to "find" the 2 H atoms associated with each O + atom. For example, if the atom ID of an O atom in a TIP4P water + molecule is 500, then its 2 H atoms must have IDs 501 and 502. + +See the :doc:`Howto tip4p ` page for more information +on how to use the TIP4P pair styles and lists of parameters to set. +Note that the neighbor list cutoff for Coulomb interactions is +effectively extended by a distance 2\*qdist when using the TIP4P pair +style, to account for the offset distance of the fictitious charges on +O atoms in water molecules. Thus it is typically best in an +efficiency sense to use a LJ cutoff >= Coulombic cutoff + 2\*qdist, to +shrink the size of the neighbor list. This leads to slightly larger +cost for the long-range calculation, so you can test the trade-off for +your model. + The *lj/cut/tip4p* styles compute the standard 12/6 Lennard-Jones potential, given by @@ -91,34 +125,6 @@ specified for this style means that pairwise interactions within this distance are computed directly; interactions outside that distance are computed in reciprocal space. -The *lj/cut/tip4p* styles implement the TIP4P -water model of :ref:`(Jorgensen) `, which introduces a massless -site located a short distance away from the oxygen atom along the -bisector of the HOH angle. The atomic types of the oxygen and -hydrogen atoms, the bond and angle types for OH and HOH interactions, -and the distance to the massless charge site are specified as -pair_style arguments. Style *lj/cut/tip4p/cut* uses a cutoff for -Coulomb interactions; style *lj/cut/tip4p/long* is for use with a -long-range Coulombic solver (Ewald or PPPM). - -.. note:: - - For each TIP4P water molecule in your system, the atom IDs for - the O and 2 H atoms must be consecutive, with the O atom first. This - is to enable LAMMPS to "find" the 2 H atoms associated with each O - atom. For example, if the atom ID of an O atom in a TIP4P water - molecule is 500, then its 2 H atoms must have IDs 501 and 502. - -See the :doc:`Howto tip4p ` page for more information -on how to use the TIP4P pair styles and lists of parameters to set. -Note that the neighbor list cutoff for Coulomb interactions is -effectively extended by a distance 2\*qdist when using the TIP4P pair -style, to account for the offset distance of the fictitious charges on -O atoms in water molecules. Thus it is typically best in an -efficiency sense to use a LJ cutoff >= Coulombic cutoff + 2\*qdist, to -shrink the size of the neighbor list. This leads to slightly larger -cost for the long-range calculation, so you can test the trade-off for -your model. Coefficients """""""""""" diff --git a/doc/src/pair_lj_smooth_linear.rst b/doc/src/pair_lj_smooth_linear.rst index 7a3ba7a3d5..20b5e6cbda 100644 --- a/doc/src/pair_lj_smooth_linear.rst +++ b/doc/src/pair_lj_smooth_linear.rst @@ -31,13 +31,13 @@ Style *lj/smooth/linear* computes a truncated and force-shifted LJ interaction (aka Shifted Force Lennard-Jones) that combines the standard 12/6 Lennard-Jones function and subtracts a linear term based on the cutoff distance, so that both, the potential and the force, go -continuously to zero at the cutoff Rc :ref:`(Toxvaerd) `: +continuously to zero at the cutoff :math:`r_c` :ref:`(Toxvaerd) `: .. math:: \phi\left(r\right) & = 4 \epsilon \left[ \left(\frac{\sigma}{r}\right)^{12} - \left(\frac{\sigma}{r}\right)^6 \right] \\ - E\left(r\right) & = \phi\left(r\right) - \phi\left(R_c\right) - \left(r - R_c\right) \left.\frac{d\phi}{d r} \right|_{r=R_c} \qquad r < R_c + E\left(r\right) & = \phi\left(r\right) - \phi\left(r_c\right) - \left(r - r_c\right) \left.\frac{d\phi}{d r} \right|_{r=r_c} \qquad r < r_c The following coefficients must be defined for each pair of atoms types via the :doc:`pair_coeff ` command as in the examples @@ -77,8 +77,9 @@ tail option for adding long-range tail corrections to energy and pressure, since the energy of the pair interaction is smoothed to 0.0 at the cutoff. -This pair style writes its information to :doc:`binary restart files `, so pair_style and pair_coeff commands do not need -to be specified in an input script that reads a restart file. +This pair style writes its information to :doc:`binary restart files `, +so pair_style and pair_coeff commands do not need to be specified +in an input script that reads a restart file. This pair style can only be used via the *pair* keyword of the :doc:`run_style respa ` command. It does not support the diff --git a/doc/src/pair_mesodpd.rst b/doc/src/pair_mesodpd.rst index 5d244f3b1d..28a398754f 100644 --- a/doc/src/pair_mesodpd.rst +++ b/doc/src/pair_mesodpd.rst @@ -1,14 +1,20 @@ .. index:: pair_style edpd +.. index:: pair_style edpd/gpu .. index:: pair_style mdpd +.. index:: pair_style mdpd/gpu .. index:: pair_style mdpd/rhosum .. index:: pair_style tdpd pair_style edpd command ======================= +Accelerator Variants: *edpd/gpu* + pair_style mdpd command ======================= +Accelerator Variants: *mdpd/gpu* + pair_style mdpd/rhosum command ============================== diff --git a/doc/src/pair_mie.rst b/doc/src/pair_mie.rst index 089f8d3d29..6e9eec1f5c 100644 --- a/doc/src/pair_mie.rst +++ b/doc/src/pair_mie.rst @@ -35,7 +35,7 @@ The *mie/cut* style computes the Mie potential, given by E = C \epsilon \left[ \left(\frac{\sigma}{r}\right)^{\gamma_{rep}} - \left(\frac{\sigma}{r}\right)^{\gamma_{att}} \right] \qquad r < r_c -Rc is the cutoff and C is a function that depends on the repulsive and +:math:`r_c` is the cutoff and C is a function that depends on the repulsive and attractive exponents, given by: .. math:: diff --git a/doc/src/pair_morse.rst b/doc/src/pair_morse.rst index 807882980d..4b93d182bb 100644 --- a/doc/src/pair_morse.rst +++ b/doc/src/pair_morse.rst @@ -53,7 +53,7 @@ Style *morse* computes pairwise interactions with the formula E = D_0 \left[ e^{- 2 \alpha (r - r_0)} - 2 e^{- \alpha (r - r_0)} \right] \qquad r < r_c -Rc is the cutoff. +:math:`r_c` is the cutoff. The following coefficients must be defined for each pair of atoms types via the :doc:`pair_coeff ` command as in the examples @@ -78,7 +78,7 @@ so that both, potential energy and force, go to zero at the cut-off: .. math:: \phi\left(r\right) & = D_0 \left[ e^{- 2 \alpha (r - r_0)} - 2 e^{- \alpha (r - r_0)} \right] \qquad r < r_c \\ - E\left(r\right) & = \phi\left(r\right) - \phi\left(R_c\right) - \left(r - R_c\right) \left.\frac{d\phi}{d r} \right|_{r=R_c} \qquad r < R_c + E\left(r\right) & = \phi\left(r\right) - \phi\left(r_c\right) - \left(r - r_c\right) \left.\frac{d\phi}{d r} \right|_{r=r_c} \qquad r < r_c The syntax of the pair_style and pair_coeff commands are the same for the *morse* and *morse/smooth/linear* styles. diff --git a/doc/src/pair_pace.rst b/doc/src/pair_pace.rst index d815f663fe..001214370c 100644 --- a/doc/src/pair_pace.rst +++ b/doc/src/pair_pace.rst @@ -40,6 +40,9 @@ Examples pair_style pace product chunksize 2048 pair_coeff * * Cu-PBE-core-rep.ace Cu + pair_style pace + pair_coeff * * Cu.yaml Cu + pair_style pace/extrapolation pair_coeff * * Cu.yaml Cu.asi Cu @@ -64,7 +67,7 @@ specifies an ACE coefficient file followed by N additional arguments specifying the mapping of ACE elements to LAMMPS atom types, where N is the number of LAMMPS atom types: -* ACE coefficient file +* ACE coefficient file (.yaml or .yace/.ace format) * N element names = mapping of ACE elements to atom types Only a single pair_coeff command is used with the *pace* style which @@ -136,6 +139,22 @@ product B-basis evaluator is always used and only *linear* ASI is supported. See the :doc:`pair_coeff ` page for alternate ways to specify the path for the ACE coefficient file. +Core repulsion +""""""""""""""""""" +The ACE potential can be configured to initiate core-repulsion from an inner cutoff, +seamlessly transitioning from ACE to ZBL. The core repulsion factor can be accessed +as a per-atom quantity, as demonstrated in the example below: + +.. code-block:: LAMMPS + + pair_style pace + pair_coeff * * CuNi.yaml Cu Ni + + fix pace_corerep all pair 1 pace corerep 1 + +In this case, per-atom `f_pace_corerep` quantities represent the fraction of ZBL +core-repulsion for each atom. + Mixing, shift, table, tail correction, restart, rRESPA info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" diff --git a/doc/src/pair_soft.rst b/doc/src/pair_soft.rst index 1702811ed9..e21ae28432 100644 --- a/doc/src/pair_soft.rst +++ b/doc/src/pair_soft.rst @@ -44,8 +44,9 @@ It is useful for pushing apart overlapping atoms, since it does not blow up as r goes to 0. A is a prefactor that can be made to vary in time from the start to the end of the run (see discussion below), e.g. to start with a very soft potential and slowly harden the -interactions over time. Rc is the cutoff. See the :doc:`fix nve/limit ` command for another way to push apart -overlapping atoms. +interactions over time. :math:`r_c` is the cutoff. +See the :doc:`fix nve/limit ` command for another way +to push apart overlapping atoms. The following coefficients must be defined for each pair of atom types via the :doc:`pair_coeff ` command as in the examples above, diff --git a/doc/src/pair_sph_heatconduction.rst b/doc/src/pair_sph_heatconduction.rst index 4716ed54fb..e9004cb5a4 100644 --- a/doc/src/pair_sph_heatconduction.rst +++ b/doc/src/pair_sph_heatconduction.rst @@ -1,8 +1,11 @@ .. index:: pair_style sph/heatconduction +.. index:: pair_style sph/heatconduction/gpu pair_style sph/heatconduction command ===================================== +Accelerator Variants: *sph/heatconduction/gpu* + Syntax """""" diff --git a/doc/src/pair_sph_lj.rst b/doc/src/pair_sph_lj.rst index b5c02c41ff..5ac7ab9c6b 100644 --- a/doc/src/pair_sph_lj.rst +++ b/doc/src/pair_sph_lj.rst @@ -1,8 +1,11 @@ .. index:: pair_style sph/lj +.. index:: pair_style sph/lj/gpu pair_style sph/lj command ========================= +Accelerator Variants: *sph/lj/gpu* + Syntax """""" diff --git a/doc/src/pair_sph_taitwater.rst b/doc/src/pair_sph_taitwater.rst index 34eb65f005..79972660c4 100644 --- a/doc/src/pair_sph_taitwater.rst +++ b/doc/src/pair_sph_taitwater.rst @@ -1,8 +1,11 @@ .. index:: pair_style sph/taitwater +.. index:: pair_style sph/taitwater/gpu pair_style sph/taitwater command ================================ +Accelerator Variants: *sph/taitwater/gpu* + Syntax """""" diff --git a/doc/src/pair_spica.rst b/doc/src/pair_spica.rst index 74a069d8a2..859506593f 100644 --- a/doc/src/pair_spica.rst +++ b/doc/src/pair_spica.rst @@ -81,7 +81,7 @@ given by as required for the SPICA (formerly called SDK) and the pSPICA Coarse-grained MD parameterization discussed in :ref:`(Shinoda) `, :ref:`(DeVane) `, :ref:`(Seo) `, and :ref:`(Miyazaki) `. -Rc is the cutoff. +:math:`r_c` is the cutoff. Summary information on these force fields can be found at https://www.spica-ff.org Style *lj/spica/coul/long* computes the adds Coulombic interactions diff --git a/doc/src/variable.rst b/doc/src/variable.rst index 92a78ee3c1..a70ac25836 100644 --- a/doc/src/variable.rst +++ b/doc/src/variable.rst @@ -53,7 +53,7 @@ Syntax x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x \|\| y, x \|\^ y, !x math functions = sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), - random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x) + random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ternary(x,y,z), ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z), logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) @@ -71,6 +71,7 @@ Syntax feature functions = is_available(category,feature), is_active(category,feature), is_defined(category,id) atom value = id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i] atom vector = id, mass, type, mol, radius, q, x, y, z, vx, vy, vz, fx, fy, fz + custom atom property = i_name, d_name, i_name[i], d_name[i], i2_name[i], d2_name[i], i2_name[i][j], d_name[i][j] compute references = c_ID, c_ID[i], c_ID[i][j], C_ID, C_ID[i] fix references = f_ID, f_ID[i], f_ID[i][j], F_ID, F_ID[i] variable references = v_name, v_name[i] @@ -514,38 +515,40 @@ is a valid (though strange) variable formula: Specifically, a formula can contain numbers, constants, thermo keywords, math operators, math functions, group functions, region functions, special functions, feature functions, atom values, atom -vectors, compute references, fix references, and references to other +vectors, custom atom properties, compute references, fix references, and references to other variables. -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Number | 0.2, 100, 1.0e20, -15.4, etc | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Constant | PI, version, on, off, true, false, yes, no | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Thermo keywords | vol, pe, ebond, etc | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Math operators | (), -x, x+y, x-y, x\*y, x/y, x\^y, x%y, x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x \|\| y, x \|\^ y, !x | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Math functions | sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z), logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Group functions | count(ID), mass(ID), charge(ID), xcm(ID,dim), vcm(ID,dim), fcm(ID,dim), bound(ID,dir), gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), inertia(ID,dimdim), omega(ID,dim) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Region functions | count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Special functions | sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label), is_typelabel(kind,label) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Feature functions | is_available(category,feature), is_active(category,feature), is_defined(category,id) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Atom values | id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i] | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Atom vectors | id, mass, type, mol, x, y, z, vx, vy, vz, fx, fy, fz, q | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Compute references | c_ID, c_ID[i], c_ID[i][j], C_ID, C_ID[i] | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Fix references | f_ID, f_ID[i], f_ID[i][j], F_ID, F_ID[i] | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Other variables | v_name, v_name[i] | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Number | 0.2, 100, 1.0e20, -15.4, etc | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Constant | PI, version, on, off, true, false, yes, no | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Thermo keywords | vol, pe, ebond, etc | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Math operators | (), -x, x+y, x-y, x\*y, x/y, x\^y, x%y, x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x \|\| y, x \|\^ y, !x | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Math functions | sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ternary(x,y,z), ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z), logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Group functions | count(ID), mass(ID), charge(ID), xcm(ID,dim), vcm(ID,dim), fcm(ID,dim), bound(ID,dir), gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), inertia(ID,dimdim), omega(ID,dim) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Region functions | count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Special functions | sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label), is_typelabel(kind,label) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Feature functions | is_available(category,feature), is_active(category,feature), is_defined(category,id) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Atom values | id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Atom vectors | id, mass, type, mol, x, y, z, vx, vy, vz, fx, fy, fz, q | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Custom atom properties | i_name, d_name, i_name[i], d_name[i], i2_name[i], d2_name[i], i2_name[i][j], d_name[i][j] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Compute references | c_ID, c_ID[i], c_ID[i][j], C_ID, C_ID[i] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Fix references | f_ID, f_ID[i], f_ID[i][j], F_ID, F_ID[i] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Other variables | v_name, v_name[i] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Most of the formula elements produce a scalar value. Some produce a global or per-atom vector of values. Global vectors can be produced @@ -703,6 +706,13 @@ library. Ceil() is the smallest integer not less than its argument. Floor() if the largest integer not greater than its argument. Round() is the nearest integer to its argument. +.. versionadded:: TBD + +The ternary(x,y,z) function is the equivalent of the ternary operator +(? and :) in C or C++. It takes 3 arguments. The first argument is a +conditional. The result of the function is y if x evaluates to true +(non-zero). The result is z if x evaluates to false (zero). + The ramp(x,y) function uses the current timestep to generate a value linearly interpolated between the specified x,y values over the course of a run, according to this formula: @@ -1034,10 +1044,9 @@ to built-in commands. For all of these styles except *command*, appending of active suffixes is also tried before reporting failure. The *feature* category checks the availability of the following -compile-time enabled features: GZIP support, PNG support, JPEG -support, FFMPEG support, and C++ exceptions for error -handling. Corresponding names are *gzip*, *png*, *jpeg*, *ffmpeg* and -*exceptions*\ . +compile-time enabled features: GZIP support, PNG support, JPEG support, +FFMPEG support, and C++ exceptions for error handling. Corresponding +names are *gzip*, *png*, *jpeg*, *ffmpeg* and *exceptions*\ . Example: Only dump in a given format if the compiled binary supports it. @@ -1139,8 +1148,45 @@ defines molecule IDs. Note that many other atom attributes can be used as inputs to a variable by using the :doc:`compute property/atom -` command and then specifying a quantity from -that compute. +` command and then referencing that compute. + +---------- + +Custom atom properties +---------------------- + +.. versionadded:: TBD + +Custom atom properties refer to per-atom integer and floating point +vectors or arrays that have been added via the :doc:`fix property/atom +` command. When that command is used specific +names are given to each attribute which are the "name" portion of +these references. References beginning with *i* and *d* refer to +integer and floating point properties respectively. Per-atom vectors +are referenced by *i_name* and *d_name*; per-atom arrays are +referenced by *i2_name* and *d2_name*. + +The various allowed references to integer custom atom properties in +the variable formulas for equal-, vector-, and atom-style variables +are listed in the following table. References to floating point +custom atom properties are the same; just replace the leading "i" with +"d". + ++--------+---------------+------------------------------------------+ +| equal | i_name[I] | element of per-atom vector (I = atom ID) | +| equal | i2_name[I][J] | element of per-atom array (I = atom ID) | ++--------+---------------+------------------------------------------+ +| vector | i_name[I] | element of per-atom vector (I = atom ID) | +| vector | i2_name[I][J] | element of per-atom array (I = atom ID) | ++--------+---------------+------------------------------------------+ +| atom | i_name | per-atom vector | +| atom | i2_name[I] | column of per-atom array | ++--------+---------------+------------------------------------------+ + +The I and J indices in these custom atom property references can be +integers or can be a variable name, specified as v_name, where name is +the name of the variable. The rules for this syntax are the same as +for indices in the "Atom Values and Vectors" discussion above. ---------- diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 75589e3115..c31c6a0a70 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -1074,6 +1074,7 @@ facesets factorizable factorizations Fahrenberger +Falk Faken Farago Fasolino @@ -1173,6 +1174,7 @@ Foiles fopenmp forceclear forestgreen +formamide formatarg formulae Forschungszentrum @@ -1821,6 +1823,7 @@ Lanczos Lande Landron Landsgesell +Langer langevin Langevin Langston @@ -1969,6 +1972,7 @@ lps lpsapi lrt lsfftw +lt ltbbmalloc Lua lubricateU @@ -2497,6 +2501,7 @@ noforce noguess Noid nolib +nonaffine nonequilibrium nongauss nonGaussian @@ -2569,6 +2574,7 @@ nthreads ntimestep Ntptask Ntriples +ntries ntris Ntype ntypes @@ -3373,6 +3379,7 @@ Sodani Soderlind Solaris Solida +solv solvated solvation someuser @@ -3619,6 +3626,7 @@ timestepping timesteps TiN TiO +Tirado Tirrell Titer Tji diff --git a/examples/PACKAGES/dpd-meso/mdpd/in.mdpd b/examples/PACKAGES/dpd-meso/mdpd/in.mdpd index b0740c8227..2c740f4127 100644 --- a/examples/PACKAGES/dpd-meso/mdpd/in.mdpd +++ b/examples/PACKAGES/dpd-meso/mdpd/in.mdpd @@ -16,6 +16,7 @@ neighbor 0.3 bin neigh_modify every 1 delay 0 check yes atom_style mdpd +comm_modify vel yes region mdpd block -25 25 -10 10 -10 10 units box create_box 1 mdpd diff --git a/examples/PACKAGES/pace/README.md b/examples/PACKAGES/pace/README.md new file mode 100644 index 0000000000..66254b5241 --- /dev/null +++ b/examples/PACKAGES/pace/README.md @@ -0,0 +1,9 @@ +# This folder contains examples for pace in LAMMPS + + +## Compute pace usage +compute/latte_cell_0.data # lammps data file with C-H-O structure +compute/latte_cell_0.xyz # xyz file with C-H-O structure +compute/coupling_coefficients.yace # .yace file containing coupling coefficients (or ACE potential parameters) +compute/in.compute # input file for calling `compute pace` + diff --git a/examples/PACKAGES/pace/compute/coupling_coefficients.yace b/examples/PACKAGES/pace/compute/coupling_coefficients.yace new file mode 100644 index 0000000000..2953222216 --- /dev/null +++ b/examples/PACKAGES/pace/compute/coupling_coefficients.yace @@ -0,0 +1,294 @@ +elements: [H, N, O] +E0: [0.000000, 0.000000, 0.000000] +deltaSplineBins: 0.001000 +embeddings: + 0: {ndensity: 1, FS_parameters: [1.0, 1.0], npoti: FinnisSinclair, rho_core_cutoff: 100000, drho_core_cutoff: 250} + 1: {ndensity: 1, FS_parameters: [1.0, 1.0], npoti: FinnisSinclair, rho_core_cutoff: 100000, drho_core_cutoff: 250} + 2: {ndensity: 1, FS_parameters: [1.0, 1.0], npoti: FinnisSinclair, rho_core_cutoff: 100000, drho_core_cutoff: 250} +bonds: + [0, 0]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.0, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} + [0, 1]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.5, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} + [0, 2]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.7, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} + [1, 0]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.5, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} + [1, 1]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 4.4, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} + [1, 2]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.7, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} + [2, 0]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.7, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} + [2, 1]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.7, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} + [2, 2]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.5, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance} +functions: + 0: + - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 0], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [2, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 0], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + 1: + - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 0], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [2, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 0], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + 2: + - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 0], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [2, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]} + - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 0], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} + - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]} diff --git a/examples/PACKAGES/pace/compute/in.compute b/examples/PACKAGES/pace/compute/in.compute new file mode 100644 index 0000000000..a0ef25f606 --- /dev/null +++ b/examples/PACKAGES/pace/compute/in.compute @@ -0,0 +1,22 @@ +#info all out log +units metal +atom_style atomic +boundary p p p +atom_modify map hash +boundary p p p +read_data latte_cell_0.data +mass 1 1.00 +mass 2 14.00 +mass 3 15.999 + + # potential settings + +pair_style zero 5.7 +pair_coeff * * + +compute pace all pace coupling_coefficients.yace 1 0 + +thermo 1 +thermo_style custom step temp c_pace[1][183] + +run 0 diff --git a/examples/PACKAGES/pace/compute/latte_cell_0.data b/examples/PACKAGES/pace/compute/latte_cell_0.data new file mode 100644 index 0000000000..b18ec13a4d --- /dev/null +++ b/examples/PACKAGES/pace/compute/latte_cell_0.data @@ -0,0 +1,172 @@ +latte_cell_0.data (written by ASE) + +161 atoms +3 atom types +0.0 12 xlo xhi +0.0 12 ylo yhi +0.0 12 zlo zhi + + +Atoms + + 1 3 1.2688096799999999 2.0079938400000001 2.7446829899999998 + 2 1 1.5343068200000001 2.0638766500000001 3.7105626900000002 + 3 1 1.7848279600000001 2.6755003400000001 2.2268847200000002 + 4 1 1.56251195 1.1089126899999999 2.3978115199999999 + 5 1 11.61728216 5.71881094 2.4732045999999999 + 6 2 6.5501865600000002 4.7439566800000001 3.6526025500000001 + 7 1 6.4564895299999998 4.1571673000000002 2.6975267999999999 + 8 2 2.0835561 1.59406078 8.5498047600000007 + 9 1 1.1041162499999999 1.4971771599999999 8.1507879200000009 + 10 1 2.60115534 2.2945960400000001 7.95374187 + 11 1 1.9817723300000001 2.0194066400000001 9.5128239400000005 + 12 1 0.99333338000000004 3.6983907299999998 8.1903947899999991 + 13 3 4.9484070999999998 5.3645501400000004 9.16152503 + 14 1 9.0716170599999995 9.3748453999999999 4.2276462400000003 + 15 2 0.30864418999999998 7.7136657499999997 2.9274995599999998 + 16 1 0.47661671 10.1807211 3.71160091 + 17 1 1.07465334 7.8226921999999997 3.5771466900000002 + 18 1 0.38402249999999999 8.3770493300000002 2.1748437100000002 + 19 1 11.435413410000001 7.7903735999999997 3.4040245499999999 + 20 3 6.1570384599999999 10.25988474 3.50899568 + 21 1 5.5932224399999999 9.5632944700000007 3.1446559000000001 + 22 2 1.7785569000000001 7.6312579300000003 9.1488452299999992 + 23 1 2.5594048599999999 6.96832838 9.3069700199999996 + 24 1 2.12441551 8.4547986999999996 8.6428622900000001 + 25 1 1.04552782 7.1697722800000001 8.5894244999999998 + 26 1 0.34824445999999998 10.17844028 9.1629463799999993 + 27 3 5.9638830399999998 10.723709400000001 9.4568803900000002 + 28 1 6.5890835699999997 10.926486110000001 8.7981925800000003 + 29 2 7.1065890400000002 1.83029753 3.3452543600000002 + 30 1 6.9229304999999997 1.8465022099999999 4.3089037100000001 + 31 1 8.0780433600000006 1.9303052199999999 3.2089521400000001 + 32 1 5.6795373600000003 10.471831630000001 4.3244390499999996 + 33 1 6.82999417 0.95850113000000003 2.9815288199999999 + 34 2 11.383805349999999 4.6301225199999996 2.5393688399999998 + 35 1 0.37927047000000003 4.1943216300000001 2.59073807 + 36 3 5.2376410099999999 1.91523463 9.7240636400000007 + 37 1 4.7887202499999999 2.7036936499999999 9.5698142300000004 + 38 1 9.8129906699999996 9.2075140700000002 4.08265499 + 39 1 4.7980879500000002 1.1403494700000001 9.6739962800000008 + 40 1 5.4455845600000004 2.0102099999999998 10.620773509999999 + 41 3 0.90954338999999995 4.6240093199999999 8.3108110600000007 + 42 1 11.909735319999999 4.7483814000000004 8.2500624600000005 + 43 2 7.3223424499999998 7.5866457 3.0245226500000002 + 44 1 7.4470362200000002 8.3169646700000008 3.7148003300000001 + 45 1 6.9073805300000002 7.9385021 2.1723768699999999 + 46 1 5.5542868500000004 5.1176065800000003 3.7655251999999999 + 47 1 6.8124309500000004 6.7778811599999997 3.3973232499999999 + 48 2 0.29575823000000001 11.04303794 3.1016142499999999 + 49 1 0.86490721999999998 11.83879228 3.6389974500000002 + 50 3 6.85201686 8.0846369300000003 8.8762878799999996 + 51 1 7.3351430100000004 7.4263498700000001 9.3821674799999997 + 52 1 6.7919613300000004 7.7595477199999996 7.9716174799999999 + 53 1 3.8990487699999998 6.4283490399999996 8.8832409600000002 + 54 1 5.95997296 9.9329723199999993 9.4746654699999997 + 55 3 11.403658979999999 10.371960359999999 9.2766092199999992 + 56 1 10.983666360000001 9.5157199800000001 9.1478757300000009 + 57 3 1.5223279700000001 5.3327331100000004 0.57537605999999997 + 58 1 2.3815113999999999 5.7251991200000001 0.77945295999999997 + 59 1 0.92079957000000001 6.0931282299999996 0.62203253000000003 + 60 3 11.23490924 2.9153355200000002 6.7585064099999999 + 61 1 10.792340190000001 2.9755225099999998 5.9000018399999998 + 62 1 10.751242059999999 2.1896156000000002 7.1807401500000001 + 63 3 11.39027944 7.3462855600000001 6.7258299499999996 + 64 1 10.92025679 6.69831954 7.2776696599999999 + 65 1 11.12238028 7.0632020999999998 5.8394107799999997 + 66 3 8.4684319499999994 10.71736286 10.60018556 + 67 1 8.5672201599999998 11.420466080000001 11.25794033 + 68 1 9.0803109800000001 10.04804949 10.9406517 + 69 3 6.5851757299999996 9.9940623399999993 6.5574614899999997 + 70 1 7.0276325799999997 10.76096604 6.1723333699999996 + 71 1 5.7419327400000002 10.384583920000001 6.8228822999999998 + 72 3 1.7600546399999999 1.01771919 5.4926787700000004 + 73 1 1.9704209100000001 1.01748419 6.4429703700000003 + 74 1 1.42973007 0.11076352 5.3470644900000002 + 75 1 2.61130613 9.7034123700000006 10.450306830000001 + 76 3 3.05086908 10.48131334 10.085189310000001 + 77 1 3.0032693199999998 10.93357295 9.3652121000000008 + 78 3 6.4631532199999997 8.7652058299999993 11.967847969999999 + 79 1 6.4506808400000004 9.6596595300000008 11.588956019999999 + 80 1 5.6611629700000003 8.3535737700000006 11.638443329999999 + 81 3 1.1745999300000001 5.2420690800000003 5.1001449699999997 + 82 1 1.31932881 5.5236392399999996 6.0128966300000002 + 83 1 0.58053834999999998 4.4898134599999997 5.2325565000000003 + 84 3 6.7275549699999999 0.78840874999999999 7.3817280900000002 + 85 1 6.3887965600000003 1.54670982 6.8634520400000003 + 86 1 7.6791783999999996 0.94039024000000004 7.2649461000000004 + 87 3 8.5476657199999995 0.0064750299999999997 5.0450514100000001 + 88 1 8.8736290899999997 11.10484108 4.8601807900000003 + 89 1 8.0477597599999999 0.20198361000000001 4.2357399400000002 + 90 3 1.2895030000000001 8.4280097900000008 11.82038504 + 91 1 1.4766666399999999 8.1087866399999999 10.87290333 + 92 1 2.10220669 8.1947620200000006 0.29510553 + 93 3 9.6797907599999995 6.4207335499999996 4.3469150599999997 + 94 1 8.9271530099999996 6.72940235 3.7974122399999999 + 95 1 10.20024126 5.9167739199999998 3.66976111 + 96 3 3.57411616 6.7041021699999996 3.8825478499999999 + 97 1 2.8894899500000002 6.1560529800000001 4.2980848099999998 + 98 1 4.3613707699999997 6.4304732400000004 4.3804965400000002 + 99 3 4.7506556 11.441853350000001 1.12537088 + 100 1 4.0861192800000001 10.748523670000001 1.1923347099999999 + 101 1 5.5035301600000004 10.965688249999999 0.73651277000000004 + 102 3 9.5254526399999992 4.8994443900000002 8.3732284099999994 + 103 1 8.7885959800000002 4.3508043900000004 8.6632831400000008 + 104 1 9.6149067499999994 4.6084911499999999 7.4540068699999997 + 105 3 4.1970746700000001 1.34592128 3.67401439 + 106 1 4.9437011999999996 0.74406280999999996 3.514068 + 107 1 4.1905534900000001 1.7730376000000001 2.7963049400000002 + 108 3 1.88232618 11.95451227 0.60024434000000004 + 109 1 2.0464587299999999 11.02454723 0.38329541 + 110 1 1.1518493700000001 0.17494340999999999 11.99928285 + 111 3 3.7593842199999998 11.01685511 6.4562050800000002 + 112 1 3.2125414299999999 10.4553747 5.8894917099999997 + 113 1 3.4166026899999999 10.821557670000001 7.3296563900000002 + 114 3 9.7039841399999993 3.95001545 11.894743249999999 + 115 1 10.461666060000001 3.9163117999999999 11.285435229999999 + 116 1 10.09834695 4.4026997400000001 0.68193007999999999 + 117 3 8.5639596400000002 3.5169507499999999 5.6224104199999996 + 118 1 8.3966650299999994 2.6262214699999999 5.2638164300000003 + 119 1 7.9695371399999999 4.0825059799999996 5.0049407400000003 + 120 3 9.6736245000000007 0.48030482000000002 7.9257577799999996 + 121 1 9.6131980400000003 11.883419180000001 7.1680923999999999 + 122 1 9.9784050299999993 11.90238635 8.63894187 + 123 3 3.9424153099999999 6.9650296699999998 11.60258943 + 124 1 4.2767152700000004 6.8460048999999996 10.670225220000001 + 125 1 4.6570638500000001 6.5129461500000003 0.091159879999999999 + 126 3 3.0570173199999999 9.6631958499999993 3.6611250599999998 + 127 1 2.5400490100000002 9.5743355000000001 2.8444047600000002 + 128 1 2.9314874400000002 8.7809807200000005 4.0425234200000002 + 129 3 7.4549612700000001 5.8430850799999998 11.011384720000001 + 130 1 8.1675884100000005 5.4639182799999997 10.47644287 + 131 1 6.7135573700000002 5.8393818399999997 10.361099749999999 + 132 3 9.8029139300000008 7.9578901699999998 10.21404942 + 133 1 10.38910242 8.3400641400000008 10.87949429 + 134 1 9.0637612000000001 7.6392374099999998 10.756928869999999 + 135 3 4.4963435599999997 4.1067935799999997 11.73387805 + 136 1 4.5473727899999998 4.9577970899999997 11.19223377 + 137 1 5.3588818399999996 4.1756111699999998 0.20355936999999999 + 138 3 9.5923448100000002 7.3418014600000001 1.34856172 + 139 1 8.8715593300000002 7.4776837199999999 2.05040471 + 140 1 9.0443221699999992 7.2732200799999998 0.54011714 + 141 3 7.0350963100000001 3.22348773 0.7070824 + 142 1 7.1784470499999999 4.1340314300000003 1.0184109699999999 + 143 1 7.7787854400000001 2.7888888399999998 1.15838887 + 144 3 9.2124107800000008 0.48085899999999998 1.21751966 + 145 1 9.6620436499999993 11.657271079999999 1.45318397 + 146 1 9.9404883900000005 1.11619136 1.18684594 + 147 3 1.19704207 9.5859959200000002 6.6190888899999996 + 148 1 0.25606413 9.6737366500000004 6.8319340899999998 + 149 1 1.2690051899999999 8.6249354900000004 6.5480112500000001 + 150 3 0.78256133999999999 2.6040609300000002 11.453408359999999 + 151 1 0.61502181 3.5607405999999999 11.40300991 + 152 1 1.55655312 2.5457368800000002 10.866733030000001 + 153 3 5.8627936099999998 7.1217054800000001 5.89173203 + 154 1 6.3432410700000004 7.9400136699999999 6.0855840299999997 + 155 1 5.5077296699999998 6.8468306800000001 6.7436875799999996 + 156 3 10.887828150000001 9.9637482500000001 0.51092815999999996 + 157 1 11.78841776 10.322043069999999 0.44704989000000001 + 158 1 11.02688182 9.2051906700000004 1.0976661299999999 + 159 3 3.93073389 4.1645674499999998 5.7137877000000001 + 160 1 4.6884062999999996 3.5788913299999998 5.5644605800000004 + 161 1 4.2956948500000003 4.7644888099999996 6.3801669700000003 diff --git a/examples/PACKAGES/pace/compute/latte_cell_0.xyz b/examples/PACKAGES/pace/compute/latte_cell_0.xyz new file mode 100644 index 0000000000..afe0a27f35 --- /dev/null +++ b/examples/PACKAGES/pace/compute/latte_cell_0.xyz @@ -0,0 +1,163 @@ +161 +Lattice="12.0 0.0 0.0 0.0 12.0 0.0 0.0 0.0 12.0" Properties=species:S:1:pos:R:3 pbc="T T T" +O 1.26880968 2.00799384 2.74468299 +H 1.53430682 2.06387665 3.71056269 +H 1.78482796 2.67550034 2.22688472 +H 1.56251195 1.10891269 2.39781152 +H 11.61728216 5.71881094 2.47320460 +N 6.55018656 4.74395668 3.65260255 +H 6.45648953 4.15716730 2.69752680 +N 2.08355610 1.59406078 8.54980476 +H 1.10411625 1.49717716 8.15078792 +H 2.60115534 2.29459604 7.95374187 +H 1.98177233 2.01940664 9.51282394 +H 0.99333338 3.69839073 8.19039479 +O 4.94840710 5.36455014 9.16152503 +H 9.07161706 9.37484540 4.22764624 +N 0.30864419 7.71366575 2.92749956 +H 0.47661671 10.18072110 3.71160091 +H 1.07465334 7.82269220 3.57714669 +H 0.38402250 8.37704933 2.17484371 +H 11.43541341 7.79037360 3.40402455 +O 6.15703846 10.25988474 3.50899568 +H 5.59322244 9.56329447 3.14465590 +N 1.77855690 7.63125793 9.14884523 +H 2.55940486 6.96832838 9.30697002 +H 2.12441551 8.45479870 8.64286229 +H 1.04552782 7.16977228 8.58942450 +H 0.34824446 10.17844028 9.16294638 +O 5.96388304 10.72370940 9.45688039 +H 6.58908357 10.92648611 8.79819258 +N 7.10658904 1.83029753 3.34525436 +H 6.92293050 1.84650221 4.30890371 +H 8.07804336 1.93030522 3.20895214 +H 5.67953736 10.47183163 4.32443905 +H 6.82999417 0.95850113 2.98152882 +N 11.38380535 4.63012252 2.53936884 +H 0.37927047 4.19432163 2.59073807 +O 5.23764101 1.91523463 9.72406364 +H 4.78872025 2.70369365 9.56981423 +H 9.81299067 9.20751407 4.08265499 +H 4.79808795 1.14034947 9.67399628 +H 5.44558456 2.01021000 10.62077351 +O 0.90954339 4.62400932 8.31081106 +H 11.90973532 4.74838140 8.25006246 +N 7.32234245 7.58664570 3.02452265 +H 7.44703622 8.31696467 3.71480033 +H 6.90738053 7.93850210 2.17237687 +H 5.55428685 5.11760658 3.76552520 +H 6.81243095 6.77788116 3.39732325 +N 0.29575823 11.04303794 3.10161425 +H 0.86490722 11.83879228 3.63899745 +O 6.85201686 8.08463693 8.87628788 +H 7.33514301 7.42634987 9.38216748 +H 6.79196133 7.75954772 7.97161748 +H 3.89904877 6.42834904 8.88324096 +H 5.95997296 9.93297232 9.47466547 +O 11.40365898 10.37196036 9.27660922 +H 10.98366636 9.51571998 9.14787573 +O 1.52232797 5.33273311 0.57537606 +H 2.38151140 5.72519912 0.77945296 +H 0.92079957 6.09312823 0.62203253 +O 11.23490924 2.91533552 6.75850641 +H 10.79234019 2.97552251 5.90000184 +H 10.75124206 2.18961560 7.18074015 +O 11.39027944 7.34628556 6.72582995 +H 10.92025679 6.69831954 7.27766966 +H 11.12238028 7.06320210 5.83941078 +O 8.46843195 10.71736286 10.60018556 +H 8.56722016 11.42046608 11.25794033 +H 9.08031098 10.04804949 10.94065170 +O 6.58517573 9.99406234 6.55746149 +H 7.02763258 10.76096604 6.17233337 +H 5.74193274 10.38458392 6.82288230 +O 1.76005464 1.01771919 5.49267877 +H 1.97042091 1.01748419 6.44297037 +H 1.42973007 0.11076352 5.34706449 +H 2.61130613 9.70341237 10.45030683 +O 3.05086908 10.48131334 10.08518931 +H 3.00326932 10.93357295 9.36521210 +O 6.46315322 8.76520583 11.96784797 +H 6.45068084 9.65965953 11.58895602 +H 5.66116297 8.35357377 11.63844333 +O 1.17459993 5.24206908 5.10014497 +H 1.31932881 5.52363924 6.01289663 +H 0.58053835 4.48981346 5.23255650 +O 6.72755497 0.78840875 7.38172809 +H 6.38879656 1.54670982 6.86345204 +H 7.67917840 0.94039024 7.26494610 +O 8.54766572 0.00647503 5.04505141 +H 8.87362909 11.10484108 4.86018079 +H 8.04775976 0.20198361 4.23573994 +O 1.28950300 8.42800979 11.82038504 +H 1.47666664 8.10878664 10.87290333 +H 2.10220669 8.19476202 0.29510553 +O 9.67979076 6.42073355 4.34691506 +H 8.92715301 6.72940235 3.79741224 +H 10.20024126 5.91677392 3.66976111 +O 3.57411616 6.70410217 3.88254785 +H 2.88948995 6.15605298 4.29808481 +H 4.36137077 6.43047324 4.38049654 +O 4.75065560 11.44185335 1.12537088 +H 4.08611928 10.74852367 1.19233471 +H 5.50353016 10.96568825 0.73651277 +O 9.52545264 4.89944439 8.37322841 +H 8.78859598 4.35080439 8.66328314 +H 9.61490675 4.60849115 7.45400687 +O 4.19707467 1.34592128 3.67401439 +H 4.94370120 0.74406281 3.51406800 +H 4.19055349 1.77303760 2.79630494 +O 1.88232618 11.95451227 0.60024434 +H 2.04645873 11.02454723 0.38329541 +H 1.15184937 0.17494341 11.99928285 +O 3.75938422 11.01685511 6.45620508 +H 3.21254143 10.45537470 5.88949171 +H 3.41660269 10.82155767 7.32965639 +O 9.70398414 3.95001545 11.89474325 +H 10.46166606 3.91631180 11.28543523 +H 10.09834695 4.40269974 0.68193008 +O 8.56395964 3.51695075 5.62241042 +H 8.39666503 2.62622147 5.26381643 +H 7.96953714 4.08250598 5.00494074 +O 9.67362450 0.48030482 7.92575778 +H 9.61319804 11.88341918 7.16809240 +H 9.97840503 11.90238635 8.63894187 +O 3.94241531 6.96502967 11.60258943 +H 4.27671527 6.84600490 10.67022522 +H 4.65706385 6.51294615 0.09115988 +O 3.05701732 9.66319585 3.66112506 +H 2.54004901 9.57433550 2.84440476 +H 2.93148744 8.78098072 4.04252342 +O 7.45496127 5.84308508 11.01138472 +H 8.16758841 5.46391828 10.47644287 +H 6.71355737 5.83938184 10.36109975 +O 9.80291393 7.95789017 10.21404942 +H 10.38910242 8.34006414 10.87949429 +H 9.06376120 7.63923741 10.75692887 +O 4.49634356 4.10679358 11.73387805 +H 4.54737279 4.95779709 11.19223377 +H 5.35888184 4.17561117 0.20355937 +O 9.59234481 7.34180146 1.34856172 +H 8.87155933 7.47768372 2.05040471 +H 9.04432217 7.27322008 0.54011714 +O 7.03509631 3.22348773 0.70708240 +H 7.17844705 4.13403143 1.01841097 +H 7.77878544 2.78888884 1.15838887 +O 9.21241078 0.48085900 1.21751966 +H 9.66204365 11.65727108 1.45318397 +H 9.94048839 1.11619136 1.18684594 +O 1.19704207 9.58599592 6.61908889 +H 0.25606413 9.67373665 6.83193409 +H 1.26900519 8.62493549 6.54801125 +O 0.78256134 2.60406093 11.45340836 +H 0.61502181 3.56074060 11.40300991 +H 1.55655312 2.54573688 10.86673303 +O 5.86279361 7.12170548 5.89173203 +H 6.34324107 7.94001367 6.08558403 +H 5.50772967 6.84683068 6.74368758 +O 10.88782815 9.96374825 0.51092816 +H 11.78841776 10.32204307 0.44704989 +H 11.02688182 9.20519067 1.09766613 +O 3.93073389 4.16456745 5.71378770 +H 4.68840630 3.57889133 5.56446058 +H 4.29569485 4.76448881 6.38016697 diff --git a/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.1 b/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.1 new file mode 100644 index 0000000000..e5036cfe5b --- /dev/null +++ b/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.1 @@ -0,0 +1,81 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +#info all out log +units metal +atom_style atomic +boundary p p p +atom_modify map hash +boundary p p p +read_data latte_cell_0.data +Reading data file ... + orthogonal box = (0 0 0) to (12 12 12) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 161 atoms + read_data CPU = 0.001 seconds +mass 1 1.00 +mass 2 14.00 +mass 3 15.999 + + # potential settings + +pair_style zero 5.7 +pair_coeff * * + +compute pace all pace coupling_coefficients.yace 1 0 + +thermo 1 +thermo_style custom step temp c_pace[1][183] + +run 0 +WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60) +Generated 0 of 3 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.7 + ghost atom cutoff = 7.7 + binsize = 3.85, bins = 4 4 4 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair zero, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard + (2) compute pace, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 6.993 | 6.993 | 6.993 Mbytes + Step Temp c_pace[1][183] + 0 0 8.6885642 +Loop time of 1.217e-06 on 1 procs for 0 steps with 161 atoms + +164.3% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0 | 0 | 0 | 0.0 | 0.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0 | 0 | 0 | 0.0 | 0.00 +Output | 0 | 0 | 0 | 0.0 | 0.00 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 1.217e-06 | | |100.00 + +Nlocal: 161 ave 161 max 161 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1754 ave 1754 max 1754 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 14230 ave 14230 max 14230 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 28460 ave 28460 max 28460 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 28460 +Ave neighs/atom = 176.77019 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.4 b/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.4 new file mode 100644 index 0000000000..49ca6129b6 --- /dev/null +++ b/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.4 @@ -0,0 +1,81 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +#info all out log +units metal +atom_style atomic +boundary p p p +atom_modify map hash +boundary p p p +read_data latte_cell_0.data +Reading data file ... + orthogonal box = (0 0 0) to (12 12 12) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 161 atoms + read_data CPU = 0.001 seconds +mass 1 1.00 +mass 2 14.00 +mass 3 15.999 + + # potential settings + +pair_style zero 5.7 +pair_coeff * * + +compute pace all pace coupling_coefficients.yace 1 0 + +thermo 1 +thermo_style custom step temp c_pace[1][183] + +run 0 +WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60) +Generated 0 of 3 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.7 + ghost atom cutoff = 7.7 + binsize = 3.85, bins = 4 4 4 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair zero, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard + (2) compute pace, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 6.97 | 6.97 | 6.971 Mbytes + Step Temp c_pace[1][183] + 0 0 8.6885642 +Loop time of 1.979e-06 on 4 procs for 0 steps with 161 atoms + +164.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0 | 0 | 0 | 0.0 | 0.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0 | 0 | 0 | 0.0 | 0.00 +Output | 0 | 0 | 0 | 0.0 | 0.00 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 1.979e-06 | | |100.00 + +Nlocal: 40.25 ave 44 max 35 min +Histogram: 1 0 0 0 1 0 0 0 1 1 +Nghost: 1134.5 ave 1159 max 1117 min +Histogram: 1 1 0 0 1 0 0 0 0 1 +Neighs: 3557.5 ave 4115 max 3189 min +Histogram: 2 0 0 0 0 1 0 0 0 1 +FullNghs: 7115 ave 7755 max 6158 min +Histogram: 1 0 0 0 1 0 0 0 0 2 + +Total # of neighbors = 28460 +Ave neighs/atom = 176.77019 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/phonon/2-1D-diatomic/in.Ana b/examples/PACKAGES/phonon/2-1D-diatomic/in.Ana index 6312466e58..ca49cd50c6 100644 --- a/examples/PACKAGES/phonon/2-1D-diatomic/in.Ana +++ b/examples/PACKAGES/phonon/2-1D-diatomic/in.Ana @@ -2,15 +2,15 @@ dimension 2 boundary p f p -units lj -atom_style bond +units lj +atom_style bond atom_modify sort 0 1. bond_style harmonic pair_style none -communicate single cutoff 2.0 +comm_modify cutoff 2.0 # geometry -read_data data.pos +read_data data.pos # neighbor 1.0 nsq @@ -43,4 +43,4 @@ thermo_modify temp MyTemp thermo 100 # -run 2000000 +run 2000000 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/in.methane_qbmsst b/examples/PACKAGES/qtb/methane_qbmsst/in.methane_qbmsst new file mode 100644 index 0000000000..99d60e52d7 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qbmsst/in.methane_qbmsst @@ -0,0 +1,33 @@ +## This script first uses fix qtb to equilibrate liquid methane to an initial state with quantum nuclear correction and then simulate shock induced chemical reactions through the quantum thermal bath multi-scale shock technique +#The default system size may take a while to run you can change to a smaller size +variable x_rep equal 5 #x-direction replication number +variable y_rep equal 5 #y-direction replication number +variable z_rep equal 10 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) +variable v_msst equal 0.122 #Shock velocity (Angstrom/fs in metal units) +variable q_msst equal 25.0 #Box mass-like parameter in the MSST (mass^2/length^4, where mass=grams/mole and length=Angstrom in real units) +variable mu_msst equal 0.9 #Artificial viscosity in the MSST (mass/length/time, where mass=grams/mole, length=Angstrom and time=fs in real units) +variable tscale_msst equal 0.01 #Temperature reduction parameter in the MSST (unitless) +variable eta_qbmsst equal 1.0 #Coupling constant between the shock and the quantum thermal bath (unitless constant) + + +##The included part first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +include methane_qtb.mod + + +##Shock compression with quantum nuclear corrections +reset_timestep 0 +fix shock all qbmsst z ${v_msst} q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix_modify shock energy yes +variable dhug equal f_shock[1] +variable dray equal f_shock[2] +variable lgr_vel equal f_shock[3] +variable lgr_pos equal f_shock[4] +variable T_qm equal f_shock[5] #Temperature with quantum nuclear correction +thermo_style custom step v_T_qm press etotal vol lx ly lz pzz v_dhug v_dray v_lgr_vel v_lgr_pos +thermo 20 +timestep ${delta_t} +#restart 1000 restart +run 500 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.1 b/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.1 new file mode 100644 index 0000000000..d46e62cfd5 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.1 @@ -0,0 +1,280 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +## This script first uses fix qtb to equilibrate liquid methane to an initial state with quantum nuclear correction and then simulate shock induced chemical reactions through the quantum thermal bath multi-scale shock technique +#The default system size may take a while to run you can change to a smaller size +variable x_rep equal 5 #x-direction replication number +variable y_rep equal 5 #y-direction replication number +variable z_rep equal 10 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) +variable v_msst equal 0.122 #Shock velocity (Angstrom/fs in metal units) +variable q_msst equal 25.0 #Box mass-like parameter in the MSST (mass^2/length^4, where mass=grams/mole and length=Angstrom in real units) +variable mu_msst equal 0.9 #Artificial viscosity in the MSST (mass/length/time, where mass=grams/mole, length=Angstrom and time=fs in real units) +variable tscale_msst equal 0.01 #Temperature reduction parameter in the MSST (unitless) +variable eta_qbmsst equal 1.0 #Coupling constant between the shock and the quantum thermal bath (unitless constant) + + +##The included part first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +include methane_qtb.mod +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 a1 3.9783624 0 0 a2 0 3.9783624 0 a3 0 0 3.9783624 basis 0.5 0.5 0.5 basis 0.663 0.663 0.663 basis 0.337 0.337 0.663 basis 0.663 0.337 0.337 basis 0.337 0.663 0.337 +Lattice spacing in x,y,z = 3.9783624 3.9783624 3.9783624 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +Created orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box basis 1 1 basis 2 2 basis 3 2 basis 4 2 basis 5 2 +Created 5 atoms + using lattice units in orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + create_atoms CPU = 0.000 seconds +replicate ${x_rep} ${y_rep} ${z_rep} +replicate 5 ${y_rep} ${z_rep} +replicate 5 5 ${z_rep} +replicate 5 5 10 +Replication is creating a 5x5x10 = 250 times larger system... + orthogonal box = (0 0 0) to (19.891812 19.891812 39.783624) + 1 by 1 by 1 MPI processor grid + 1250 atoms + replicate CPU = 0.000 seconds + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create 110 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 20 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +fix methane_qtb all qtb temp 110 damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 +fix methane_qtb all qtb temp 110 damp 200 seed 35082 f_max 0.3 N_f 50 +timestep ${delta_t} +timestep 0.25 +run 500 #500 fs + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12.5 + ghost atom cutoff = 12.5 + binsize = 6.25, bins = 4 4 7 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 201.3 | 201.3 | 201.3 Mbytes + Step Temp Press TotEng Volume + 0 110 -15717.706 -110869.31 15741.751 + 20 133.92166 8773.5364 -110569.51 15741.751 + 40 184.43244 -12136.835 -110378.92 15741.751 + 60 203.58164 6527.2188 -110190.9 15741.751 + 80 183.0518 -9667.6163 -110095.24 15741.751 + 100 236.07378 4393.5089 -109905.8 15741.751 + 120 226.94599 -5612.6845 -109708.46 15741.751 + 140 249.34156 988.50573 -109631.88 15741.751 + 160 255.08331 -1397.98 -109469.09 15741.751 + 180 281.64743 -1682.598 -109285.53 15741.751 + 200 303.76929 2594.8345 -109206.84 15741.751 + 220 311.6547 -4566.4307 -109053.21 15741.751 + 240 350.68316 5132.0272 -108918.26 15741.751 + 260 347.11102 -6078.5078 -108828.31 15741.751 + 280 366.56298 6373.2426 -108694.64 15741.751 + 300 393.62524 -6438.9321 -108521.5 15741.751 + 320 403.64821 5946.6873 -108487.83 15741.751 + 340 406.12883 -5053.5592 -108331.25 15741.751 + 360 450.60139 4323.0942 -108185.06 15741.751 + 380 429.46056 -3317.8604 -108146.84 15741.751 + 400 448.11876 3264.6165 -108048.01 15741.751 + 420 485.98657 -3047.3542 -107882.88 15741.751 + 440 463.23761 3088.3325 -107853.09 15741.751 + 460 504.27223 -1966.5888 -107689.56 15741.751 + 480 515.66783 2915.6322 -107550.83 15741.751 + 500 516.26369 -1733.2701 -107498.06 15741.751 +Loop time of 41.4818 on 1 procs for 500 steps with 1250 atoms + +Performance: 0.260 ns/day, 92.182 hours/ns, 12.053 timesteps/s, 15.067 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 30.707 | 30.707 | 30.707 | 0.0 | 74.03 +Neigh | 2.2815 | 2.2815 | 2.2815 | 0.0 | 5.50 +Comm | 0.023963 | 0.023963 | 0.023963 | 0.0 | 0.06 +Output | 0.00073327 | 0.00073327 | 0.00073327 | 0.0 | 0.00 +Modify | 8.4653 | 8.4653 | 8.4653 | 0.0 | 20.41 +Other | | 0.00334 | | | 0.01 + +Nlocal: 1250 ave 1250 max 1250 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 8444 ave 8444 max 8444 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 601915 ave 601915 max 601915 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 601915 +Ave neighs/atom = 481.532 +Neighbor list builds = 50 +Dangerous builds not checked +unfix methane_qtb +unfix scapegoat_qtb + + +##Shock compression with quantum nuclear corrections +reset_timestep 0 +fix shock all qbmsst z ${v_msst} q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta 1 beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta 1 beta 400 T_init 110 +QBMSST parameters: + Shock in z direction + Cell mass-like parameter qmass (units of mass^2/length^4) = 2.50000e+01 + Shock velocity = 1.22000e-01 + Artificial viscosity (units of mass/length/time) = 9.00000e-01 + Initial pressure calculated on first step + Initial volume calculated on first step + Initial energy calculated on first step +fix_modify shock energy yes +variable dhug equal f_shock[1] +variable dray equal f_shock[2] +variable lgr_vel equal f_shock[3] +variable lgr_pos equal f_shock[4] +variable T_qm equal f_shock[5] #Temperature with quantum nuclear correction +thermo_style custom step v_T_qm press etotal vol lx ly lz pzz v_dhug v_dray v_lgr_vel v_lgr_pos +thermo 20 +timestep ${delta_t} +timestep 0.25 +#restart 1000 restart +run 500 +Fix QBMSST v0 = 1.57418e+04 +Fix QBMSST p0 = -3.03801e+03 +Fix QBMSST e0 = to be -1.07498e+05 +Fix QBMSST initial strain rate of -1.02043e-04 established by reducing temperature by factor of 1.00000e-02 +Per MPI rank memory allocation (min/avg/max) = 201.4 | 201.4 | 201.4 Mbytes + Step v_T_qm Press TotEng Volume Lx Ly Lz Pzz v_dhug v_dray v_lgr_vel v_lgr_pos + 0 110 -1789.091 -107498.06 15741.751 19.891812 19.891812 39.783624 -3095.1546 1.9543098e-12 -57.148468 0 0 + 20 110 313.41128 -107231.57 15733.908 19.891812 19.891812 39.763803 1026.815 -35.805172 3755.1834 6.0783853e-05 -0.60983919 + 40 110 1248.5771 -107106.23 15726.494 19.891812 19.891812 39.745066 -277.53233 -52.672766 2158.1479 0.00011824041 -1.219383 + 60 110 -944.55947 -107017.75 15719.482 19.891812 19.891812 39.727345 1006.8843 -64.550247 3165.7346 0.00017258388 -1.8286479 + 80 110 2164.646 -107053.82 15712.848 19.891812 19.891812 39.710579 686.99949 -59.728513 2583.9345 0.00022399951 -2.4376489 + 100 110 -332.40946 -106996.04 15706.579 19.891812 19.891812 39.694734 1555.274 -67.472889 3204.6947 0.00027258815 -3.0464001 + 120 110 2556.8172 -106828.33 15700.655 19.891812 19.891812 39.679765 -1406.2492 -90.123866 9.330762 0.00031849257 -3.6549157 + 140 110 -649.1633 -106851.95 15695.029 19.891812 19.891812 39.665545 3704.8784 -86.742267 4898.3193 0.00036209988 -4.2632077 + 160 110 2301.4774 -106787.04 15689.738 19.891812 19.891812 39.652174 -893.31294 -95.690383 91.247096 0.00040310452 -4.8712886 + 180 110 -701.59672 -106639.61 15684.711 19.891812 19.891812 39.63947 3211.2065 -115.27944 3997.3199 0.00044206086 -5.47917 + 200 110 3857.6228 -106696.51 15679.975 19.891812 19.891812 39.627501 -1722.9124 -107.93584 -1123.778 0.00047876602 -6.0868625 + 220 110 -1057.1346 -106590.95 15675.462 19.891812 19.891812 39.616094 3285.0876 -121.80821 3706.0326 0.00051374575 -6.6943761 + 240 110 2748.5299 -106428.9 15671.216 19.891812 19.891812 39.605364 172.15717 -143.78629 425.48974 0.00054664912 -7.3017201 + 260 110 64.99143 -106442.23 15667.188 19.891812 19.891812 39.595183 981.21139 -141.94851 1075.4979 0.00057787086 -7.9089043 + 280 110 1612.9607 -106412.77 15663.362 19.891812 19.891812 39.585514 662.48897 -145.93658 605.73218 0.00060752164 -8.5159364 + 300 110 1435.9566 -106307.06 15659.725 19.891812 19.891812 39.576323 759.46794 -160.13403 559.12791 0.00063570794 -9.1228243 + 320 110 -890.72712 -106332.6 15656.258 19.891812 19.891812 39.56756 234.14376 -156.75496 -103.07714 0.00066257852 -9.7295747 + 340 110 4270.0983 -106252.72 15652.976 19.891812 19.891812 39.559265 5411.2268 -167.0427 4944.423 0.00068801647 -10.336194 + 360 110 -2801.0763 -106105.96 15649.905 19.891812 19.891812 39.551504 -3276.3824 -187.5258 -3864.4213 0.00071181569 -10.942691 + 380 110 5566.9116 -106139.88 15646.926 19.891812 19.891812 39.543977 2737.1121 -182.43141 2031.4929 0.00073489745 -11.549071 + 400 110 -4432.9416 -106074.79 15644.09 19.891812 19.891812 39.536808 -4946.1908 -191.90759 -5763.8068 0.00075688314 -12.155339 + 420 52.599535 5582.8126 -105959.96 15641.311 19.891812 19.891812 39.529786 7869.5301 -206.09135 6942.2136 0.00077841805 -12.761497 + 440 52.599535 -2861.6332 -106017.66 15638.758 19.891812 19.891812 39.523335 -1820.4742 -199.30721 -2848.5648 0.00079820063 -13.367553 + 460 52.599535 3942.7505 -105984.45 15636.294 19.891812 19.891812 39.517106 3327.0393 -203.24794 2201.6559 0.00081729985 -13.973511 + 480 52.599535 419.18442 -105827.32 15633.955 19.891812 19.891812 39.511194 -1910.6109 -224.9021 -3128.3482 0.00083542949 -14.579377 + 500 52.599535 117.60016 -105904.83 15631.655 19.891812 19.891812 39.505383 -603.40365 -214.36236 -1911.9203 0.00085325005 -15.185153 +Loop time of 41.8312 on 1 procs for 500 steps with 1250 atoms + +Performance: 0.258 ns/day, 92.958 hours/ns, 11.953 timesteps/s, 14.941 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 31.016 | 31.016 | 31.016 | 0.0 | 74.15 +Neigh | 2.2849 | 2.2849 | 2.2849 | 0.0 | 5.46 +Comm | 0.020391 | 0.020391 | 0.020391 | 0.0 | 0.05 +Output | 0.0019403 | 0.0019403 | 0.0019403 | 0.0 | 0.00 +Modify | 8.505 | 8.505 | 8.505 | 0.0 | 20.33 +Other | | 0.003238 | | | 0.01 + +Nlocal: 1250 ave 1250 max 1250 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 8489 ave 8489 max 8489 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 606382 ave 606382 max 606382 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 606382 +Ave neighs/atom = 485.1056 +Neighbor list builds = 50 +Dangerous builds not checked +Total wall time: 0:01:23 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.4 b/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.4 new file mode 100644 index 0000000000..357f31a300 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.4 @@ -0,0 +1,280 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +## This script first uses fix qtb to equilibrate liquid methane to an initial state with quantum nuclear correction and then simulate shock induced chemical reactions through the quantum thermal bath multi-scale shock technique +#The default system size may take a while to run you can change to a smaller size +variable x_rep equal 5 #x-direction replication number +variable y_rep equal 5 #y-direction replication number +variable z_rep equal 10 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) +variable v_msst equal 0.122 #Shock velocity (Angstrom/fs in metal units) +variable q_msst equal 25.0 #Box mass-like parameter in the MSST (mass^2/length^4, where mass=grams/mole and length=Angstrom in real units) +variable mu_msst equal 0.9 #Artificial viscosity in the MSST (mass/length/time, where mass=grams/mole, length=Angstrom and time=fs in real units) +variable tscale_msst equal 0.01 #Temperature reduction parameter in the MSST (unitless) +variable eta_qbmsst equal 1.0 #Coupling constant between the shock and the quantum thermal bath (unitless constant) + + +##The included part first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +include methane_qtb.mod +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 a1 3.9783624 0 0 a2 0 3.9783624 0 a3 0 0 3.9783624 basis 0.5 0.5 0.5 basis 0.663 0.663 0.663 basis 0.337 0.337 0.663 basis 0.663 0.337 0.337 basis 0.337 0.663 0.337 +Lattice spacing in x,y,z = 3.9783624 3.9783624 3.9783624 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +Created orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box basis 1 1 basis 2 2 basis 3 2 basis 4 2 basis 5 2 +Created 5 atoms + using lattice units in orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + create_atoms CPU = 0.000 seconds +replicate ${x_rep} ${y_rep} ${z_rep} +replicate 5 ${y_rep} ${z_rep} +replicate 5 5 ${z_rep} +replicate 5 5 10 +Replication is creating a 5x5x10 = 250 times larger system... + orthogonal box = (0 0 0) to (19.891812 19.891812 39.783624) + 1 by 1 by 4 MPI processor grid + 1250 atoms + replicate CPU = 0.000 seconds + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create 110 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 20 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +fix methane_qtb all qtb temp 110 damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 +fix methane_qtb all qtb temp 110 damp 200 seed 35082 f_max 0.3 N_f 50 +timestep ${delta_t} +timestep 0.25 +run 500 #500 fs + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12.5 + ghost atom cutoff = 12.5 + binsize = 6.25, bins = 4 4 7 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 125.2 | 125.3 | 125.4 Mbytes + Step Temp Press TotEng Volume + 0 110 -15717.706 -110869.31 15741.751 + 20 133.92621 9503.0083 -110548.47 15741.751 + 40 188.1524 -13687.131 -110344.93 15741.751 + 60 205.85747 8421.3906 -110165.58 15741.751 + 80 185.08989 -11337.006 -110026.24 15741.751 + 100 245.36524 5805.0694 -109841.66 15741.751 + 120 218.83661 -7740.8838 -109674.15 15741.751 + 140 254.6075 3396.3936 -109589.89 15741.751 + 160 262.20963 -3574.2575 -109413.81 15741.751 + 180 297.89271 917.40867 -109204.79 15741.751 + 200 315.54026 -371.17448 -109129.45 15741.751 + 220 323.90745 -2811.4367 -108988.12 15741.751 + 240 358.28478 3972.8358 -108848.95 15741.751 + 260 359.12673 -6289.689 -108788.08 15741.751 + 280 376.47656 6851.3186 -108664.07 15741.751 + 300 404.30975 -7805.7238 -108482.75 15741.751 + 320 410.9097 7696.2518 -108421.87 15741.751 + 340 406.19092 -8175.1703 -108311.84 15741.751 + 360 460.37085 7630.6182 -108139.6 15741.751 + 380 413.96355 -7515.2307 -108150.73 15741.751 + 400 452.17428 7148.0954 -108027.39 15741.751 + 420 467.1725 -6662.4113 -107842.71 15741.751 + 440 481.03775 6117.6862 -107759.03 15741.751 + 460 509.03937 -4095.0215 -107648.46 15741.751 + 480 533.22373 2211.9169 -107481.89 15741.751 + 500 517.71195 -214.23969 -107489.48 15741.751 +Loop time of 22.2711 on 4 procs for 500 steps with 1250 atoms + +Performance: 0.485 ns/day, 49.491 hours/ns, 22.451 timesteps/s, 28.063 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 13.689 | 15.195 | 16.732 | 27.7 | 68.23 +Neigh | 1.5325 | 1.5496 | 1.5658 | 1.0 | 6.96 +Comm | 0.073366 | 1.6105 | 3.116 | 85.1 | 7.23 +Output | 0.00052192 | 0.00057642 | 0.00073657 | 0.0 | 0.00 +Modify | 3.896 | 3.9129 | 3.9306 | 0.6 | 17.57 +Other | | 0.00241 | | | 0.01 + +Nlocal: 312.5 ave 317 max 308 min +Histogram: 1 0 0 1 0 0 1 0 0 1 +Nghost: 4982 ave 4995 max 4967 min +Histogram: 1 0 0 0 0 1 1 0 0 1 +Neighs: 172509 ave 174182 max 170676 min +Histogram: 1 0 0 1 0 0 0 1 0 1 + +Total # of neighbors = 690037 +Ave neighs/atom = 552.0296 +Neighbor list builds = 50 +Dangerous builds not checked +unfix methane_qtb +unfix scapegoat_qtb + + +##Shock compression with quantum nuclear corrections +reset_timestep 0 +fix shock all qbmsst z ${v_msst} q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta 1 beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta 1 beta 400 T_init 110 +QBMSST parameters: + Shock in z direction + Cell mass-like parameter qmass (units of mass^2/length^4) = 2.50000e+01 + Shock velocity = 1.22000e-01 + Artificial viscosity (units of mass/length/time) = 9.00000e-01 + Initial pressure calculated on first step + Initial volume calculated on first step + Initial energy calculated on first step +fix_modify shock energy yes +variable dhug equal f_shock[1] +variable dray equal f_shock[2] +variable lgr_vel equal f_shock[3] +variable lgr_pos equal f_shock[4] +variable T_qm equal f_shock[5] #Temperature with quantum nuclear correction +thermo_style custom step v_T_qm press etotal vol lx ly lz pzz v_dhug v_dray v_lgr_vel v_lgr_pos +thermo 20 +timestep ${delta_t} +timestep 0.25 +#restart 1000 restart +run 500 +Fix QBMSST v0 = 1.57418e+04 +Fix QBMSST p0 = -5.88788e+01 +Fix QBMSST e0 = to be -1.07489e+05 +Fix QBMSST initial strain rate of -1.02186e-04 established by reducing temperature by factor of 1.00000e-02 +Per MPI rank memory allocation (min/avg/max) = 126.1 | 126.1 | 126.1 Mbytes + Step v_T_qm Press TotEng Volume Lx Ly Lz Pzz v_dhug v_dray v_lgr_vel v_lgr_pos + 0 110 -270.21489 -107489.48 15741.751 19.891812 19.891812 39.783624 -118.93551 0 -60.056661 0 0 + 20 110 -2180.5877 -107208.2 15733.847 19.891812 19.891812 39.763648 223.47326 -37.773571 -29.703539 6.1258507e-05 -0.60983836 + 40 110 5004.864 -107109.95 15726.306 19.891812 19.891812 39.744592 3335.6341 -50.920246 2784.77 0.00011969641 -1.2193771 + 60 110 -4549.1199 -106949.22 15719.136 19.891812 19.891812 39.72647 -4461.4212 -72.656651 -5295.3675 0.00017526726 -1.8286321 + 80 110 6695.6833 -106942.12 15712.214 19.891812 19.891812 39.708976 5333.2741 -73.356417 4226.043 0.00022891479 -2.4376137 + 100 110 -5337.7671 -106930.78 15705.644 19.891812 19.891812 39.692373 -2682.4224 -75.129348 -4049.0157 0.00027982924 -3.0463347 + 120 110 6526.5587 -106736.15 15699.334 19.891812 19.891812 39.676424 7038.2375 -100.8809 5422.5046 0.00032873694 -3.6548061 + 140 110 -3284.0472 -106761.36 15693.36 19.891812 19.891812 39.661329 -3999.8116 -97.977739 -5851.3636 0.00037502973 -4.2630401 + 160 110 4792.0537 -106662.24 15687.56 19.891812 19.891812 39.64667 4484.6905 -110.86184 2404.1579 0.00041998006 -4.8710464 + 180 110 -1253.5849 -106532.38 15682.037 19.891812 19.891812 39.632711 -723.78287 -128.58314 -3022.3825 0.00046278801 -5.4788331 + 200 110 3276.2225 -106488.13 15676.725 19.891812 19.891812 39.619286 5117.4749 -134.15782 2609.1518 0.00050395806 -6.0864105 + 220 110 -553.17982 -106421.17 15671.675 19.891812 19.891812 39.606524 -1360.8796 -143.56979 -4068.5641 0.00054309397 -6.6937871 + 240 110 1329.8793 -106309.56 15666.794 19.891812 19.891812 39.594187 775.35326 -158.40869 -2125.0508 0.00058092605 -7.300972 + 260 110 1809.8974 -106360.42 15662.075 19.891812 19.891812 39.582262 3075.2725 -151.39659 -11.4097 0.00061749364 -7.9079706 + 280 110 24.534819 -106310.46 15657.56 19.891812 19.891812 39.570852 1043.8352 -158.25965 -2221.0935 0.00065248454 -8.5147908 + 300 110 2854.2862 -106150.2 15653.217 19.891812 19.891812 39.559874 3727.6844 -179.54521 291.27132 0.00068614803 -9.1214393 + 320 110 -776.61228 -106199.04 15649.041 19.891812 19.891812 39.549322 -1285.3999 -173.42703 -4886.655 0.00071850756 -9.7279234 + 340 110 3778.2238 -106201.03 15644.958 19.891812 19.891812 39.539001 3694.462 -172.6926 -68.017561 0.00075015694 -10.334247 + 360 110 -1505.9413 -106025.15 15641.031 19.891812 19.891812 39.529078 -1491.3768 -196.81063 -5408.8787 0.00078058882 -10.940416 + 380 110 3414.9599 -106071.49 15637.176 19.891812 19.891812 39.519335 4956.6752 -189.93327 886.98409 0.00081046454 -11.546435 + 400 110 -947.2273 -106003.34 15633.49 19.891812 19.891812 39.510021 726.91825 -199.51619 -3488.2795 0.0008390284 -12.152307 + 420 46.681884 1610.2414 -105884.37 15629.905 19.891812 19.891812 39.500961 -1377.8364 -215.72223 -5734.5653 0.00086681188 -12.758039 + 440 46.681884 2290.4653 -105923.83 15626.371 19.891812 19.891812 39.492029 6296.7177 -209.55961 1800.4591 0.00089420243 -13.363632 + 460 46.681884 -2068.0472 -105879.44 15622.969 19.891812 19.891812 39.483432 -5629.8405 -216.88862 -10260.4 0.00092056659 -13.969092 + 480 46.681884 5011.06 -105748.92 15619.556 19.891812 19.891812 39.474805 8649.5097 -232.72756 3884.1859 0.00094702163 -14.574419 + 500 46.681884 -3314.8335 -105829.23 15616.305 19.891812 19.891812 39.46659 -5120.4784 -223.60669 -10014.132 0.00097221364 -15.179618 +Loop time of 26.5748 on 4 procs for 500 steps with 1250 atoms + +Performance: 0.406 ns/day, 59.055 hours/ns, 18.815 timesteps/s, 23.519 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 16.259 | 18.109 | 19.999 | 31.1 | 68.14 +Neigh | 1.8265 | 1.8477 | 1.8638 | 1.0 | 6.95 +Comm | 0.045073 | 1.9349 | 3.7845 | 95.1 | 7.28 +Output | 0.0019058 | 0.0019666 | 0.0021202 | 0.2 | 0.01 +Modify | 4.6619 | 4.6782 | 4.699 | 0.6 | 17.60 +Other | | 0.002774 | | | 0.01 + +Nlocal: 312.5 ave 318 max 307 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Nghost: 5059 ave 5080 max 5039 min +Histogram: 1 0 1 0 0 0 1 0 0 1 +Neighs: 173854 ave 176807 max 170839 min +Histogram: 1 0 0 1 0 0 1 0 0 1 + +Total # of neighbors = 695414 +Ave neighs/atom = 556.3312 +Neighbor list builds = 50 +Dangerous builds not checked +Total wall time: 0:00:49 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/methane_qbmsst.in b/examples/PACKAGES/qtb/methane_qbmsst/methane_qbmsst.in deleted file mode 100644 index 507164a732..0000000000 --- a/examples/PACKAGES/qtb/methane_qbmsst/methane_qbmsst.in +++ /dev/null @@ -1,33 +0,0 @@ -## This script first uses fix qtb to equilibrate liquid methane to an initial state with quantum nuclear correction and then simulate shock induced chemical reactions through the quantum thermal bath multi-scale shock technique -#The default system size may take a while to run you can change to a smaller size -variable x_rep equal 5 #x-direction replication number -variable y_rep equal 5 #y-direction replication number -variable z_rep equal 10 #z-direction replication number -variable temperature equal 110.0 #Target quantum temperature (K in real units) -variable delta_t equal 0.25 #MD timestep length (fs in real units) -variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) -variable v_msst equal 0.122 #Shock velocity (Angstrom/fs in metal units) -variable q_msst equal 25.0 #Box mass-like parameter in the MSST (mass^2/length^4, where mass=grams/mole and length=Angstrom in real units) -variable mu_msst equal 0.9 #Artificial viscosity in the MSST (mass/length/time, where mass=grams/mole, length=Angstrom and time=fs in real units) -variable tscale_msst equal 0.01 #Temperature reduction parameter in the MSST (unitless) -variable eta_qbmsst equal 1.0 #Coupling constant between the shock and the quantum thermal bath (unitless constant) - - -##The included part first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. -include methane_qtb.mod - - -##Shock compression with quantum nuclear corrections -reset_timestep 0 -fix shock all qbmsst z ${v_msst} q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} -fix_modify shock energy yes -variable dhug equal f_shock[1] -variable dray equal f_shock[2] -variable lgr_vel equal f_shock[3] -variable lgr_pos equal f_shock[4] -variable T_qm equal f_shock[5] #Temperature with quantum nuclear correction -thermo_style custom step v_T_qm press etotal vol lx ly lz pzz v_dhug v_dray v_lgr_vel v_lgr_pos -thermo 100 -timestep ${delta_t} -restart 1000 restart -run 5000 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/methane_qtb.mod b/examples/PACKAGES/qtb/methane_qbmsst/methane_qtb.mod index 65bfc5d7f7..ac1735ca7b 100644 --- a/examples/PACKAGES/qtb/methane_qbmsst/methane_qtb.mod +++ b/examples/PACKAGES/qtb/methane_qbmsst/methane_qtb.mod @@ -3,62 +3,62 @@ ## This part defines units, methane structure, and atomic information #General -units real -dimension 3 -boundary p p p -atom_style charge +units real +dimension 3 +boundary p p p +atom_style charge #Lattice -lattice custom 1.0 & - a1 3.9783624 0 0 & - a2 0 3.9783624 0 & - a3 0 0 3.9783624 & - & - basis 0.5 0.5 0.5 & - basis 0.663 0.663 0.663 & - basis 0.337 0.337 0.663 & - basis 0.663 0.337 0.337 & - basis 0.337 0.663 0.337 +lattice custom 1.0 & + a1 3.9783624 0 0 & + a2 0 3.9783624 0 & + a3 0 0 3.9783624 & + & + basis 0.5 0.5 0.5 & + basis 0.663 0.663 0.663 & + basis 0.337 0.337 0.663 & + basis 0.663 0.337 0.337 & + basis 0.337 0.663 0.337 #Computational Cell -region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box -create_box 2 simbox -create_atoms 1 box & - basis 1 1 & - basis 2 2 & - basis 3 2 & - basis 4 2 & - basis 5 2 -replicate ${x_rep} ${y_rep} ${z_rep} +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +create_atoms 1 box & + basis 1 1 & + basis 2 2 & + basis 3 2 & + basis 4 2 & + basis 5 2 +replicate ${x_rep} ${y_rep} ${z_rep} #Atomic Information -mass 1 12.011150 -mass 2 1.007970 +mass 1 12.011150 +mass 2 1.007970 ## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" #Pair Potentials -pair_style reax/c NULL -pair_coeff * * ffield.reax C H -fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff #Neighbor Style -neighbor 2.5 bin -neigh_modify every 10 delay 0 check no +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no ## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects #Initialization -velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all #Setup output -thermo_style custom step temp press etotal vol -thermo 100 +thermo_style custom step temp press etotal vol +thermo 20 #Colored thermal bath -fix scapegoat_qtb all nve #NVE does the time integration -fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher -timestep ${delta_t} -run 2000 #500 fs -unfix methane_qtb -unfix scapegoat_qtb +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +timestep ${delta_t} +run 500 #500 fs +unfix methane_qtb +unfix scapegoat_qtb diff --git a/examples/PACKAGES/qtb/methane_qtb/in.methane_qtb b/examples/PACKAGES/qtb/methane_qtb/in.methane_qtb new file mode 100644 index 0000000000..bfaa8706c4 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qtb/in.methane_qtb @@ -0,0 +1,70 @@ +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +variable x_rep equal 2 #x-direction replication number +variable y_rep equal 2 #y-direction replication number +variable z_rep equal 2 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 & + a1 3.9783624 0 0 & + a2 0 3.9783624 0 & + a3 0 0 3.9783624 & + & + basis 0.5 0.5 0.5 & + basis 0.663 0.663 0.663 & + basis 0.337 0.337 0.663 & + basis 0.663 0.337 0.337 & + basis 0.337 0.663 0.337 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +create_atoms 1 box & + basis 1 1 & + basis 2 2 & + basis 3 2 & + basis 4 2 & + basis 5 2 +replicate ${x_rep} ${y_rep} ${z_rep} + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 50 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +timestep ${delta_t} +run 1000 +unfix methane_qtb +unfix scapegoat_qtb diff --git a/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.1 b/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.1 new file mode 100644 index 0000000000..b6efdb6360 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.1 @@ -0,0 +1,174 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +variable x_rep equal 2 #x-direction replication number +variable y_rep equal 2 #y-direction replication number +variable z_rep equal 2 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 a1 3.9783624 0 0 a2 0 3.9783624 0 a3 0 0 3.9783624 basis 0.5 0.5 0.5 basis 0.663 0.663 0.663 basis 0.337 0.337 0.663 basis 0.663 0.337 0.337 basis 0.337 0.663 0.337 +Lattice spacing in x,y,z = 3.9783624 3.9783624 3.9783624 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +Created orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box basis 1 1 basis 2 2 basis 3 2 basis 4 2 basis 5 2 +Created 5 atoms + using lattice units in orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + create_atoms CPU = 0.000 seconds +replicate ${x_rep} ${y_rep} ${z_rep} +replicate 2 ${y_rep} ${z_rep} +replicate 2 2 ${z_rep} +replicate 2 2 2 +Replication is creating a 2x2x2 = 8 times larger system... + orthogonal box = (0 0 0) to (7.9567248 7.9567248 7.9567248) + 1 by 1 by 1 MPI processor grid + 40 atoms + replicate CPU = 0.001 seconds + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create 110 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 50 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +fix methane_qtb all qtb temp 110 damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 +fix methane_qtb all qtb temp 110 damp 200 seed 35082 f_max 0.3 N_f 50 +timestep ${delta_t} +timestep 0.25 +run 1000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12.5 + ghost atom cutoff = 12.5 + binsize = 6.25, bins = 2 2 2 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 82.45 | 82.45 | 82.45 Mbytes + Step Temp Press TotEng Volume + 0 110 -15746.508 -3548.1354 503.73603 + 50 191.27715 -7523.7503 -3530.4179 503.73603 + 100 214.09982 12016.892 -3517.4544 503.73603 + 150 317.38272 3098.2254 -3499.5793 503.73603 + 200 338.76362 -4484.9241 -3490.3649 503.73603 + 250 402.05826 3973.0488 -3474.81 503.73603 + 300 340.80076 11193.4 -3470.8029 503.73603 + 350 556.19747 8086.3266 -3451.5937 503.73603 + 400 566.8737 5499.5505 -3439.2335 503.73603 + 450 643.2883 -8270.5736 -3426.0767 503.73603 + 500 613.09742 -12406.229 -3419.8547 503.73603 + 550 669.28891 -9757.601 -3410.7281 503.73603 + 600 600.66922 10407.403 -3408.3776 503.73603 + 650 573.1485 30971.977 -3405.0744 503.73603 + 700 726.22146 29573.798 -3386.3167 503.73603 + 750 777.22659 13265.88 -3378.8462 503.73603 + 800 652.46476 -9231.9331 -3388.7229 503.73603 + 850 679.18414 -19802.254 -3384.6321 503.73603 + 900 711.60594 -18792.396 -3373.2944 503.73603 + 950 865.79013 -2837.6042 -3363.2971 503.73603 + 1000 884.14995 6160.4875 -3360.6295 503.73603 +Loop time of 7.87 on 1 procs for 1000 steps with 40 atoms + +Performance: 2.745 ns/day, 8.744 hours/ns, 127.065 timesteps/s, 5.083 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 6.3662 | 6.3662 | 6.3662 | 0.0 | 80.89 +Neigh | 0.97757 | 0.97757 | 0.97757 | 0.0 | 12.42 +Comm | 0.013653 | 0.013653 | 0.013653 | 0.0 | 0.17 +Output | 0.00042319 | 0.00042319 | 0.00042319 | 0.0 | 0.01 +Modify | 0.50971 | 0.50971 | 0.50971 | 0.0 | 6.48 +Other | | 0.00248 | | | 0.03 + +Nlocal: 40 ave 40 max 40 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 2552 ave 2552 max 2552 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 25110 ave 25110 max 25110 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 25110 +Ave neighs/atom = 627.75 +Neighbor list builds = 100 +Dangerous builds not checked +unfix methane_qtb +unfix scapegoat_qtb +Total wall time: 0:00:07 diff --git a/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.4 b/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.4 new file mode 100644 index 0000000000..c68935df87 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.4 @@ -0,0 +1,174 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +variable x_rep equal 2 #x-direction replication number +variable y_rep equal 2 #y-direction replication number +variable z_rep equal 2 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 a1 3.9783624 0 0 a2 0 3.9783624 0 a3 0 0 3.9783624 basis 0.5 0.5 0.5 basis 0.663 0.663 0.663 basis 0.337 0.337 0.663 basis 0.663 0.337 0.337 basis 0.337 0.663 0.337 +Lattice spacing in x,y,z = 3.9783624 3.9783624 3.9783624 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +Created orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box basis 1 1 basis 2 2 basis 3 2 basis 4 2 basis 5 2 +Created 5 atoms + using lattice units in orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + create_atoms CPU = 0.000 seconds +replicate ${x_rep} ${y_rep} ${z_rep} +replicate 2 ${y_rep} ${z_rep} +replicate 2 2 ${z_rep} +replicate 2 2 2 +Replication is creating a 2x2x2 = 8 times larger system... + orthogonal box = (0 0 0) to (7.9567248 7.9567248 7.9567248) + 1 by 2 by 2 MPI processor grid + 40 atoms + replicate CPU = 0.000 seconds + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create 110 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 50 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +fix methane_qtb all qtb temp 110 damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 +fix methane_qtb all qtb temp 110 damp 200 seed 35082 f_max 0.3 N_f 50 +timestep ${delta_t} +timestep 0.25 +run 1000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12.5 + ghost atom cutoff = 12.5 + binsize = 6.25, bins = 2 2 2 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 72.47 | 72.47 | 72.47 Mbytes + Step Temp Press TotEng Volume + 0 110 -15746.508 -3548.1354 503.73603 + 50 183.74482 -917.59204 -3534.8518 503.73603 + 100 200.96363 16464.403 -3517.1456 503.73603 + 150 255.33305 14801.963 -3507.7299 503.73603 + 200 328.11626 5119.3618 -3498.0388 503.73603 + 250 356.88626 -11306.151 -3485.1746 503.73603 + 300 284.7363 -25276.091 -3479.4732 503.73603 + 350 434.79382 -23326.29 -3471.7491 503.73603 + 400 414.69602 2800.9047 -3465.7225 503.73603 + 450 464.61242 20775.398 -3449.1675 503.73603 + 500 671.43369 15272.581 -3433.9453 503.73603 + 550 534.01157 -8545.4173 -3427.6672 503.73603 + 600 512.69648 -15904.052 -3417.8071 503.73603 + 650 604.62051 -1777.9242 -3419.4324 503.73603 + 700 650.2196 20108.199 -3415.8902 503.73603 + 750 677.45644 21721.335 -3409.1253 503.73603 + 800 707.98295 171.53756 -3413.4048 503.73603 + 850 740.68522 -23846.627 -3384.7024 503.73603 + 900 739.55514 -22742.841 -3377.091 503.73603 + 950 769.44821 -7060.9388 -3389.817 503.73603 + 1000 987.6246 -0.47618437 -3373.9263 503.73603 +Loop time of 6.80367 on 4 procs for 1000 steps with 40 atoms + +Performance: 3.175 ns/day, 7.560 hours/ns, 146.980 timesteps/s, 5.879 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.1801 | 5.3491 | 5.5417 | 6.1 | 78.62 +Neigh | 0.76934 | 0.77281 | 0.77567 | 0.3 | 11.36 +Comm | 0.072213 | 0.26492 | 0.4339 | 27.2 | 3.89 +Output | 0.00032365 | 0.00035547 | 0.00044739 | 0.0 | 0.01 +Modify | 0.41139 | 0.41424 | 0.4179 | 0.4 | 6.09 +Other | | 0.00226 | | | 0.03 + +Nlocal: 10 ave 10 max 10 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 1950 ave 1950 max 1950 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 6434.5 ave 6447 max 6427 min +Histogram: 2 0 0 0 0 1 0 0 0 1 + +Total # of neighbors = 25738 +Ave neighs/atom = 643.45 +Neighbor list builds = 100 +Dangerous builds not checked +unfix methane_qtb +unfix scapegoat_qtb +Total wall time: 0:00:06 diff --git a/examples/PACKAGES/qtb/methane_qtb/methane_qtb.in b/examples/PACKAGES/qtb/methane_qtb/methane_qtb.in deleted file mode 100644 index e31f0695b9..0000000000 --- a/examples/PACKAGES/qtb/methane_qtb/methane_qtb.in +++ /dev/null @@ -1,70 +0,0 @@ -## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. -variable x_rep equal 2 #x-direction replication number -variable y_rep equal 2 #y-direction replication number -variable z_rep equal 2 #z-direction replication number -variable temperature equal 110.0 #Target quantum temperature (K in real units) -variable delta_t equal 0.25 #MD timestep length (fs in real units) -variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) - - -## This part defines units, methane structure, and atomic information -#General -units real -dimension 3 -boundary p p p -atom_style charge - -#Lattice -lattice custom 1.0 & - a1 3.9783624 0 0 & - a2 0 3.9783624 0 & - a3 0 0 3.9783624 & - & - basis 0.5 0.5 0.5 & - basis 0.663 0.663 0.663 & - basis 0.337 0.337 0.663 & - basis 0.663 0.337 0.337 & - basis 0.337 0.663 0.337 - -#Computational Cell -region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box -create_box 2 simbox -create_atoms 1 box & - basis 1 1 & - basis 2 2 & - basis 3 2 & - basis 4 2 & - basis 5 2 -replicate ${x_rep} ${y_rep} ${z_rep} - -#Atomic Information -mass 1 12.011150 -mass 2 1.007970 - - -## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" -#Pair Potentials -pair_style reax/c NULL -pair_coeff * * ffield.reax C H -fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c - -#Neighbor Style -neighbor 2.5 bin -neigh_modify every 10 delay 0 check no - - -## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects -#Initialization -velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all - -#Setup output -thermo_style custom step temp press etotal vol -thermo 100 - -#Colored thermal bath -fix scapegoat_qtb all nve #NVE does the time integration -fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher -timestep ${delta_t} -run 3000 #750 fs -unfix methane_qtb -unfix scapegoat_qtb diff --git a/examples/amoeba/amoeba_ubiquitin.key b/examples/amoeba/amoeba_ubiquitin.key index 2870d071d4..3d63525258 100644 --- a/examples/amoeba/amoeba_ubiquitin.key +++ b/examples/amoeba/amoeba_ubiquitin.key @@ -12,7 +12,8 @@ ewald ewald-alpha 0.4 pewald-alpha 0.5 ewald-cutoff 7.0 -#pme-grid 60 45 45 pme-grid 60 48 48 -pme-order 5 polar-eps 0.00001 +#pme-grid 15 12 12 +#polar-eps 0.0002 +pme-order 5 diff --git a/examples/qeq/in.qeq.reaxc b/examples/qeq/in.qeq.reaxff similarity index 90% rename from examples/qeq/in.qeq.reaxc rename to examples/qeq/in.qeq.reaxff index 5271c74671..a60cca269e 100644 --- a/examples/qeq/in.qeq.reaxc +++ b/examples/qeq/in.qeq.reaxff @@ -1,19 +1,19 @@ -# This example demonstrates the use of various fix qeq variants with pair reax/c +# This example demonstrates the use of various fix qeq variants with pair reaxff # You can comment in/out various versions below # # 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when -# used with pair_style reax/c, provided that the QEq parameters are the same. +# used with pair_style reaxff, provided that the QEq parameters are the same. # # 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that # the QEq parameters are the same. These two styles can also be used with -# pair_style reax/c. +# pair_style reaxff. units real atom_style charge read_data data.CHO -pair_style reax/c NULL checkqeq no +pair_style reaxff NULL checkqeq no pair_coeff * * ffield.reax.cho H C O neighbor 1 bin diff --git a/examples/qeq/log.27Nov18.qeq.reaxc.g++.1 b/examples/qeq/log.27Nov18.qeq.reaxc.g++.1 deleted file mode 100644 index c88acc39f2..0000000000 --- a/examples/qeq/log.27Nov18.qeq.reaxc.g++.1 +++ /dev/null @@ -1,116 +0,0 @@ -LAMMPS (27 Nov 2018) - using 1 OpenMP thread(s) per MPI task -# This example demonstrates the use of various fix qeq variants with pair reax/c -# You can comment in/out various versions below -# -# 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when -# used with pair_style reax/c, provided that the QEq parameters are the same. -# -# 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that -# the QEq parameters are the same. These two styles can also be used with -# pair_style reax/c. - -units real -atom_style charge - -read_data data.CHO - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c NULL checkqeq no -pair_coeff * * ffield.reax.cho H C O -Reading potential file ffield.reax.cho with DATE: 2011-02-18 - -neighbor 1 bin -neigh_modify every 1 delay 0 check yes - -group type1 type 1 -60 atoms in group type1 -compute charge1 type1 property/atom q -compute q1 type1 reduce ave c_charge1 -group type2 type 2 -25 atoms in group type2 -compute charge2 type2 property/atom q -compute q2 type2 reduce ave c_charge2 -group type3 type 3 -20 atoms in group type3 -compute charge3 type3 property/atom q -compute q3 type3 reduce ave c_charge3 -variable qtot equal count(type1)*c_q1+count(type2)*c_q2+count(type3)*c_q3 - -thermo_style custom step pe c_q1 c_q2 c_q3 v_qtot -thermo 1 - -velocity all create 300.0 1281937 -fix 1 all nve - -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq.reax -#fix 2 all qeq/shielded 1 10.0 1e-6 400 param.qeq1 -#fix 2 all qeq/point 1 10.0 1e-6 400 param.qeq1 -#fix 2 all qeq/dynamic 1 10.0 1e-3 100 param.qeq1 - -timestep 0.25 - -run 10 -Neighbor list info ... - update every 1 steps, delay 0 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 11 - ghost atom cutoff = 11 - binsize = 5.5, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 16.65 | 16.65 | 16.65 Mbytes -Step PotEng c_q1 c_q2 c_q3 v_qtot - 0 -10226.557 0.095634063 -0.15658793 -0.091167279 4.4408921e-16 - 1 -10225.799 0.095649584 -0.1566219 -0.091171371 7.1054274e-15 - 2 -10223.656 0.095669731 -0.15666714 -0.091175264 -6.4392935e-15 - 3 -10220.276 0.095691257 -0.15671597 -0.091178813 2.6645353e-15 - 4 -10215.894 0.095714363 -0.15676887 -0.091182006 -3.1086245e-15 - 5 -10210.804 0.095733863 -0.15681398 -0.09118412 6.6613381e-16 - 6 -10205.342 0.095751253 -0.15685427 -0.091185918 -1.110223e-15 - 7 -10199.848 0.095762028 -0.1568795 -0.091186707 8.8817842e-15 - 8 -10194.646 0.095767243 -0.15689184 -0.091186932 -2.4424907e-15 - 9 -10190.016 0.095760528 -0.15687664 -0.091185782 -4.4408921e-16 - 10 -10186.168 0.095748006 -0.15684815 -0.09118383 1.110223e-15 -Loop time of 0.0322483 on 1 procs for 10 steps with 105 atoms - -Performance: 6.698 ns/day, 3.583 hours/ns, 310.094 timesteps/s -99.8% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.026229 | 0.026229 | 0.026229 | 0.0 | 81.34 -Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 5.2214e-05 | 5.2214e-05 | 5.2214e-05 | 0.0 | 0.16 -Output | 0.00027299 | 0.00027299 | 0.00027299 | 0.0 | 0.85 -Modify | 0.0056667 | 0.0056667 | 0.0056667 | 0.0 | 17.57 -Other | | 2.694e-05 | | | 0.08 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 512 ave 512 max 512 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3417 ave 3417 max 3417 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3417 -Ave neighs/atom = 32.5429 -Neighbor list builds = 0 -Dangerous builds = 0 - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:00 diff --git a/examples/qeq/log.27Nov18.qeq.reaxc.g++.4 b/examples/qeq/log.27Nov18.qeq.reaxc.g++.4 deleted file mode 100644 index c54a99577e..0000000000 --- a/examples/qeq/log.27Nov18.qeq.reaxc.g++.4 +++ /dev/null @@ -1,116 +0,0 @@ -LAMMPS (27 Nov 2018) - using 1 OpenMP thread(s) per MPI task -# This example demonstrates the use of various fix qeq variants with pair reax/c -# You can comment in/out various versions below -# -# 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when -# used with pair_style reax/c, provided that the QEq parameters are the same. -# -# 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that -# the QEq parameters are the same. These two styles can also be used with -# pair_style reax/c. - -units real -atom_style charge - -read_data data.CHO - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c NULL checkqeq no -pair_coeff * * ffield.reax.cho H C O -Reading potential file ffield.reax.cho with DATE: 2011-02-18 - -neighbor 1 bin -neigh_modify every 1 delay 0 check yes - -group type1 type 1 -60 atoms in group type1 -compute charge1 type1 property/atom q -compute q1 type1 reduce ave c_charge1 -group type2 type 2 -25 atoms in group type2 -compute charge2 type2 property/atom q -compute q2 type2 reduce ave c_charge2 -group type3 type 3 -20 atoms in group type3 -compute charge3 type3 property/atom q -compute q3 type3 reduce ave c_charge3 -variable qtot equal count(type1)*c_q1+count(type2)*c_q2+count(type3)*c_q3 - -thermo_style custom step pe c_q1 c_q2 c_q3 v_qtot -thermo 1 - -velocity all create 300.0 1281937 -fix 1 all nve - -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq.reax -#fix 2 all qeq/shielded 1 10.0 1e-6 400 param.qeq1 -#fix 2 all qeq/point 1 10.0 1e-6 400 param.qeq1 -#fix 2 all qeq/dynamic 1 10.0 1e-3 100 param.qeq1 - -timestep 0.25 - -run 10 -Neighbor list info ... - update every 1 steps, delay 0 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 11 - ghost atom cutoff = 11 - binsize = 5.5, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 10.83 | 11.69 | 12.52 Mbytes -Step PotEng c_q1 c_q2 c_q3 v_qtot - 0 -10226.557 0.095633919 -0.15658765 -0.091167194 1.7763568e-15 - 1 -10225.799 0.0956503 -0.15662357 -0.09117143 2.8865799e-15 - 2 -10223.656 0.095669684 -0.15666698 -0.091175327 1.110223e-15 - 3 -10220.276 0.095691296 -0.15671615 -0.091178696 0 - 4 -10215.894 0.09571384 -0.15676787 -0.091181678 8.8817842e-16 - 5 -10210.804 0.095734178 -0.15681468 -0.09118418 1.3322676e-15 - 6 -10205.342 0.095751126 -0.15685409 -0.091185769 4.4408921e-16 - 7 -10199.848 0.095762403 -0.15688037 -0.091186751 0 - 8 -10194.646 0.095766449 -0.15689014 -0.091186673 -4.4408921e-16 - 9 -10190.016 0.095761078 -0.15687818 -0.09118551 -4.4408921e-16 - 10 -10186.168 0.095747223 -0.15684634 -0.091183742 0 -Loop time of 0.0185181 on 4 procs for 10 steps with 105 atoms - -Performance: 11.664 ns/day, 2.058 hours/ns, 540.011 timesteps/s -92.5% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.0097179 | 0.01078 | 0.012052 | 0.8 | 58.21 -Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.00041604 | 0.0017492 | 0.0028496 | 2.1 | 9.45 -Output | 0.00041103 | 0.00046283 | 0.00051498 | 0.0 | 2.50 -Modify | 0.0051849 | 0.0052357 | 0.0052917 | 0.1 | 28.27 -Other | | 0.0002902 | | | 1.57 - -Nlocal: 26.25 ave 35 max 15 min -Histogram: 1 0 0 1 0 0 0 0 0 2 -Nghost: 300 ave 357 max 239 min -Histogram: 2 0 0 0 0 0 0 0 0 2 -Neighs: 1025.25 ave 1468 max 405 min -Histogram: 1 0 0 0 1 0 0 0 0 2 - -Total # of neighbors = 4101 -Ave neighs/atom = 39.0571 -Neighbor list builds = 0 -Dangerous builds = 0 - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:00 diff --git a/examples/qeq/log.30Nov23.reaxff.g++.1 b/examples/qeq/log.30Nov23.reaxff.g++.1 new file mode 100644 index 0000000000..c79d9fa662 --- /dev/null +++ b/examples/qeq/log.30Nov23.reaxff.g++.1 @@ -0,0 +1,146 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# This example demonstrates the use of various fix qeq variants with pair reaxff +# You can comment in/out various versions below +# +# 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when +# used with pair_style reaxff, provided that the QEq parameters are the same. +# +# 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that +# the QEq parameters are the same. These two styles can also be used with +# pair_style reaxff. + +units real +atom_style charge + +read_data data.CHO +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff NULL checkqeq no +pair_coeff * * ffield.reax.cho H C O +Reading potential file ffield.reax.cho with DATE: 2011-02-18 + +neighbor 1 bin +neigh_modify every 1 delay 0 check yes + +group type1 type 1 +60 atoms in group type1 +compute charge1 type1 property/atom q +compute q1 type1 reduce ave c_charge1 +group type2 type 2 +25 atoms in group type2 +compute charge2 type2 property/atom q +compute q2 type2 reduce ave c_charge2 +group type3 type 3 +20 atoms in group type3 +compute charge3 type3 property/atom q +compute q3 type3 reduce ave c_charge3 +variable qtot equal count(type1)*c_q1+count(type2)*c_q2+count(type3)*c_q3 + +thermo_style custom step pe c_q1 c_q2 c_q3 v_qtot +thermo 1 + +velocity all create 300.0 1281937 +fix 1 all nve + +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq.reax +#fix 2 all qeq/shielded 1 10.0 1e-6 400 param.qeq1 +#fix 2 all qeq/point 1 10.0 1e-6 400 param.qeq1 +#fix 2 all qeq/dynamic 1 10.0 1e-3 100 param.qeq1 + +timestep 0.25 + +run 10 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11 + ghost atom cutoff = 11 + binsize = 5.5, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 14.54 | 14.54 | 14.54 Mbytes + Step PotEng c_q1 c_q2 c_q3 v_qtot + 0 -10226.557 0.095633909 -0.15658753 -0.091167311 -2.8865799e-15 + 1 -10225.799 0.095650157 -0.15662321 -0.091171465 5.3290705e-15 + 2 -10223.656 0.095669727 -0.15666713 -0.091175264 -2.8865799e-15 + 3 -10220.276 0.095691262 -0.15671593 -0.09117887 4.6629367e-15 + 4 -10215.894 0.095714037 -0.15676816 -0.091181914 4.4408921e-16 + 5 -10210.804 0.095733939 -0.15681378 -0.091184589 -3.9968029e-15 + 6 -10205.342 0.09575102 -0.15685378 -0.091185835 -3.5527137e-15 + 7 -10199.848 0.095762356 -0.1568802 -0.091186815 2.220446e-15 + 8 -10194.646 0.095766731 -0.15689071 -0.091186805 -3.9968029e-15 + 9 -10190.016 0.095761083 -0.15687817 -0.091185537 -2.6645353e-15 + 10 -10186.168 0.095747444 -0.15684695 -0.091183644 -1.5543122e-15 +Loop time of 0.013327 on 1 procs for 10 steps with 105 atoms + +Performance: 16.208 ns/day, 1.481 hours/ns, 750.359 timesteps/s, 78.788 katom-step/s +97.3% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.010565 | 0.010565 | 0.010565 | 0.0 | 79.28 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 2.3272e-05 | 2.3272e-05 | 2.3272e-05 | 0.0 | 0.17 +Output | 0.00023198 | 0.00023198 | 0.00023198 | 0.0 | 1.74 +Modify | 0.0024913 | 0.0024913 | 0.0024913 | 0.0 | 18.69 +Other | | 1.529e-05 | | | 0.11 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 512 ave 512 max 512 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3417 ave 3417 max 3417 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3417 +Ave neighs/atom = 32.542857 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/qeq/log.30Nov23.reaxff.g++.4 b/examples/qeq/log.30Nov23.reaxff.g++.4 new file mode 100644 index 0000000000..6f58a13a7f --- /dev/null +++ b/examples/qeq/log.30Nov23.reaxff.g++.4 @@ -0,0 +1,146 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# This example demonstrates the use of various fix qeq variants with pair reaxff +# You can comment in/out various versions below +# +# 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when +# used with pair_style reaxff, provided that the QEq parameters are the same. +# +# 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that +# the QEq parameters are the same. These two styles can also be used with +# pair_style reaxff. + +units real +atom_style charge + +read_data data.CHO +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.000 seconds + +pair_style reaxff NULL checkqeq no +pair_coeff * * ffield.reax.cho H C O +Reading potential file ffield.reax.cho with DATE: 2011-02-18 + +neighbor 1 bin +neigh_modify every 1 delay 0 check yes + +group type1 type 1 +60 atoms in group type1 +compute charge1 type1 property/atom q +compute q1 type1 reduce ave c_charge1 +group type2 type 2 +25 atoms in group type2 +compute charge2 type2 property/atom q +compute q2 type2 reduce ave c_charge2 +group type3 type 3 +20 atoms in group type3 +compute charge3 type3 property/atom q +compute q3 type3 reduce ave c_charge3 +variable qtot equal count(type1)*c_q1+count(type2)*c_q2+count(type3)*c_q3 + +thermo_style custom step pe c_q1 c_q2 c_q3 v_qtot +thermo 1 + +velocity all create 300.0 1281937 +fix 1 all nve + +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq.reax +#fix 2 all qeq/shielded 1 10.0 1e-6 400 param.qeq1 +#fix 2 all qeq/point 1 10.0 1e-6 400 param.qeq1 +#fix 2 all qeq/dynamic 1 10.0 1e-3 100 param.qeq1 + +timestep 0.25 + +run 10 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11 + ghost atom cutoff = 11 + binsize = 5.5, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 9.845 | 10.57 | 11.28 Mbytes + Step PotEng c_q1 c_q2 c_q3 v_qtot + 0 -10226.557 0.095633904 -0.15658758 -0.091167237 -8.8817842e-16 + 1 -10225.799 0.095650278 -0.1566235 -0.091171458 -1.7763568e-15 + 2 -10223.656 0.095669806 -0.15666728 -0.091175321 0 + 3 -10220.276 0.095691215 -0.15671588 -0.091178792 1.7763568e-15 + 4 -10215.894 0.09571392 -0.15676795 -0.091181826 1.7763568e-15 + 5 -10210.804 0.095734058 -0.15681436 -0.091184227 1.3322676e-15 + 6 -10205.342 0.095751113 -0.15685409 -0.091185731 -4.4408921e-16 + 7 -10199.848 0.095762524 -0.15688062 -0.091186803 -1.3322676e-15 + 8 -10194.646 0.095766647 -0.15689045 -0.091186875 2.8865799e-15 + 9 -10190.016 0.095760978 -0.15687772 -0.09118579 -4.4408921e-16 + 10 -10186.168 0.095747037 -0.15684594 -0.091183687 -1.5543122e-15 +Loop time of 0.00732332 on 4 procs for 10 steps with 105 atoms + +Performance: 29.495 ns/day, 0.814 hours/ns, 1365.500 timesteps/s, 143.378 katom-step/s +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.0033186 | 0.0038166 | 0.0041063 | 0.5 | 52.12 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.0002671 | 0.00057126 | 0.0010608 | 0.0 | 7.80 +Output | 0.00019157 | 0.0002237 | 0.00028058 | 0.0 | 3.05 +Modify | 0.0026446 | 0.0026528 | 0.0026604 | 0.0 | 36.22 +Other | | 5.9e-05 | | | 0.81 + +Nlocal: 26.25 ave 35 max 15 min +Histogram: 1 0 0 1 0 0 0 0 0 2 +Nghost: 300 ave 357 max 239 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Neighs: 1025.25 ave 1468 max 405 min +Histogram: 1 0 0 0 1 0 0 0 0 2 + +Total # of neighbors = 4101 +Ave neighs/atom = 39.057143 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/reaxff/AB/in.AB b/examples/reaxff/AB/in.AB index 5f19b30b6c..30d96f2622 100644 --- a/examples/reaxff/AB/in.AB +++ b/examples/reaxff/AB/in.AB @@ -1,23 +1,23 @@ # REAX potential for Nitroamines system # ..... -units real +units real -atom_style charge -read_data data.AB +atom_style charge +read_data data.AB -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AB H B N +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AB H B N -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.ab -#dump 1 all atom 30 dump.reax.ab - -run 3000 +run 2000 diff --git a/examples/reaxff/AB/lmp_control b/examples/reaxff/AB/lmp_control index c5d98968a5..b97ad8a67f 100644 --- a/examples/reaxff/AB/lmp_control +++ b/examples/reaxff/AB/lmp_control @@ -1,17 +1,8 @@ -simulation_name AB_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -write_freq 1 ! write trajectory after so many steps -traj_title AB ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/AB/log.30Nov23.AB.g++.1 b/examples/reaxff/AB/log.30Nov23.AB.g++.1 new file mode 100644 index 0000000000..e966977f54 --- /dev/null +++ b/examples/reaxff/AB/log.30Nov23.AB.g++.1 @@ -0,0 +1,131 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Nitroamines system +# ..... + +units real + +atom_style charge +read_data data.AB +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 104 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AB H B N +Reading potential file ffield.reax.AB with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.ab + +run 2000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 16.54 | 16.54 | 16.54 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -8505.1816 0 -8505.1816 -673.36566 + 100 83.873108 -8497.003 0 -8471.252 -609.71138 + 200 125.22992 -8479.8879 0 -8441.4394 -1069.4072 + 300 202.34273 -8479.1321 0 -8417.0081 -707.7946 + 400 260.53055 -8476.7914 0 -8396.8025 221.10403 + 500 282.47043 -8466.8576 0 -8380.1326 -223.61988 + 600 288.72043 -8452.9503 0 -8364.3064 681.87761 + 700 379.03381 -8467.4869 0 -8351.1146 921.82426 + 800 382.0856 -8458.717 0 -8341.4078 253.69164 + 900 380.10802 -8449.5745 0 -8332.8725 1199.5539 + 1000 377.60669 -8440.3419 0 -8324.4078 -365.02585 + 1100 372.89451 -8428.8743 0 -8314.387 -1401.9593 + 1200 392.77958 -8426.3492 0 -8305.7567 -572.78319 + 1300 429.04209 -8430.6839 0 -8298.958 -409.55236 + 1400 471.52489 -8438.2785 0 -8293.5093 -16.649651 + 1500 404.49399 -8411.1192 0 -8286.93 338.99191 + 1600 443.77567 -8418.1237 0 -8281.8741 -774.22575 + 1700 479.8234 -8424.6901 0 -8277.3731 65.260334 + 1800 386.73299 -8390.8969 0 -8272.1608 70.076616 + 1900 431.57275 -8401.0671 0 -8268.5641 30.882406 + 2000 454.96043 -8406.0467 0 -8266.3632 728.1499 +Loop time of 2.35094 on 1 procs for 2000 steps with 104 atoms + +Performance: 18.376 ns/day, 1.306 hours/ns, 850.725 timesteps/s, 88.475 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.9254 | 1.9254 | 1.9254 | 0.0 | 81.90 +Neigh | 0.10479 | 0.10479 | 0.10479 | 0.0 | 4.46 +Comm | 0.0067523 | 0.0067523 | 0.0067523 | 0.0 | 0.29 +Output | 0.0005375 | 0.0005375 | 0.0005375 | 0.0 | 0.02 +Modify | 0.31152 | 0.31152 | 0.31152 | 0.0 | 13.25 +Other | | 0.001934 | | | 0.08 + +Nlocal: 104 ave 104 max 104 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 710 ave 710 max 710 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3076 ave 3076 max 3076 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3076 +Ave neighs/atom = 29.576923 +Neighbor list builds = 200 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/AB/log.30Nov23.AB.g++.4 b/examples/reaxff/AB/log.30Nov23.AB.g++.4 new file mode 100644 index 0000000000..5b3ae33fbe --- /dev/null +++ b/examples/reaxff/AB/log.30Nov23.AB.g++.4 @@ -0,0 +1,131 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Nitroamines system +# ..... + +units real + +atom_style charge +read_data data.AB +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 104 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AB H B N +Reading potential file ffield.reax.AB with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.ab + +run 2000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 11.06 | 11.68 | 11.96 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -8505.1816 0 -8505.1816 -673.36566 + 100 83.873123 -8497.0031 0 -8471.252 -609.71119 + 200 125.23001 -8479.8879 0 -8441.4394 -1069.4122 + 300 202.34219 -8479.1319 0 -8417.0081 -707.82246 + 400 260.52726 -8476.7906 0 -8396.8026 221.14446 + 500 282.4624 -8466.8556 0 -8380.133 -223.17501 + 600 288.8059 -8452.9729 0 -8364.3028 679.38441 + 700 378.87007 -8467.429 0 -8351.107 920.99401 + 800 382.10004 -8458.7194 0 -8341.4058 256.06383 + 900 379.69698 -8449.4416 0 -8332.8657 1266.1715 + 1000 379.63496 -8440.9584 0 -8324.4015 -604.987 + 1100 372.82256 -8428.7507 0 -8314.2854 -1236.8451 + 1200 397.12809 -8427.4286 0 -8305.501 -356.42394 + 1300 413.36951 -8425.3861 0 -8298.472 -47.619729 + 1400 428.68835 -8424.4328 0 -8292.8154 -812.52975 + 1500 403.59408 -8411.0829 0 -8287.1701 71.054401 + 1600 448.76276 -8419.8186 0 -8282.0379 -339.19148 + 1700 450.87444 -8416.1981 0 -8277.769 -44.043208 + 1800 485.33509 -8421.3776 0 -8272.3684 -848.94941 + 1900 481.36374 -8416.1719 0 -8268.382 -282.62675 + 2000 437.25967 -8398.9233 0 -8264.6743 -217.40762 +Loop time of 1.44368 on 4 procs for 2000 steps with 104 atoms + +Performance: 29.924 ns/day, 0.802 hours/ns, 1385.350 timesteps/s, 144.076 katom-step/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.77999 | 0.87212 | 0.96576 | 8.1 | 60.41 +Neigh | 0.054058 | 0.059726 | 0.06287 | 1.4 | 4.14 +Comm | 0.031767 | 0.12609 | 0.21802 | 21.3 | 8.73 +Output | 0.00041377 | 0.00045661 | 0.00058001 | 0.0 | 0.03 +Modify | 0.3805 | 0.38348 | 0.3894 | 0.6 | 26.56 +Other | | 0.001808 | | | 0.13 + +Nlocal: 26 ave 34 max 14 min +Histogram: 1 0 0 0 0 1 0 0 0 2 +Nghost: 429.25 ave 457 max 386 min +Histogram: 1 0 0 0 0 1 0 0 0 2 +Neighs: 922.5 ave 1238 max 496 min +Histogram: 1 0 0 0 1 0 0 0 1 1 + +Total # of neighbors = 3690 +Ave neighs/atom = 35.480769 +Neighbor list builds = 200 +Dangerous builds not checked +Total wall time: 0:00:01 diff --git a/examples/reaxff/AB/log.8Mar18.AB.g++.1 b/examples/reaxff/AB/log.8Mar18.AB.g++.1 deleted file mode 100644 index 065b1a1e67..0000000000 --- a/examples/reaxff/AB/log.8Mar18.AB.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for Nitroamines system -# ..... - -units real - -atom_style charge -read_data data.AB - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 104 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AB H B N -Reading potential file ffield.reax.AB with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.ab - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 19.3 | 19.3 | 19.3 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -8505.1816 0 -8505.1816 -673.36566 - 3000 478.18595 -8398.4168 0 -8251.6025 1452.6935 -Loop time of 14.3573 on 1 procs for 3000 steps with 104 atoms - -Performance: 4.513 ns/day, 5.318 hours/ns, 208.952 timesteps/s -96.6% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 12.709 | 12.709 | 12.709 | 0.0 | 88.52 -Neigh | 0.36804 | 0.36804 | 0.36804 | 0.0 | 2.56 -Comm | 0.022419 | 0.022419 | 0.022419 | 0.0 | 0.16 -Output | 2.8133e-05 | 2.8133e-05 | 2.8133e-05 | 0.0 | 0.00 -Modify | 1.2513 | 1.2513 | 1.2513 | 0.0 | 8.72 -Other | | 0.006263 | | | 0.04 - -Nlocal: 104 ave 104 max 104 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 694 ave 694 max 694 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 2866 ave 2866 max 2866 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2866 -Ave neighs/atom = 27.5577 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:14 diff --git a/examples/reaxff/AB/log.8Mar18.AB.g++.4 b/examples/reaxff/AB/log.8Mar18.AB.g++.4 deleted file mode 100644 index 1e02ec5725..0000000000 --- a/examples/reaxff/AB/log.8Mar18.AB.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for Nitroamines system -# ..... - -units real - -atom_style charge -read_data data.AB - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 104 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AB H B N -Reading potential file ffield.reax.AB with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.ab - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 12.38 | 13.22 | 13.64 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -8505.1816 0 -8505.1816 -673.36566 - 3000 555.17702 -8426.5541 0 -8256.1017 219.26856 -Loop time of 9.03521 on 4 procs for 3000 steps with 104 atoms - -Performance: 7.172 ns/day, 3.346 hours/ns, 332.034 timesteps/s -94.6% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 7.0347 | 7.0652 | 7.1049 | 1.0 | 78.20 -Neigh | 0.18481 | 0.20727 | 0.22108 | 3.0 | 2.29 -Comm | 0.075175 | 0.11496 | 0.14517 | 7.4 | 1.27 -Output | 2.2888e-05 | 2.569e-05 | 3.1948e-05 | 0.0 | 0.00 -Modify | 1.6286 | 1.6421 | 1.6649 | 1.1 | 18.17 -Other | | 0.005646 | | | 0.06 - -Nlocal: 26 ave 35 max 13 min -Histogram: 1 0 0 0 0 1 0 0 1 1 -Nghost: 420.25 ave 454 max 370 min -Histogram: 1 0 0 0 0 1 0 0 1 1 -Neighs: 862.5 ave 1178 max 444 min -Histogram: 1 0 0 0 1 0 0 0 1 1 - -Total # of neighbors = 3450 -Ave neighs/atom = 33.1731 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:09 diff --git a/examples/reaxff/AuO/in.AuO b/examples/reaxff/AuO/in.AuO index 90ae812f7d..4e5162d620 100644 --- a/examples/reaxff/AuO/in.AuO +++ b/examples/reaxff/AuO/in.AuO @@ -1,23 +1,25 @@ # REAX potential for AuO system # ..... -units real +units real -atom_style charge -read_data data.AuO +atom_style charge +read_data data.AuO -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AuO O Au +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AuO O Au -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 -#dump 1 all atom 30 dump.reax.auo +thermo 5 -run 100 +#dump 1 all atom 30 dump.reax.auo + +run 100 diff --git a/examples/reaxff/AuO/lmp_control b/examples/reaxff/AuO/lmp_control index d24ae0a055..ecf22940b7 100644 --- a/examples/reaxff/AuO/lmp_control +++ b/examples/reaxff/AuO/lmp_control @@ -1,17 +1,7 @@ -simulation_name AuO_example ! output files will carry this name + their specific ext +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions - -write_freq 1 ! write trajectory after so many steps -traj_title AuO ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/AuO/log.30Nov23.AuO.g++.1 b/examples/reaxff/AuO/log.30Nov23.AuO.g++.1 new file mode 100644 index 0000000000..8896ef1015 --- /dev/null +++ b/examples/reaxff/AuO/log.30Nov23.AuO.g++.1 @@ -0,0 +1,132 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for AuO system +# ..... + +units real + +atom_style charge +read_data data.AuO +Reading data file ... + orthogonal box = (0 0 0) to (26.15618 21.54252 24.00246) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 960 atoms + read_data CPU = 0.004 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AuO O Au +Reading potential file ffield.reax.AuO with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +thermo 5 + +#dump 1 all atom 30 dump.reax.auo + +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 4 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 129.2 | 129.2 | 129.2 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -72201.743 0 -72201.743 -166.19508 + 5 6.5398577 -72202.679 0 -72183.984 71.658901 + 10 13.280881 -72204.445 0 -72166.481 515.28836 + 15 19.951637 -72206.24 0 -72149.206 886.438 + 20 26.441301 -72207.78 0 -72132.195 1549.914 + 25 32.580167 -72208.5 0 -72115.367 2309.8004 + 30 38.264935 -72208.14 0 -72098.756 3148.7379 + 35 43.433009 -72206.523 0 -72082.366 3853.4389 + 40 48.028176 -72203.472 0 -72066.178 4830.1846 + 45 52.019459 -72198.85 0 -72050.147 5881.5166 + 50 55.407353 -72192.638 0 -72034.251 6996.89 + 55 58.218407 -72184.89 0 -72018.467 8191.8057 + 60 60.499102 -72175.717 0 -72002.774 9470.0601 + 65 62.309031 -72165.271 0 -71987.155 10831.309 + 70 63.72857 -72153.749 0 -71971.575 12270.345 + 75 64.847533 -72141.43 0 -71956.057 13791.775 + 80 65.755809 -72128.548 0 -71940.579 15397.406 + 85 66.547696 -72115.362 0 -71925.129 17100.883 + 90 67.309412 -72102.119 0 -71909.708 18888.699 + 95 68.120206 -72089.043 0 -71894.315 20757.038 + 100 69.043359 -72076.31 0 -71878.942 22702.463 +Loop time of 5.72003 on 1 procs for 100 steps with 960 atoms + +Performance: 0.378 ns/day, 63.556 hours/ns, 17.482 timesteps/s, 16.783 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.2903 | 4.2903 | 4.2903 | 0.0 | 75.00 +Neigh | 0.31194 | 0.31194 | 0.31194 | 0.0 | 5.45 +Comm | 0.0034139 | 0.0034139 | 0.0034139 | 0.0 | 0.06 +Output | 0.0005041 | 0.0005041 | 0.0005041 | 0.0 | 0.01 +Modify | 1.1134 | 1.1134 | 1.1134 | 0.0 | 19.46 +Other | | 0.0005147 | | | 0.01 + +Nlocal: 960 ave 960 max 960 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 6708 ave 6708 max 6708 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 369128 ave 369128 max 369128 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 369128 +Ave neighs/atom = 384.50833 +Neighbor list builds = 10 +Dangerous builds not checked +Total wall time: 0:00:05 diff --git a/examples/reaxff/AuO/log.30Nov23.AuO.g++.4 b/examples/reaxff/AuO/log.30Nov23.AuO.g++.4 new file mode 100644 index 0000000000..ba3b81ea1b --- /dev/null +++ b/examples/reaxff/AuO/log.30Nov23.AuO.g++.4 @@ -0,0 +1,132 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for AuO system +# ..... + +units real + +atom_style charge +read_data data.AuO +Reading data file ... + orthogonal box = (0 0 0) to (26.15618 21.54252 24.00246) + 2 by 1 by 2 MPI processor grid + reading atoms ... + 960 atoms + read_data CPU = 0.002 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AuO O Au +Reading potential file ffield.reax.AuO with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +thermo 5 + +#dump 1 all atom 30 dump.reax.auo + +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 4 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 71.65 | 71.65 | 71.65 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -72201.743 0 -72201.743 -166.19214 + 5 6.5398578 -72202.679 0 -72183.984 71.651708 + 10 13.280883 -72204.445 0 -72166.481 515.29601 + 15 19.951639 -72206.24 0 -72149.206 886.53083 + 20 26.441291 -72207.78 0 -72132.195 1550.0745 + 25 32.580153 -72208.5 0 -72115.366 2309.9393 + 30 38.264928 -72208.14 0 -72098.756 3148.6036 + 35 43.432999 -72206.523 0 -72082.365 3853.6963 + 40 48.028158 -72203.472 0 -72066.179 4830.1407 + 45 52.019436 -72198.85 0 -72050.147 5881.1916 + 50 55.407331 -72192.638 0 -72034.251 6996.6661 + 55 58.218406 -72184.89 0 -72018.467 8191.9075 + 60 60.499115 -72175.716 0 -72002.774 9470.4845 + 65 62.309058 -72165.271 0 -71987.154 10831.926 + 70 63.728581 -72153.75 0 -71971.575 12269.823 + 75 64.847544 -72141.431 0 -71956.058 13791.586 + 80 65.755816 -72128.549 0 -71940.579 15396.822 + 85 66.547694 -72115.363 0 -71925.13 17100.27 + 90 67.309401 -72102.119 0 -71909.708 18888.633 + 95 68.120175 -72089.042 0 -71894.314 20757.565 + 100 69.043333 -72076.31 0 -71878.943 22701.953 +Loop time of 2.52972 on 4 procs for 100 steps with 960 atoms + +Performance: 0.854 ns/day, 28.108 hours/ns, 39.530 timesteps/s, 37.949 katom-step/s +99.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.9911 | 2.0015 | 2.0084 | 0.5 | 79.12 +Neigh | 0.16044 | 0.16105 | 0.16228 | 0.2 | 6.37 +Comm | 0.018211 | 0.025417 | 0.03612 | 4.2 | 1.00 +Output | 0.00039837 | 0.00043613 | 0.00054664 | 0.0 | 0.02 +Modify | 0.34008 | 0.34101 | 0.34154 | 0.1 | 13.48 +Other | | 0.0003489 | | | 0.01 + +Nlocal: 240 ave 240 max 240 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 3981 ave 3981 max 3981 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 105979 ave 105979 max 105979 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 423916 +Ave neighs/atom = 441.57917 +Neighbor list builds = 10 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/AuO/log.8Mar18.AuO.g++.1 b/examples/reaxff/AuO/log.8Mar18.AuO.g++.1 deleted file mode 100644 index 3c609ac023..0000000000 --- a/examples/reaxff/AuO/log.8Mar18.AuO.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for AuO system -# ..... - -units real - -atom_style charge -read_data data.AuO - orthogonal box = (0 0 0) to (26.1562 21.5425 24.0025) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 960 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AuO O Au -Reading potential file ffield.reax.AuO with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.auo - -run 100 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 4 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 157.6 | 157.6 | 157.6 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -72201.743 0 -72201.743 -166.19482 - 100 69.043331 -72076.309 0 -71878.942 22702.89 -Loop time of 18.4369 on 1 procs for 100 steps with 960 atoms - -Performance: 0.117 ns/day, 204.854 hours/ns, 5.424 timesteps/s -98.7% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 15.373 | 15.373 | 15.373 | 0.0 | 83.38 -Neigh | 0.58774 | 0.58774 | 0.58774 | 0.0 | 3.19 -Comm | 0.0079026 | 0.0079026 | 0.0079026 | 0.0 | 0.04 -Output | 3.171e-05 | 3.171e-05 | 3.171e-05 | 0.0 | 0.00 -Modify | 2.4665 | 2.4665 | 2.4665 | 0.0 | 13.38 -Other | | 0.001366 | | | 0.01 - -Nlocal: 960 ave 960 max 960 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 6708 ave 6708 max 6708 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 369128 ave 369128 max 369128 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 369128 -Ave neighs/atom = 384.508 -Neighbor list builds = 10 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:18 diff --git a/examples/reaxff/AuO/log.8Mar18.AuO.g++.4 b/examples/reaxff/AuO/log.8Mar18.AuO.g++.4 deleted file mode 100644 index ed98e1f2f4..0000000000 --- a/examples/reaxff/AuO/log.8Mar18.AuO.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for AuO system -# ..... - -units real - -atom_style charge -read_data data.AuO - orthogonal box = (0 0 0) to (26.1562 21.5425 24.0025) - 2 by 1 by 2 MPI processor grid - reading atoms ... - 960 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AuO O Au -Reading potential file ffield.reax.AuO with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.auo - -run 100 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 4 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 87.17 | 87.17 | 87.17 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -72201.743 0 -72201.743 -166.2027 - 100 69.043379 -72076.31 0 -71878.943 22701.771 -Loop time of 8.44797 on 4 procs for 100 steps with 960 atoms - -Performance: 0.256 ns/day, 93.866 hours/ns, 11.837 timesteps/s -96.5% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 7.3702 | 7.3757 | 7.3879 | 0.3 | 87.31 -Neigh | 0.28875 | 0.29449 | 0.29747 | 0.6 | 3.49 -Comm | 0.015008 | 0.027055 | 0.032681 | 4.3 | 0.32 -Output | 2.4319e-05 | 2.8551e-05 | 3.8624e-05 | 0.0 | 0.00 -Modify | 0.74721 | 0.74985 | 0.75539 | 0.4 | 8.88 -Other | | 0.0008975 | | | 0.01 - -Nlocal: 240 ave 240 max 240 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 3981 ave 3981 max 3981 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Neighs: 105979 ave 105979 max 105979 min -Histogram: 4 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 423916 -Ave neighs/atom = 441.579 -Neighbor list builds = 10 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:08 diff --git a/examples/reaxff/CHO/in.CHO b/examples/reaxff/CHO/in.CHO index 668be5eee0..3baa885ae5 100644 --- a/examples/reaxff/CHO/in.CHO +++ b/examples/reaxff/CHO/in.CHO @@ -1,23 +1,24 @@ # REAX potential for CHO system # ..... -units real +units real -atom_style charge -read_data data.CHO +atom_style charge +read_data data.CHO -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.cho H C O +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.cho H C O -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 -#dump 1 all atom 30 dump.reax.cho +thermo 100 +#dump 1 all atom 30 dump.reax.cho -run 3000 +run 3000 diff --git a/examples/reaxff/CHO/lmp_control b/examples/reaxff/CHO/lmp_control index 6db169bf70..b97ad8a67f 100644 --- a/examples/reaxff/CHO/lmp_control +++ b/examples/reaxff/CHO/lmp_control @@ -1,17 +1,8 @@ -simulation_name CHO_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -write_freq 1 ! write trajectory after so many steps -traj_title CHO ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/CHO/log.30Nov23.CHO.g++.1 b/examples/reaxff/CHO/log.30Nov23.CHO.g++.1 new file mode 100644 index 0000000000..de8da4edad --- /dev/null +++ b/examples/reaxff/CHO/log.30Nov23.CHO.g++.1 @@ -0,0 +1,141 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for CHO system +# ..... + +units real + +atom_style charge +read_data data.CHO +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.cho H C O +Reading potential file ffield.reax.cho with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +thermo 100 +#dump 1 all atom 30 dump.reax.cho + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 16.04 | 16.04 | 16.04 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10226.557 0 -10226.557 -106.09742 + 100 54.051992 -10207.393 0 -10190.636 -291.38729 + 200 134.81151 -10200.411 0 -10158.619 -1637.1719 + 300 140.9118 -10177.136 0 -10133.452 -1668.5701 + 400 254.70109 -10189.927 0 -10110.969 -2522.3829 + 500 228.22383 -10162.396 0 -10091.646 404.00518 + 600 393.48635 -10197.284 0 -10075.301 394.0729 + 700 305.82675 -10156.708 0 -10061.9 362.69731 + 800 375.9566 -10170.288 0 -10053.74 -664.01093 + 900 361.59639 -10155.849 0 -10043.752 458.54613 + 1000 445.46183 -10176.602 0 -10038.507 251.38181 + 1100 475.46673 -10180.119 0 -10032.723 839.6649 + 1200 406.78262 -10155.498 0 -10029.394 62.559824 + 1300 461.0773 -10167.129 0 -10024.193 266.27742 + 1400 408.15446 -10148.62 0 -10022.091 -1187.1776 + 1500 514.43707 -10178.34 0 -10018.863 -616.2329 + 1600 432.19202 -10151.16 0 -10017.179 -677.67834 + 1700 521.01474 -10175.583 0 -10014.066 97.420991 + 1800 409.79407 -10138.825 0 -10011.787 1883.8131 + 1900 481.84667 -10160.146 0 -10010.772 1059.6448 + 2000 423.61284 -10138.538 0 -10007.216 -434.24008 + 2100 521.01756 -10169.192 0 -10007.674 376.95207 + 2200 477.03314 -10153.033 0 -10005.151 -114.09514 + 2300 477.80526 -10153.294 0 -10005.172 869.97281 + 2400 471.49741 -10149.165 0 -10002.999 689.65295 + 2500 482.38958 -10152.956 0 -10003.413 352.08649 + 2600 505.57503 -10159.507 0 -10002.777 -812.75272 + 2700 498.41415 -10156.448 0 -10001.937 -458.03311 + 2800 534.65278 -10166.893 0 -10001.149 169.20767 + 2900 432.93717 -10134.759 0 -10000.546 -184.75627 + 3000 548.46832 -10170.375 0 -10000.347 41.765546 +Loop time of 3.49376 on 1 procs for 3000 steps with 105 atoms + +Performance: 18.547 ns/day, 1.294 hours/ns, 858.673 timesteps/s, 90.161 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 2.8082 | 2.8082 | 2.8082 | 0.0 | 80.38 +Neigh | 0.15477 | 0.15477 | 0.15477 | 0.0 | 4.43 +Comm | 0.0097478 | 0.0097478 | 0.0097478 | 0.0 | 0.28 +Output | 0.00081006 | 0.00081006 | 0.00081006 | 0.0 | 0.02 +Modify | 0.51773 | 0.51773 | 0.51773 | 0.0 | 14.82 +Other | | 0.002538 | | | 0.07 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 643 ave 643 max 643 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 4237 ave 4237 max 4237 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 4237 +Ave neighs/atom = 40.352381 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:03 diff --git a/examples/reaxff/CHO/log.30Nov23.CHO.g++.4 b/examples/reaxff/CHO/log.30Nov23.CHO.g++.4 new file mode 100644 index 0000000000..158b1aa657 --- /dev/null +++ b/examples/reaxff/CHO/log.30Nov23.CHO.g++.4 @@ -0,0 +1,141 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for CHO system +# ..... + +units real + +atom_style charge +read_data data.CHO +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.cho H C O +Reading potential file ffield.reax.cho with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +thermo 100 +#dump 1 all atom 30 dump.reax.cho + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.47 | 11.39 | 12.19 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10226.557 0 -10226.557 -106.09736 + 100 54.051902 -10207.393 0 -10190.636 -291.39467 + 200 134.81172 -10200.411 0 -10158.619 -1637.1599 + 300 140.91215 -10177.136 0 -10133.452 -1668.5676 + 400 254.70123 -10189.927 0 -10110.968 -2522.3655 + 500 228.22204 -10162.396 0 -10091.646 403.98879 + 600 393.48756 -10197.284 0 -10075.301 394.11243 + 700 305.82625 -10156.707 0 -10061.9 362.73212 + 800 375.95634 -10170.288 0 -10053.74 -664.10079 + 900 361.59143 -10155.847 0 -10043.752 458.52018 + 1000 445.4582 -10176.601 0 -10038.507 251.4509 + 1100 475.47 -10180.12 0 -10032.722 840.09331 + 1200 406.77476 -10155.496 0 -10029.394 62.656622 + 1300 461.06079 -10167.123 0 -10024.192 265.91062 + 1400 408.15869 -10148.621 0 -10022.09 -1187.4869 + 1500 514.43021 -10178.337 0 -10018.862 -616.07216 + 1600 432.22013 -10151.168 0 -10017.178 -678.01121 + 1700 521.0846 -10175.605 0 -10014.067 98.591699 + 1800 409.72383 -10138.803 0 -10011.787 1884.7989 + 1900 481.86369 -10160.152 0 -10010.773 1058.5554 + 2000 423.60058 -10138.532 0 -10007.214 -437.22408 + 2100 520.96555 -10169.169 0 -10007.668 376.18619 + 2200 477.21351 -10153.089 0 -10005.15 -113.43512 + 2300 477.86263 -10153.309 0 -10005.17 868.89369 + 2400 471.46466 -10149.152 0 -10002.996 688.76379 + 2500 482.61616 -10153.025 0 -10003.412 350.03715 + 2600 505.68439 -10159.544 0 -10002.78 -810.94974 + 2700 498.37307 -10156.441 0 -10001.944 -460.12105 + 2800 535.06218 -10167.029 0 -10001.157 152.85379 + 2900 432.98591 -10134.778 0 -10000.55 -170.46638 + 3000 547.92956 -10170.199 0 -10000.339 60.201766 +Loop time of 2.03179 on 4 procs for 3000 steps with 105 atoms + +Performance: 31.893 ns/day, 0.753 hours/ns, 1476.533 timesteps/s, 155.036 katom-step/s +99.0% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.213 | 1.2396 | 1.2505 | 1.4 | 61.01 +Neigh | 0.074765 | 0.08966 | 0.10323 | 4.3 | 4.41 +Comm | 0.12894 | 0.14116 | 0.16833 | 4.2 | 6.95 +Output | 0.0006079 | 0.00066664 | 0.00083802 | 0.0 | 0.03 +Modify | 0.54589 | 0.55836 | 0.57217 | 1.6 | 27.48 +Other | | 0.002368 | | | 0.12 + +Nlocal: 26.25 ave 45 max 6 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Nghost: 380.75 ave 495 max 261 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Neighs: 1269.5 ave 2197 max 179 min +Histogram: 1 0 1 0 0 0 0 0 1 1 + +Total # of neighbors = 5078 +Ave neighs/atom = 48.361905 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/CHO/log.8Mar18.CHO.g++.1 b/examples/reaxff/CHO/log.8Mar18.CHO.g++.1 deleted file mode 100644 index 305ccbf3a0..0000000000 --- a/examples/reaxff/CHO/log.8Mar18.CHO.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for CHO system -# ..... - -units real - -atom_style charge -read_data data.CHO - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.cho H C O -Reading potential file ffield.reax.cho with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.cho - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 18.68 | 18.68 | 18.68 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10226.557 0 -10226.557 -106.09755 - 3000 548.5116 -10170.389 0 -10000.348 40.372297 -Loop time of 12.6046 on 1 procs for 3000 steps with 105 atoms - -Performance: 5.141 ns/day, 4.668 hours/ns, 238.008 timesteps/s -98.9% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 10.931 | 10.931 | 10.931 | 0.0 | 86.72 -Neigh | 0.33107 | 0.33107 | 0.33107 | 0.0 | 2.63 -Comm | 0.017975 | 0.017975 | 0.017975 | 0.0 | 0.14 -Output | 2.0742e-05 | 2.0742e-05 | 2.0742e-05 | 0.0 | 0.00 -Modify | 1.3197 | 1.3197 | 1.3197 | 0.0 | 10.47 -Other | | 0.005059 | | | 0.04 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 643 ave 643 max 643 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 4237 ave 4237 max 4237 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 4237 -Ave neighs/atom = 40.3524 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:12 diff --git a/examples/reaxff/CHO/log.8Mar18.CHO.g++.4 b/examples/reaxff/CHO/log.8Mar18.CHO.g++.4 deleted file mode 100644 index 2bc19dc789..0000000000 --- a/examples/reaxff/CHO/log.8Mar18.CHO.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for CHO system -# ..... - -units real - -atom_style charge -read_data data.CHO - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.cho H C O -Reading potential file ffield.reax.cho with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.cho - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 11.75 | 12.85 | 13.81 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10226.557 0 -10226.557 -106.09745 - 3000 548.30567 -10170.323 0 -10000.346 47.794514 -Loop time of 7.42367 on 4 procs for 3000 steps with 105 atoms - -Performance: 8.729 ns/day, 2.750 hours/ns, 404.113 timesteps/s -97.7% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 5.3058 | 5.4086 | 5.4922 | 3.1 | 72.86 -Neigh | 0.14791 | 0.17866 | 0.2106 | 6.5 | 2.41 -Comm | 0.080185 | 0.16666 | 0.26933 | 17.7 | 2.24 -Output | 2.5988e-05 | 2.8491e-05 | 3.4571e-05 | 0.0 | 0.00 -Modify | 1.6364 | 1.6658 | 1.6941 | 2.0 | 22.44 -Other | | 0.003964 | | | 0.05 - -Nlocal: 26.25 ave 45 max 6 min -Histogram: 1 0 1 0 0 0 0 0 1 1 -Nghost: 380.75 ave 495 max 261 min -Histogram: 1 0 1 0 0 0 0 0 1 1 -Neighs: 1269.5 ave 2197 max 179 min -Histogram: 1 0 1 0 0 0 0 0 1 1 - -Total # of neighbors = 5078 -Ave neighs/atom = 48.3619 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:07 diff --git a/examples/reaxff/FC/in.FC b/examples/reaxff/FC/in.FC index 3679a9bc19..eaa2b3c444 100644 --- a/examples/reaxff/FC/in.FC +++ b/examples/reaxff/FC/in.FC @@ -3,33 +3,33 @@ dimension 3 boundary p p p -units real +units real -atom_style charge -read_data data.FC +atom_style charge +read_data data.FC -pair_style reax/c NULL -pair_coeff * * ffield.reax.FC C F -neighbor 2. bin -neigh_modify every 10 delay 0 check no -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c +pair_style reaxff NULL +pair_coeff * * ffield.reax.FC C F +neighbor 2. bin +neigh_modify every 10 delay 0 check no +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff # should equilibrate much longer in practice -fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 +fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 timestep 0.2 thermo_style custom step temp epair etotal press thermo 1 dump 4 all xyz 5000 dumpnpt.xyz -run 10 +run 10 unfix 1 fix 1 all nvt temp 100.0 100.0 100.0 thermo_style custom step temp epair etotal press -timestep 0.2 +timestep 0.2 -#dump 5 all xyz 5000 dumpnvt.xyz +#dump 5 all xyz 5000 dumpnvt.xyz #dump 6 all custom 5000 dumpidtype.dat id type x y z -run 10 +run 10 diff --git a/examples/reaxff/FC/log.30Nov23.FC.g++.1 b/examples/reaxff/FC/log.30Nov23.FC.g++.1 new file mode 100644 index 0000000000..a8f8c0ff83 --- /dev/null +++ b/examples/reaxff/FC/log.30Nov23.FC.g++.1 @@ -0,0 +1,172 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Nitroamines system +# ..... + +dimension 3 +boundary p p p +units real + +atom_style charge +read_data data.FC +Reading data file ... + orthogonal box = (-82.62 -79.5011 -50) to (82.62 79.5011 50) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 17280 atoms + read_data CPU = 0.025 seconds + +pair_style reaxff NULL +pair_coeff * * ffield.reax.FC C F +Reading potential file ffield.reax.FC with DATE: 2013-06-28 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) +neighbor 2. bin +neigh_modify every 10 delay 0 check no +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff + +# should equilibrate much longer in practice + +fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 +timestep 0.2 +thermo_style custom step temp epair etotal press +thermo 1 +dump 4 all xyz 5000 dumpnpt.xyz +run 10 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 28 27 17 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 384.3 | 384.3 | 384.3 Mbytes + Step Temp E_pair TotEng Press + 0 0 -808525.04 -808525.04 58194.694 + 1 4.9935726 -808803.88 -808546.69 58205.825 + 2 19.98696 -809640.53 -808611.1 58239.155 + 3 45.012616 -811035.3 -808716.9 58294.499 + 4 80.103613 -812988.58 -808862.81 58371.548 + 5 125.26228 -815500.68 -809049 58469.872 + 6 180.4316 -818571.56 -809278.36 58588.936 + 7 245.47913 -822200.73 -809557.22 58728.144 + 8 320.17692 -826387.19 -809896.34 58886.879 + 9 404.17073 -831129.38 -810312.4 59064.554 + 10 497.02486 -836425.06 -810825.59 59260.717 +Loop time of 6.13793 on 1 procs for 10 steps with 17280 atoms + +Performance: 0.028 ns/day, 852.491 hours/ns, 1.629 timesteps/s, 28.153 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.1752 | 5.1752 | 5.1752 | 0.0 | 84.31 +Neigh | 0.039453 | 0.039453 | 0.039453 | 0.0 | 0.64 +Comm | 0.00042596 | 0.00042596 | 0.00042596 | 0.0 | 0.01 +Output | 0.00064013 | 0.00064013 | 0.00064013 | 0.0 | 0.01 +Modify | 0.92205 | 0.92205 | 0.92205 | 0.0 | 15.02 +Other | | 0.0002045 | | | 0.00 + +Nlocal: 17280 ave 17280 max 17280 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5352 ave 5352 max 5352 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.62136e+06 ave 2.62136e+06 max 2.62136e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2621360 +Ave neighs/atom = 151.69907 +Neighbor list builds = 1 +Dangerous builds not checked + +unfix 1 + +fix 1 all nvt temp 100.0 100.0 100.0 +thermo_style custom step temp epair etotal press +timestep 0.2 + +#dump 5 all xyz 5000 dumpnvt.xyz +#dump 6 all custom 5000 dumpidtype.dat id type x y z + +run 10 +Per MPI rank memory allocation (min/avg/max) = 386.9 | 386.9 | 386.9 Mbytes + Step Temp E_pair TotEng Press + 10 497.02486 -836425.06 -810825.59 59260.717 + 11 601.6514 -841814.09 -810825.78 59489.425 + 12 716.37597 -847724.6 -810827.35 59738.298 + 13 841.27959 -854161.62 -810831.16 60008.164 + 14 976.4666 -861131.68 -810838.36 60300.364 + 15 1122.0668 -868642.96 -810850.45 60616.793 + 16 1278.2373 -876705.43 -810869.28 60959.942 + 17 1445.1655 -885331.03 -810897.18 61332.932 + 18 1623.072 -894533.91 -810936.92 61739.541 + 19 1812.1864 -904337.86 -811000.45 62200.561 + 20 2011.5898 -915379.05 -811771.28 63361.15 +Loop time of 6.11372 on 1 procs for 10 steps with 17280 atoms + +Performance: 0.028 ns/day, 849.127 hours/ns, 1.636 timesteps/s, 28.264 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.0783 | 5.0783 | 5.0783 | 0.0 | 83.06 +Neigh | 0.03596 | 0.03596 | 0.03596 | 0.0 | 0.59 +Comm | 0.00041578 | 0.00041578 | 0.00041578 | 0.0 | 0.01 +Output | 0.00062133 | 0.00062133 | 0.00062133 | 0.0 | 0.01 +Modify | 0.99825 | 0.99825 | 0.99825 | 0.0 | 16.33 +Other | | 0.0002171 | | | 0.00 + +Nlocal: 17280 ave 17280 max 17280 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5352 ave 5352 max 5352 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.62136e+06 ave 2.62136e+06 max 2.62136e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2621360 +Ave neighs/atom = 151.69907 +Neighbor list builds = 1 +Dangerous builds not checked +Total wall time: 0:00:13 diff --git a/examples/reaxff/FC/log.30Nov23.FC.g++.4 b/examples/reaxff/FC/log.30Nov23.FC.g++.4 new file mode 100644 index 0000000000..1a53bb8c03 --- /dev/null +++ b/examples/reaxff/FC/log.30Nov23.FC.g++.4 @@ -0,0 +1,172 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Nitroamines system +# ..... + +dimension 3 +boundary p p p +units real + +atom_style charge +read_data data.FC +Reading data file ... + orthogonal box = (-82.62 -79.5011 -50) to (82.62 79.5011 50) + 2 by 2 by 1 MPI processor grid + reading atoms ... + 17280 atoms + read_data CPU = 0.030 seconds + +pair_style reaxff NULL +pair_coeff * * ffield.reax.FC C F +Reading potential file ffield.reax.FC with DATE: 2013-06-28 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) +neighbor 2. bin +neigh_modify every 10 delay 0 check no +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff + +# should equilibrate much longer in practice + +fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 +timestep 0.2 +thermo_style custom step temp epair etotal press +thermo 1 +dump 4 all xyz 5000 dumpnpt.xyz +run 10 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 28 27 17 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 121.6 | 121.6 | 121.6 Mbytes + Step Temp E_pair TotEng Press + 0 0 -808525.04 -808525.04 58194.694 + 1 4.9935726 -808803.88 -808546.69 58205.825 + 2 19.98696 -809640.53 -808611.1 58239.155 + 3 45.012616 -811035.3 -808716.9 58294.499 + 4 80.103613 -812988.58 -808862.81 58371.548 + 5 125.26228 -815500.68 -809049 58469.872 + 6 180.4316 -818571.56 -809278.36 58588.936 + 7 245.47913 -822200.73 -809557.22 58728.144 + 8 320.17692 -826387.19 -809896.34 58886.879 + 9 404.17073 -831129.38 -810312.4 59064.554 + 10 497.02486 -836425.06 -810825.59 59260.717 +Loop time of 1.75962 on 4 procs for 10 steps with 17280 atoms + +Performance: 0.098 ns/day, 244.392 hours/ns, 5.683 timesteps/s, 98.203 katom-step/s +99.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.402 | 1.4417 | 1.4815 | 3.0 | 81.93 +Neigh | 0.012815 | 0.013047 | 0.01323 | 0.2 | 0.74 +Comm | 0.0006609 | 0.040482 | 0.080149 | 17.9 | 2.30 +Output | 0.00028041 | 0.00029538 | 0.00033093 | 0.0 | 0.02 +Modify | 0.26389 | 0.26407 | 0.26425 | 0.0 | 15.01 +Other | | 7.451e-05 | | | 0.00 + +Nlocal: 4320 ave 4320 max 4320 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 2856 ave 2856 max 2856 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 691892 ave 691892 max 691892 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2767568 +Ave neighs/atom = 160.16019 +Neighbor list builds = 1 +Dangerous builds not checked + +unfix 1 + +fix 1 all nvt temp 100.0 100.0 100.0 +thermo_style custom step temp epair etotal press +timestep 0.2 + +#dump 5 all xyz 5000 dumpnvt.xyz +#dump 6 all custom 5000 dumpidtype.dat id type x y z + +run 10 +Per MPI rank memory allocation (min/avg/max) = 123 | 123 | 123 Mbytes + Step Temp E_pair TotEng Press + 10 497.02486 -836425.06 -810825.59 59260.717 + 11 601.6514 -841814.09 -810825.78 59489.425 + 12 716.37597 -847724.6 -810827.35 59738.298 + 13 841.27959 -854161.62 -810831.16 60008.164 + 14 976.4666 -861131.68 -810838.36 60300.364 + 15 1122.0668 -868642.96 -810850.45 60616.793 + 16 1278.2373 -876705.43 -810869.28 60959.942 + 17 1445.1655 -885331.03 -810897.18 61332.932 + 18 1623.072 -894533.91 -810936.92 61739.541 + 19 1812.1864 -904337.86 -811000.45 62200.561 + 20 2011.5898 -915379.05 -811771.28 63361.15 +Loop time of 1.8322 on 4 procs for 10 steps with 17280 atoms + +Performance: 0.094 ns/day, 254.473 hours/ns, 5.458 timesteps/s, 94.313 katom-step/s +99.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.3846 | 1.4468 | 1.524 | 4.2 | 78.96 +Neigh | 0.012048 | 0.012239 | 0.012522 | 0.2 | 0.67 +Comm | 0.00082283 | 0.07804 | 0.14024 | 17.9 | 4.26 +Output | 0.00029695 | 0.00031243 | 0.00035323 | 0.0 | 0.02 +Modify | 0.29449 | 0.29478 | 0.29497 | 0.0 | 16.09 +Other | | 7.342e-05 | | | 0.00 + +Nlocal: 4320 ave 4320 max 4320 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 2856 ave 2856 max 2856 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 691892 ave 691892 max 691892 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2767568 +Ave neighs/atom = 160.16019 +Neighbor list builds = 1 +Dangerous builds not checked +Total wall time: 0:00:04 diff --git a/examples/reaxff/FC/log.8Mar18.FC.g++.1 b/examples/reaxff/FC/log.8Mar18.FC.g++.1 deleted file mode 100644 index 1e2f723966..0000000000 --- a/examples/reaxff/FC/log.8Mar18.FC.g++.1 +++ /dev/null @@ -1,141 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for Nitroamines system -# ..... - -dimension 3 -boundary p p p -units real - -atom_style charge -read_data data.FC - orthogonal box = (-82.62 -79.5011 -50) to (82.62 79.5011 50) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 17280 atoms - -pair_style reax/c NULL -pair_coeff * * ffield.reax.FC C F -Reading potential file ffield.reax.FC with DATE: 2013-06-28 -neighbor 2. bin -neigh_modify every 10 delay 0 check no -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c - -# should equilibrate much longer in practice - -fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 -timestep 0.2 -thermo_style custom step temp epair etotal press -thermo 1 -dump 4 all xyz 5000 dumpnpt.xyz -run 10 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 28 27 17 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 470 | 470 | 470 Mbytes -Step Temp E_pair TotEng Press - 0 0 -808525.04 -808525.04 58194.694 - 1 4.9935726 -808803.89 -808546.69 58205.825 - 2 19.98696 -809640.54 -808611.1 58239.155 - 3 45.012616 -811035.31 -808716.91 58294.499 - 4 80.103613 -812988.6 -808862.83 58371.547 - 5 125.26228 -815500.71 -809049.03 58469.871 - 6 180.4316 -818571.61 -809278.4 58588.935 - 7 245.47913 -822200.79 -809557.28 58728.142 - 8 320.17692 -826387.27 -809896.43 58886.877 - 9 404.17073 -831129.48 -810312.5 59064.551 - 10 497.02486 -836425.19 -810825.72 59260.714 -Loop time of 21.5054 on 1 procs for 10 steps with 17280 atoms - -Performance: 0.008 ns/day, 2986.857 hours/ns, 0.465 timesteps/s -98.8% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 19.008 | 19.008 | 19.008 | 0.0 | 88.39 -Neigh | 0.084401 | 0.084401 | 0.084401 | 0.0 | 0.39 -Comm | 0.00080419 | 0.00080419 | 0.00080419 | 0.0 | 0.00 -Output | 0.00095367 | 0.00095367 | 0.00095367 | 0.0 | 0.00 -Modify | 2.4109 | 2.4109 | 2.4109 | 0.0 | 11.21 -Other | | 0.0004592 | | | 0.00 - -Nlocal: 17280 ave 17280 max 17280 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 5352 ave 5352 max 5352 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 2.62136e+06 ave 2.62136e+06 max 2.62136e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2621360 -Ave neighs/atom = 151.699 -Neighbor list builds = 1 -Dangerous builds not checked - -unfix 1 - -fix 1 all nvt temp 100.0 100.0 100.0 -thermo_style custom step temp epair etotal press -timestep 0.2 - -#dump 5 all xyz 5000 dumpnvt.xyz -#dump 6 all custom 5000 dumpidtype.dat id type x y z - -run 10 -Per MPI rank memory allocation (min/avg/max) = 470 | 470 | 470 Mbytes -Step Temp E_pair TotEng Press - 10 497.02486 -836425.19 -810825.72 59260.714 - 11 601.65141 -841814.22 -810825.91 59489.422 - 12 716.37599 -847724.72 -810827.48 59738.295 - 13 841.27961 -854161.75 -810831.29 60008.162 - 14 976.46663 -861131.81 -810838.49 60300.362 - 15 1122.0668 -868643.09 -810850.57 60616.791 - 16 1278.2373 -876705.56 -810869.41 60959.94 - 17 1445.1655 -885331.16 -810897.31 61332.931 - 18 1623.072 -894534.04 -810937.04 61739.541 - 19 1812.1865 -904337.99 -811000.57 62200.561 - 20 2011.5899 -915379.19 -811771.41 63361.151 -Loop time of 21.362 on 1 procs for 10 steps with 17280 atoms - -Performance: 0.008 ns/day, 2966.945 hours/ns, 0.468 timesteps/s -98.9% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 18.793 | 18.793 | 18.793 | 0.0 | 87.97 -Neigh | 0.077047 | 0.077047 | 0.077047 | 0.0 | 0.36 -Comm | 0.00080276 | 0.00080276 | 0.00080276 | 0.0 | 0.00 -Output | 0.0010097 | 0.0010097 | 0.0010097 | 0.0 | 0.00 -Modify | 2.4897 | 2.4897 | 2.4897 | 0.0 | 11.65 -Other | | 0.0004568 | | | 0.00 - -Nlocal: 17280 ave 17280 max 17280 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 5352 ave 5352 max 5352 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 2.62136e+06 ave 2.62136e+06 max 2.62136e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2621360 -Ave neighs/atom = 151.699 -Neighbor list builds = 1 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:47 diff --git a/examples/reaxff/FC/log.8Mar18.FC.g++.4 b/examples/reaxff/FC/log.8Mar18.FC.g++.4 deleted file mode 100644 index 76dcadfb0f..0000000000 --- a/examples/reaxff/FC/log.8Mar18.FC.g++.4 +++ /dev/null @@ -1,141 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for Nitroamines system -# ..... - -dimension 3 -boundary p p p -units real - -atom_style charge -read_data data.FC - orthogonal box = (-82.62 -79.5011 -50) to (82.62 79.5011 50) - 2 by 2 by 1 MPI processor grid - reading atoms ... - 17280 atoms - -pair_style reax/c NULL -pair_coeff * * ffield.reax.FC C F -Reading potential file ffield.reax.FC with DATE: 2013-06-28 -neighbor 2. bin -neigh_modify every 10 delay 0 check no -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c - -# should equilibrate much longer in practice - -fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 -timestep 0.2 -thermo_style custom step temp epair etotal press -thermo 1 -dump 4 all xyz 5000 dumpnpt.xyz -run 10 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 28 27 17 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 149.3 | 149.3 | 149.3 Mbytes -Step Temp E_pair TotEng Press - 0 0 -808525.04 -808525.04 58194.694 - 1 4.9935726 -808803.89 -808546.69 58205.825 - 2 19.98696 -809640.54 -808611.1 58239.155 - 3 45.012616 -811035.31 -808716.91 58294.499 - 4 80.103613 -812988.6 -808862.83 58371.547 - 5 125.26228 -815500.71 -809049.03 58469.871 - 6 180.4316 -818571.61 -809278.4 58588.935 - 7 245.47913 -822200.79 -809557.28 58728.142 - 8 320.17692 -826387.27 -809896.43 58886.877 - 9 404.17073 -831129.48 -810312.5 59064.551 - 10 497.02486 -836425.19 -810825.72 59260.714 -Loop time of 6.02109 on 4 procs for 10 steps with 17280 atoms - -Performance: 0.029 ns/day, 836.262 hours/ns, 1.661 timesteps/s -99.0% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 4.9482 | 5.1186 | 5.3113 | 7.4 | 85.01 -Neigh | 0.024811 | 0.025702 | 0.027556 | 0.7 | 0.43 -Comm | 0.0027421 | 0.19541 | 0.36565 | 38.1 | 3.25 -Output | 0.00053239 | 0.00057119 | 0.00067186 | 0.0 | 0.01 -Modify | 0.67876 | 0.68059 | 0.68165 | 0.1 | 11.30 -Other | | 0.0001779 | | | 0.00 - -Nlocal: 4320 ave 4320 max 4320 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 2856 ave 2856 max 2856 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Neighs: 691892 ave 691892 max 691892 min -Histogram: 4 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2767568 -Ave neighs/atom = 160.16 -Neighbor list builds = 1 -Dangerous builds not checked - -unfix 1 - -fix 1 all nvt temp 100.0 100.0 100.0 -thermo_style custom step temp epair etotal press -timestep 0.2 - -#dump 5 all xyz 5000 dumpnvt.xyz -#dump 6 all custom 5000 dumpidtype.dat id type x y z - -run 10 -Per MPI rank memory allocation (min/avg/max) = 149.3 | 149.3 | 149.3 Mbytes -Step Temp E_pair TotEng Press - 10 497.02486 -836425.19 -810825.72 59260.714 - 11 601.65141 -841814.22 -810825.91 59489.422 - 12 716.37599 -847724.72 -810827.48 59738.295 - 13 841.27961 -854161.75 -810831.29 60008.162 - 14 976.46663 -861131.81 -810838.49 60300.362 - 15 1122.0668 -868643.09 -810850.57 60616.791 - 16 1278.2373 -876705.56 -810869.41 60959.94 - 17 1445.1655 -885331.16 -810897.31 61332.931 - 18 1623.072 -894534.04 -810937.04 61739.541 - 19 1812.1865 -904337.99 -811000.57 62200.561 - 20 2011.5899 -915379.19 -811771.41 63361.151 -Loop time of 6.08805 on 4 procs for 10 steps with 17280 atoms - -Performance: 0.028 ns/day, 845.563 hours/ns, 1.643 timesteps/s -99.2% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 4.9124 | 5.1008 | 5.3405 | 8.3 | 83.78 -Neigh | 0.023652 | 0.024473 | 0.025996 | 0.6 | 0.40 -Comm | 0.0020971 | 0.24171 | 0.43023 | 38.0 | 3.97 -Output | 0.00056076 | 0.00060701 | 0.00072312 | 0.0 | 0.01 -Modify | 0.71869 | 0.72023 | 0.72107 | 0.1 | 11.83 -Other | | 0.0001827 | | | 0.00 - -Nlocal: 4320 ave 4320 max 4320 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 2856 ave 2856 max 2856 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Neighs: 691892 ave 691892 max 691892 min -Histogram: 4 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2767568 -Ave neighs/atom = 160.16 -Neighbor list builds = 1 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:13 diff --git a/examples/reaxff/FeOH3/in.FeOH3 b/examples/reaxff/FeOH3/in.FeOH3 index 8b56f2a7d2..72afbe5416 100644 --- a/examples/reaxff/FeOH3/in.FeOH3 +++ b/examples/reaxff/FeOH3/in.FeOH3 @@ -1,23 +1,24 @@ # REAX potential for Fe/O/H system # ..... -units real +units real -atom_style charge -read_data data.FeOH3 +atom_style charge +read_data data.FeOH3 -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.Fe_O_C_H H O Fe +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.Fe_O_C_H H O Fe -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 +thermo 100 -#dump 1 all atom 30 dump.reax.feoh +#dump 1 all atom 30 dump.reax.feoh -run 3000 +run 3000 diff --git a/examples/reaxff/FeOH3/lmp_control b/examples/reaxff/FeOH3/lmp_control index 779c7da7ec..6b26c6427a 100644 --- a/examples/reaxff/FeOH3/lmp_control +++ b/examples/reaxff/FeOH3/lmp_control @@ -1,17 +1,7 @@ -simulation_name FeOH3_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions - -write_freq 1 ! write trajectory after so many steps -traj_title Fe_OH3 ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions diff --git a/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.1 b/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.1 new file mode 100644 index 0000000000..0acb09bdc0 --- /dev/null +++ b/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.1 @@ -0,0 +1,141 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Fe/O/H system +# ..... + +units real + +atom_style charge +read_data data.FeOH3 +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.000 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.Fe_O_C_H H O Fe +Reading potential file ffield.reax.Fe_O_C_H with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 + +#dump 1 all atom 30 dump.reax.feoh + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 15.99 | 15.99 | 15.99 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -9715.3326 0 -9715.3326 -139.61126 + 100 127.38829 -9720.5854 0 -9681.0945 -933.74373 + 200 141.21008 -9696.3143 0 -9652.5386 -831.74241 + 300 176.81083 -9681.3376 0 -9626.5255 -520.30966 + 400 220.75236 -9672.6196 0 -9604.1854 -388.85436 + 500 301.29415 -9678.8463 0 -9585.4438 -545.22735 + 600 320.36877 -9670.3054 0 -9570.9897 -609.44044 + 700 414.53699 -9688.649 0 -9560.1408 -259.51791 + 800 391.93073 -9675.1212 0 -9553.621 77.352757 + 900 413.52476 -9673.7372 0 -9545.5428 369.71918 + 1000 382.03337 -9656.3848 0 -9537.9528 236.61186 + 1100 381.68223 -9647.4372 0 -9529.1141 -432.67374 + 1200 470.68889 -9671.5116 0 -9525.596 448.90781 + 1300 436.34973 -9659.2277 0 -9523.9574 188.12079 + 1400 422.25034 -9651.2639 0 -9520.3645 48.988693 + 1500 363.49223 -9625.6588 0 -9512.9746 -977.83513 + 1600 450.39155 -9646.4742 0 -9506.8509 44.80204 + 1700 461.44884 -9648.1215 0 -9505.0704 -29.381385 + 1800 457.01538 -9644.6842 0 -9503.0075 -29.157643 + 1900 461.56497 -9642.8457 0 -9499.7586 -608.58801 + 2000 491.20199 -9648.6637 0 -9496.389 -99.409356 + 2100 461.60295 -9636.4878 0 -9493.3889 753.00956 + 2200 480.92601 -9640.304 0 -9491.2149 -176.4371 + 2300 450.00958 -9627.8875 0 -9488.3826 -210.21397 + 2400 475.97134 -9634.1577 0 -9486.6046 -364.46797 + 2500 478.0174 -9631.5069 0 -9483.3194 557.79107 + 2600 500.26141 -9636.8606 0 -9481.7774 115.84535 + 2700 455.06433 -9620.0151 0 -9478.9433 -963.22 + 2800 441.50799 -9612.6546 0 -9475.7852 -177.60856 + 2900 471.67031 -9618.9817 0 -9472.7619 -294.38595 + 3000 526.94336 -9635.8664 0 -9472.5117 119.05777 +Loop time of 2.33387 on 1 procs for 3000 steps with 105 atoms + +Performance: 27.765 ns/day, 0.864 hours/ns, 1285.420 timesteps/s, 134.969 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.8409 | 1.8409 | 1.8409 | 0.0 | 78.88 +Neigh | 0.15998 | 0.15998 | 0.15998 | 0.0 | 6.85 +Comm | 0.0090909 | 0.0090909 | 0.0090909 | 0.0 | 0.39 +Output | 0.00069968 | 0.00069968 | 0.00069968 | 0.0 | 0.03 +Modify | 0.32099 | 0.32099 | 0.32099 | 0.0 | 13.75 +Other | | 0.002244 | | | 0.10 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 651 ave 651 max 651 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3388 ave 3388 max 3388 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3388 +Ave neighs/atom = 32.266667 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.4 b/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.4 new file mode 100644 index 0000000000..3357947749 --- /dev/null +++ b/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.4 @@ -0,0 +1,141 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Fe/O/H system +# ..... + +units real + +atom_style charge +read_data data.FeOH3 +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.Fe_O_C_H H O Fe +Reading potential file ffield.reax.Fe_O_C_H with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 + +#dump 1 all atom 30 dump.reax.feoh + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.56 | 11.55 | 12.17 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -9715.3326 0 -9715.3326 -139.61126 + 100 127.3884 -9720.5854 0 -9681.0945 -933.74975 + 200 141.21023 -9696.3143 0 -9652.5385 -831.74859 + 300 176.81092 -9681.3376 0 -9626.5254 -520.29734 + 400 220.75237 -9672.6195 0 -9604.1853 -388.89122 + 500 301.29434 -9678.8461 0 -9585.4436 -545.24883 + 600 320.36921 -9670.3055 0 -9570.9897 -609.45071 + 700 414.5366 -9688.649 0 -9560.1409 -259.54271 + 800 391.93079 -9675.1212 0 -9553.621 77.314405 + 900 413.52641 -9673.738 0 -9545.5431 369.67477 + 1000 382.02987 -9656.384 0 -9537.9531 236.57634 + 1100 381.6811 -9647.4372 0 -9529.1144 -432.72725 + 1200 470.68578 -9671.511 0 -9525.5964 448.88885 + 1300 436.3616 -9659.2312 0 -9523.9573 188.07625 + 1400 422.26867 -9651.2709 0 -9520.3658 48.829055 + 1500 363.49419 -9625.6611 0 -9512.9764 -977.70396 + 1600 450.39497 -9646.4762 0 -9506.8518 45.000339 + 1700 461.44368 -9648.121 0 -9505.0715 -29.332359 + 1800 457.02327 -9644.687 0 -9503.0078 -29.48478 + 1900 461.60004 -9642.8571 0 -9499.7592 -608.0786 + 2000 491.19069 -9648.6599 0 -9496.3887 -99.479995 + 2100 461.50901 -9636.4581 0 -9493.3884 752.86874 + 2200 480.7646 -9640.2431 0 -9491.204 -175.99562 + 2300 450.00669 -9627.875 0 -9488.3711 -209.83065 + 2400 475.84946 -9634.1191 0 -9486.6038 -366.65233 + 2500 477.75601 -9631.4196 0 -9483.3132 558.18557 + 2600 500.64305 -9636.9676 0 -9481.7661 111.76394 + 2700 455.78826 -9620.2513 0 -9478.955 -962.65771 + 2800 438.72349 -9611.8395 0 -9475.8334 -180.94976 + 2900 471.33135 -9618.8641 0 -9472.7494 -291.14764 + 3000 528.64651 -9636.4232 0 -9472.5405 111.18605 +Loop time of 1.42723 on 4 procs for 3000 steps with 105 atoms + +Performance: 45.403 ns/day, 0.529 hours/ns, 2101.973 timesteps/s, 220.707 katom-step/s +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.79715 | 0.88663 | 0.96735 | 7.5 | 62.12 +Neigh | 0.083068 | 0.096787 | 0.10679 | 2.8 | 6.78 +Comm | 0.058539 | 0.13831 | 0.22776 | 19.0 | 9.69 +Output | 0.0006518 | 0.00071197 | 0.00088964 | 0.0 | 0.05 +Modify | 0.29308 | 0.30291 | 0.31706 | 1.6 | 21.22 +Other | | 0.001886 | | | 0.13 + +Nlocal: 26.25 ave 34 max 12 min +Histogram: 1 0 0 0 0 0 0 1 1 1 +Nghost: 408 ave 462 max 347 min +Histogram: 1 0 0 0 1 0 1 0 0 1 +Neighs: 1109 ave 1419 max 453 min +Histogram: 1 0 0 0 0 0 0 1 0 2 + +Total # of neighbors = 4436 +Ave neighs/atom = 42.247619 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:01 diff --git a/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.1 b/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.1 deleted file mode 100644 index fd9b310bb2..0000000000 --- a/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.1 +++ /dev/null @@ -1,70 +0,0 @@ -LAMMPS (5 Oct 2016) -# REAX potential for Fe/O/H system -# ..... - -units real - -atom_style charge -read_data data.FeOH3 - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.Fe_O_C_H H O Fe -Reading potential file ffield.reax.Fe_O_C_H with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.feoh - -run 3000 -Neighbor list info ... - 2 neighbor list requests - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6 -> bins = 5 5 5 -Memory usage per processor = 17.7294 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -9715.3326 0 -9715.3326 -139.61126 - 3000 529.72301 -9636.7144 0 -9472.498 127.52152 -Loop time of 8.40814 on 1 procs for 3000 steps with 105 atoms - -Performance: 7.707 ns/day, 3.114 hours/ns, 356.797 timesteps/s -99.2% CPU use with 1 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 7.3193 | 7.3193 | 7.3193 | 0.0 | 87.05 -Neigh | 0.29032 | 0.29032 | 0.29032 | 0.0 | 3.45 -Comm | 0.016032 | 0.016032 | 0.016032 | 0.0 | 0.19 -Output | 1.2159e-05 | 1.2159e-05 | 1.2159e-05 | 0.0 | 0.00 -Modify | 0.77846 | 0.77846 | 0.77846 | 0.0 | 9.26 -Other | | 0.004053 | | | 0.05 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 651 ave 651 max 651 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3389 ave 3389 max 3389 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3389 -Ave neighs/atom = 32.2762 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:08 diff --git a/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.4 b/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.4 deleted file mode 100644 index 399c5dbe3c..0000000000 --- a/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.4 +++ /dev/null @@ -1,70 +0,0 @@ -LAMMPS (5 Oct 2016) -# REAX potential for Fe/O/H system -# ..... - -units real - -atom_style charge -read_data data.FeOH3 - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.Fe_O_C_H H O Fe -Reading potential file ffield.reax.Fe_O_C_H with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.feoh - -run 3000 -Neighbor list info ... - 2 neighbor list requests - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6 -> bins = 5 5 5 -Memory usage per processor = 12.3695 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -9715.3326 0 -9715.3326 -139.61126 - 3000 534.48882 -9638.0405 0 -9472.3467 127.47989 -Loop time of 4.78344 on 4 procs for 3000 steps with 105 atoms - -Performance: 13.547 ns/day, 1.772 hours/ns, 627.164 timesteps/s -99.0% CPU use with 4 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 3.7061 | 3.7503 | 3.7853 | 1.5 | 78.40 -Neigh | 0.14361 | 0.16388 | 0.18297 | 3.4 | 3.43 -Comm | 0.062001 | 0.098492 | 0.14111 | 9.0 | 2.06 -Output | 2.0981e-05 | 2.2948e-05 | 2.7895e-05 | 0.1 | 0.00 -Modify | 0.75012 | 0.76764 | 0.78678 | 1.5 | 16.05 -Other | | 0.003105 | | | 0.06 - -Nlocal: 26.25 ave 35 max 12 min -Histogram: 1 0 0 0 0 0 0 2 0 1 -Nghost: 408 ave 462 max 348 min -Histogram: 1 0 0 0 1 0 1 0 0 1 -Neighs: 1107 ave 1428 max 453 min -Histogram: 1 0 0 0 0 0 0 1 0 2 - -Total # of neighbors = 4428 -Ave neighs/atom = 42.1714 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:05 diff --git a/examples/reaxff/HNS/in.reaxc.hns b/examples/reaxff/HNS/in.reaxff.hns similarity index 82% rename from examples/reaxff/HNS/in.reaxc.hns rename to examples/reaxff/HNS/in.reaxff.hns index 5b83698917..0f40814bff 100644 --- a/examples/reaxff/HNS/in.reaxc.hns +++ b/examples/reaxff/HNS/in.reaxff.hns @@ -12,16 +12,15 @@ atom_style charge atom_modify sort 100 0.0 # optional dimension 3 boundary p p p -box tilt large read_data data.hns-equil replicate $x $y $z bbox -pair_style reax/c NULL +pair_style reaxff NULL pair_coeff * * ffield.reax.hns C H O N -compute reax all pair reax/c +compute reax all pair reaxff neighbor 1.0 bin neigh_modify every 20 delay 0 check no @@ -35,6 +34,6 @@ thermo 10 velocity all create 300.0 41279 loop geom fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff run $t diff --git a/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.1 b/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.1 new file mode 100644 index 0000000000..b2c9778994 --- /dev/null +++ b/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.1 @@ -0,0 +1,149 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS +# See README for more info + +variable x index 2 +variable y index 2 +variable z index 2 +variable t index 100 + + +units real +atom_style charge +atom_modify sort 100 0.0 # optional +dimension 3 +boundary p p p + +read_data data.hns-equil +Reading data file ... + triclinic box = (0 0 0) to (22.326 11.1412 13.778966) with tilt (0 -5.02603 0) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 304 atoms + reading velocities ... + 304 velocities + read_data CPU = 0.005 seconds +replicate $x $y $z bbox +replicate 2 $y $z bbox +replicate 2 2 $z bbox +replicate 2 2 2 bbox +Replication is creating a 2x2x2 = 8 times larger system... + triclinic box = (0 0 0) to (44.652 22.2824 27.557932) with tilt (0 -10.05206 0) + 1 by 1 by 1 MPI processor grid + bounding box image = (0 -1 -1) to (0 1 1) + bounding box extra memory = 0.03 MB + average # of replicas added to proc = 8.00 out of 8 (100.00%) + 2432 atoms + replicate CPU = 0.001 seconds + + +pair_style reaxff NULL +pair_coeff * * ffield.reax.hns C H O N + +compute reax all pair reaxff + +neighbor 1.0 bin +neigh_modify every 20 delay 0 check no + +timestep 0.1 + +thermo_style custom step temp pe press evdwl ecoul vol +thermo_modify norm yes +thermo 10 + +velocity all create 300.0 41279 loop geom + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff + +run $t +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 20 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11 + ghost atom cutoff = 11 + binsize = 5.5, bins = 10 5 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 215 | 215 | 215 Mbytes + Step Temp PotEng Press E_vdwl E_coul Volume + 0 300 -113.27833 437.52149 -111.57687 -1.7014647 27418.867 + 10 299.38517 -113.27631 1439.2564 -111.57492 -1.7013814 27418.867 + 20 300.27107 -113.27884 3764.4017 -111.57762 -1.7012246 27418.867 + 30 302.21064 -113.28428 7007.6558 -111.58335 -1.7009364 27418.867 + 40 303.52265 -113.28799 9844.8196 -111.58747 -1.7005186 27418.867 + 50 301.8706 -113.28324 9663.08 -111.58318 -1.7000523 27418.867 + 60 296.67808 -113.26777 7273.8875 -111.56815 -1.6996136 27418.867 + 70 292.19999 -113.25435 5533.625 -111.55514 -1.6992157 27418.867 + 80 293.58678 -113.25831 5993.4679 -111.55946 -1.6988532 27418.867 + 90 300.62637 -113.27925 7202.8453 -111.58069 -1.6985592 27418.867 + 100 305.38277 -113.29357 10085.747 -111.59518 -1.6983875 27418.867 +Loop time of 17.6114 on 1 procs for 100 steps with 2432 atoms + +Performance: 0.049 ns/day, 489.205 hours/ns, 5.678 timesteps/s, 13.809 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 13.081 | 13.081 | 13.081 | 0.0 | 74.27 +Neigh | 0.25469 | 0.25469 | 0.25469 | 0.0 | 1.45 +Comm | 0.0061082 | 0.0061082 | 0.0061082 | 0.0 | 0.03 +Output | 0.00035315 | 0.00035315 | 0.00035315 | 0.0 | 0.00 +Modify | 4.2687 | 4.2687 | 4.2687 | 0.0 | 24.24 +Other | | 0.0007784 | | | 0.00 + +Nlocal: 2432 ave 2432 max 2432 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 10685 ave 10685 max 10685 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 823958 ave 823958 max 823958 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 823958 +Ave neighs/atom = 338.79852 +Neighbor list builds = 5 +Dangerous builds not checked +Total wall time: 0:00:17 diff --git a/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.4 b/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.4 new file mode 100644 index 0000000000..d7c3b76f7a --- /dev/null +++ b/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.4 @@ -0,0 +1,149 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS +# See README for more info + +variable x index 2 +variable y index 2 +variable z index 2 +variable t index 100 + + +units real +atom_style charge +atom_modify sort 100 0.0 # optional +dimension 3 +boundary p p p + +read_data data.hns-equil +Reading data file ... + triclinic box = (0 0 0) to (22.326 11.1412 13.778966) with tilt (0 -5.02603 0) + 2 by 1 by 2 MPI processor grid + reading atoms ... + 304 atoms + reading velocities ... + 304 velocities + read_data CPU = 0.003 seconds +replicate $x $y $z bbox +replicate 2 $y $z bbox +replicate 2 2 $z bbox +replicate 2 2 2 bbox +Replication is creating a 2x2x2 = 8 times larger system... + triclinic box = (0 0 0) to (44.652 22.2824 27.557932) with tilt (0 -10.05206 0) + 2 by 1 by 2 MPI processor grid + bounding box image = (0 -1 -1) to (0 1 1) + bounding box extra memory = 0.03 MB + average # of replicas added to proc = 5.00 out of 8 (62.50%) + 2432 atoms + replicate CPU = 0.000 seconds + + +pair_style reaxff NULL +pair_coeff * * ffield.reax.hns C H O N + +compute reax all pair reaxff + +neighbor 1.0 bin +neigh_modify every 20 delay 0 check no + +timestep 0.1 + +thermo_style custom step temp pe press evdwl ecoul vol +thermo_modify norm yes +thermo 10 + +velocity all create 300.0 41279 loop geom + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff + +run $t +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 20 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11 + ghost atom cutoff = 11 + binsize = 5.5, bins = 10 5 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 103.8 | 103.8 | 103.8 Mbytes + Step Temp PotEng Press E_vdwl E_coul Volume + 0 300 -113.27833 437.52125 -111.57687 -1.7014647 27418.867 + 10 299.38517 -113.27631 1439.2564 -111.57492 -1.7013814 27418.867 + 20 300.27106 -113.27884 3764.3691 -111.57762 -1.7012246 27418.867 + 30 302.21062 -113.28428 7007.6981 -111.58335 -1.7009363 27418.867 + 40 303.52264 -113.28799 9844.8446 -111.58747 -1.7005186 27418.867 + 50 301.87059 -113.28324 9663.0539 -111.58318 -1.7000523 27418.867 + 60 296.67807 -113.26777 7273.8306 -111.56815 -1.6996136 27418.867 + 70 292.19997 -113.25435 5533.612 -111.55514 -1.6992157 27418.867 + 80 293.58675 -113.25831 5993.4344 -111.55946 -1.6988533 27418.867 + 90 300.62636 -113.27925 7202.8636 -111.58069 -1.6985591 27418.867 + 100 305.38278 -113.29357 10085.719 -111.59518 -1.6983876 27418.867 +Loop time of 6.63333 on 4 procs for 100 steps with 2432 atoms + +Performance: 0.130 ns/day, 184.259 hours/ns, 15.075 timesteps/s, 36.663 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.1006 | 4.5745 | 5.0624 | 17.4 | 68.96 +Neigh | 0.11589 | 0.11637 | 0.11669 | 0.1 | 1.75 +Comm | 0.0077297 | 0.49567 | 0.96958 | 52.7 | 7.47 +Output | 0.00027396 | 0.00031049 | 0.00038633 | 0.0 | 0.00 +Modify | 1.4458 | 1.4461 | 1.4465 | 0.0 | 21.80 +Other | | 0.0004201 | | | 0.01 + +Nlocal: 608 ave 612 max 604 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +Nghost: 5737.25 ave 5744 max 5732 min +Histogram: 1 0 1 0 0 1 0 0 0 1 +Neighs: 231539 ave 233090 max 229970 min +Histogram: 1 0 0 0 1 1 0 0 0 1 + +Total # of neighbors = 926155 +Ave neighs/atom = 380.82031 +Neighbor list builds = 5 +Dangerous builds not checked +Total wall time: 0:00:06 diff --git a/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.1 b/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.1 deleted file mode 100644 index d418d287b1..0000000000 --- a/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.1 +++ /dev/null @@ -1,115 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS -# See README for more info - -variable x index 2 -variable y index 2 -variable z index 2 -variable t index 100 - - -units real -atom_style charge -atom_modify sort 100 0.0 # optional -dimension 3 -boundary p p p -box tilt large - -read_data data.hns-equil - triclinic box = (0 0 0) to (22.326 11.1412 13.779) with tilt (0 -5.02603 0) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 304 atoms - reading velocities ... - 304 velocities -replicate $x $y $z bbox -replicate 2 $y $z bbox -replicate 2 2 $z bbox -replicate 2 2 2 bbox - triclinic box = (0 0 0) to (44.652 22.2824 27.5579) with tilt (0 -10.0521 0) - 1 by 1 by 1 MPI processor grid - 2432 atoms - Time spent = 0.000789404 secs - - -pair_style reax/c NULL -pair_coeff * * ffield.reax.hns C H O N - -compute reax all pair reax/c - -neighbor 1.0 bin -neigh_modify every 20 delay 0 check no - -timestep 0.1 - -thermo_style custom step temp pe press evdwl ecoul vol -thermo_modify norm yes -thermo 10 - -velocity all create 300.0 41279 loop geom - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c - -run 100 -Neighbor list info ... - update every 20 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 11 - ghost atom cutoff = 11 - binsize = 5.5, bins = 10 5 6 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 262.4 | 262.4 | 262.4 Mbytes -Step Temp PotEng Press E_vdwl E_coul Volume - 0 300 -113.27833 437.52103 -111.57687 -1.7014647 27418.867 - 10 299.87174 -113.27778 2033.6337 -111.57645 -1.7013325 27418.867 - 20 300.81718 -113.28046 4817.5889 -111.57931 -1.7011463 27418.867 - 30 301.8622 -113.28323 8303.0039 -111.58237 -1.7008608 27418.867 - 40 302.4646 -113.28493 10519.459 -111.58446 -1.700467 27418.867 - 50 300.79064 -113.27989 10402.291 -111.57987 -1.7000218 27418.867 - 60 296.11534 -113.26599 7929.1348 -111.5664 -1.6995929 27418.867 - 70 291.73354 -113.25289 5071.5459 -111.5537 -1.6991916 27418.867 - 80 292.189 -113.25399 5667.0962 -111.55519 -1.6987993 27418.867 - 90 298.40792 -113.27253 7513.3806 -111.57409 -1.6984403 27418.867 - 100 303.58246 -113.28809 10017.879 -111.58991 -1.698177 27418.867 -Loop time of 59.5461 on 1 procs for 100 steps with 2432 atoms - -Performance: 0.015 ns/day, 1654.060 hours/ns, 1.679 timesteps/s -97.0% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 49.922 | 49.922 | 49.922 | 0.0 | 83.84 -Neigh | 0.53154 | 0.53154 | 0.53154 | 0.0 | 0.89 -Comm | 0.011399 | 0.011399 | 0.011399 | 0.0 | 0.02 -Output | 0.00064397 | 0.00064397 | 0.00064397 | 0.0 | 0.00 -Modify | 9.0782 | 9.0782 | 9.0782 | 0.0 | 15.25 -Other | | 0.002116 | | | 0.00 - -Nlocal: 2432 ave 2432 max 2432 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 10687 ave 10687 max 10687 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 823977 ave 823977 max 823977 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 823977 -Ave neighs/atom = 338.806 -Neighbor list builds = 5 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:01:00 diff --git a/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.4 b/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.4 deleted file mode 100644 index aef07f80eb..0000000000 --- a/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.4 +++ /dev/null @@ -1,115 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS -# See README for more info - -variable x index 2 -variable y index 2 -variable z index 2 -variable t index 100 - - -units real -atom_style charge -atom_modify sort 100 0.0 # optional -dimension 3 -boundary p p p -box tilt large - -read_data data.hns-equil - triclinic box = (0 0 0) to (22.326 11.1412 13.779) with tilt (0 -5.02603 0) - 2 by 1 by 2 MPI processor grid - reading atoms ... - 304 atoms - reading velocities ... - 304 velocities -replicate $x $y $z bbox -replicate 2 $y $z bbox -replicate 2 2 $z bbox -replicate 2 2 2 bbox - triclinic box = (0 0 0) to (44.652 22.2824 27.5579) with tilt (0 -10.0521 0) - 2 by 1 by 2 MPI processor grid - 2432 atoms - Time spent = 0.000398397 secs - - -pair_style reax/c NULL -pair_coeff * * ffield.reax.hns C H O N - -compute reax all pair reax/c - -neighbor 1.0 bin -neigh_modify every 20 delay 0 check no - -timestep 0.1 - -thermo_style custom step temp pe press evdwl ecoul vol -thermo_modify norm yes -thermo 10 - -velocity all create 300.0 41279 loop geom - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c - -run 100 -Neighbor list info ... - update every 20 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 11 - ghost atom cutoff = 11 - binsize = 5.5, bins = 10 5 6 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 126.6 | 126.6 | 126.6 Mbytes -Step Temp PotEng Press E_vdwl E_coul Volume - 0 300 -113.27833 437.52112 -111.57687 -1.7014647 27418.867 - 10 299.87174 -113.27778 2033.632 -111.57645 -1.7013325 27418.867 - 20 300.81719 -113.28046 4817.5761 -111.57931 -1.7011463 27418.867 - 30 301.8622 -113.28323 8302.9767 -111.58237 -1.7008609 27418.867 - 40 302.4646 -113.28493 10519.481 -111.58446 -1.700467 27418.867 - 50 300.79064 -113.27989 10402.312 -111.57987 -1.7000217 27418.867 - 60 296.11534 -113.26599 7929.1393 -111.5664 -1.6995929 27418.867 - 70 291.73354 -113.25289 5071.5368 -111.5537 -1.6991916 27418.867 - 80 292.18901 -113.25399 5667.1118 -111.55519 -1.6987993 27418.867 - 90 298.40793 -113.27253 7513.4029 -111.57409 -1.6984403 27418.867 - 100 303.58247 -113.28809 10017.892 -111.58991 -1.698177 27418.867 -Loop time of 21.3933 on 4 procs for 100 steps with 2432 atoms - -Performance: 0.040 ns/day, 594.257 hours/ns, 4.674 timesteps/s -97.6% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 14.863 | 16.367 | 18.027 | 28.6 | 76.51 -Neigh | 0.23943 | 0.2422 | 0.24658 | 0.6 | 1.13 -Comm | 0.024331 | 1.6845 | 3.189 | 89.2 | 7.87 -Output | 0.00051165 | 0.00056899 | 0.00068665 | 0.0 | 0.00 -Modify | 3.0933 | 3.0969 | 3.0999 | 0.1 | 14.48 -Other | | 0.001784 | | | 0.01 - -Nlocal: 608 ave 608 max 608 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 5738.25 ave 5742 max 5734 min -Histogram: 1 1 0 0 0 0 0 0 0 2 -Neighs: 231544 ave 231625 max 231466 min -Histogram: 2 0 0 0 0 0 0 0 0 2 - -Total # of neighbors = 926176 -Ave neighs/atom = 380.829 -Neighbor list builds = 5 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:21 diff --git a/examples/reaxff/RDX/in.RDX b/examples/reaxff/RDX/in.RDX index 67d6145787..a510390a03 100644 --- a/examples/reaxff/RDX/in.RDX +++ b/examples/reaxff/RDX/in.RDX @@ -1,23 +1,23 @@ # REAX potential for high energy CHON systems # ..... -units real +units real -atom_style charge -read_data data.RDX +atom_style charge +read_data data.RDX -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.rdx H C O N +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.rdx H C O N -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.rdx -#dump 1 all atom 30 dump.reax.rdx - -run 3000 +run 3000 diff --git a/examples/reaxff/RDX/lmp_control b/examples/reaxff/RDX/lmp_control index c729255b45..b97ad8a67f 100644 --- a/examples/reaxff/RDX/lmp_control +++ b/examples/reaxff/RDX/lmp_control @@ -1,17 +1,8 @@ -simulation_name RDX_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -write_freq 1 ! write trajectory after so many steps -traj_title RDX ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/RDX/log.30Nov23.RDX.g++.1 b/examples/reaxff/RDX/log.30Nov23.RDX.g++.1 new file mode 100644 index 0000000000..7d2214a6c8 --- /dev/null +++ b/examples/reaxff/RDX/log.30Nov23.RDX.g++.1 @@ -0,0 +1,140 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for high energy CHON systems +# ..... + +units real + +atom_style charge +read_data data.RDX +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.rdx H C O N +Reading potential file ffield.reax.rdx with DATE: 2010-02-19 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.rdx + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 16.3 | 16.3 | 16.3 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10197.932 0 -10197.932 38.347492 + 100 47.478574 -10176.425 0 -10161.706 632.99863 + 200 166.95277 -10181.513 0 -10129.757 -27.107717 + 300 142.53594 -10148.039 0 -10103.853 5120.6794 + 400 322.68495 -10178.868 0 -10078.834 2342.89 + 500 193.81476 -10117.984 0 -10057.901 8412.5289 + 600 300.27155 -10134.473 0 -10041.388 -2801.8661 + 700 272.63426 -10110.146 0 -10025.629 10749.023 + 800 339.99867 -10114.124 0 -10008.723 5122.9966 + 900 231.65547 -10068.587 0 -9996.7728 5306.059 + 1000 329.92918 -10088.776 0 -9986.4964 3190.1697 + 1100 376.60905 -10092.398 0 -9975.6476 2921.9605 + 1200 361.98746 -10076.599 0 -9964.3813 3612.0455 + 1300 358.65631 -10069.365 0 -9958.1802 4339.8435 + 1400 470.15262 -10098.553 0 -9952.8035 -146.0811 + 1500 509.62274 -10106.57 0 -9948.5844 2356.8592 + 1600 417.89364 -10075.274 0 -9945.7249 1760.5655 + 1700 453.21317 -10084.329 0 -9943.8306 -570.32375 + 1800 472.92112 -10087.83 0 -9941.2221 1550.3495 + 1900 507.18794 -10096.441 0 -9939.2102 -460.65809 + 2000 443.55347 -10076.832 0 -9939.3281 -145.14295 + 2100 485.44451 -10088.451 0 -9937.9611 -614.40787 + 2200 507.52411 -10095.157 0 -9937.8226 1308.3869 + 2300 496.44961 -10089.637 0 -9935.7354 206.44147 + 2400 457.99343 -10078.114 0 -9936.1344 810.80538 + 2500 507.88115 -10092.692 0 -9935.2463 -464.87873 + 2600 437.84198 -10069.789 0 -9934.0561 1323.4191 + 2700 503.09017 -10088.945 0 -9932.9853 1133.3561 + 2800 481.73908 -10082.193 0 -9932.852 -27.838881 + 2900 487.56555 -10082.752 0 -9931.6045 1772.2131 + 3000 510.30601 -10091.368 0 -9933.1706 1273.0501 +Loop time of 5.35022 on 1 procs for 3000 steps with 105 atoms + +Performance: 12.112 ns/day, 1.982 hours/ns, 560.725 timesteps/s, 58.876 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.7927 | 4.7927 | 4.7927 | 0.0 | 89.58 +Neigh | 0.15169 | 0.15169 | 0.15169 | 0.0 | 2.84 +Comm | 0.011036 | 0.011036 | 0.011036 | 0.0 | 0.21 +Output | 0.00080628 | 0.00080628 | 0.00080628 | 0.0 | 0.02 +Modify | 0.3906 | 0.3906 | 0.3906 | 0.0 | 7.30 +Other | | 0.003436 | | | 0.06 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 645 ave 645 max 645 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3065 ave 3065 max 3065 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3065 +Ave neighs/atom = 29.190476 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:05 diff --git a/examples/reaxff/RDX/log.30Nov23.RDX.g++.4 b/examples/reaxff/RDX/log.30Nov23.RDX.g++.4 new file mode 100644 index 0000000000..37481acd1b --- /dev/null +++ b/examples/reaxff/RDX/log.30Nov23.RDX.g++.4 @@ -0,0 +1,140 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for high energy CHON systems +# ..... + +units real + +atom_style charge +read_data data.RDX +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.rdx H C O N +Reading potential file ffield.reax.rdx with DATE: 2010-02-19 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.rdx + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.78 | 11.56 | 12.26 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10197.932 0 -10197.932 38.347492 + 100 47.47852 -10176.425 0 -10161.706 632.97359 + 200 166.95287 -10181.513 0 -10129.757 -27.146803 + 300 142.53582 -10148.039 0 -10103.852 5120.6397 + 400 322.68523 -10178.868 0 -10078.834 2342.7187 + 500 193.81484 -10117.984 0 -10057.901 8412.4559 + 600 300.27165 -10134.473 0 -10041.388 -2801.9143 + 700 272.63408 -10110.146 0 -10025.629 10749.2 + 800 339.99669 -10114.123 0 -10008.723 5123.2489 + 900 231.65632 -10068.587 0 -9996.7729 5306.0392 + 1000 329.93324 -10088.777 0 -9986.4967 3190.4707 + 1100 376.60924 -10092.398 0 -9975.6478 2920.8475 + 1200 361.98231 -10076.598 0 -9964.3816 3612.0573 + 1300 358.6599 -10069.366 0 -9958.1803 4341.9871 + 1400 470.14856 -10098.552 0 -9952.8036 -146.9069 + 1500 509.6454 -10106.577 0 -9948.5847 2355.4022 + 1600 417.9276 -10075.284 0 -9945.7249 1749.565 + 1700 453.25817 -10084.343 0 -9943.8306 -570.48011 + 1800 472.9517 -10087.84 0 -9941.2226 1532.6424 + 1900 507.14171 -10096.428 0 -9939.212 -404.84948 + 2000 443.62843 -10076.86 0 -9939.3329 -132.17302 + 2100 485.441 -10088.414 0 -9937.925 -609.75758 + 2200 507.23914 -10095.067 0 -9937.8209 1288.5372 + 2300 499.64956 -10090.665 0 -9935.7719 149.06622 + 2400 457.97848 -10078.107 0 -9936.1317 2065.2075 + 2500 510.58254 -10093.537 0 -9935.2543 -559.75965 + 2600 440.97503 -10070.865 0 -9934.1605 1164.1078 + 2700 500.4945 -10088.165 0 -9933.0096 1051.9016 + 2800 485.77814 -10083.543 0 -9932.9498 294.64404 + 2900 487.73983 -10082.939 0 -9931.7373 2208.263 + 3000 504.69717 -10089.803 0 -9933.3447 1723.6386 +Loop time of 2.81192 on 4 procs for 3000 steps with 105 atoms + +Performance: 23.045 ns/day, 1.041 hours/ns, 1066.887 timesteps/s, 112.023 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 2.0513 | 2.1567 | 2.2232 | 4.3 | 76.70 +Neigh | 0.072125 | 0.087048 | 0.10214 | 3.7 | 3.10 +Comm | 0.086792 | 0.15326 | 0.25749 | 16.2 | 5.45 +Output | 0.00058533 | 0.00064027 | 0.00080207 | 0.0 | 0.02 +Modify | 0.39587 | 0.41124 | 0.42647 | 1.7 | 14.62 +Other | | 0.003062 | | | 0.11 + +Nlocal: 26.25 ave 46 max 8 min +Histogram: 1 0 0 1 0 1 0 0 0 1 +Nghost: 399.5 ave 512 max 288 min +Histogram: 1 0 0 1 0 0 1 0 0 1 +Neighs: 1011.25 ave 1819 max 420 min +Histogram: 1 0 1 1 0 0 0 0 0 1 + +Total # of neighbors = 4045 +Ave neighs/atom = 38.52381 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/RDX/log.8Mar18.RDX.g++.1 b/examples/reaxff/RDX/log.8Mar18.RDX.g++.1 deleted file mode 100644 index d0765a97a5..0000000000 --- a/examples/reaxff/RDX/log.8Mar18.RDX.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for high energy CHON systems -# ..... - -units real - -atom_style charge -read_data data.RDX - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.rdx H C O N -Reading potential file ffield.reax.rdx with DATE: 2010-02-19 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.rdx - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 19 | 19 | 19 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10197.932 0 -10197.932 38.347492 - 3000 510.63767 -10091.537 0 -9933.2374 1144.545 -Loop time of 21.2931 on 1 procs for 3000 steps with 105 atoms - -Performance: 3.043 ns/day, 7.886 hours/ns, 140.891 timesteps/s -97.6% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 19.887 | 19.887 | 19.887 | 0.0 | 93.40 -Neigh | 0.33143 | 0.33143 | 0.33143 | 0.0 | 1.56 -Comm | 0.02079 | 0.02079 | 0.02079 | 0.0 | 0.10 -Output | 2.5272e-05 | 2.5272e-05 | 2.5272e-05 | 0.0 | 0.00 -Modify | 1.0478 | 1.0478 | 1.0478 | 0.0 | 4.92 -Other | | 0.006125 | | | 0.03 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 645 ave 645 max 645 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3063 ave 3063 max 3063 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3063 -Ave neighs/atom = 29.1714 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:21 diff --git a/examples/reaxff/RDX/log.8Mar18.RDX.g++.4 b/examples/reaxff/RDX/log.8Mar18.RDX.g++.4 deleted file mode 100644 index 7082d30636..0000000000 --- a/examples/reaxff/RDX/log.8Mar18.RDX.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for high energy CHON systems -# ..... - -units real - -atom_style charge -read_data data.RDX - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.rdx H C O N -Reading potential file ffield.reax.rdx with DATE: 2010-02-19 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.rdx - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 12.14 | 13.04 | 13.9 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10197.932 0 -10197.932 38.347492 - 3000 509.89257 -10091.36 0 -9933.2916 1406.1215 -Loop time of 10.8858 on 4 procs for 3000 steps with 105 atoms - -Performance: 5.953 ns/day, 4.032 hours/ns, 275.588 timesteps/s -98.1% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 9.3081 | 9.4054 | 9.4994 | 2.6 | 86.40 -Neigh | 0.15541 | 0.18258 | 0.2099 | 4.7 | 1.68 -Comm | 0.070516 | 0.16621 | 0.26541 | 19.7 | 1.53 -Output | 2.2173e-05 | 2.5153e-05 | 3.3855e-05 | 0.0 | 0.00 -Modify | 1.0979 | 1.1272 | 1.1568 | 2.1 | 10.35 -Other | | 0.004379 | | | 0.04 - -Nlocal: 26.25 ave 46 max 8 min -Histogram: 1 0 0 1 0 1 0 0 0 1 -Nghost: 399.5 ave 512 max 288 min -Histogram: 1 0 0 1 0 0 1 0 0 1 -Neighs: 1011.25 ave 1819 max 420 min -Histogram: 1 0 1 1 0 0 0 0 0 1 - -Total # of neighbors = 4045 -Ave neighs/atom = 38.5238 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:11 diff --git a/examples/reaxff/VOH/in.VOH b/examples/reaxff/VOH/in.VOH index 82fa8d1811..3a1047f2c3 100644 --- a/examples/reaxff/VOH/in.VOH +++ b/examples/reaxff/VOH/in.VOH @@ -1,23 +1,23 @@ # REAX potential for VOH system # ..... -units real +units real -atom_style charge -read_data data.VOH +atom_style charge +read_data data.VOH -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.V_O_C_H H C O V +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.V_O_C_H H C O V -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 -#dump 1 all atom 30 dump.reax.voh - -run 3000 +#dump 1 all atom 30 dump.reax.voh +thermo 100 +run 2000 diff --git a/examples/reaxff/VOH/lmp_control b/examples/reaxff/VOH/lmp_control index 735540053f..b97ad8a67f 100644 --- a/examples/reaxff/VOH/lmp_control +++ b/examples/reaxff/VOH/lmp_control @@ -1,17 +1,8 @@ -simulation_name VOH_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -write_freq 1 ! write trajectory after so many steps -traj_title VOH ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/VOH/log.30Nov23.VOH.g++.1 b/examples/reaxff/VOH/log.30Nov23.VOH.g++.1 new file mode 100644 index 0000000000..d649653444 --- /dev/null +++ b/examples/reaxff/VOH/log.30Nov23.VOH.g++.1 @@ -0,0 +1,131 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for VOH system +# ..... + +units real + +atom_style charge +read_data data.VOH +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 100 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.V_O_C_H H C O V +Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +#dump 1 all atom 30 dump.reax.voh +thermo 100 +run 2000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 15.34 | 15.34 | 15.34 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10246.825 0 -10246.825 42.256089 + 100 83.813625 -10238.056 0 -10213.322 -246.37234 + 200 128.47312 -10221.424 0 -10183.511 -896.05588 + 300 199.45833 -10218.343 0 -10159.482 -66.676466 + 400 243.93496 -10211.648 0 -10139.663 -1073.274 + 500 314.81116 -10216.592 0 -10123.692 542.54772 + 600 361.45977 -10217.717 0 -10111.05 205.47425 + 700 392.16954 -10215.815 0 -10100.086 -283.06967 + 800 392.49036 -10206.909 0 -10091.085 953.23712 + 900 426.51015 -10209.352 0 -10083.489 473.9928 + 1000 398.23517 -10195.103 0 -10077.584 243.59494 + 1100 414.05403 -10192.081 0 -10069.893 1063.7609 + 1200 442.70037 -10196.631 0 -10065.99 -1189.773 + 1300 470.32545 -10201.252 0 -10062.459 -132.3016 + 1400 446.97236 -10189.078 0 -10057.176 9.8938187 + 1500 475.7665 -10195.598 0 -10055.199 -877.81691 + 1600 440.45154 -10181.071 0 -10051.093 579.85471 + 1700 517.45211 -10201.067 0 -10048.367 136.58133 + 1800 461.86671 -10182.818 0 -10046.521 -260.09694 + 1900 463.99242 -10181.136 0 -10044.211 1567.8398 + 2000 476.73786 -10184.032 0 -10043.346 -883.50859 +Loop time of 1.94324 on 1 procs for 2000 steps with 100 atoms + +Performance: 22.231 ns/day, 1.080 hours/ns, 1029.207 timesteps/s, 102.921 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.5707 | 1.5707 | 1.5707 | 0.0 | 80.83 +Neigh | 0.1054 | 0.1054 | 0.1054 | 0.0 | 5.42 +Comm | 0.005606 | 0.005606 | 0.005606 | 0.0 | 0.29 +Output | 0.00053398 | 0.00053398 | 0.00053398 | 0.0 | 0.03 +Modify | 0.2594 | 0.2594 | 0.2594 | 0.0 | 13.35 +Other | | 0.001603 | | | 0.08 + +Nlocal: 100 ave 100 max 100 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 638 ave 638 max 638 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3434 ave 3434 max 3434 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3434 +Ave neighs/atom = 34.34 +Neighbor list builds = 200 +Dangerous builds not checked +Total wall time: 0:00:01 diff --git a/examples/reaxff/VOH/log.30Nov23.VOH.g++.4 b/examples/reaxff/VOH/log.30Nov23.VOH.g++.4 new file mode 100644 index 0000000000..a7363ee766 --- /dev/null +++ b/examples/reaxff/VOH/log.30Nov23.VOH.g++.4 @@ -0,0 +1,131 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for VOH system +# ..... + +units real + +atom_style charge +read_data data.VOH +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 100 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.V_O_C_H H C O V +Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +#dump 1 all atom 30 dump.reax.voh +thermo 100 +run 2000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.04 | 11.12 | 12.06 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10246.825 0 -10246.825 42.256092 + 100 83.813732 -10238.056 0 -10213.322 -246.39794 + 200 128.4729 -10221.424 0 -10183.511 -896.07308 + 300 199.45765 -10218.342 0 -10159.482 -66.695871 + 400 243.93632 -10211.649 0 -10139.663 -1073.2779 + 500 314.81228 -10216.592 0 -10123.691 542.5312 + 600 361.46099 -10217.717 0 -10111.05 205.56032 + 700 392.16903 -10215.815 0 -10100.086 -283.00265 + 800 392.48962 -10206.909 0 -10091.085 953.23878 + 900 426.50866 -10209.352 0 -10083.489 474.04312 + 1000 398.23724 -10195.104 0 -10077.584 243.52194 + 1100 414.05514 -10192.081 0 -10069.893 1063.726 + 1200 442.70432 -10196.633 0 -10065.99 -1189.8309 + 1300 470.32067 -10201.251 0 -10062.459 -132.41831 + 1400 447.00366 -10189.087 0 -10057.177 10.168781 + 1500 475.77239 -10195.599 0 -10055.199 -877.85409 + 1600 440.43788 -10181.066 0 -10051.092 580.25473 + 1700 517.37824 -10201.044 0 -10048.366 136.09841 + 1800 461.75464 -10182.78 0 -10046.517 -259.88878 + 1900 464.0812 -10181.158 0 -10044.207 1566.4734 + 2000 476.55134 -10183.975 0 -10043.344 -884.37537 +Loop time of 1.16101 on 4 procs for 2000 steps with 100 atoms + +Performance: 37.209 ns/day, 0.645 hours/ns, 1722.640 timesteps/s, 172.264 katom-step/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.65447 | 0.69002 | 0.77443 | 6.0 | 59.43 +Neigh | 0.050397 | 0.060259 | 0.06883 | 2.7 | 5.19 +Comm | 0.03061 | 0.1145 | 0.15016 | 14.5 | 9.86 +Output | 0.00039488 | 0.00043537 | 0.00055434 | 0.0 | 0.04 +Modify | 0.28585 | 0.29446 | 0.30467 | 1.2 | 25.36 +Other | | 0.00134 | | | 0.12 + +Nlocal: 25 ave 36 max 10 min +Histogram: 1 0 0 0 0 1 0 0 1 1 +Nghost: 385.75 ave 472 max 299 min +Histogram: 1 0 0 1 0 0 1 0 0 1 +Neighs: 1077 ave 1693 max 379 min +Histogram: 1 0 0 1 0 0 0 0 1 1 + +Total # of neighbors = 4308 +Ave neighs/atom = 43.08 +Neighbor list builds = 200 +Dangerous builds not checked +Total wall time: 0:00:01 diff --git a/examples/reaxff/VOH/log.8Mar18.VOH.g++.1 b/examples/reaxff/VOH/log.8Mar18.VOH.g++.1 deleted file mode 100644 index 924769e570..0000000000 --- a/examples/reaxff/VOH/log.8Mar18.VOH.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for VOH system -# ..... - -units real - -atom_style charge -read_data data.VOH - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 100 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.V_O_C_H H C O V -Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.voh - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 17.79 | 17.79 | 17.79 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10246.825 0 -10246.825 42.256089 - 3000 476.73301 -10185.256 0 -10044.572 -694.70737 -Loop time of 11.0577 on 1 procs for 3000 steps with 100 atoms - -Performance: 5.860 ns/day, 4.095 hours/ns, 271.304 timesteps/s -98.9% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 9.6785 | 9.6785 | 9.6785 | 0.0 | 87.53 -Neigh | 0.32599 | 0.32599 | 0.32599 | 0.0 | 2.95 -Comm | 0.017231 | 0.017231 | 0.017231 | 0.0 | 0.16 -Output | 2.5511e-05 | 2.5511e-05 | 2.5511e-05 | 0.0 | 0.00 -Modify | 1.0311 | 1.0311 | 1.0311 | 0.0 | 9.32 -Other | | 0.004857 | | | 0.04 - -Nlocal: 100 ave 100 max 100 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 598 ave 598 max 598 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3390 ave 3390 max 3390 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3390 -Ave neighs/atom = 33.9 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:11 diff --git a/examples/reaxff/VOH/log.8Mar18.VOH.g++.4 b/examples/reaxff/VOH/log.8Mar18.VOH.g++.4 deleted file mode 100644 index 0395af6671..0000000000 --- a/examples/reaxff/VOH/log.8Mar18.VOH.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for VOH system -# ..... - -units real - -atom_style charge -read_data data.VOH - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 100 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.V_O_C_H H C O V -Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.voh - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 11.21 | 12.52 | 13.64 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10246.825 0 -10246.825 42.256092 - 3000 489.67803 -10188.866 0 -10044.362 -553.7513 -Loop time of 6.49847 on 4 procs for 3000 steps with 100 atoms - -Performance: 9.972 ns/day, 2.407 hours/ns, 461.647 timesteps/s -97.7% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 4.7412 | 4.8453 | 4.9104 | 2.9 | 74.56 -Neigh | 0.1468 | 0.17834 | 0.20151 | 4.7 | 2.74 -Comm | 0.071841 | 0.14037 | 0.24502 | 17.2 | 2.16 -Output | 2.1219e-05 | 2.408e-05 | 3.1948e-05 | 0.0 | 0.00 -Modify | 1.3072 | 1.3308 | 1.3627 | 1.7 | 20.48 -Other | | 0.003713 | | | 0.06 - -Nlocal: 25 ave 38 max 11 min -Histogram: 1 0 0 0 1 0 1 0 0 1 -Nghost: 369.75 ave 453 max 283 min -Histogram: 1 0 0 0 1 1 0 0 0 1 -Neighs: 1082.25 ave 1788 max 417 min -Histogram: 1 0 1 0 0 0 1 0 0 1 - -Total # of neighbors = 4329 -Ave neighs/atom = 43.29 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:06 diff --git a/examples/reaxff/ZnOH2/in.ZnOH2 b/examples/reaxff/ZnOH2/in.ZnOH2 index f39b1a29dd..75a2d05f4b 100644 --- a/examples/reaxff/ZnOH2/in.ZnOH2 +++ b/examples/reaxff/ZnOH2/in.ZnOH2 @@ -1,23 +1,24 @@ # REAX potential for ZnOH2 system # ..... -units real +units real -atom_style charge -read_data data.ZnOH2 +atom_style charge +read_data data.ZnOH2 -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.ZnOH H O Zn +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.ZnOH H O Zn -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 +thermo 100 -#dump 1 all atom 30 dump.reax.znoh +#dump 1 all atom 30 dump.reax.znoh -run 3000 +run 1000 diff --git a/examples/reaxff/ZnOH2/lmp_control b/examples/reaxff/ZnOH2/lmp_control index 0fb44fe862..10c27d025d 100644 --- a/examples/reaxff/ZnOH2/lmp_control +++ b/examples/reaxff/ZnOH2/lmp_control @@ -1,17 +1,7 @@ -simulation_name ZnOH2_example ! output files will carry this name + their specific ext tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs thb_cutoff 0.001 ! cutoff value for three body interactions - -write_freq 1 ! write trajectory after so many steps -traj_title ZnOH2 ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.1 b/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.1 new file mode 100644 index 0000000000..850eeb3f3e --- /dev/null +++ b/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.1 @@ -0,0 +1,122 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for ZnOH2 system +# ..... + +units real + +atom_style charge +read_data data.ZnOH2 +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.000 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.ZnOH H O Zn +Reading potential file ffield.reax.ZnOH with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 + +#dump 1 all atom 30 dump.reax.znoh + +run 1000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 15.8 | 15.8 | 15.8 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -7900.2668 0 -7900.2668 60.076093 + 100 89.745108 -7892.7937 0 -7864.9724 -359.37879 + 200 151.73431 -7883.2823 0 -7836.244 118.04838 + 300 223.74392 -7881.6513 0 -7812.2898 -97.069674 + 400 293.70909 -7883.7754 0 -7792.7243 -384.10332 + 500 301.22843 -7869.313 0 -7775.9309 76.604433 + 600 317.45476 -7860.4665 0 -7762.0541 40.95095 + 700 335.70939 -7853.865 0 -7749.7937 -173.3119 + 800 380.48725 -7857.8679 0 -7739.9152 -139.88773 + 900 502.93129 -7891.7095 0 -7735.7987 488.40109 + 1000 510.36735 -7894.0653 0 -7735.8493 -222.85193 +Loop time of 0.583996 on 1 procs for 1000 steps with 105 atoms + +Performance: 36.987 ns/day, 0.649 hours/ns, 1712.342 timesteps/s, 179.796 katom-step/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.41526 | 0.41526 | 0.41526 | 0.0 | 71.11 +Neigh | 0.058908 | 0.058908 | 0.058908 | 0.0 | 10.09 +Comm | 0.0028308 | 0.0028308 | 0.0028308 | 0.0 | 0.48 +Output | 0.00021295 | 0.00021295 | 0.00021295 | 0.0 | 0.04 +Modify | 0.1061 | 0.1061 | 0.1061 | 0.0 | 18.17 +Other | | 0.0006844 | | | 0.12 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 640 ave 640 max 640 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3934 ave 3934 max 3934 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3934 +Ave neighs/atom = 37.466667 +Neighbor list builds = 100 +Dangerous builds not checked +Total wall time: 0:00:00 diff --git a/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.4 b/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.4 new file mode 100644 index 0000000000..d98fe1df74 --- /dev/null +++ b/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.4 @@ -0,0 +1,122 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for ZnOH2 system +# ..... + +units real + +atom_style charge +read_data data.ZnOH2 +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.ZnOH H O Zn +Reading potential file ffield.reax.ZnOH with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 + +#dump 1 all atom 30 dump.reax.znoh + +run 1000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.1 | 11.34 | 12.51 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -7900.2668 0 -7900.2668 60.076093 + 100 89.745102 -7892.7938 0 -7864.9724 -359.39279 + 200 151.73402 -7883.2823 0 -7836.2441 118.03582 + 300 223.74416 -7881.6514 0 -7812.2897 -97.060088 + 400 293.70926 -7883.7754 0 -7792.7242 -384.10477 + 500 301.22851 -7869.3129 0 -7775.9308 76.601414 + 600 317.45436 -7860.4664 0 -7762.0542 40.946828 + 700 335.70975 -7853.8651 0 -7749.7936 -173.31084 + 800 380.48744 -7857.868 0 -7739.9153 -139.87915 + 900 502.93034 -7891.7094 0 -7735.7989 488.3973 + 1000 510.36634 -7894.0651 0 -7735.8493 -222.85474 +Loop time of 0.345754 on 4 procs for 1000 steps with 105 atoms + +Performance: 62.472 ns/day, 0.384 hours/ns, 2892.228 timesteps/s, 303.684 katom-step/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.18043 | 0.18884 | 0.19544 | 1.3 | 54.62 +Neigh | 0.027479 | 0.033519 | 0.039456 | 2.3 | 9.69 +Comm | 0.01673 | 0.023479 | 0.031605 | 3.6 | 6.79 +Output | 0.00019732 | 0.0002106 | 0.00024979 | 0.0 | 0.06 +Modify | 0.093437 | 0.099224 | 0.10519 | 1.3 | 28.70 +Other | | 0.0004842 | | | 0.14 + +Nlocal: 26.25 ave 41 max 11 min +Histogram: 1 0 0 0 1 0 1 0 0 1 +Nghost: 390.25 ave 491 max 286 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Neighs: 1154 ave 1912 max 445 min +Histogram: 1 0 0 1 0 1 0 0 0 1 + +Total # of neighbors = 4616 +Ave neighs/atom = 43.961905 +Neighbor list builds = 100 +Dangerous builds not checked +Total wall time: 0:00:00 diff --git a/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.1 b/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.1 deleted file mode 100644 index 58b1a36719..0000000000 --- a/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for ZnOH2 system -# ..... - -units real - -atom_style charge -read_data data.ZnOH2 - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.ZnOH H O Zn -Reading potential file ffield.reax.ZnOH with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.znoh - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 18.36 | 18.36 | 18.36 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -7900.2668 0 -7900.2668 60.076093 - 3000 535.58577 -7934.7287 0 -7768.6948 -475.46237 -Loop time of 7.29784 on 1 procs for 3000 steps with 105 atoms - -Performance: 8.879 ns/day, 2.703 hours/ns, 411.081 timesteps/s -97.3% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 5.9988 | 5.9988 | 5.9988 | 0.0 | 82.20 -Neigh | 0.37455 | 0.37455 | 0.37455 | 0.0 | 5.13 -Comm | 0.019186 | 0.019186 | 0.019186 | 0.0 | 0.26 -Output | 2.4557e-05 | 2.4557e-05 | 2.4557e-05 | 0.0 | 0.00 -Modify | 0.89915 | 0.89915 | 0.89915 | 0.0 | 12.32 -Other | | 0.006108 | | | 0.08 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 649 ave 649 max 649 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3971 ave 3971 max 3971 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3971 -Ave neighs/atom = 37.819 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:07 diff --git a/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.4 b/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.4 deleted file mode 100644 index 77c5cbe1b7..0000000000 --- a/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for ZnOH2 system -# ..... - -units real - -atom_style charge -read_data data.ZnOH2 - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.ZnOH H O Zn -Reading potential file ffield.reax.ZnOH with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.znoh - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 11.28 | 12.77 | 14.21 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -7900.2668 0 -7900.2668 60.076093 - 3000 538.25796 -7935.6159 0 -7768.7536 -525.47078 -Loop time of 4.48824 on 4 procs for 3000 steps with 105 atoms - -Performance: 14.438 ns/day, 1.662 hours/ns, 668.414 timesteps/s -97.2% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 3.1031 | 3.1698 | 3.2378 | 3.3 | 70.62 -Neigh | 0.16642 | 0.20502 | 0.25003 | 6.6 | 4.57 -Comm | 0.074932 | 0.14224 | 0.21025 | 15.6 | 3.17 -Output | 0.00011349 | 0.00011736 | 0.00012231 | 0.0 | 0.00 -Modify | 0.92089 | 0.96736 | 1.0083 | 3.2 | 21.55 -Other | | 0.003731 | | | 0.08 - -Nlocal: 26.25 ave 45 max 15 min -Histogram: 1 0 2 0 0 0 0 0 0 1 -Nghost: 399 ave 509 max 295 min -Histogram: 1 0 0 0 2 0 0 0 0 1 -Neighs: 1151.5 ave 2066 max 701 min -Histogram: 1 2 0 0 0 0 0 0 0 1 - -Total # of neighbors = 4606 -Ave neighs/atom = 43.8667 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:04 diff --git a/examples/reaxff/ci-reaxFF/in.ci-reax.CH b/examples/reaxff/ci-reaxFF/in.ci-reax.CH index b3a2406a56..bee76b3eea 100644 --- a/examples/reaxff/ci-reaxFF/in.ci-reax.CH +++ b/examples/reaxff/ci-reaxFF/in.ci-reax.CH @@ -4,8 +4,8 @@ units real read_data CH4.dat -pair_style hybrid/overlay reax/c control checkqeq no table linear 11000 -pair_coeff * * reax/c ffield.ci-reax.CH C H +pair_style hybrid/overlay reaxff control checkqeq no table linear 11000 +pair_coeff * * reaxff ffield.ci-reax.CH C H pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF diff --git a/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.1 b/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.1 new file mode 100644 index 0000000000..08f2f4b47b --- /dev/null +++ b/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.1 @@ -0,0 +1,105 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +#ci-reax potential for CH systems with tabulated ZBL correction +atom_style charge +units real + +read_data CH4.dat +Reading data file ... + orthogonal box = (0 0 0) to (20 20 20) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 315 atoms + reading velocities ... + 315 velocities + read_data CPU = 0.003 seconds + +pair_style hybrid/overlay reaxff control checkqeq no table linear 11000 +pair_coeff * * reaxff ffield.ci-reax.CH C H +Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20 +pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF +WARNING: 2 of 10000 force values in table CC_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) +pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF +WARNING: 2 of 11000 force values in table CH_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) +pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF +WARNING: 2 of 6000 force values in table HH_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) + +timestep 0.25 +fix 1 all nve +fix 2 all temp/berendsen 500.0 500.0 100.0 + +#dump 1 all atom 30 dump.ci-reax.lammpstrj + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (src/REAXFF/pair_reaxff.cpp:365) +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 9.5 + ghost atom cutoff = 9.5 + binsize = 4.75, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) pair table, perpetual + attributes: half, newton on, cut 3.1 + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 35.64 | 35.64 | 35.64 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 508.42043 -28736.654 0 -28260.785 1678.3276 + 3000 480.41333 -28707.835 0 -28258.181 -3150.0762 +Loop time of 13.2263 on 1 procs for 3000 steps with 315 atoms + +Performance: 4.899 ns/day, 4.899 hours/ns, 226.821 timesteps/s, 71.449 katom-step/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 13.046 | 13.046 | 13.046 | 0.0 | 98.64 +Neigh | 0.12783 | 0.12783 | 0.12783 | 0.0 | 0.97 +Comm | 0.025611 | 0.025611 | 0.025611 | 0.0 | 0.19 +Output | 2.2361e-05 | 2.2361e-05 | 2.2361e-05 | 0.0 | 0.00 +Modify | 0.017722 | 0.017722 | 0.017722 | 0.0 | 0.13 +Other | | 0.008824 | | | 0.07 + +Nlocal: 315 ave 315 max 315 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 2056 ave 2056 max 2056 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 32754 ave 32754 max 32754 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 32754 +Ave neighs/atom = 103.98095 +Neighbor list builds = 37 +Dangerous builds = 0 +Total wall time: 0:00:13 diff --git a/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.4 b/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.4 new file mode 100644 index 0000000000..cc6c386f03 --- /dev/null +++ b/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.4 @@ -0,0 +1,105 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +#ci-reax potential for CH systems with tabulated ZBL correction +atom_style charge +units real + +read_data CH4.dat +Reading data file ... + orthogonal box = (0 0 0) to (20 20 20) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 315 atoms + reading velocities ... + 315 velocities + read_data CPU = 0.002 seconds + +pair_style hybrid/overlay reaxff control checkqeq no table linear 11000 +pair_coeff * * reaxff ffield.ci-reax.CH C H +Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20 +pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF +WARNING: 2 of 10000 force values in table CC_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) +pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF +WARNING: 2 of 11000 force values in table CH_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) +pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF +WARNING: 2 of 6000 force values in table HH_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) + +timestep 0.25 +fix 1 all nve +fix 2 all temp/berendsen 500.0 500.0 100.0 + +#dump 1 all atom 30 dump.ci-reax.lammpstrj + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (src/REAXFF/pair_reaxff.cpp:365) +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 9.5 + ghost atom cutoff = 9.5 + binsize = 4.75, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) pair table, perpetual + attributes: half, newton on, cut 3.1 + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 20.4 | 21.35 | 22.73 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 508.42043 -28736.654 0 -28260.785 1678.3276 + 3000 480.41333 -28707.835 0 -28258.181 -3150.0762 +Loop time of 8.18251 on 4 procs for 3000 steps with 315 atoms + +Performance: 7.919 ns/day, 3.031 hours/ns, 366.636 timesteps/s, 115.490 katom-step/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 6.7983 | 7.3808 | 7.9976 | 16.3 | 90.20 +Neigh | 0.062309 | 0.065996 | 0.07006 | 1.4 | 0.81 +Comm | 0.083586 | 0.70629 | 1.2934 | 53.1 | 8.63 +Output | 1.8365e-05 | 2.0853e-05 | 2.7615e-05 | 0.0 | 0.00 +Modify | 0.016829 | 0.019422 | 0.02157 | 1.4 | 0.24 +Other | | 0.01001 | | | 0.12 + +Nlocal: 78.75 ave 96 max 65 min +Histogram: 2 0 0 0 0 0 0 1 0 1 +Nghost: 1233 ave 1348 max 1116 min +Histogram: 1 0 1 0 0 0 0 1 0 1 +Neighs: 9467.25 ave 12150 max 7160 min +Histogram: 1 1 0 0 0 0 0 1 0 1 + +Total # of neighbors = 37869 +Ave neighs/atom = 120.21905 +Neighbor list builds = 37 +Dangerous builds = 0 +Total wall time: 0:00:08 diff --git a/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.1 b/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.1 deleted file mode 100644 index e966fd26b6..0000000000 --- a/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.1 +++ /dev/null @@ -1,86 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -#ci-reax potential for CH systems with tabulated ZBL correction -atom_style charge -units real - -read_data CH4.dat - orthogonal box = (0 0 0) to (20 20 20) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 315 atoms - reading velocities ... - 315 velocities - -pair_style hybrid/overlay reax/c control checkqeq no table linear 11000 -pair_coeff * * reax/c ffield.ci-reax.CH C H -Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20 -pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF -WARNING: 2 of 10000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) -pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF -WARNING: 2 of 11000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) -pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF -WARNING: 2 of 6000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) - -timestep 0.25 -fix 1 all nve -fix 2 all temp/berendsen 500.0 500.0 100.0 - -#dump 1 all atom 30 dump.ci-reax.lammpstrj - -run 3000 -WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (../pair_reaxc.cpp:392) -Neighbor list info ... - update every 1 steps, delay 10 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 9.5 - ghost atom cutoff = 9.5 - binsize = 4.75, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) pair table, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d/newton - bin: standard -Per MPI rank memory allocation (min/avg/max) = 43.46 | 43.46 | 43.46 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 508.42043 -28736.654 0 -28260.785 1678.3276 - 3000 480.41333 -28707.835 0 -28258.181 -3150.0762 -Loop time of 45.3959 on 1 procs for 3000 steps with 315 atoms - -Performance: 1.427 ns/day, 16.813 hours/ns, 66.085 timesteps/s -96.6% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 44.955 | 44.955 | 44.955 | 0.0 | 99.03 -Neigh | 0.29903 | 0.29903 | 0.29903 | 0.0 | 0.66 -Comm | 0.056547 | 0.056547 | 0.056547 | 0.0 | 0.12 -Output | 4.8399e-05 | 4.8399e-05 | 4.8399e-05 | 0.0 | 0.00 -Modify | 0.058722 | 0.058722 | 0.058722 | 0.0 | 0.13 -Other | | 0.02632 | | | 0.06 - -Nlocal: 315 ave 315 max 315 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 2056 ave 2056 max 2056 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 32754 ave 32754 max 32754 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 32754 -Ave neighs/atom = 103.981 -Neighbor list builds = 37 -Dangerous builds = 0 - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:45 diff --git a/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.4 b/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.4 deleted file mode 100644 index ccc87b3536..0000000000 --- a/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.4 +++ /dev/null @@ -1,86 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -#ci-reax potential for CH systems with tabulated ZBL correction -atom_style charge -units real - -read_data CH4.dat - orthogonal box = (0 0 0) to (20 20 20) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 315 atoms - reading velocities ... - 315 velocities - -pair_style hybrid/overlay reax/c control checkqeq no table linear 11000 -pair_coeff * * reax/c ffield.ci-reax.CH C H -Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20 -pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF -WARNING: 2 of 10000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) -pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF -WARNING: 2 of 11000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) -pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF -WARNING: 2 of 6000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) - -timestep 0.25 -fix 1 all nve -fix 2 all temp/berendsen 500.0 500.0 100.0 - -#dump 1 all atom 30 dump.ci-reax.lammpstrj - -run 3000 -WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (../pair_reaxc.cpp:392) -Neighbor list info ... - update every 1 steps, delay 10 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 9.5 - ghost atom cutoff = 9.5 - binsize = 4.75, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) pair table, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d/newton - bin: standard -Per MPI rank memory allocation (min/avg/max) = 24.48 | 25.61 | 27.27 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 508.42043 -28736.654 0 -28260.785 1678.3276 - 3000 480.41333 -28707.835 0 -28258.181 -3150.0762 -Loop time of 24.7034 on 4 procs for 3000 steps with 315 atoms - -Performance: 2.623 ns/day, 9.149 hours/ns, 121.441 timesteps/s -95.8% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 18.945 | 21.367 | 24.046 | 39.3 | 86.49 -Neigh | 0.1456 | 0.15254 | 0.16101 | 1.6 | 0.62 -Comm | 0.39168 | 3.0859 | 5.5185 | 103.9 | 12.49 -Output | 3.5763e-05 | 4.065e-05 | 5.2452e-05 | 0.0 | 0.00 -Modify | 0.05831 | 0.068811 | 0.077666 | 2.9 | 0.28 -Other | | 0.0292 | | | 0.12 - -Nlocal: 78.75 ave 96 max 65 min -Histogram: 2 0 0 0 0 0 0 1 0 1 -Nghost: 1233 ave 1348 max 1116 min -Histogram: 1 0 1 0 0 0 0 1 0 1 -Neighs: 9467.25 ave 12150 max 7160 min -Histogram: 1 1 0 0 0 0 0 1 0 1 - -Total # of neighbors = 37869 -Ave neighs/atom = 120.219 -Neighbor list builds = 37 -Dangerous builds = 0 - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:24 diff --git a/lib/gpu/Makefile.linux_multi b/lib/gpu/Makefile.linux_multi index 3299bbec3a..005f659079 100644 --- a/lib/gpu/Makefile.linux_multi +++ b/lib/gpu/Makefile.linux_multi @@ -65,7 +65,7 @@ CUDA_PRECISION = -D_SINGLE_DOUBLE CUDA_INCLUDE = -I$(CUDA_HOME)/include CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs -CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC +CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC -allow-unsupported-compiler CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC -std=c++11 CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias diff --git a/lib/gpu/geryon/ocl_mat.h b/lib/gpu/geryon/ocl_mat.h index 3135594dc3..66ca6ab527 100644 --- a/lib/gpu/geryon/ocl_mat.h +++ b/lib/gpu/geryon/ocl_mat.h @@ -54,6 +54,6 @@ namespace ucl_opencl { #include "ucl_print.h" #undef UCL_PRINT_ALLOW -} // namespace ucl_cudart +} // namespace ucl_opencl #endif diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp index 5e19997913..805c4c4b26 100644 --- a/lib/gpu/lal_amoeba.cpp +++ b/lib/gpu/lal_amoeba.cpp @@ -281,13 +281,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) { const int BX=this->block_size(); const int GX=static_cast(ceil(static_cast(ainum)/(BX/this->_threads_per_atom))); - /* - const int cus = this->device->gpu->cus(); - while (GX < cus && GX > 1) { - BX /= 2; - GX=static_cast(ceil(static_cast(ainum)/(BX/this->_threads_per_atom))); - } - */ + this->time_pair.start(); // Build the short neighbor list if not done yet diff --git a/lib/gpu/lal_base_dpd.cpp b/lib/gpu/lal_base_dpd.cpp index e103699d40..0ddd24d21e 100644 --- a/lib/gpu/lal_base_dpd.cpp +++ b/lib/gpu/lal_base_dpd.cpp @@ -56,7 +56,8 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *_screen, const void *pair_program, - const char *k_name, const int onetype) { + const char *k_name, const int onetype, + const int extra_fields) { screen=_screen; int gpu_nbor=0; @@ -75,7 +76,8 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall, bool charge = false; bool rot = false; bool vel = true; - int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel); + _extra_fields = extra_fields; + int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields/4); if (success!=0) return success; diff --git a/lib/gpu/lal_base_dpd.h b/lib/gpu/lal_base_dpd.h index 9eb56993af..64ec725d95 100644 --- a/lib/gpu/lal_base_dpd.h +++ b/lib/gpu/lal_base_dpd.h @@ -53,7 +53,7 @@ class BaseDPD { const int maxspecial, const double cell_size, const double gpu_split, FILE *screen, const void *pair_program, const char *k_name, - const int onetype=0); + const int onetype=0, const int extra_fields=0); /// Estimate the overhead for GPU context changes and CPU driver void estimate_gpu_overhead(); @@ -167,7 +167,6 @@ class BaseDPD { /// Atom Data Atom *atom; - // ------------------------ FORCE/ENERGY DATA ----------------------- Answer *ans; @@ -199,7 +198,7 @@ class BaseDPD { protected: bool _compiled; - int _block_size, _threads_per_atom, _onetype; + int _block_size, _threads_per_atom, _onetype, _extra_fields; double _max_bytes, _max_an_bytes; double _gpu_overhead, _driver_overhead; UCL_D_Vec *_nbor_data; diff --git a/lib/gpu/lal_base_sph.cpp b/lib/gpu/lal_base_sph.cpp new file mode 100644 index 0000000000..f373c0ebb6 --- /dev/null +++ b/lib/gpu/lal_base_sph.cpp @@ -0,0 +1,362 @@ +/*************************************************************************** + base_sph.cpp + ------------------- + Trung Nguyen (U Chicago) + + Base class for SPH pair styles needing per-particle data for position, + velocity, and type. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include "lal_base_sph.h" +namespace LAMMPS_AL { +#define BaseSPHT BaseSPH + +extern Device global_device; + +template +BaseSPHT::BaseSPH() : _compiled(false), _max_bytes(0) { + device=&global_device; + ans=new Answer(); + nbor=new Neighbor(); + pair_program=nullptr; + ucl_device=nullptr; + #if defined(LAL_OCL_EV_JIT) + pair_program_noev=nullptr; + #endif +} + +template +BaseSPHT::~BaseSPH() { + delete ans; + delete nbor; + k_pair_fast.clear(); + k_pair.clear(); + if (pair_program) delete pair_program; + #if defined(LAL_OCL_EV_JIT) + k_pair_noev.clear(); + if (pair_program_noev) delete pair_program_noev; + #endif +} + +template +int BaseSPHT::bytes_per_atom_atomic(const int max_nbors) const { + return device->atom.bytes_per_atom()+ans->bytes_per_atom()+ + nbor->bytes_per_atom(max_nbors); +} + +template +int BaseSPHT::init_atomic(const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, const double gpu_split, + FILE *_screen, const void *pair_program, + const char *k_name, const int onetype, + const int extra_fields) { + screen=_screen; + + int gpu_nbor=0; + if (device->gpu_mode()==Device::GPU_NEIGH) + gpu_nbor=1; + else if (device->gpu_mode()==Device::GPU_HYB_NEIGH) + gpu_nbor=2; + + int _gpu_host=0; + int host_nlocal=hd_balancer.first_host_count(nlocal,gpu_split,gpu_nbor); + if (host_nlocal>0) + _gpu_host=1; + + _threads_per_atom=device->threads_per_atom(); + + bool charge = false; + bool rot = false; + bool vel = true; + _extra_fields = extra_fields; + int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields/4); + if (success!=0) + return success; + + if (ucl_device!=device->gpu) _compiled=false; + + ucl_device=device->gpu; + atom=&device->atom; + + _block_size=device->pair_block_size(); + compile_kernels(*ucl_device,pair_program,k_name,onetype); + + if (_threads_per_atom>1 && gpu_nbor==0) { + nbor->packing(true); + _nbor_data=&(nbor->dev_packed); + } else + _nbor_data=&(nbor->dev_nbor); + + success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host, + max_nbors,cell_size,false,_threads_per_atom); + if (success!=0) + return success; + + // Initialize host-device load balancer + hd_balancer.init(device,gpu_nbor,gpu_split); + + // Initialize timers for the selected GPU + time_pair.init(*ucl_device); + time_pair.zero(); + + pos_tex.bind_float(atom->x,4); + vel_tex.bind_float(atom->v,4); + + _max_an_bytes=ans->gpu_bytes()+nbor->gpu_bytes(); + + return success; +} + +template +void BaseSPHT::estimate_gpu_overhead() { + device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead); +} + +template +void BaseSPHT::clear_atomic() { + // Output any timing information + acc_timers(); + double avg_split=hd_balancer.all_avg_split(); + _gpu_overhead*=hd_balancer.timestep(); + _driver_overhead*=hd_balancer.timestep(); + device->output_times(time_pair,*ans,*nbor,avg_split,_max_bytes+_max_an_bytes, + _gpu_overhead,_driver_overhead,_threads_per_atom,screen); + + time_pair.clear(); + hd_balancer.clear(); + + nbor->clear(); + ans->clear(); +} + +// --------------------------------------------------------------------------- +// Copy neighbor list from host +// --------------------------------------------------------------------------- +template +int * BaseSPHT::reset_nbors(const int nall, const int inum, int *ilist, + int *numj, int **firstneigh, bool &success) { + success=true; + + int mn=nbor->max_nbor_loop(inum,numj,ilist); + resize_atom(inum,nall,success); + resize_local(inum,mn,success); + if (!success) + return nullptr; + + nbor->get_host(inum,ilist,numj,firstneigh,block_size()); + + double bytes=ans->gpu_bytes()+nbor->gpu_bytes(); + if (bytes>_max_an_bytes) + _max_an_bytes=bytes; + + return ilist; +} + +// --------------------------------------------------------------------------- +// Build neighbor list on device +// --------------------------------------------------------------------------- +template +inline void BaseSPHT::build_nbor_list(const int inum, const int host_inum, + const int nall, double **host_x, + int *host_type, double *sublo, + double *subhi, tagint *tag, + int **nspecial, tagint **special, + bool &success) { + success=true; + resize_atom(inum,nall,success); + resize_local(inum,host_inum,nbor->max_nbors(),success); + if (!success) + return; + atom->cast_copy_x(host_x,host_type); + + int mn; + nbor->build_nbor_list(host_x, inum, host_inum, nall, *atom, sublo, subhi, + tag, nspecial, special, success, mn, ans->error_flag); + + double bytes=ans->gpu_bytes()+nbor->gpu_bytes(); + if (bytes>_max_an_bytes) + _max_an_bytes=bytes; +} + +// --------------------------------------------------------------------------- +// Copy nbor list from host if necessary and then calculate forces, virials,.. +// --------------------------------------------------------------------------- +template +void BaseSPHT::compute(const int f_ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *tag, + double **host_v, const int nlocal) { + acc_timers(); + int eflag, vflag; + if (eatom) eflag=2; + else if (eflag_in) eflag=1; + else eflag=0; + if (vatom) vflag=2; + else if (vflag_in) vflag=1; + else vflag=0; + + #ifdef LAL_NO_BLOCK_REDUCE + if (eflag) eflag=2; + if (vflag) vflag=2; + #endif + + set_kernel(eflag,vflag); + if (inum_full==0) { + host_start=0; + // Make sure textures are correct if realloc by a different hybrid style + resize_atom(0,nall,success); + zero_timers(); + return; + } + + int ago=hd_balancer.ago_first(f_ago); + int inum=hd_balancer.balance(ago,inum_full,cpu_time); + ans->inum(inum); + host_start=inum; + + if (ago==0) { + reset_nbors(nall, inum, ilist, numj, firstneigh, success); + if (!success) + return; + } + + atom->cast_x_data(host_x,host_type); + atom->cast_v_data(host_v,tag); + hd_balancer.start_timer(); + atom->add_x_data(host_x,host_type); + atom->add_v_data(host_v,tag); + + const int red_blocks=loop(eflag,vflag); + ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks); + device->add_ans_object(ans); + hd_balancer.stop_timer(); +} + +// --------------------------------------------------------------------------- +// Reneighbor on GPU if necessary and then compute forces, virials, energies +// --------------------------------------------------------------------------- +template +int** BaseSPHT::compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag_in, const bool vflag_in, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v) { + acc_timers(); + int eflag, vflag; + if (eatom) eflag=2; + else if (eflag_in) eflag=1; + else eflag=0; + if (vatom) vflag=2; + else if (vflag_in) vflag=1; + else vflag=0; + + #ifdef LAL_NO_BLOCK_REDUCE + if (eflag) eflag=2; + if (vflag) vflag=2; + #endif + + set_kernel(eflag,vflag); + if (inum_full==0) { + host_start=0; + // Make sure textures are correct if realloc by a different hybrid style + resize_atom(0,nall,success); + zero_timers(); + return nullptr; + } + + hd_balancer.balance(cpu_time); + int inum=hd_balancer.get_gpu_count(ago,inum_full); + ans->inum(inum); + host_start=inum; + + // Build neighbor list on GPU if necessary + if (ago==0) { + build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, + sublo, subhi, tag, nspecial, special, success); + if (!success) + return nullptr; + atom->cast_v_data(host_v,tag); + hd_balancer.start_timer(); + } else { + atom->cast_x_data(host_x,host_type); + atom->cast_v_data(host_v,tag); + hd_balancer.start_timer(); + atom->add_x_data(host_x,host_type); + } + atom->add_v_data(host_v,tag); + *ilist=nbor->host_ilist.begin(); + *jnum=nbor->host_acc.begin(); + + const int red_blocks=loop(eflag,vflag); + ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); + device->add_ans_object(ans); + hd_balancer.stop_timer(); + + return nbor->host_jlist.begin()-host_start; +} + +template +double BaseSPHT::host_memory_usage_atomic() const { + return device->atom.host_memory_usage()+nbor->host_memory_usage()+ + 4*sizeof(numtyp)+sizeof(BaseSPH); +} + +template +void BaseSPHT::compile_kernels(UCL_Device &dev, const void *pair_str, + const char *kname, const int onetype) { + if (_compiled && _onetype==onetype) + return; + + _onetype=onetype; + + std::string s_fast=std::string(kname)+"_fast"; + if (pair_program) delete pair_program; + pair_program=new UCL_Program(dev); + std::string oclstring = device->compile_string()+" -DEVFLAG=1"; + if (_onetype) oclstring+=" -DONETYPE="+device->toa(_onetype); + pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen); + k_pair_fast.set_function(*pair_program,s_fast.c_str()); + k_pair.set_function(*pair_program,kname); + pos_tex.get_texture(*pair_program,"pos_tex"); + vel_tex.get_texture(*pair_program,"vel_tex"); + + #if defined(LAL_OCL_EV_JIT) + oclstring = device->compile_string()+" -DEVFLAG=0"; + if (_onetype) oclstring+=" -DONETYPE="+device->toa(_onetype); + if (pair_program_noev) delete pair_program_noev; + pair_program_noev=new UCL_Program(dev); + pair_program_noev->load_string(pair_str,oclstring.c_str(),nullptr,screen); + k_pair_noev.set_function(*pair_program_noev,s_fast.c_str()); + #else + k_pair_sel = &k_pair_fast; + #endif + + _compiled=true; + + #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) + if (dev.has_subgroup_support()) { + size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); + #if defined(LAL_OCL_EV_JIT) + mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); + #endif + if (_threads_per_atom > (int)mx_subgroup_sz) _threads_per_atom = mx_subgroup_sz; + device->set_simd_size(mx_subgroup_sz); + } + #endif + +} + +template class BaseSPH; +} diff --git a/lib/gpu/lal_base_sph.h b/lib/gpu/lal_base_sph.h new file mode 100644 index 0000000000..e1e5731573 --- /dev/null +++ b/lib/gpu/lal_base_sph.h @@ -0,0 +1,209 @@ +/*************************************************************************** + base_sph.h + ------------------- + Trung Nguyen (U Chicago) + + Base class for SPH pair styles needing per-particle data for position, + velocity, and type. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#ifndef LAL_BASE_SPH_H +#define LAL_BASE_DPD_H + +#include "lal_device.h" +#include "lal_balance.h" +#include "mpi.h" + +#ifdef USE_OPENCL +#include "geryon/ocl_texture.h" +#elif defined(USE_HIP) +#include "geryon/hip_texture.h" +#else +#include "geryon/nvd_texture.h" +#endif + +namespace LAMMPS_AL { + +template +class BaseSPH { + public: + BaseSPH(); + virtual ~BaseSPH(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * \param k_name name for the kernel for force calculation + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init_atomic(const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, + const void *pair_program, const char *k_name, + const int onetype=0, const int extra_fields=0); + + /// Estimate the overhead for GPU context changes and CPU driver + void estimate_gpu_overhead(); + + /// Check if there is enough storage for atom arrays and realloc if not + /** \param success set to false if insufficient memory **/ + inline void resize_atom(const int inum, const int nall, bool &success) { + if (atom->resize(nall, success)) { + pos_tex.bind_float(atom->x,4); + vel_tex.bind_float(atom->v,4); + } + ans->resize(inum,success); + } + + /// Check if there is enough storage for neighbors and realloc if not + /** \param nlocal number of particles whose nbors must be stored on device + * \param host_inum number of particles whose nbors need to copied to host + * \param current maximum number of neighbors + * \note olist_size=total number of local particles **/ + inline void resize_local(const int inum, const int max_nbors, bool &success) { + nbor->resize(inum,max_nbors,success); + } + + /// Check if there is enough storage for neighbors and realloc if not + /** \param nlocal number of particles whose nbors must be stored on device + * \param host_inum number of particles whose nbors need to copied to host + * \param current maximum number of neighbors + * \note host_inum is 0 if the host is performing neighboring + * \note nlocal+host_inum=total number local particles + * \note olist_size=0 **/ + inline void resize_local(const int inum, const int host_inum, + const int max_nbors, bool &success) { + nbor->resize(inum,host_inum,max_nbors,success); + } + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear_atomic(); + + /// Returns memory usage on device per atom + int bytes_per_atom_atomic(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage_atomic() const; + + /// Accumulate timers + inline void acc_timers() { + if (device->time_device()) { + nbor->acc_timers(screen); + time_pair.add_to_total(); + atom->acc_timers(); + ans->acc_timers(); + } + } + + /// Zero timers + inline void zero_timers() { + time_pair.zero(); + atom->zero_timers(); + ans->zero_timers(); + } + + /// Copy neighbor list from host + int * reset_nbors(const int nall, const int inum, int *ilist, int *numj, + int **firstneigh, bool &success); + + /// Build neighbor list on device + void build_nbor_list(const int inum, const int host_inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, bool &success); + + /// Pair loop with host neighboring + void compute(const int f_ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *tag, + double **v, const int nlocal); + + /// Pair loop with device neighboring + int** compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **numj, const double cpu_time, bool &success, + double **v); + + // -------------------------- DEVICE DATA ------------------------- + + /// Device Properties and Atom and Neighbor storage + Device *device; + + /// Geryon device + UCL_Device *ucl_device; + + /// Device Timers + UCL_Timer time_pair; + + /// Host device load balancer + Balance hd_balancer; + + /// LAMMPS pointer for screen output + FILE *screen; + + // --------------------------- ATOM DATA -------------------------- + + /// Atom Data + Atom *atom; + + // ------------------------ FORCE/ENERGY DATA ----------------------- + + Answer *ans; + + // --------------------------- NBOR DATA ---------------------------- + + /// Neighbor data + Neighbor *nbor; + + // ------------------------- DEVICE KERNELS ------------------------- + UCL_Program *pair_program, *pair_program_noev; + UCL_Kernel k_pair_fast, k_pair, k_pair_noev, *k_pair_sel; + inline int block_size() { return _block_size; } + inline void set_kernel(const int eflag, const int vflag) { + #if defined(LAL_OCL_EV_JIT) + if (eflag || vflag) k_pair_sel = &k_pair_fast; + else k_pair_sel = &k_pair_noev; + #endif + } + + + // --------------------------- TEXTURES ----------------------------- + UCL_Texture pos_tex; + UCL_Texture vel_tex; + + // ------------------------- COMMON VARS ---------------------------- + + protected: + bool _compiled; + int _block_size, _threads_per_atom, _onetype, _extra_fields; + double _max_bytes, _max_an_bytes; + double _gpu_overhead, _driver_overhead; + UCL_D_Vec *_nbor_data; + + void compile_kernels(UCL_Device &dev, const void *pair_string, + const char *k, const int onetype); + virtual int loop(const int eflag, const int vflag) = 0; +}; + +} + +#endif diff --git a/lib/gpu/lal_coul_slater_long.cpp b/lib/gpu/lal_coul_slater_long.cpp new file mode 100644 index 0000000000..42eb86e8ff --- /dev/null +++ b/lib/gpu/lal_coul_slater_long.cpp @@ -0,0 +1,150 @@ +/*************************************************************************** + coul_slater_long_ext.cpp + ------------------------ + Trung Nguyen (U Chicago) + + Class for acceleration of the coul/slater/long pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "coul_slater_long_cl.h" +#elif defined(USE_CUDART) +const char *coul_slater_long=0; +#else +#include "coul_slater_long_cubin.h" +#endif + +#include "lal_coul_slater_long.h" +#include +namespace LAMMPS_AL { +#define CoulSlaterLongT CoulSlaterLong + +extern Device pair_gpu_device; + +template +CoulSlaterLongT::CoulSlaterLong() : BaseCharge(), _allocated(false) { +} + +template +CoulSlaterLongT::~CoulSlaterLong() { + clear(); +} + +template +int CoulSlaterLongT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int CoulSlaterLongT::init(const int ntypes, double **host_scale, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen, + const double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const double g_ewald, double lamda) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,coul_slater_long,"k_coul_slater_long"); + if (success!=0) + return success; + + int lj_types=ntypes; + shared_types=false; + int max_shared_types=this->device->max_shared_types(); + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack1(ntypes,lj_types,scale,host_write,host_scale); + + sp_cl.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + for (int i=0; i<4; i++) { + host_write[i]=host_special_coul[i]; + } + ucl_copy(sp_cl,host_write,4,false); + + _cut_coulsq=host_cut_coulsq; + _qqrd2e=qqrd2e; + _g_ewald=g_ewald; + _lamda=lamda; + + _allocated=true; + this->_max_bytes=scale.row_bytes()+sp_cl.row_bytes(); + return 0; +} + +template +void CoulSlaterLongT::reinit(const int ntypes, double **host_scale) { + UCL_H_Vec hscale(_lj_types*_lj_types,*(this->ucl_device), + UCL_WRITE_ONLY); + this->atom->type_pack1(ntypes,_lj_types,scale,hscale,host_scale); +} + +template +void CoulSlaterLongT::clear() { + if (!_allocated) + return; + _allocated=false; + + scale.clear(); + sp_cl.clear(); + this->clear_atomic(); +} + +template +double CoulSlaterLongT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(CoulSlaterLong); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int CoulSlaterLongT::loop(const int eflag, const int vflag) { + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &scale, &sp_cl, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, + &eflag, &vflag, &ainum, &nbor_pitch, + &this->atom->q, &_cut_coulsq, &_qqrd2e, &_g_ewald, + &_lamda, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &scale, &_lj_types, &sp_cl, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->q, &_cut_coulsq, + &_qqrd2e, &_g_ewald, &_lamda, &this->_threads_per_atom); + } + this->time_pair.stop(); + return GX; +} + +template class CoulSlaterLong; +} diff --git a/lib/gpu/lal_coul_slater_long.cu b/lib/gpu/lal_coul_slater_long.cu new file mode 100644 index 0000000000..1fc8ab8be4 --- /dev/null +++ b/lib/gpu/lal_coul_slater_long.cu @@ -0,0 +1,250 @@ +// ************************************************************************** +// coul_slater_long.cu +// ------------------- +// Trung Nguyen (U Chicago) +// +// Device code for acceleration of the coul/slater/long pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : September 2023 +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) + +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( q_tex,float); +#else +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); +#endif + +#else +#define pos_tex x_ +#define q_tex q_ +#endif + +__kernel void k_coul_slater_long(const __global numtyp4 *restrict x_, + const __global numtyp *restrict scale, + const int lj_types, + const __global numtyp *restrict sp_cl_in, + const __global int *dev_nbor, + const __global int *dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, + const __global numtyp *restrict q_, + const numtyp cut_coulsq, const numtyp qqrd2e, + const numtyp g_ewald, const numtyp lamda, + const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + __local numtyp sp_cl[4]; + int n_stride; + local_allocate_store_charge(); + + sp_cl[0]=sp_cl_in[0]; + sp_cl[1]=sp_cl_in[1]; + sp_cl[2]=sp_cl_in[2]; + sp_cl[3]=sp_cl_in[3]; + + acctyp3 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp e_coul, virial[6]; + if (EVFLAG) { + e_coul=(acctyp)0; + for (int i=0; i<6; i++) virial[i]=(acctyp)0; + } + + if (ii (numtyp)0) force -= factor_coul*prefactor*((numtyp)1.0-slater_term); + force *= r2inv; + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + if (EVFLAG && eflag) { + numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv; + numtyp e = prefactor*(_erfc-e_slater); + if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater); + e_coul += e; + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + } // if ii + acctyp energy; + if (EVFLAG) energy=(acctyp)0.0; + store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag, + vflag,ans,engv); +} + +__kernel void k_coul_slater_long_fast(const __global numtyp4 *restrict x_, + const __global numtyp *restrict scale_in, + const __global numtyp *restrict sp_cl_in, + const __global int *dev_nbor, + const __global int *dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, + const __global numtyp *restrict q_, + const numtyp cut_coulsq, const numtyp qqrd2e, + const numtyp g_ewald, const numtyp lamda, + const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + __local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_cl[4]; + int n_stride; + local_allocate_store_charge(); + + if (tid<4) + sp_cl[tid]=sp_cl_in[tid]; + if (tid (numtyp)0) force -= factor_coul*prefactor*((numtyp)1.0-slater_term); + force *= r2inv; + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + if (EVFLAG && eflag) { + numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv; + numtyp e = prefactor*(_erfc-e_slater); + if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater); + e_coul += e; + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + } // if ii + acctyp energy; + if (EVFLAG) energy=(acctyp)0.0; + store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag, + vflag,ans,engv); +} + diff --git a/lib/gpu/lal_coul_slater_long.h b/lib/gpu/lal_coul_slater_long.h new file mode 100644 index 0000000000..8950fd81ef --- /dev/null +++ b/lib/gpu/lal_coul_slater_long.h @@ -0,0 +1,82 @@ +/*************************************************************************** + coul_slater_long.h + ------------------- + Trung Nguyen (U Chicago) + + Class for acceleration of the coul/slater/long pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#ifndef LAL_Coul_Slater_Long_H +#define LAL_Coul_Slater_Long_H + +#include "lal_base_charge.h" + +namespace LAMMPS_AL { + +template +class CoulSlaterLong : public BaseCharge { + public: + CoulSlaterLong(); + ~CoulSlaterLong(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **scale, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, + const double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const double g_ewald, const double lamda); + + /// Send updated coeffs from host to device (to be compatible with fix adapt) + void reinit(const int ntypes, double **scale); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + // --------------------------- TYPE DATA -------------------------- + + /// scale + UCL_D_Vec scale; + /// Special Coul values [0-3] + UCL_D_Vec sp_cl; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + numtyp _cut_coulsq, _qqrd2e, _g_ewald, _lamda; + + protected: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_coul_slater_long_ext.cpp b/lib/gpu/lal_coul_slater_long_ext.cpp new file mode 100644 index 0000000000..8c34cc5552 --- /dev/null +++ b/lib/gpu/lal_coul_slater_long_ext.cpp @@ -0,0 +1,145 @@ +/*************************************************************************** + coul_slater_long_ext.cpp + ------------------------ + Trung Nguyen (U Chicago) + + Functions for LAMMPS access to coul/slater/long acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_coul_slater_long.h" + +using namespace std; +using namespace LAMMPS_AL; + +static CoulSlaterLong CSLMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int csl_gpu_init(const int ntypes, double **host_scale, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, int &gpu_mode, + FILE *screen, double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const double g_ewald, const double lamda) { + CSLMF.clear(); + gpu_mode=CSLMF.device->gpu_mode(); + double gpu_split=CSLMF.device->particle_split(); + int first_gpu=CSLMF.device->first_device(); + int last_gpu=CSLMF.device->last_device(); + int world_me=CSLMF.device->world_me(); + int gpu_rank=CSLMF.device->gpu_rank(); + int procs_per_gpu=CSLMF.device->procs_per_gpu(); + + CSLMF.device->init_message(screen,"coul/slater/long",first_gpu,last_gpu); + + bool message=false; + if (CSLMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=CSLMF.init(ntypes, host_scale, inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen, host_cut_coulsq, + host_special_coul, qqrd2e, g_ewald, lamda); + + CSLMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + CSLMF.estimate_gpu_overhead(); + return init_ok; +} + +// --------------------------------------------------------------------------- +// Copy updated coeffs from host to device +// --------------------------------------------------------------------------- +void csl_gpu_reinit(const int ntypes, double **host_scale) { + int world_me=CSLMF.device->world_me(); + int gpu_rank=CSLMF.device->gpu_rank(); + int procs_per_gpu=CSLMF.device->procs_per_gpu(); + + if (world_me==0) + CSLMF.reinit(ntypes, host_scale); + + CSLMF.device->world_barrier(); + + for (int i=0; iserialize_init(); + } +} + +void csl_gpu_clear() { + CSLMF.clear(); +} + +int** csl_gpu_compute_n(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, + double *prd) { + return CSLMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + host_q, boxlo, prd); +} + +void csl_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, double *host_q, + const int nlocal, double *boxlo, double *prd) { + CSLMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj, + firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success, + host_q,nlocal,boxlo,prd); +} + +double csl_gpu_bytes() { + return CSLMF.host_memory_usage(); +} + + diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 70ba373a65..e9ef2294b2 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -364,6 +364,12 @@ int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu, } else _neighbor_shared.setup_auto_cell_size(false,_user_cell_size,_simd_size); + #ifndef LAL_USE_OLD_NEIGHBOR + _use_old_nbor_build = 0; + #else + _use_old_nbor_build = 1; + #endif + return flag; } @@ -510,9 +516,13 @@ int DeviceT::init(Answer &ans, const bool charge, gpu_nbor=1; else if (_gpu_mode==Device::GPU_HYB_NEIGH) gpu_nbor=2; + + // NOTE: enforce the hybrid mode (binning on the CPU) + // when not using sorting on the device #if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT) if (gpu_nbor==1) gpu_nbor=2; #endif + // or when the device supports subgroups #ifndef LAL_USE_OLD_NEIGHBOR if (gpu_nbor==1) gpu_nbor=2; #endif @@ -886,19 +896,31 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, } if (times[5] > 0.0) fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size); - fprintf(screen,"Average split: %.4f.\n",avg_split); - fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom); - fprintf(screen,"Vector width: %d.\n", simd_size()); - fprintf(screen,"Prefetch mode: "); - if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n"); - else if (_nbor_prefetch==1) fprintf(screen,"API.\n"); - else fprintf(screen,"None.\n"); - fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb); if (nbor.gpu_nbor()==2) fprintf(screen,"CPU Neighbor: %.4f s.\n",times[8]/_replica_size); fprintf(screen,"CPU Cast/Pack: %.4f s.\n",times[4]/_replica_size); fprintf(screen,"CPU Driver_Time: %.4f s.\n",times[6]/_replica_size); fprintf(screen,"CPU Idle_Time: %.4f s.\n",times[7]/_replica_size); + fprintf(screen,"Average split: %.4f.\n",avg_split); + fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb); + fprintf(screen,"Prefetch mode: "); + if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n"); + else if (_nbor_prefetch==1) fprintf(screen,"API.\n"); + else fprintf(screen,"None.\n"); + fprintf(screen,"Vector width: %d.\n", simd_size()); + fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom); + fprintf(screen,"Pair block: %d.\n",_block_pair); + fprintf(screen,"Neigh block: %d.\n",_block_nbor_build); + if (nbor.gpu_nbor()==2) { + fprintf(screen,"Neigh mode: Hybrid (binning on host)"); + if (_use_old_nbor_build == 1) fprintf(screen," - legacy\n"); + else fprintf(screen," with subgroup support\n"); + } else if (nbor.gpu_nbor()==1) { + fprintf(screen,"Neigh mode: Device"); + if (_use_old_nbor_build == 1) fprintf(screen," - legacy\n"); + else fprintf(screen," - with subgroup support\n"); + } else if (nbor.gpu_nbor()==0) + fprintf(screen,"Neigh mode: Host\n"); fprintf(screen,"-------------------------------------"); fprintf(screen,"--------------------------------\n\n"); diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h index ba693e551a..d6b52484f1 100644 --- a/lib/gpu/lal_device.h +++ b/lib/gpu/lal_device.h @@ -347,6 +347,7 @@ class Device { int _pppm_block, _block_nbor_build, _block_cell_2d, _block_cell_id; int _max_shared_types, _max_bio_shared_types, _pppm_max_spline; int _nbor_prefetch; + int _use_old_nbor_build; UCL_Program *dev_program; UCL_Kernel k_zero, k_info; diff --git a/lib/gpu/lal_edpd.cpp b/lib/gpu/lal_edpd.cpp new file mode 100644 index 0000000000..c03591b9ed --- /dev/null +++ b/lib/gpu/lal_edpd.cpp @@ -0,0 +1,285 @@ +/*************************************************************************** + edpd.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Class for acceleration of the edpd pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "edpd_cl.h" +#elif defined(USE_CUDART) +const char *edpd=0; +#else +#include "edpd_cubin.h" +#endif + +#include "lal_edpd.h" +#include +namespace LAMMPS_AL { +#define EDPDT EDPD + +extern Device device; + +template +EDPDT::EDPD() : BaseDPD(), _allocated(false) { + _max_q_size = 0; +} + +template +EDPDT::~EDPD() { + clear(); +} + +template +int EDPDT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int EDPDT::init(const int ntypes, + double **host_cutsq, double **host_a0, + double **host_gamma, double **host_cut, + double **host_power, double **host_kappa, + double **host_powerT, double **host_cutT, + double ***host_sc, double ***host_kc, double *host_mass, + double *host_special_lj, + const int power_flag, const int kappa_flag, + const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + + int success; + int extra_fields = 4; // round up to accomodate quadruples of numtyp values + // T and cv + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,edpd,"k_edpd",onetype,extra_fields); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_a0,host_gamma, + host_cut); + + coeff2.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff2,host_write,host_power,host_kappa, + host_powerT,host_cutT); + + UCL_H_Vec dview_mass(ntypes, *(this->ucl_device), UCL_WRITE_ONLY); + for (int i = 0; i < ntypes; i++) + dview_mass[i] = host_mass[i]; + mass.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY); + ucl_copy(mass,dview_mass,false); + + if (host_sc) { + UCL_H_Vec dview(lj_types*lj_types,*(this->ucl_device),UCL_WRITE_ONLY);; + sc.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + int n = 0; + for (int i = 1; i < ntypes; i++) + for (int j = 1; j < ntypes; j++) { + dview[n].x = host_sc[i][j][0]; + dview[n].y = host_sc[i][j][1]; + dview[n].z = host_sc[i][j][2]; + dview[n].w = host_sc[i][j][3]; + n++; + } + ucl_copy(sc,dview,false); + } + + if (host_kc) { + UCL_H_Vec dview(lj_types*lj_types,*(this->ucl_device),UCL_WRITE_ONLY);; + kc.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + int n = 0; + for (int i = 1; i < ntypes; i++) + for (int j = 1; j < ntypes; j++) { + dview[n].x = host_kc[i][j][0]; + dview[n].y = host_kc[i][j][1]; + dview[n].z = host_kc[i][j][2]; + dview[n].w = host_kc[i][j][3]; + n++; + } + ucl_copy(kc,dview,false); + } + + UCL_H_Vec host_rsq(lj_types*lj_types,*(this->ucl_device), + UCL_WRITE_ONLY); + cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq); + + double special_sqrt[4]; + special_sqrt[0] = sqrt(host_special_lj[0]); + special_sqrt[1] = sqrt(host_special_lj[1]); + special_sqrt[2] = sqrt(host_special_lj[2]); + special_sqrt[3] = sqrt(host_special_lj[3]); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + sp_sqrt.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(special_sqrt,4,*(this->ucl_device)); + ucl_copy(sp_sqrt,dview,false); + + _power_flag = power_flag; + _kappa_flag = kappa_flag; + + // allocate per-atom array Q + + int ef_nall=nall; + if (ef_nall==0) + ef_nall=2000; + + _max_q_size=static_cast(static_cast(ef_nall)*1.10); + Q.alloc(_max_q_size,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); + + _allocated=true; + this->_max_bytes=coeff.row_bytes()+coeff2.row_bytes()+Q.row_bytes()+ + sc.row_bytes()+kc.row_bytes()+mass.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes()+sp_sqrt.row_bytes(); + return 0; +} + +template +void EDPDT::clear() { + if (!_allocated) + return; + _allocated=false; + + coeff.clear(); + coeff2.clear(); + sc.clear(); + kc.clear(); + Q.clear(); + mass.clear(); + cutsq.clear(); + sp_lj.clear(); + sp_sqrt.clear(); + this->clear_atomic(); +} + +template +double EDPDT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(EDPD); +} + +template +void EDPDT::update_flux(void **flux_ptr) { + *flux_ptr=Q.host.begin(); + Q.update_host(_max_q_size,false); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int EDPDT::loop(const int eflag, const int vflag) { + + int nall = this->atom->nall(); + + // Resize Q array if necessary + if (nall > _max_q_size) { + _max_q_size=static_cast(static_cast(nall)*1.10); + Q.resize(_max_q_size); + } + + // signal that we need to transfer extra data from the host + + this->atom->extra_data_unavail(); + + numtyp4 *pextra=reinterpret_cast(&(this->atom->extra[0])); + + int n = 0; + int nstride = 1; + for (int i = 0; i < nall; i++) { + int idx = n+i*nstride; + numtyp4 v; + v.x = edpd_temp[i]; + v.y = edpd_cv[i]; + v.z = 0; + v.w = 0; + pextra[idx] = v; + } + this->atom->add_extra_data(); + + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, &mass, + &sc, &kc, &sp_lj, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &Q, &eflag, &vflag, + &_power_flag, &_kappa_flag, &ainum, &nbor_pitch, + &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed, + &this->_timestep, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, &mass, + &sc, &kc, &_lj_types, &sp_lj, &sp_sqrt, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &Q, &eflag, &vflag, + &_power_flag, &_kappa_flag, &ainum, &nbor_pitch, + &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed, + &this->_timestep, &this->_threads_per_atom); + } + + this->time_pair.stop(); + return GX; +} + +// --------------------------------------------------------------------------- +// Get the extra data pointers from host +// --------------------------------------------------------------------------- + +template +void EDPDT::get_extra_data(double *host_T, double *host_cv) { + edpd_temp = host_T; + edpd_cv = host_cv; +} + +template class EDPD; +} diff --git a/lib/gpu/lal_edpd.cu b/lib/gpu/lal_edpd.cu new file mode 100644 index 0000000000..9662d15aea --- /dev/null +++ b/lib/gpu/lal_edpd.cu @@ -0,0 +1,619 @@ +// ************************************************************************** +// edpd.cu +// ------------------- +// Trung Dac Nguyen (U Chicago) +// +// Device code for acceleration of the edpd pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : September 2023 +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( vel_tex,float4); +#else +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); +#endif +#else +#define pos_tex x_ +#define vel_tex v_ +#endif + +#define EPSILON (numtyp)1.0e-10 + +//#define _USE_UNIFORM_SARU_LCG +//#define _USE_UNIFORM_SARU_TEA8 +//#define _USE_GAUSSIAN_SARU_LCG + +#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG) +#define _USE_UNIFORM_SARU_LCG +#endif + +// References: +// 1. Y. Afshar, F. Schmid, A. Pishevar, S. Worley, Comput. Phys. Comm. 184 (2013), 1119–1128. +// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201. +// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19 + +#define LCGA 0x4beb5d59 /* Full period 32 bit LCG */ +#define LCGC 0x2600e1f7 +#define oWeylPeriod 0xda879add /* Prime period 3666320093 */ +#define oWeylOffset 0x8009d14b +#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */ + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns uniformly distributed random numbers u in [-1.0;1.0] +// using the inherent LCG, then multiply u with sqrt(3) to "match" +// with a normal random distribution. +// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12) +// Curly brackets to make variables local to the scope. +#ifdef _USE_UNIFORM_SARU_LCG +#define SQRT3 (numtyp)1.7320508075688772935274463 +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + unsigned int v = (state ^ (state>>26)) + wstate; \ + unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \ + randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \ +} +#endif + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns uniformly distributed random numbers u in [-1.0;1.0] using TEA8 +// then multiply u with sqrt(3) to "match" with a normal random distribution +// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12) +#ifdef _USE_UNIFORM_SARU_TEA8 +#define SQRT3 (numtyp)1.7320508075688772935274463 +#define k0 0xA341316C +#define k1 0xC8013EA4 +#define k2 0xAD90777D +#define k3 0x7E95761E +#define delta 0x9e3779b9 +#define rounds 8 +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + unsigned int sum = 0; \ + for (int i=0; i < rounds; i++) { \ + sum += delta; \ + state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1); \ + wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3); \ + } \ + unsigned int v = (state ^ (state>>26)) + wstate; \ + unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \ + randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \ +} +#endif + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns two uniformly distributed random numbers r1 and r2 in [-1.0;1.0], +// and uses the polar method (Marsaglia's) to transform to a normal random value +// This is used to compared with CPU DPD using RandMars::gaussian() +#ifdef _USE_GAUSSIAN_SARU_LCG +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state=0x12345678; \ + unsigned int wstate=12345678; \ + state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + unsigned int v, s; \ + numtyp r1, r2, rsq; \ + while (1) { \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + v = (state ^ (state>>26)) + wstate; \ + s = (signed int)((v^(v>>20))*0x6957f5a7); \ + r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + v = (state ^ (state>>26)) + wstate; \ + s = (signed int)((v^(v>>20))*0x6957f5a7); \ + r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \ + rsq = r1 * r1 + r2 * r2; \ + if (rsq < (numtyp)1.0) break; \ + } \ + numtyp fac = ucl_sqrt((numtyp)-2.0*log(rsq)/rsq); \ + randnum = r2*fac; \ +} +#endif + +#if (SHUFFLE_AVAIL == 0) + +#define store_heatflux(Qi, ii, inum, tid, t_per_atom, offset, Q) \ + if (t_per_atom>1) { \ + simdsync(); \ + simd_reduce_add1(t_per_atom, red_acc, offset, tid, Qi); \ + } \ + if (offset==0 && ii1) { \ + simd_reduce_add1(t_per_atom,Qi); \ + } \ + if (offset==0 && ii tag2) { + tag1 = jtag; tag2 = itag; + } + + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + numtyp T_ij=(numtyp)0.5*(Ti+Tj); + numtyp4 T_pow; + T_pow.x = T_ij - (numtyp)1.0; + T_pow.y = T_pow.x*T_pow.x; + T_pow.z = T_pow.x*T_pow.y; + T_pow.w = T_pow.x*T_pow.z; + + numtyp coeff2x = coeff2[mtype].x; //power[itype][jtype] + numtyp coeff2y = coeff2[mtype].y; //kappa[itype][jtype] + numtyp coeff2z = coeff2[mtype].z; //powerT[itype][jtype] + numtyp coeff2w = coeff2[mtype].w; //cutT[itype][jtype] + numtyp power_d = coeff2x; + if (power_flag) { + numtyp factor = (numtyp)1.0; + factor += sc[mtype].x*T_pow.x + sc[mtype].y*T_pow.y + + sc[mtype].z*T_pow.z + sc[mtype].w*T_pow.w; + power_d *= factor; + } + + power_d = MAX((numtyp)0.01,power_d); + numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype] + wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc)); + numtyp wr = ucl_pow(wc, (numtyp)0.5*power_d); + + numtyp kboltz = (numtyp)1.0; + numtyp GammaIJ = coeffy; // gamma[itype][jtype] + numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj); + SigmaIJ = ucl_sqrt(SigmaIJ); + + numtyp force = coeffx*T_ij*wc; // a0[itype][jtype] + force -= GammaIJ *wr*wr *dot*rinv; + force += SigmaIJ * wr *randnum * dtinvsqrt; + force *= factor_dpd*rinv; + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + // heat transfer + + if (r < coeff2w) { + numtyp wrT = (numtyp)1.0 - r/coeff2w; + wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT)); + wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype] + numtyp randnumT = (numtyp)0; + saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian(); + randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0)); + + numtyp kappaT = coeff2y; // kappa[itype][jtype] + if (kappa_flag) { + numtyp factor = (numtyp)1.0; + factor += kc[mtype].x*T_pow.x + kc[mtype].y*T_pow.y + + kc[mtype].z*T_pow.z + kc[mtype].w*T_pow.w; + kappaT *= factor; + } + + numtyp kij = cvi*cvj*kappaT * T_ij*T_ij; + numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij); + + numtyp dQc = kij * wrT*wrT * (Tj - Ti)/(Ti*Tj); + numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum; + dQd /= (cvi+cvj); + numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT; + Qi += (dQc + dQd + dQr ); + } + + if (EVFLAG && eflag) { + numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc; + energy+=factor_dpd*e; + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + } // for nbor + } // if ii + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + store_heatflux(Qi,ii,inum,tid,t_per_atom,offset,Q); +} + +__kernel void k_edpd_fast(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict extra, + const __global numtyp4 *restrict coeff_in, + const __global numtyp4 *restrict coeff2_in, + const __global numtyp *restrict mass, + const __global numtyp4 *restrict sc_in, + const __global numtyp4 *restrict kc_in, + const __global numtyp *restrict sp_lj_in, + const __global numtyp *restrict sp_sqrt_in, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + __global acctyp *restrict Q, + const int eflag, const int vflag, + const int power_flag, const int kappa_flag, + const int inum, const int nbor_pitch, + const __global numtyp4 *restrict v_, + const __global numtyp *restrict cutsq, + const numtyp dtinvsqrt, const int seed, + const int timestep, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + #ifndef ONETYPE + __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp4 coeff2[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp4 sc[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp4 kc[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_lj[4]; + __local numtyp sp_sqrt[4]; + if (tid<4) { + sp_lj[tid]=sp_lj_in[tid]; + sp_sqrt[tid]=sp_sqrt_in[tid]; + } + if (tid tag2) { + tag1 = jtag; tag2 = itag; + } + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + numtyp T_ij=(numtyp)0.5*(Ti+Tj); + numtyp4 T_pow; + T_pow.x = T_ij - (numtyp)1.0; + T_pow.y = T_pow.x*T_pow.x; + T_pow.z = T_pow.x*T_pow.y; + T_pow.w = T_pow.x*T_pow.z; + + numtyp power_d = coeff2x; // power[itype][jtype] + if (power_flag) { + numtyp factor = (numtyp)1.0; + factor += scx*T_pow.x + scy*T_pow.y + scz*T_pow.z + scw*T_pow.w; + power_d *= factor; + } + + power_d = MAX((numtyp)0.01,power_d); + numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype] + wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc)); + numtyp wr = ucl_pow((numtyp)wc, (numtyp)0.5*power_d); + + numtyp kboltz = (numtyp)1.0; + numtyp GammaIJ = coeffy; // gamma[itype][jtype] + numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj); + SigmaIJ = ucl_sqrt(SigmaIJ); + + numtyp force = coeffx*T_ij*wc; // a0[itype][jtype] + force -= GammaIJ *wr*wr *dot*rinv; + force += SigmaIJ* wr *randnum * dtinvsqrt; + #ifndef ONETYPE + force *= factor_dpd*rinv; + #else + force *= rinv; + #endif + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + // heat transfer + + if (r < coeff2w) { + numtyp wrT = (numtyp)1.0 - r/coeff2w; + wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT)); + wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype] + numtyp randnumT = (numtyp)0; + saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian(); + randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0)); + + numtyp kappaT = coeff2y; // kappa[itype][jtype] + if (kappa_flag) { + numtyp factor = (numtyp)1.0; + factor += kcx*T_pow.x + kcy*T_pow.y + kcz*T_pow.z + kcw*T_pow.w; + kappaT *= factor; + } + + numtyp kij = cvi*cvj*kappaT * T_ij*T_ij; + numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij); + + numtyp dQc = kij * wrT*wrT * (Tj - Ti )/(Ti*Tj); + numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum; + dQd /= (cvi+cvj); + numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT; + Qi += (dQc + dQd + dQr ); + } + + if (EVFLAG && eflag) { + numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc; + #ifndef ONETYPE + energy+=factor_dpd*e; + #else + energy+=e; + #endif + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + + } + } // for nbor + } // if ii + + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); + store_heatflux(Qi,ii,inum,tid,t_per_atom,offset,Q); +} + diff --git a/lib/gpu/lal_edpd.h b/lib/gpu/lal_edpd.h new file mode 100644 index 0000000000..e5f7b0633b --- /dev/null +++ b/lib/gpu/lal_edpd.h @@ -0,0 +1,102 @@ +/*************************************************************************** + edpd.h + ------------------- + Trung Dac Nguyen (U Chicago) + + Class for acceleration of the edpd pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#ifndef LAL_EDPD_H +#define LAL_EDPD_H + +#include "lal_base_dpd.h" + +namespace LAMMPS_AL { + +template +class EDPD : public BaseDPD { + public: + EDPD(); + ~EDPD(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, double **host_a0, + double **host_gamma, double **host_cut, double **host_power, + double **host_kappa, double **host_powerT, double **host_cutT, + double ***host_sc, double ***host_kc, double *host_mass, + double *host_special_lj, const int power_flag, const int kappa_flag, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, const double gpu_split, + FILE *screen); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + void get_extra_data(double *host_T, double *host_cv); + + /// copy Q (flux) from device to host + void update_flux(void **flux_ptr); + + // --------------------------- TYPE DATA -------------------------- + + /// coeff.x = a0, coeff.y = gamma, coeff.z = cut + UCL_D_Vec coeff; + /// coeff2.x = power, coeff2.y = kappa, coeff2.z = powerT, coeff2.w = cutT + UCL_D_Vec coeff2; + + UCL_D_Vec kc, sc; + UCL_D_Vec cutsq; + + /// per-type array + UCL_D_Vec mass; + + /// Special LJ values + UCL_D_Vec sp_lj, sp_sqrt; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// Per-atom arrays + UCL_Vector Q; + int _max_q_size; + + int _power_flag, _kappa_flag; + + /// pointer to host data + double *edpd_temp, *edpd_cv; + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_edpd_ext.cpp b/lib/gpu/lal_edpd_ext.cpp new file mode 100644 index 0000000000..a9f60c3941 --- /dev/null +++ b/lib/gpu/lal_edpd_ext.cpp @@ -0,0 +1,142 @@ +/*************************************************************************** + edpd_ext.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Functions for LAMMPS access to edpd acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_edpd.h" + +using namespace std; +using namespace LAMMPS_AL; + +static EDPD EDPDMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int edpd_gpu_init(const int ntypes, double **cutsq, double **host_a0, + double **host_gamma, double **host_cut, double **host_power, + double **host_kappa, double **host_powerT, double **host_cutT, + double ***host_sc, double ***host_kc, double *host_mass, + double *special_lj, const int power_flag, const int kappa_flag, + const int inum, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { + EDPDMF.clear(); + gpu_mode=EDPDMF.device->gpu_mode(); + double gpu_split=EDPDMF.device->particle_split(); + int first_gpu=EDPDMF.device->first_device(); + int last_gpu=EDPDMF.device->last_device(); + int world_me=EDPDMF.device->world_me(); + int gpu_rank=EDPDMF.device->gpu_rank(); + int procs_per_gpu=EDPDMF.device->procs_per_gpu(); + + EDPDMF.device->init_message(screen,"edpd",first_gpu,last_gpu); + + bool message=false; + if (EDPDMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=EDPDMF.init(ntypes, cutsq, host_a0, host_gamma, host_cut, + host_power, host_kappa, host_powerT, + host_cutT, host_sc, host_kc, host_mass, + special_lj, power_flag, kappa_flag, + inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen); + + EDPDMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + EDPDMF.estimate_gpu_overhead(); + return init_ok; +} + +void edpd_gpu_clear() { + EDPDMF.clear(); +} + +int ** edpd_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v, const double dtinvsqrt, + const int seed, const int timestep, + double *boxlo, double *prd) { + return EDPDMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + host_v, dtinvsqrt, seed, timestep, boxlo, prd); +} + +void edpd_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *tag, + double **host_v, const double dtinvsqrt, + const int seed, const int timestep, + const int nlocal, double *boxlo, double *prd) { + EDPDMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj, + firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success, + tag, host_v, dtinvsqrt, seed, timestep, nlocal, boxlo, prd); +} + +void edpd_gpu_get_extra_data(double *host_T, double *host_cv) { + EDPDMF.get_extra_data(host_T, host_cv); +} + +void edpd_gpu_update_flux(void **flux_ptr) { + EDPDMF.update_flux(flux_ptr); +} + +double edpd_gpu_bytes() { + return EDPDMF.host_memory_usage(); +} diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp index 8d6ad5dfb2..3511d82b00 100644 --- a/lib/gpu/lal_hippo.cpp +++ b/lib/gpu/lal_hippo.cpp @@ -603,13 +603,7 @@ int HippoT::polar_real(const int eflag, const int vflag) { const int BX=this->block_size(); const int GX=static_cast(ceil(static_cast(ainum)/(BX/this->_threads_per_atom))); - /* - const int cus = this->device->gpu->cus(); - while (GX < cus && GX > 1) { - BX /= 2; - GX=static_cast(ceil(static_cast(ainum)/(BX/this->_threads_per_atom))); - } - */ + this->time_pair.start(); // Build the short neighbor list if not done yet diff --git a/lib/gpu/lal_lj_coul_long.h b/lib/gpu/lal_lj_coul_long.h index bc4fce40a5..ace5a26339 100644 --- a/lib/gpu/lal_lj_coul_long.h +++ b/lib/gpu/lal_lj_coul_long.h @@ -78,7 +78,7 @@ class LJCoulLong : public BaseCharge { numtyp _cut_coulsq, _qqrd2e, _g_ewald; - private: +protected: bool _allocated; int loop(const int eflag, const int vflag); }; diff --git a/lib/gpu/lal_lj_coul_long_soft.cpp b/lib/gpu/lal_lj_coul_long_soft.cpp new file mode 100644 index 0000000000..80eaaca94a --- /dev/null +++ b/lib/gpu/lal_lj_coul_long_soft.cpp @@ -0,0 +1,174 @@ +/*************************************************************************** + lj_coul_long_soft.cpp + ------------------- + Trung Nguyen (U Chicago) + + Class for acceleration of the lj/cut/coul/long/soft pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "lj_coul_long_soft_cl.h" +#elif defined(USE_CUDART) +const char *lj_coul_long_soft=0; +#else +#include "lj_coul_long_soft_cubin.h" +#endif + +#include "lal_lj_coul_long_soft.h" +#include +namespace LAMMPS_AL { +#define LJCoulLongSoftT LJCoulLongSoft + +extern Device device; + +template +LJCoulLongSoftT::LJCoulLongSoft() : BaseCharge(), + _allocated(false) { +} + +template +LJCoulLongSoftT::~LJCoulLongSoft() { + clear(); +} + +template +int LJCoulLongSoftT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int LJCoulLongSoftT::init(const int ntypes, + double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double **host_epsilon, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen, + double **host_cut_ljsq, const double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,lj_coul_long_soft,"k_lj_coul_long_soft"); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + int max_shared_types=this->device->max_shared_types(); + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2, + host_cutsq, host_cut_ljsq); + + lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4, + host_offset, host_epsilon); + + sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY); + for (int i=0; i<4; i++) { + host_write[i]=host_special_lj[i]; + host_write[i+4]=host_special_coul[i]; + } + ucl_copy(sp_lj,host_write,8,false); + + _cut_coulsq=host_cut_coulsq; + _qqrd2e=qqrd2e; + _g_ewald=g_ewald; + + _allocated=true; + this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes(); + return 0; +} + +template +void LJCoulLongSoftT::reinit(const int ntypes, double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **host_offset, double **host_epsilon, double **host_cut_ljsq) { + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(_lj_types*_lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; i<_lj_types*_lj_types; i++) + host_write[i]=0.0; + + this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2, + host_cutsq, host_cut_ljsq); + this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4, + host_offset, host_epsilon); +} + +template +void LJCoulLongSoftT::clear() { + if (!_allocated) + return; + _allocated=false; + + lj1.clear(); + lj3.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double LJCoulLongSoftT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(LJCoulLongSoft); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int LJCoulLongSoftT::loop(const int eflag, const int vflag) { + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &lj1, &lj3, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, + &vflag, &ainum, &nbor_pitch, &this->atom->q, + &_cut_coulsq, &_qqrd2e, &_g_ewald, + &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &lj1, &lj3, + &_lj_types, &sp_lj, &this->nbor->dev_nbor, + &this->_nbor_data->begin(), &this->ans->force, + &this->ans->engv, &eflag, &vflag, &ainum, + &nbor_pitch, &this->atom->q, &_cut_coulsq, + &_qqrd2e, &_g_ewald, &this->_threads_per_atom); + } + this->time_pair.stop(); + return GX; +} + +template class LJCoulLongSoft; +} diff --git a/lib/gpu/lal_lj_coul_long_soft.cu b/lib/gpu/lal_lj_coul_long_soft.cu new file mode 100644 index 0000000000..e311bb5d3b --- /dev/null +++ b/lib/gpu/lal_lj_coul_long_soft.cu @@ -0,0 +1,290 @@ +// ************************************************************************** +// lj_coul_long_soft.cu +// ------------------- +// Trung Nguyen (U Chicago) +// +// Device code for acceleration of the lj/cut/coul/long/soft pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) + +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( q_tex,float); +#else +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); +#endif + +#else +#define pos_tex x_ +#define q_tex q_ +#endif + +__kernel void k_lj_coul_long_soft(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict lj1, + const __global numtyp4 *restrict lj3, + const int lj_types, + const __global numtyp *restrict sp_lj_in, + const __global int *dev_nbor, + const __global int *dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, + const __global numtyp *restrict q_, + const numtyp cut_coulsq, const numtyp qqrd2e, + const numtyp g_ewald, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + __local numtyp sp_lj[8]; + int n_stride; + local_allocate_store_charge(); + + sp_lj[0]=sp_lj_in[0]; + sp_lj[1]=sp_lj_in[1]; + sp_lj[2]=sp_lj_in[2]; + sp_lj[3]=sp_lj_in[3]; + sp_lj[4]=sp_lj_in[4]; + sp_lj[5]=sp_lj_in[5]; + sp_lj[6]=sp_lj_in[6]; + sp_lj[7]=sp_lj_in[7]; + + acctyp3 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp energy, e_coul, virial[6]; + if (EVFLAG) { + energy=(acctyp)0; + e_coul=(acctyp)0; + for (int i=0; i<6; i++) virial[i]=(acctyp)0; + } + + if (ii +class LJCoulLongSoft : public BaseCharge { + public: + LJCoulLongSoft(); + ~LJCoulLongSoft(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double **host_epsilon, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, double **host_cut_ljsq, + const double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const double g_ewald); + + /// Send updated coeffs from host to device (to be compatible with fix adapt) + void reinit(const int ntypes, double **host_cutsq, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double **host_epsilon, double **host_cut_ljsq); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + // --------------------------- TYPE DATA -------------------------- + + /// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq, lj1.w = cutsq_vdw + UCL_D_Vec lj1; + /// lj3.x = lj3, lj3.y = lj4, lj3.z = offset, lj3.w = epsilon + UCL_D_Vec lj3; + /// Special LJ values [0-3] and Special Coul values [4-7] + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + numtyp _cut_coulsq, _qqrd2e, _g_ewald; + +protected: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_lj_coul_long_soft_ext.cpp b/lib/gpu/lal_lj_coul_long_soft_ext.cpp new file mode 100644 index 0000000000..cb2657c03b --- /dev/null +++ b/lib/gpu/lal_lj_coul_long_soft_ext.cpp @@ -0,0 +1,151 @@ +/*************************************************************************** + lj_coul_long_soft_ext.cpp + ------------------------- + Trung Nguyen (U Chicago) + + Functions for LAMMPS access to lj/cut/coul/long/soft acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_lj_coul_long_soft.h" + +using namespace std; +using namespace LAMMPS_AL; + +static LJCoulLongSoft LJCLSMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int ljcls_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double **epsilon, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const double qqrd2e, + const double g_ewald) { + LJCLSMF.clear(); + gpu_mode=LJCLSMF.device->gpu_mode(); + double gpu_split=LJCLSMF.device->particle_split(); + int first_gpu=LJCLSMF.device->first_device(); + int last_gpu=LJCLSMF.device->last_device(); + int world_me=LJCLSMF.device->world_me(); + int gpu_rank=LJCLSMF.device->gpu_rank(); + int procs_per_gpu=LJCLSMF.device->procs_per_gpu(); + + LJCLSMF.device->init_message(screen,"lj/cut/coul/long/soft",first_gpu,last_gpu); + + bool message=false; + if (LJCLSMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=LJCLSMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, epsilon, special_lj, inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen, host_cut_ljsq, + host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); + + LJCLSMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; igpu_barrier(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + LJCLSMF.estimate_gpu_overhead(); + return init_ok; +} + +// --------------------------------------------------------------------------- +// Copy updated coeffs from host to device +// --------------------------------------------------------------------------- +void ljcls_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double **epsilon, double **host_cut_ljsq) { + int world_me=LJCLSMF.device->world_me(); + int gpu_rank=LJCLSMF.device->gpu_rank(); + int procs_per_gpu=LJCLSMF.device->procs_per_gpu(); + + if (world_me==0) + LJCLSMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, + offset, epsilon, host_cut_ljsq); + LJCLSMF.device->world_barrier(); + + for (int i=0; igpu_barrier(); + } +} + +void ljcls_gpu_clear() { + LJCLSMF.clear(); +} + +int** ljcls_gpu_compute_n(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, + double *prd) { + return LJCLSMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + host_q, boxlo, prd); +} + +void ljcls_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, double *host_q, + const int nlocal, double *boxlo, double *prd) { + LJCLSMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj, + firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success, + host_q,nlocal,boxlo,prd); +} + +double ljcls_gpu_bytes() { + return LJCLSMF.host_memory_usage(); +} + diff --git a/lib/gpu/lal_lj_coul_soft.cpp b/lib/gpu/lal_lj_coul_soft.cpp new file mode 100644 index 0000000000..9ee6486817 --- /dev/null +++ b/lib/gpu/lal_lj_coul_soft.cpp @@ -0,0 +1,157 @@ +/*************************************************************************** + lj_coul_soft.cpp + ------------------- + Trung Nguyen (U Chicago) + + Class for acceleration of the lj/cut/coul/cut/soft pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : ndtrung@uchicago.edu + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "lj_coul_soft_cl.h" +#elif defined(USE_CUDART) +const char *lj_coul_soft=0; +#else +#include "lj_coul_soft_cubin.h" +#endif + +#include "lal_lj_coul_soft.h" +#include +namespace LAMMPS_AL { +#define LJCoulSoftT LJCoulSoft + +extern Device device; + +template +LJCoulSoftT::LJCoulSoft() : BaseCharge(), + _allocated(false) { +} + +template +LJCoulSoftT::~LJCoulSoft() { + clear(); +} + +template +int LJCoulSoftT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int LJCoulSoftT::init(const int ntypes, + double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double **host_epsilon, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen, + double **host_cut_ljsq, double **host_cut_coulsq, + double *host_special_coul, const double qqrd2e) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,lj_coul_soft,"k_lj_coul_soft"); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + int max_shared_types=this->device->max_shared_types(); + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2, + host_cut_ljsq, host_cut_coulsq); + + lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4, + host_offset, host_epsilon); + + cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack1(ntypes,lj_types,cutsq,host_write,host_cutsq); + + sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY); + for (int i=0; i<4; i++) { + host_write[i]=host_special_lj[i]; + host_write[i+4]=host_special_coul[i]; + } + ucl_copy(sp_lj,host_write,8,false); + + _qqrd2e=qqrd2e; + + _allocated=true; + this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+cutsq.row_bytes()+ + sp_lj.row_bytes(); + return 0; +} + +template +void LJCoulSoftT::clear() { + if (!_allocated) + return; + _allocated=false; + + lj1.clear(); + lj3.clear(); + cutsq.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double LJCoulSoftT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(LJCoulSoft); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int LJCoulSoftT::loop(const int eflag, const int vflag) { + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &lj1, &lj3, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, + &vflag, &ainum, &nbor_pitch, &this->atom->q, + &cutsq, &_qqrd2e, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &lj1, &lj3, &_lj_types, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, + &eflag, &vflag, &ainum, &nbor_pitch, &this->atom->q, + &cutsq, &_qqrd2e, &this->_threads_per_atom); + } + this->time_pair.stop(); + return GX; +} + +template class LJCoulSoft; +} diff --git a/lib/gpu/lal_lj_coul_soft.cu b/lib/gpu/lal_lj_coul_soft.cu new file mode 100644 index 0000000000..1fc564bde6 --- /dev/null +++ b/lib/gpu/lal_lj_coul_soft.cu @@ -0,0 +1,276 @@ +// ************************************************************************** +// lj_coul_soft.cu +// ------------------- +// Trung Nguyen (U Chicago) +// +// Device code for acceleration of the lj/coul/cut/soft pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : +// email : ndtrung@uchicago.edu +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) + +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( q_tex,float); +#else +_texture_2d( pos_tex,int4); +_texture( q_tex,int2); +#endif + +#else +#define pos_tex x_ +#define q_tex q_ +#endif + +__kernel void k_lj_coul_soft(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict lj1, + const __global numtyp4 *restrict lj3, + const int lj_types, + const __global numtyp *restrict sp_lj_in, + const __global int *dev_nbor, + const __global int *dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, + const __global numtyp *restrict q_, + const __global numtyp *restrict cutsq, + const numtyp qqrd2e, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + __local numtyp sp_lj[8]; + int n_stride; + local_allocate_store_charge(); + + sp_lj[0]=sp_lj_in[0]; + sp_lj[1]=sp_lj_in[1]; + sp_lj[2]=sp_lj_in[2]; + sp_lj[3]=sp_lj_in[3]; + sp_lj[4]=sp_lj_in[4]; + sp_lj[5]=sp_lj_in[5]; + sp_lj[6]=sp_lj_in[6]; + sp_lj[7]=sp_lj_in[7]; + + acctyp3 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp energy, e_coul, virial[6]; + if (EVFLAG) { + energy=(acctyp)0; + e_coul=(acctyp)0; + for (int i=0; i<6; i++) virial[i]=(acctyp)0; + } + + if (ii +class LJCoulSoft : public BaseCharge { + public: + LJCoulSoft(); + ~LJCoulSoft(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **host_offset, double **host_epsilon, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, double **host_cut_ljsq, + double **host_cut_coulsq, double *host_special_coul, + const double qqrd2e); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + // --------------------------- TYPE DATA -------------------------- + + /// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq_vdw, lj1.w = cutsq_coul + UCL_D_Vec lj1; + /// lj3.x = lj3, lj3.y = lj4, lj3.z = offset, lj3.w = epsilon + UCL_D_Vec lj3; + /// cutsq + UCL_D_Vec cutsq; + /// Special LJ values [0-3] and Special Coul values [4-7] + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + numtyp _qqrd2e; + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_lj_coul_soft_ext.cpp b/lib/gpu/lal_lj_coul_soft_ext.cpp new file mode 100644 index 0000000000..02d367b3c7 --- /dev/null +++ b/lib/gpu/lal_lj_coul_soft_ext.cpp @@ -0,0 +1,128 @@ +/*************************************************************************** + lj_coul_soft_ext.cpp + ------------------- + Trung Nguyen (U Chicago) + + Functions for LAMMPS access to lj/cut/coul/cut/soft acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : ndtrung@uchicago.edu + ***************************************************************************/ + +#include +#include +#include + +#include "lal_lj_coul_soft.h" + +using namespace std; +using namespace LAMMPS_AL; + +static LJCoulSoft LJCSMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int ljcs_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double **epsilon, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double **host_cut_coulsq, + double *host_special_coul, const double qqrd2e) { + LJCSMF.clear(); + gpu_mode=LJCSMF.device->gpu_mode(); + double gpu_split=LJCSMF.device->particle_split(); + int first_gpu=LJCSMF.device->first_device(); + int last_gpu=LJCSMF.device->last_device(); + int world_me=LJCSMF.device->world_me(); + int gpu_rank=LJCSMF.device->gpu_rank(); + int procs_per_gpu=LJCSMF.device->procs_per_gpu(); + + LJCSMF.device->init_message(screen,"lj/cut/coul/cut/soft",first_gpu,last_gpu); + + bool message=false; + if (LJCSMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=LJCSMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, + host_lj4, offset, epsilon, special_lj, inum, nall, max_nbors, + maxspecial, cell_size, gpu_split, screen, host_cut_ljsq, + host_cut_coulsq, host_special_coul, qqrd2e); + + LJCSMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; igpu_barrier(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + LJCSMF.estimate_gpu_overhead(); + return init_ok; +} + +void ljcs_gpu_clear() { + LJCSMF.clear(); +} + +int** ljcs_gpu_compute_n(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, + double *prd) { + return LJCSMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + host_q, boxlo, prd); +} + +void ljcs_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, double *host_q, + const int nlocal, double *boxlo, double *prd) { + LJCSMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,firstneigh,eflag, + vflag,eatom,vatom,host_start,cpu_time,success,host_q, + nlocal,boxlo,prd); +} + +double ljcs_gpu_bytes() { + return LJCSMF.host_memory_usage(); +} + + diff --git a/lib/gpu/lal_mdpd.cpp b/lib/gpu/lal_mdpd.cpp new file mode 100644 index 0000000000..16cf926df8 --- /dev/null +++ b/lib/gpu/lal_mdpd.cpp @@ -0,0 +1,218 @@ +/*************************************************************************** + mdpd.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Class for acceleration of the mdpd pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "mdpd_cl.h" +#elif defined(USE_CUDART) +const char *mdpd=0; +#else +#include "mdpd_cubin.h" +#endif + +#include "lal_mdpd.h" +#include +namespace LAMMPS_AL { +#define MDPDT MDPD + +extern Device device; + +template +MDPDT::MDPD() : BaseDPD(), _allocated(false) { +} + +template +MDPDT::~MDPD() { + clear(); +} + +template +int MDPDT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int MDPDT::init(const int ntypes, + double **host_cutsq, double **host_A_att, double **host_B_rep, + double **host_gamma, double **host_sigma, + double **host_cut, double **host_cut_r, + double *host_special_lj, const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + + int success; + int extra_fields = 4; // round up to accomodate quadruples of numtyp values + // rho + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,mdpd,"k_mdpd",onetype,extra_fields); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_A_att,host_B_rep, + host_gamma,host_sigma); + + coeff2.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff2,host_write,host_cut,host_cut_r, + host_cutsq); + + UCL_H_Vec host_rsq(lj_types*lj_types,*(this->ucl_device), + UCL_WRITE_ONLY); + cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq); + + double special_sqrt[4]; + special_sqrt[0] = sqrt(host_special_lj[0]); + special_sqrt[1] = sqrt(host_special_lj[1]); + special_sqrt[2] = sqrt(host_special_lj[2]); + special_sqrt[3] = sqrt(host_special_lj[3]); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + sp_sqrt.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(special_sqrt,4,*(this->ucl_device)); + ucl_copy(sp_sqrt,dview,false); + + // allocate per-atom array Q + + int ef_nall=nall; + if (ef_nall==0) + ef_nall=2000; + + _allocated=true; + this->_max_bytes=coeff.row_bytes()+coeff2.row_bytes()+cutsq.row_bytes()+ + sp_lj.row_bytes()+sp_sqrt.row_bytes(); + return 0; +} + +template +void MDPDT::clear() { + if (!_allocated) + return; + _allocated=false; + + coeff.clear(); + coeff2.clear(); + cutsq.clear(); + sp_lj.clear(); + sp_sqrt.clear(); + this->clear_atomic(); +} + +template +double MDPDT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(MDPD); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int MDPDT::loop(const int eflag, const int vflag) { + + int nall = this->atom->nall(); + + // signal that we need to transfer extra data from the host + + this->atom->extra_data_unavail(); + + numtyp4 *pextra=reinterpret_cast(&(this->atom->extra[0])); + + int n = 0; + int nstride = 1; + for (int i = 0; i < nall; i++) { + int idx = n+i*nstride; + numtyp4 v; + v.x = mdpd_rho[i]; + v.y = 0; + v.z = 0; + v.w = 0; + pextra[idx] = v; + } + this->atom->add_extra_data(); + + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, + &sp_lj, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed, + &this->_timestep, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, + &_lj_types, &sp_lj, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed, + &this->_timestep, &this->_threads_per_atom); + } + + this->time_pair.stop(); + return GX; +} + +// --------------------------------------------------------------------------- +// Get the extra data pointers from host +// --------------------------------------------------------------------------- + +template +void MDPDT::get_extra_data(double *host_rho) { + mdpd_rho = host_rho; +} + +template class MDPD; +} diff --git a/lib/gpu/lal_mdpd.cu b/lib/gpu/lal_mdpd.cu new file mode 100644 index 0000000000..6230cb2496 --- /dev/null +++ b/lib/gpu/lal_mdpd.cu @@ -0,0 +1,475 @@ +// ************************************************************************** +// mdpd.cu +// ------------------- +// Trung Dac Nguyen (ORNL) +// +// Device code for acceleration of the mdpd pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : December 2023 +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( vel_tex,float4); +#else +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); +#endif +#else +#define pos_tex x_ +#define vel_tex v_ +#endif + +#define EPSILON (numtyp)1.0e-10 + +//#define _USE_UNIFORM_SARU_LCG +//#define _USE_UNIFORM_SARU_TEA8 +//#define _USE_GAUSSIAN_SARU_LCG + +#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG) +#define _USE_UNIFORM_SARU_LCG +#endif + +// References: +// 1. Y. Afshar, F. Schmid, A. Pishevar, S. Worley, Comput. Phys. Comm. 184 (2013), 1119–1128. +// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201. +// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19 + +#define LCGA 0x4beb5d59 /* Full period 32 bit LCG */ +#define LCGC 0x2600e1f7 +#define oWeylPeriod 0xda879add /* Prime period 3666320093 */ +#define oWeylOffset 0x8009d14b +#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */ + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns uniformly distributed random numbers u in [-1.0;1.0] +// using the inherent LCG, then multiply u with sqrt(3) to "match" +// with a normal random distribution. +// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12) +// Curly brackets to make variables local to the scope. +#ifdef _USE_UNIFORM_SARU_LCG +#define SQRT3 (numtyp)1.7320508075688772935274463 +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + unsigned int v = (state ^ (state>>26)) + wstate; \ + unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \ + randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \ +} +#endif + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns uniformly distributed random numbers u in [-1.0;1.0] using TEA8 +// then multiply u with sqrt(3) to "match" with a normal random distribution +// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12) +#ifdef _USE_UNIFORM_SARU_TEA8 +#define SQRT3 (numtyp)1.7320508075688772935274463 +#define k0 0xA341316C +#define k1 0xC8013EA4 +#define k2 0xAD90777D +#define k3 0x7E95761E +#define delta 0x9e3779b9 +#define rounds 8 +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + unsigned int sum = 0; \ + for (int i=0; i < rounds; i++) { \ + sum += delta; \ + state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1); \ + wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3); \ + } \ + unsigned int v = (state ^ (state>>26)) + wstate; \ + unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \ + randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \ +} +#endif + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns two uniformly distributed random numbers r1 and r2 in [-1.0;1.0], +// and uses the polar method (Marsaglia's) to transform to a normal random value +// This is used to compared with CPU DPD using RandMars::gaussian() +#ifdef _USE_GAUSSIAN_SARU_LCG +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state=0x12345678; \ + unsigned int wstate=12345678; \ + state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + unsigned int v, s; \ + numtyp r1, r2, rsq; \ + while (1) { \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + v = (state ^ (state>>26)) + wstate; \ + s = (signed int)((v^(v>>20))*0x6957f5a7); \ + r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + v = (state ^ (state>>26)) + wstate; \ + s = (signed int)((v^(v>>20))*0x6957f5a7); \ + r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \ + rsq = r1 * r1 + r2 * r2; \ + if (rsq < (numtyp)1.0) break; \ + } \ + numtyp fac = ucl_sqrt((numtyp)-2.0*log(rsq)/rsq); \ + randnum = r2*fac; \ +} +#endif + +#define MIN(A,B) ((A) < (B) ? (A) : (B)) +#define MAX(A,B) ((A) < (B) ? (B) : (A)) + +// coeff.x = A_att, coeff.y = B_rep, coeff.z = gamma, coeff.w = sigma +// coeff2.x = cut, coeff2.y = cut_r, coeff2.z = cutsq + +__kernel void k_mdpd(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict extra, + const __global numtyp4 *restrict coeff, + const __global numtyp4 *restrict coeff2, + const int lj_types, + const __global numtyp *restrict sp_lj, + const __global numtyp *restrict sp_sqrt, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, + const __global numtyp4 *restrict v_, + const __global numtyp *restrict cutsq, + const numtyp dtinvsqrt, const int seed, + const int timestep, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + int n_stride; + local_allocate_store_pair(); + + acctyp3 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp energy, virial[6]; + if (EVFLAG) { + energy=(acctyp)0; + for (int i=0; i<6; i++) virial[i]=(acctyp)0; + } + + if (ii tag2) { + tag1 = jtag; tag2 = itag; + } + + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + // conservative force = A_att * wc + B_rep*(rhoi+rhoj)*wc_r + // drag force = -gamma * wr^2 * (delx dot delv) / r + // random force = sigma * wr * rnd * dtinvsqrt; + + numtyp force = A_attij*wc + B_repij*(rhoi+rhoj)*wc_r; + force -= gammaij*wr*wr*dot*rinv; + force += sigmaij*wr*randnum*dtinvsqrt; + force *= factor_dpd*rinv; + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + if (EVFLAG && eflag) { + // unshifted eng of conservative term: + // eng shifted to 0.0 at cutoff + numtyp e = (numtyp)0.5*A_attij*cutij * wr*wr + (numtyp)0.5*B_repij*cut_rij*(rhoi+rhoj)*wc_r*wc_r; + energy+=factor_dpd*e; + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + } // if ii + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); +} + +__kernel void k_mdpd_fast(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict extra, + const __global numtyp4 *restrict coeff_in, + const __global numtyp4 *restrict coeff2_in, + const __global numtyp *restrict sp_lj_in, + const __global numtyp *restrict sp_sqrt_in, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, + const __global numtyp4 *restrict v_, + const __global numtyp *restrict cutsq, + const numtyp dtinvsqrt, const int seed, + const int timestep, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + #ifndef ONETYPE + __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp4 coeff2[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_lj[4]; + __local numtyp sp_sqrt[4]; + if (tid<4) { + sp_lj[tid]=sp_lj_in[tid]; + sp_sqrt[tid]=sp_sqrt_in[tid]; + } + if (tid tag2) { + tag1 = jtag; tag2 = itag; + } + + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + // conservative force = A_att * wc + B_rep*(rhoi+rhoj)*wc_r + // drag force = -gamma * wr^2 * (delx dot delv) / r + // random force = sigma * wr * rnd * dtinvsqrt; + + numtyp force = A_attij*wc + B_repij*(rhoi+rhoj)*wc_r; + force -= gammaij*wr*wr*dot*rinv; + force += sigmaij*wr*randnum*dtinvsqrt; + #ifndef ONETYPE + force *= factor_dpd*rinv; + #else + force*=rinv; + #endif + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + if (EVFLAG && eflag) { + // unshifted eng of conservative term: + // eng shifted to 0.0 at cutoff + numtyp e = (numtyp)0.5*A_attij*cutij * wr*wr + (numtyp)0.5*B_repij*cut_rij*(rhoi+rhoj)*wc_r*wc_r; + #ifndef ONETYPE + energy+=factor_dpd*e; + #else + energy+=e; + #endif + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + } // if ii + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); +} + diff --git a/lib/gpu/lal_mdpd.h b/lib/gpu/lal_mdpd.h new file mode 100644 index 0000000000..0e95185714 --- /dev/null +++ b/lib/gpu/lal_mdpd.h @@ -0,0 +1,88 @@ +/*************************************************************************** + mdpd.h + ------------------- + Trung Dac Nguyen (U Chicago) + + Class for acceleration of the mdpd pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#ifndef LAL_MDPD_H +#define LAL_MDPD_H + +#include "lal_base_dpd.h" + +namespace LAMMPS_AL { + +template +class MDPD : public BaseDPD { + public: + MDPD(); + ~MDPD(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double **host_A_att, double **host_B_rep, + double **host_gamma, double **host_sigma, + double **host_cut, double **host_cut_r, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, const double gpu_split, + FILE *screen); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + void get_extra_data(double *host_rho); + + // --------------------------- TYPE DATA -------------------------- + + /// coeff.x = A_att, coeff.x = B_rep, coeff.z = gamma, coeff.w = sigma + UCL_D_Vec coeff; + /// coeff2.x = cut, coeff2.y = cut_r, coeff2.z = cutsq + UCL_D_Vec coeff2; + + UCL_D_Vec cutsq; + + /// Special LJ values + UCL_D_Vec sp_lj, sp_sqrt; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// pointer to host data + double *mdpd_rho; + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_mdpd_ext.cpp b/lib/gpu/lal_mdpd_ext.cpp new file mode 100644 index 0000000000..def6adb1f6 --- /dev/null +++ b/lib/gpu/lal_mdpd_ext.cpp @@ -0,0 +1,133 @@ +/*************************************************************************** + mdpd_ext.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Functions for LAMMPS access to mdpd acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_mdpd.h" + +using namespace std; +using namespace LAMMPS_AL; + +static MDPD MDPDMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int mdpd_gpu_init(const int ntypes, double **cutsq, + double **host_A_att, double **host_B_rep, + double **host_gamma, double **host_sigma, + double **host_cut, double **host_cut_r, + double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { + MDPDMF.clear(); + gpu_mode=MDPDMF.device->gpu_mode(); + double gpu_split=MDPDMF.device->particle_split(); + int first_gpu=MDPDMF.device->first_device(); + int last_gpu=MDPDMF.device->last_device(); + int world_me=MDPDMF.device->world_me(); + int gpu_rank=MDPDMF.device->gpu_rank(); + int procs_per_gpu=MDPDMF.device->procs_per_gpu(); + + MDPDMF.device->init_message(screen,"mdpd",first_gpu,last_gpu); + + bool message=false; + if (MDPDMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=MDPDMF.init(ntypes, cutsq, host_A_att, host_B_rep, host_gamma, host_sigma, + host_cut, host_cut_r, special_lj, inum, nall, max_nbors, + maxspecial, cell_size, gpu_split, screen); + + MDPDMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + MDPDMF.estimate_gpu_overhead(); + return init_ok; +} + +void mdpd_gpu_clear() { + MDPDMF.clear(); +} + +int ** mdpd_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v, const double dtinvsqrt, + const int seed, const int timestep, + double *boxlo, double *prd) { + return MDPDMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + host_v, dtinvsqrt, seed, timestep, boxlo, prd); +} + +void mdpd_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *tag, + double **host_v, const double dtinvsqrt, + const int seed, const int timestep, + const int nlocal, double *boxlo, double *prd) { + MDPDMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj, + firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success, + tag, host_v, dtinvsqrt, seed, timestep, nlocal, boxlo, prd); +} + +void mdpd_gpu_get_extra_data(double *host_rho) { + MDPDMF.get_extra_data(host_rho); +} + +double mdpd_gpu_bytes() { + return MDPDMF.host_memory_usage(); +} + + diff --git a/lib/gpu/lal_sph_heatconduction.cpp b/lib/gpu/lal_sph_heatconduction.cpp new file mode 100644 index 0000000000..e8e366e93a --- /dev/null +++ b/lib/gpu/lal_sph_heatconduction.cpp @@ -0,0 +1,222 @@ +/*************************************************************************** + sph_heatconduction.cpp + ------------------- + Trung Nguyen (U Chicago) + + Class for acceleration of the sph_heatconduction pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "sph_heatconduction_cl.h" +#elif defined(USE_CUDART) +const char *sph_heatconduction=0; +#else +#include "sph_heatconduction_cubin.h" +#endif + +#include "lal_sph_heatconduction.h" +#include +namespace LAMMPS_AL { +#define SPHHeatConductionT SPHHeatConduction + +extern Device device; + +template +SPHHeatConductionT::SPHHeatConduction() : BaseSPH(), _allocated(false) { + _max_dE_size = 0; +} + +template +SPHHeatConductionT::~SPHHeatConduction() { + clear(); +} + +template +int SPHHeatConductionT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int SPHHeatConductionT::init(const int ntypes, + double **host_cutsq, double **host_cut, + double **host_alpha, double* host_mass, + const int dimension, double *host_special_lj, + const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + + int success; + int extra_fields = 4; // round up to accomodate quadruples of numtyp values + // rho, esph + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,sph_heatconduction,"k_sph_heatconduction", + onetype,extra_fields); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_alpha, + host_cut, host_cutsq); + + UCL_H_Vec dview_mass(ntypes, *(this->ucl_device), UCL_WRITE_ONLY); + for (int i = 0; i < ntypes; i++) + dview_mass[i] = host_mass[i]; + mass.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY); + ucl_copy(mass,dview_mass,false); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + + // allocate per-atom array Q + + int ef_nall=nall; + if (ef_nall==0) + ef_nall=2000; + + _max_dE_size=static_cast(static_cast(ef_nall)*1.10); + dE.alloc(_max_dE_size,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); + + _dimension = dimension; + + _allocated=true; + this->_max_bytes=coeff.row_bytes()+dE.row_bytes()+sp_lj.row_bytes(); + return 0; +} + +template +void SPHHeatConductionT::clear() { + if (!_allocated) + return; + _allocated=false; + + coeff.clear(); + mass.clear(); + dE.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double SPHHeatConductionT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(SPHHeatConduction); +} + +template +void SPHHeatConductionT::update_dE(void **dE_ptr) { + *dE_ptr=dE.host.begin(); + dE.update_host(_max_dE_size,false); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int SPHHeatConductionT::loop(const int eflag, const int vflag) { + + int nall = this->atom->nall(); + + // Resize dE array if necessary + if (nall > _max_dE_size) { + _max_dE_size=static_cast(static_cast(nall)*1.10); + dE.resize(_max_dE_size); + } + + // signal that we need to transfer extra data from the host + + this->atom->extra_data_unavail(); + + numtyp4 *pextra=reinterpret_cast(&(this->atom->extra[0])); + + int n = 0; + int nstride = 1; + for (int i = 0; i < nall; i++) { + int idx = n+i*nstride; + numtyp4 v; + v.x = rho[i]; + v.y = esph[i]; + v.z = 0; + v.w = 0; + pextra[idx] = v; + } + this->atom->add_extra_data(); + + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &mass, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &dE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &mass, + &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &dE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom); + } + + this->time_pair.stop(); + return GX; +} + +// --------------------------------------------------------------------------- +// Get the extra data pointers from host +// --------------------------------------------------------------------------- + +template +void SPHHeatConductionT::get_extra_data(double *host_rho, double *host_esph) { + rho = host_rho; + esph = host_esph; +} + +template class SPHHeatConduction; +} diff --git a/lib/gpu/lal_sph_heatconduction.cu b/lib/gpu/lal_sph_heatconduction.cu new file mode 100644 index 0000000000..21c936347a --- /dev/null +++ b/lib/gpu/lal_sph_heatconduction.cu @@ -0,0 +1,253 @@ +// ************************************************************************** +// sph_heatconduction.cu +// --------------------- +// Trung Dac Nguyen (U Chicago) +// +// Device code for acceleration of the sph/heatconduction pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : September 2023 +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( vel_tex,float4); +#else +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); +#endif +#else +#define pos_tex x_ +#define vel_tex v_ +#endif + +#if (SHUFFLE_AVAIL == 0) + +#define store_dE(dEacc, ii, inum, tid, t_per_atom, offset, dE) \ + if (t_per_atom>1) { \ + simdsync(); \ + simd_reduce_add1(t_per_atom, red_acc, offset, tid, dEacc); \ + } \ + if (offset==0 && ii1) { \ + for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ + dEacc += shfl_down(dEacc, s, t_per_atom); \ + } \ + } \ + if (offset==0 && ii +class SPHHeatConduction : public BaseSPH { + public: + SPHHeatConduction(); + ~SPHHeatConduction(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double** host_cut, double **host_alpha, double *host_mass, + const int dimension, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + void get_extra_data(double *host_rho, double *host_esph); + + /// copy desph from device to host + void update_dE(void **dE_ptr); + + // --------------------------- TYPE DATA -------------------------- + + /// coeff.x = alpha, coeff.y = cut, coeff.z = cutsq + UCL_D_Vec coeff; + + /// per-type coeffs + UCL_D_Vec mass; + + /// Special LJ values + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// Per-atom arrays + UCL_Vector dE; + int _max_dE_size; + + int _dimension; + + /// pointer to host data + double *rho, *esph, *cv; + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_sph_heatconduction_ext.cpp b/lib/gpu/lal_sph_heatconduction_ext.cpp new file mode 100644 index 0000000000..92e0e342d2 --- /dev/null +++ b/lib/gpu/lal_sph_heatconduction_ext.cpp @@ -0,0 +1,129 @@ +/*************************************************************************** + sph_heatconduction_ext.cpp + -------------------------- + Trung Dac Nguyen (U Chicago) + + Functions for LAMMPS access to sph/heatconduction acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_sph_heatconduction.h" + +using namespace std; +using namespace LAMMPS_AL; + +static SPHHeatConduction SPHHeatConductionMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int sph_heatconduction_gpu_init(const int ntypes, double **cutsq, double** host_cut, + double **host_alpha, double* host_mass, const int dimension, + double *special_lj, const int inum, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { + SPHHeatConductionMF.clear(); + gpu_mode=SPHHeatConductionMF.device->gpu_mode(); + double gpu_split=SPHHeatConductionMF.device->particle_split(); + int first_gpu=SPHHeatConductionMF.device->first_device(); + int last_gpu=SPHHeatConductionMF.device->last_device(); + int world_me=SPHHeatConductionMF.device->world_me(); + int gpu_rank=SPHHeatConductionMF.device->gpu_rank(); + int procs_per_gpu=SPHHeatConductionMF.device->procs_per_gpu(); + + SPHHeatConductionMF.device->init_message(screen,"sph_heatconduction",first_gpu,last_gpu); + + bool message=false; + if (SPHHeatConductionMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=SPHHeatConductionMF.init(ntypes, cutsq, host_cut, host_alpha, host_mass, + dimension, special_lj, inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen); + + SPHHeatConductionMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + SPHHeatConductionMF.estimate_gpu_overhead(); + return init_ok; +} + +void sph_heatconduction_gpu_clear() { + SPHHeatConductionMF.clear(); +} + +int ** sph_heatconduction_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *host_tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v) { + return SPHHeatConductionMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, host_tag, nspecial, special, eflag, vflag, + eatom, vatom, host_start, ilist, jnum, cpu_time, success, + host_v); +} + +void sph_heatconduction_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *host_tag, + double **host_v, const int nlocal) { + SPHHeatConductionMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj, + firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success, + host_tag, host_v, nlocal); +} + +void sph_heatconduction_gpu_get_extra_data(double *host_rho, double *host_esph) { + SPHHeatConductionMF.get_extra_data(host_rho, host_esph); +} + +void sph_heatconduction_gpu_update_dE(void **dE_ptr) { + SPHHeatConductionMF.update_dE(dE_ptr); +} + +double sph_heatconduction_gpu_bytes() { + return SPHHeatConductionMF.host_memory_usage(); +} diff --git a/lib/gpu/lal_sph_lj.cpp b/lib/gpu/lal_sph_lj.cpp new file mode 100644 index 0000000000..66c2a5c302 --- /dev/null +++ b/lib/gpu/lal_sph_lj.cpp @@ -0,0 +1,222 @@ +/*************************************************************************** + sph_lj.cpp + ------------------- + Trung Nguyen (U Chicago) + + Class for acceleration of the sph_lj pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "sph_lj_cl.h" +#elif defined(USE_CUDART) +const char *sph_lj=0; +#else +#include "sph_lj_cubin.h" +#endif + +#include "lal_sph_lj.h" +#include +namespace LAMMPS_AL { +#define SPHLJT SPHLJ + +extern Device device; + +template +SPHLJT::SPHLJ() : BaseSPH(), _allocated(false) { + _max_drhoE_size = 0; +} + +template +SPHLJT::~SPHLJ() { + clear(); +} + +template +int SPHLJT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int SPHLJT::init(const int ntypes, + double **host_cutsq, double **host_cut, + double **host_viscosity, double* host_mass, + const int dimension, double *host_special_lj, + const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + + int success; + int extra_fields = 4; // round up to accomodate quadruples of numtyp values + // rho, cv + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,sph_lj,"k_sph_lj",onetype,extra_fields); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_viscosity, + host_cut, host_cutsq); + + UCL_H_Vec dview_mass(ntypes, *(this->ucl_device), UCL_WRITE_ONLY); + for (int i = 0; i < ntypes; i++) + dview_mass[i] = host_mass[i]; + mass.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY); + ucl_copy(mass,dview_mass,false); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + + // allocate per-atom array Q + + int ef_nall=nall; + if (ef_nall==0) + ef_nall=2000; + + _max_drhoE_size=static_cast(static_cast(ef_nall)*1.10); + drhoE.alloc(_max_drhoE_size*2,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); + + _dimension = dimension; + + _allocated=true; + this->_max_bytes=coeff.row_bytes()+drhoE.row_bytes()+sp_lj.row_bytes(); + return 0; +} + +template +void SPHLJT::clear() { + if (!_allocated) + return; + _allocated=false; + + coeff.clear(); + mass.clear(); + drhoE.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double SPHLJT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(SPHLJ); +} + +template +void SPHLJT::update_drhoE(void **drhoE_ptr) { + *drhoE_ptr=drhoE.host.begin(); + drhoE.update_host(_max_drhoE_size*2,false); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int SPHLJT::loop(const int eflag, const int vflag) { + + int nall = this->atom->nall(); + + // Resize drhoE array if necessary + if (nall > _max_drhoE_size) { + _max_drhoE_size=static_cast(static_cast(nall)*1.10); + drhoE.resize(_max_drhoE_size*2); + } + + // signal that we need to transfer extra data from the host + + this->atom->extra_data_unavail(); + + numtyp4 *pextra=reinterpret_cast(&(this->atom->extra[0])); + + int n = 0; + int nstride = 1; + for (int i = 0; i < nall; i++) { + int idx = n+i*nstride; + numtyp4 v; + v.x = rho[i]; + v.y = esph[i]; + v.z = cv[i]; + v.w = 0; + pextra[idx] = v; + } + this->atom->add_extra_data(); + + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &mass, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &mass, + &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom); + } + + this->time_pair.stop(); + return GX; +} + +// --------------------------------------------------------------------------- +// Get the extra data pointers from host +// --------------------------------------------------------------------------- + +template +void SPHLJT::get_extra_data(double *host_rho, double *host_esph, double *host_cv) { + rho = host_rho; + esph = host_esph; + cv = host_cv; +} + +template class SPHLJ; +} diff --git a/lib/gpu/lal_sph_lj.cu b/lib/gpu/lal_sph_lj.cu new file mode 100644 index 0000000000..23863b5e28 --- /dev/null +++ b/lib/gpu/lal_sph_lj.cu @@ -0,0 +1,426 @@ +// ************************************************************************** +// sph_lj.cu +// ------------------- +// Trung Dac Nguyen (U Chicago) +// +// Device code for acceleration of the sph/lj pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : September 2023 +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( vel_tex,float4); +#else +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); +#endif +#else +#define pos_tex x_ +#define vel_tex v_ +#endif + +#if (SHUFFLE_AVAIL == 0) + +#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE) \ + if (t_per_atom>1) { \ + simdsync(); \ + simd_reduce_add2(t_per_atom, red_acc, offset, tid, \ + drhoEacc.x, drhoEacc.y); \ + } \ + if (offset==0 && ii1) { \ + for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ + drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom); \ + drhoEacc.y += shfl_down(drhoEacc.y, s, t_per_atom); \ + } \ + } \ + if (offset==0 && ii (numtyp)0.0) { + pc[1] = ucl_sqrt(csq); // soundspeed + } else { + pc[1] = (numtyp)0.0; + } +} + + +__kernel void k_sph_lj(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict extra, + const __global numtyp4 *restrict coeff, + const __global numtyp *restrict mass, + const int lj_types, + const __global numtyp *restrict sp_lj, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + __global acctyp2 *restrict drhoE, + const int eflag, const int vflag, + const int inum, const int nbor_pitch, + const __global numtyp4 *restrict v_, + const int dimension, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + int n_stride; + local_allocate_store_pair(); + + acctyp3 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp energy, virial[6]; + if (EVFLAG) { + energy=(acctyp)0; + for (int i=0; i<6; i++) virial[i]=(acctyp)0; + } + acctyp2 drhoEacc; + drhoEacc.x = drhoEacc.x = (acctyp)0; + + if (ii +class SPHLJ : public BaseSPH { + public: + SPHLJ(); + ~SPHLJ(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double** host_cut, double **host_viscosity, double *host_mass, + const int dimension, + double *host_special_lj, const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, const double gpu_split, + FILE *screen); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + void get_extra_data(double *host_rho, double *host_esph, + double *host_cv); + + /// copy drho and desph from device to host + void update_drhoE(void **drhoE_ptr); + + // --------------------------- TYPE DATA -------------------------- + + /// coeff.x = viscosity, coeff.y = cut, coeff.z = cutsq + UCL_D_Vec coeff; + + /// per-type coeffs + UCL_D_Vec mass; + + /// Special LJ values + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// Per-atom arrays + UCL_Vector drhoE; + int _max_drhoE_size; + + int _dimension; + + /// pointer to host data + double *rho, *esph, *cv; + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_sph_lj_ext.cpp b/lib/gpu/lal_sph_lj_ext.cpp new file mode 100644 index 0000000000..55f85c030e --- /dev/null +++ b/lib/gpu/lal_sph_lj_ext.cpp @@ -0,0 +1,129 @@ +/*************************************************************************** + sph_lj_ext.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Functions for LAMMPS access to sph/lj acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_sph_lj.h" + +using namespace std; +using namespace LAMMPS_AL; + +static SPHLJ SPHLJMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int sph_lj_gpu_init(const int ntypes, double **cutsq, double** host_cut, + double **host_viscosity, double* host_mass, const int dimension, + double *special_lj, const int inum, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { + SPHLJMF.clear(); + gpu_mode=SPHLJMF.device->gpu_mode(); + double gpu_split=SPHLJMF.device->particle_split(); + int first_gpu=SPHLJMF.device->first_device(); + int last_gpu=SPHLJMF.device->last_device(); + int world_me=SPHLJMF.device->world_me(); + int gpu_rank=SPHLJMF.device->gpu_rank(); + int procs_per_gpu=SPHLJMF.device->procs_per_gpu(); + + SPHLJMF.device->init_message(screen,"sph_lj",first_gpu,last_gpu); + + bool message=false; + if (SPHLJMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=SPHLJMF.init(ntypes, cutsq, host_cut, host_viscosity, host_mass, + dimension, special_lj, inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen); + + SPHLJMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + SPHLJMF.estimate_gpu_overhead(); + return init_ok; +} + +void sph_lj_gpu_clear() { + SPHLJMF.clear(); +} + +int ** sph_lj_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *host_tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v) { + return SPHLJMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, host_tag, nspecial, special, eflag, vflag, + eatom, vatom, host_start, ilist, jnum, cpu_time, success, + host_v); +} + +void sph_lj_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *host_tag, + double **host_v, const int nlocal) { + SPHLJMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj, + firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success, + host_tag, host_v, nlocal); +} + +void sph_lj_gpu_get_extra_data(double *host_rho, double *host_esph, double *host_cv) { + SPHLJMF.get_extra_data(host_rho, host_esph, host_cv); +} + +void sph_lj_gpu_update_drhoE(void **drhoE_ptr) { + SPHLJMF.update_drhoE(drhoE_ptr); +} + +double sph_lj_gpu_bytes() { + return SPHLJMF.host_memory_usage(); +} diff --git a/lib/gpu/lal_sph_taitwater.cpp b/lib/gpu/lal_sph_taitwater.cpp new file mode 100644 index 0000000000..7a584d435e --- /dev/null +++ b/lib/gpu/lal_sph_taitwater.cpp @@ -0,0 +1,225 @@ +/*************************************************************************** + sph_taitwater.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Class for acceleration of the sph/taitwater pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "sph_taitwater_cl.h" +#elif defined(USE_CUDART) +const char *sph_taitwater=0; +#else +#include "sph_taitwater_cubin.h" +#endif + +#include "lal_sph_taitwater.h" +#include +namespace LAMMPS_AL { +#define SPHTaitwaterT SPHTaitwater + +extern Device device; + +template +SPHTaitwaterT::SPHTaitwater() : BaseSPH(), _allocated(false) { + _max_drhoE_size = 0; +} + +template +SPHTaitwaterT::~SPHTaitwater() { + clear(); +} + +template +int SPHTaitwaterT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int SPHTaitwaterT::init(const int ntypes, double **host_cutsq, + double **host_cut, double **host_viscosity, + double* host_mass, double* host_rho0, + double* host_soundspeed, double* host_B, const int dimension, + double *host_special_lj, const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + + int success; + int extra_fields = 4; // round up to accomodate quadruples of numtyp values + // rho + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,sph_taitwater,"k_sph_taitwater", + onetype,extra_fields); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_viscosity, + host_cut, host_cutsq); + + UCL_H_Vec dview_coeff2(ntypes, *(this->ucl_device), UCL_WRITE_ONLY); + for (int i = 0; i < ntypes; i++) { + dview_coeff2[i].x = host_mass[i]; + dview_coeff2[i].y = host_rho0[i]; + dview_coeff2[i].z = host_soundspeed[i]; + dview_coeff2[i].w = host_B[i]; + } + coeff2.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY); + ucl_copy(coeff2,dview_coeff2,false); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + + // allocate per-atom array Q + + int ef_nall=nall; + if (ef_nall==0) + ef_nall=2000; + + _max_drhoE_size=static_cast(static_cast(ef_nall)*1.10); + drhoE.alloc(_max_drhoE_size*2,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); + + _dimension = dimension; + + _allocated=true; + this->_max_bytes=coeff.row_bytes()+coeff2.row_bytes()+drhoE.row_bytes()+sp_lj.row_bytes(); + return 0; +} + +template +void SPHTaitwaterT::clear() { + if (!_allocated) + return; + _allocated=false; + + coeff.clear(); + coeff2.clear(); + drhoE.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double SPHTaitwaterT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(SPHTaitwater); +} + +template +void SPHTaitwaterT::update_drhoE(void **drhoE_ptr) { + *drhoE_ptr=drhoE.host.begin(); + drhoE.update_host(_max_drhoE_size*2,false); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int SPHTaitwaterT::loop(const int eflag, const int vflag) { + + int nall = this->atom->nall(); + + // Resize drhoE array if necessary + if (nall > _max_drhoE_size) { + _max_drhoE_size=static_cast(static_cast(nall)*1.10); + drhoE.resize(_max_drhoE_size*2); + } + + // signal that we need to transfer extra data from the host + + this->atom->extra_data_unavail(); + + numtyp4 *pextra=reinterpret_cast(&(this->atom->extra[0])); + + int n = 0; + int nstride = 1; + for (int i = 0; i < nall; i++) { + int idx = n+i*nstride; + numtyp4 v; + v.x = rho[i]; + v.y = 0; + v.z = 0; + v.w = 0; + pextra[idx] = v; + } + this->atom->add_extra_data(); + + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, + &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom); + } + + this->time_pair.stop(); + return GX; +} + +// --------------------------------------------------------------------------- +// Get the extra data pointers from host +// --------------------------------------------------------------------------- + +template +void SPHTaitwaterT::get_extra_data(double *host_rho) { + rho = host_rho; +} + +template class SPHTaitwater; +} diff --git a/lib/gpu/lal_sph_taitwater.cu b/lib/gpu/lal_sph_taitwater.cu new file mode 100644 index 0000000000..708d3ae43b --- /dev/null +++ b/lib/gpu/lal_sph_taitwater.cu @@ -0,0 +1,377 @@ +// ************************************************************************** +// sph_taitwater.cu +// ------------------- +// Trung Dac Nguyen (U Chicago) +// +// Device code for acceleration of the sph/taitwater pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : September 2023 +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( vel_tex,float4); +#else +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); +#endif +#else +#define pos_tex x_ +#define vel_tex v_ +#endif + +#if (SHUFFLE_AVAIL == 0) + +#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE) \ + if (t_per_atom>1) { \ + simdsync(); \ + simd_reduce_add2(t_per_atom, red_acc, offset, tid, \ + drhoEacc.x, drhoEacc.y); \ + } \ + if (offset==0 && ii1) { \ + for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ + drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom); \ + drhoEacc.y += shfl_down(drhoEacc.y, s, t_per_atom); \ + } \ + } \ + if (offset==0 && ii +class SPHTaitwater : public BaseSPH { + public: + SPHTaitwater(); + ~SPHTaitwater(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double** host_cut, double **host_viscosity, double *host_mass, + double* host_rho0, double* host_soundspeed, double* host_B, + const int dimension, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + void get_extra_data(double *host_rho); + + /// copy drho and desph from device to host + void update_drhoE(void **drhoE_ptr); + + // --------------------------- TYPE DATA -------------------------- + + /// per-pair coeffs: coeff.x = viscosity, coeff.y = cut, coeff.z = cutsq + UCL_D_Vec coeff; + + /// per-type coeffs + UCL_D_Vec coeff2; + + /// Special LJ values + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// Per-atom arrays + UCL_Vector drhoE; + int _max_drhoE_size; + + int _dimension; + + /// pointer to host data + double *rho; + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_sph_taitwater_ext.cpp b/lib/gpu/lal_sph_taitwater_ext.cpp new file mode 100644 index 0000000000..9d125a6395 --- /dev/null +++ b/lib/gpu/lal_sph_taitwater_ext.cpp @@ -0,0 +1,133 @@ +/*************************************************************************** + sph_taitwater_ext.cpp + ------------------- + Trung Dac Nguyen (U Chicago) + + Functions for LAMMPS access to sph taitwater acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_sph_taitwater.h" + +using namespace std; +using namespace LAMMPS_AL; + +static SPHTaitwater SPHTaitwaterMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int sph_taitwater_gpu_init(const int ntypes, double **cutsq, double** host_cut, + double **host_viscosity, double* host_mass, + double* host_rho0, double* host_soundspeed, double* host_B, + const int dimension, double *special_lj, + const int inum, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { + SPHTaitwaterMF.clear(); + gpu_mode=SPHTaitwaterMF.device->gpu_mode(); + double gpu_split=SPHTaitwaterMF.device->particle_split(); + int first_gpu=SPHTaitwaterMF.device->first_device(); + int last_gpu=SPHTaitwaterMF.device->last_device(); + int world_me=SPHTaitwaterMF.device->world_me(); + int gpu_rank=SPHTaitwaterMF.device->gpu_rank(); + int procs_per_gpu=SPHTaitwaterMF.device->procs_per_gpu(); + + SPHTaitwaterMF.device->init_message(screen,"sph_taitwater",first_gpu,last_gpu); + + bool message=false; + if (SPHTaitwaterMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=SPHTaitwaterMF.init(ntypes, cutsq, host_cut, host_viscosity, host_mass, + host_rho0, host_soundspeed, host_B, dimension, + special_lj, inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen); + + SPHTaitwaterMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + SPHTaitwaterMF.estimate_gpu_overhead(); + return init_ok; +} + +void sph_taitwater_gpu_clear() { + SPHTaitwaterMF.clear(); +} + +int ** sph_taitwater_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *host_tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v) { + return SPHTaitwaterMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, host_tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + host_v); +} + +void sph_taitwater_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *host_tag, + double **host_v, const int nlocal) { + SPHTaitwaterMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj, + firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success, + host_tag, host_v, nlocal); +} + +void sph_taitwater_gpu_get_extra_data(double *host_rho) { + SPHTaitwaterMF.get_extra_data(host_rho); +} + +void sph_taitwater_gpu_update_drhoE(void **drhoE_ptr) { + SPHTaitwaterMF.update_drhoE(drhoE_ptr); +} + +double sph_taitwater_gpu_bytes() { + return SPHTaitwaterMF.host_memory_usage(); +} diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 4c145c44b3..c6115f4b3d 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,6 +1,97 @@ # CHANGELOG -## [4.1.00](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-06-16) +## [4.2.00](https://github.com/kokkos/kokkos/tree/4.2.00) (2023-11-06) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.1.00...4.2.00) + +### Features: +- SIMD: significant improvements to SIMD support and alignment with C++26 SIMD + - add `Kokkos::abs` overload for SIMD types [\#6069](https://github.com/kokkos/kokkos/pull/6069) + - add generator constructors [\#6347](https://github.com/kokkos/kokkos/pull/6347) + - convert binary operators to hidden friends [\#6320](https://github.com/kokkos/kokkos/pull/6320) + - add shift operators [\#6109](https://github.com/kokkos/kokkos/pull/6109) + - add `float` support [\#6177](https://github.com/kokkos/kokkos/pull/6177) + - add remaining `gather_from` and `scatter_to` overloads [\#6220](https://github.com/kokkos/kokkos/pull/6220) + - define simd math function overloads in the Kokkos namespace [\#6465](https://github.com/kokkos/kokkos/pull/6465), [\#6487](https://github.com/kokkos/kokkos/pull/6487) + - `Kokkos_ENABLE_NATIVE=ON` autodetects SIMD types supported [\#6188](https://github.com/kokkos/kokkos/pull/6188) + - fix AVX2 SIMD support for ZEN2 AMD CPU [\#6238](https://github.com/kokkos/kokkos/pull/6238) +- `Kokkos::printf` [\#6083](https://github.com/kokkos/kokkos/pull/6083) +- `Kokkos::sort`: support custom comparator [\#6253](https://github.com/kokkos/kokkos/pull/6253) +- `half_t` and `bhalf_t` numeric traits [\#5778](https://github.com/kokkos/kokkos/pull/5778) +- `half_t` and `bhalf_t` mixed comparisons [\#6407](https://github.com/kokkos/kokkos/pull/6407) +- `half_t` and `bhalf_t` mathematical functions [\#6124](https://github.com/kokkos/kokkos/pull/6124) +- `TeamThreadRange` `parallel_scan` with return value [\#6090](https://github.com/kokkos/kokkos/pull/6090), [\#6301](https://github.com/kokkos/kokkos/pull/6301), [\#6302](https://github.com/kokkos/kokkos/pull/6302), [\#6303](https://github.com/kokkos/kokkos/pull/6303), [\#6307](https://github.com/kokkos/kokkos/pull/6307) +- `ThreadVectorRange` `parallel_scan` with return value [\#6235](https://github.com/kokkos/kokkos/pull/6235), [\#6242](https://github.com/kokkos/kokkos/pull/6242), [\#6308](https://github.com/kokkos/kokkos/pull/6308), [\#6305](https://github.com/kokkos/kokkos/pull/6305), [\#6292](https://github.com/kokkos/kokkos/pull/6292) +- Add team-level std algorithms [\#6200](https://github.com/kokkos/kokkos/pull/6200), [\#6205](https://github.com/kokkos/kokkos/pull/6205), [\#6207](https://github.com/kokkos/kokkos/pull/6207), [\#6208](https://github.com/kokkos/kokkos/pull/6208), [\#6209](https://github.com/kokkos/kokkos/pull/6209), [\#6210](https://github.com/kokkos/kokkos/pull/6210), [\#6211](https://github.com/kokkos/kokkos/pull/6211), [\#6212](https://github.com/kokkos/kokkos/pull/6212), [\#6213](https://github.com/kokkos/kokkos/pull/6213), [\#6256](https://github.com/kokkos/kokkos/pull/6256), [\#6258](https://github.com/kokkos/kokkos/pull/6258), [\#6350](https://github.com/kokkos/kokkos/pull/6350), [\#6351](https://github.com/kokkos/kokkos/pull/6351) +- Serial: Allow for distinct execution space instances [\#6441](https://github.com/kokkos/kokkos/pull/6441) + +### Backend and Architecture Enhancements: + +#### CUDA: +- Fixed potential data race in Cuda `parallel_reduce` [\#6236](https://github.com/kokkos/kokkos/pull/6236) +- Use `cudaMallocAsync` by default [\#6402](https://github.com/kokkos/kokkos/pull/6402) +- Bugfix for using Kokkos from a thread of execution [\#6299](https://github.com/kokkos/kokkos/pull/6299) + +#### HIP: +- New naming convention for AMD GPU: VEGA906, VEGA908, VEGA90A, NAVI1030 to AMD_GFX906, AMD_GFX908, AMD_GFX90A, AMD_GFX1030 [\#6266](https://github.com/kokkos/kokkos/pull/6266) +- Add initial support for gfx942: [\#6358](https://github.com/kokkos/kokkos/pull/6358) +- Improve reduction performance [\#6229](https://github.com/kokkos/kokkos/pull/6229) +- Deprecate `HIP(hipStream_t,bool)` constructor [\#6401](https://github.com/kokkos/kokkos/pull/6401) +- Add support for Graph [\#6370](https://github.com/kokkos/kokkos/pull/6370) +- Improve reduction performance when using Teams [\#6284](https://github.com/kokkos/kokkos/pull/6284) +- Fix concurrency calculation [\#6479](https://github.com/kokkos/kokkos/pull/6479) +- Fix potential data race in HIP `parallel_reduce` [\#6429](https://github.com/kokkos/kokkos/pull/6429) + +#### SYCL: +- Enforce external `sycl::queues` to be in-order [\#6246](https://github.com/kokkos/kokkos/pull/6246) +- Improve reduction performance: [\#6272](https://github.com/kokkos/kokkos/pull/6272) [\#6271](https://github.com/kokkos/kokkos/pull/6271) [\#6270](https://github.com/kokkos/kokkos/pull/6270) [\#6264](https://github.com/kokkos/kokkos/pull/6264) +- Allow using the SYCL execution space on AMD GPUs [\#6321](https://github.com/kokkos/kokkos/pull/6321) +- Allow sorting via native oneDPL to support Views with stride=1 [\#6322](https://github.com/kokkos/kokkos/pull/6322) +- Make in-order queues the default via macro [\#6189](https://github.com/kokkos/kokkos/pull/6189) + +#### OpenACC: +- Support Clacc compiler [\#6250](https://github.com/kokkos/kokkos/pull/6250) + +### General Enhancements +- Add missing `is_*_view` traits and `is_*_view_v` helper variable templates for `DynRankView`, `DynamicView`, `OffsetView`, `ScatterView` containers [\#6195](https://github.com/kokkos/kokkos/pull/6195) +- Make `nvcc_wrapper` and `compiler_launcher` scripts more portable by switching to a `#!/usr/bin/env` shebang [\#6357](https://github.com/kokkos/kokkos/pull/6357) +- Add an improved `Kokkos::malloc` / `Kokkos::free` performance test [\#6377](https://github.com/kokkos/kokkos/pull/6377) +- Ensure `Views` with `size==0` can be used with `deep_copy` [\#6273](https://github.com/kokkos/kokkos/pull/6273) +- `Kokkos::abort` is moved to header `Kokkos_Abort.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) +- `KOKKOS_ASSERT`, `KOKKOS_EXPECTS`, `KOKKOS_ENSURES` are moved to header `Kokkos_Assert.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) +- Add a permuted-index mode to the gups benchmark [\#6378](https://github.com/kokkos/kokkos/pull/6378) +- Check for overflow during backend initialization [\#6159](https://github.com/kokkos/kokkos/pull/6159) +- Make constraints on `Kokkos::sort` more visible [\#6234](https://github.com/kokkos/kokkos/pull/6234) and cleanup API [\#6239](https://github.com/kokkos/kokkos/pull/6239) +- Add converting assignment to `DualView`: [\#6474](https://github.com/kokkos/kokkos/pull/6474) + + +### Build System Changes + +- Export `Kokkos_CXX_COMPILER_VERSION` [\#6282](https://github.com/kokkos/kokkos/pull/6282) +- Disable default oneDPL support in Trilinos [\#6342](https://github.com/kokkos/kokkos/pull/6342) + +### Incompatibilities (i.e. breaking changes) + - Ensure that `Kokkos::complex` only gets instantiated for cv-unqualified floating-point types [\#6251](https://github.com/kokkos/kokkos/pull/6251) + - Removed (deprecated-3) support for volatile join operators in reductions [\#6385](https://github.com/kokkos/kokkos/pull/6385) + - Enforce `ViewCtorArgs` restrictions for `create_mirror_view` [\#6304](https://github.com/kokkos/kokkos/pull/6304) + - SIMD types for ARM NEON are not autodetected anymore but need `Kokkos_ARCH_ARM_NEON` or `Kokkos_ARCH_NATIVE=ON` [\#6394](https://github.com/kokkos/kokkos/pull/6394) + - Remove `#include ` from headers where possible [\#6482](https://github.com/kokkos/kokkos/pull/6482) + +### Deprecations +- Deprecated `Kokkos::vector` [\#6252](https://github.com/kokkos/kokkos/pull/6252) +- All host allocation mechanisms except for `STD_MALLOC` have been deprecated [\#6341](https://github.com/kokkos/kokkos/pull/6341) + +### Bug Fixes + - Missing memory fence in `RandomPool::free_state` functions [\#6290](https://github.com/kokkos/kokkos/pull/6290) + - Fix for corner case in `Kokkos::Experimental::is_partitioned` algorithm [\#6257](https://github.com/kokkos/kokkos/pull/6257) + - Fix initialization of scratch lock variables in the `Cuda` backend [\#6433](https://github.com/kokkos/kokkos/pull/6433) + - Fixes for `Kokkos::Array` [\#6372](https://github.com/kokkos/kokkos/pull/6372) + - Fixed symlink configure issue for Windows [\#6241](https://github.com/kokkos/kokkos/pull/6241) + - OpenMPTarget init-join fix [\#6444](https://github.com/kokkos/kokkos/pull/6444) + - Fix atomic operations bug for Min and Max [\#6435](https://github.com/kokkos/kokkos/pull/6435) + - Fix implementation for `cyl_bessel_i0` [\#6484](https://github.com/kokkos/kokkos/pull/6484) + - Fix various NVCC warnings in `BinSort`, `Array`, and bit manipulation function templates [\#6483](https://github.com/kokkos/kokkos/pull/6483) + +## [4.1.00](https://github.com/kokkos/kokkos/tree/4.1.00) (2023-06-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.01...4.1.00) ### Features: diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 895cee6a08..f6bd81058e 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -150,8 +150,8 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 1) -set(Kokkos_VERSION_PATCH 00) +set(Kokkos_VERSION_MINOR 2) +set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") @@ -314,7 +314,6 @@ KOKKOS_PROCESS_SUBPACKAGES() # E) If Kokkos itself is enabled, process the Kokkos package # -KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() KOKKOS_CONFIGURE_CORE() diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 46998091fe..c970f72755 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS) endif KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 1 -KOKKOS_VERSION_PATCH = 00 +KOKKOS_VERSION_MINOR = 2 +KOKKOS_VERSION_PATCH = 0 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 -# AMD-GPUS: Vega906,Vega908,Vega90A,Navi1030 +# AMD-GPUS: GFX906,GFX908,GFX90A,GFX942,GFX1030,GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC KOKKOS_ARCH ?= "" @@ -40,7 +40,7 @@ KOKKOS_TRIBITS ?= "no" KOKKOS_STANDALONE_CMAKE ?= "no" # Default settings specific options. -# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr +# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async KOKKOS_CUDA_OPTIONS ?= "enable_lambda" # Options: rdc @@ -92,6 +92,7 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS), KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) +KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),disable_malloc_async) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) # deprecated KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) @@ -412,10 +413,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) endif endif -KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906) -KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega908) -KOKKOS_INTERNAL_USE_ARCH_VEGA90A := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega90A) -KOKKOS_INTERNAL_USE_ARCH_NAVI1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),Navi1030) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100)) # Any AVX? KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) @@ -698,6 +700,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND") endif + + ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC") + else + tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC */") + endif endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) @@ -710,6 +718,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -722,6 +731,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -734,6 +744,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_A64FX") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") KOKKOS_CXXFLAGS += -march=armv8.2-a+sve KOKKOS_LDFLAGS += -march=armv8.2-a+sve @@ -749,7 +760,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -762,7 +773,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -775,7 +786,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -789,6 +800,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -802,6 +814,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -1085,29 +1098,34 @@ endif # Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx942 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 endif diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 4e08a46c69..ec8770dd7d 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -36,6 +36,8 @@ Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp +Kokkos_Abort.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt index ab557ab66a..368984647e 100644 --- a/lib/kokkos/algorithms/CMakeLists.txt +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -2,6 +2,6 @@ IF (NOT Kokkos_INSTALL_TESTING) ADD_SUBDIRECTORY(src) ENDIF() # FIXME_OPENACC: temporarily disabled due to unimplemented features -IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET OR KOKKOS_ENABLE_OPENACC) AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) +IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) ENDIF() diff --git a/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp b/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp index 4c8be792d8..18e0674efe 100644 --- a/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp @@ -14,175 +14,17 @@ // //@HEADER -#ifndef KOKKOS_NESTEDSORT_HPP_ -#define KOKKOS_NESTEDSORT_HPP_ - -#include -#include -#include - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -// true for TeamVectorRange, false for ThreadVectorRange -template -struct NestedRange {}; - -// Specialization for team-level -template <> -struct NestedRange { - template - KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { - return Kokkos::TeamVectorRange(t, len); - } - template - KOKKOS_FUNCTION static void barrier(const TeamMember& t) { - t.team_barrier(); - } -}; - -// Specialization for thread-level -template <> -struct NestedRange { - template - KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { - return Kokkos::ThreadVectorRange(t, len); - } - // Barrier is no-op, as vector lanes of a thread are implicitly synchronized - // after parallel region - template - KOKKOS_FUNCTION static void barrier(const TeamMember&) {} -}; - -// When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. -// This only takes the NestedRange instance for template arg deduction. -template -KOKKOS_INLINE_FUNCTION void sort_nested_impl( - const TeamMember& t, const KeyViewType& keyView, - [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, - const NestedRange) { - using SizeType = typename KeyViewType::size_type; - using KeyType = typename KeyViewType::non_const_value_type; - using Range = NestedRange; - SizeType n = keyView.extent(0); - SizeType npot = 1; - SizeType levels = 0; - // FIXME: ceiling power-of-two is a common thing to need - make it a utility - while (npot < n) { - levels++; - npot <<= 1; - } - for (SizeType i = 0; i < levels; i++) { - for (SizeType j = 0; j <= i; j++) { - // n/2 pairs of items are compared in parallel - Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { - // How big are the brown/pink boxes? - // (Terminology comes from Wikipedia diagram) - // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg - SizeType boxSize = SizeType(2) << (i - j); - // Which box contains this thread? - SizeType boxID = k >> (i - j); // k * 2 / boxSize; - SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize - SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; - SizeType elem1 = boxStart + boxOffset; - // In first phase (j == 0, brown box): within a box, compare with the - // opposite value in the box. - // In later phases (j > 0, pink box): within a box, compare with fixed - // distance (boxSize / 2) apart. - SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) - : (elem1 + boxSize / 2); - if (elem2 < n) { - KeyType key1 = keyView(elem1); - KeyType key2 = keyView(elem2); - if (comp(key2, key1)) { - keyView(elem1) = key2; - keyView(elem2) = key1; - if constexpr (!std::is_same_v) { - Kokkos::Experimental::swap(valueView(elem1), valueView(elem2)); - } - } - } - }); - Range::barrier(t); - } - } -} - -} // namespace Impl - -template -KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, - const ViewType& view) { - Impl::sort_nested_impl(t, view, nullptr, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename ViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, - const Comparator& comp) { - Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView) { - Impl::sort_nested_impl(t, keyView, valueView, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename KeyViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView, - const Comparator& comp) { - Impl::sort_nested_impl(t, keyView, valueView, comp, - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, - const ViewType& view) { - Impl::sort_nested_impl(t, view, nullptr, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename ViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, - const ViewType& view, - const Comparator& comp) { - Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView) { - Impl::sort_nested_impl(t, keyView, valueView, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename KeyViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView, - const Comparator& comp) { - Impl::sort_nested_impl(t, keyView, valueView, comp, - Impl::NestedRange()); -} - -} // namespace Experimental -} // namespace Kokkos +#ifndef KOKKOS_NESTED_SORT_HPP_ +#define KOKKOS_NESTED_SORT_HPP_ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#endif + +#include "sorting/Kokkos_NestedSortPublicAPI.hpp" + +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#endif #endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index abb028d28e..2d7d236d2f 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -956,6 +956,8 @@ class Random_XorShift64_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift64& state) const { state_(state.state_idx_, 0) = state.state_; + // Release the lock only after the state has been updated in memory + Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; @@ -1208,7 +1210,9 @@ class Random_XorShift1024_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift1024& state) const { for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; - p_(state.state_idx_, 0) = state.p_; + p_(state.state_idx_, 0) = state.p_; + // Release the lock only after the state has been updated in memory + Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 10f9ad6462..f77484cc55 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -21,762 +21,9 @@ #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #endif -#include -#include -#include -#include - -#if defined(KOKKOS_ENABLE_CUDA) - -// Workaround for `Instruction 'shfl' without '.sync' is not supported on -// .target sm_70 and higher from PTX ISA version 6.4`. -// Also see https://github.com/NVIDIA/cub/pull/170. -#if !defined(CUB_USE_COOPERATIVE_GROUPS) -#define CUB_USE_COOPERATIVE_GROUPS -#endif - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" - -#if defined(KOKKOS_COMPILER_CLANG) -// Some versions of Clang fail to compile Thrust, failing with errors like -// this: -// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: -// error: use of undeclared identifier 'va_printf' -// The exact combination of versions for Clang and Thrust (or CUDA) for this -// failure was not investigated, however even very recent version combination -// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. -// -// Defining _CubLog here locally allows us to avoid that code path, however -// disabling some debugging diagnostics -#pragma push_macro("_CubLog") -#ifdef _CubLog -#undef _CubLog -#endif -#define _CubLog -#include -#include -#pragma pop_macro("_CubLog") -#else -#include -#include -#endif - -#pragma GCC diagnostic pop - -#endif - -#if defined(KOKKOS_ENABLE_ONEDPL) -#include -#include -#endif - -namespace Kokkos { - -namespace Impl { - -template -struct CopyOp; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - dst(i_dst) = src(i_src); - } -}; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); - } -}; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - for (int j = 0; j < dst.extent(1); j++) - for (int k = 0; k < dst.extent(2); k++) - dst(i_dst, j, k) = src(i_src, j, k); - } -}; -} // namespace Impl - -//---------------------------------------------------------------------------- - -template -class BinSort { - public: - template - struct copy_functor { - using src_view_type = typename SrcViewType::const_type; - - using copy_op = Impl::CopyOp; - - DstViewType dst_values; - src_view_type src_values; - int dst_offset; - - copy_functor(DstViewType const& dst_values_, int const& dst_offset_, - SrcViewType const& src_values_) - : dst_values(dst_values_), - src_values(src_values_), - dst_offset(dst_offset_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int& i) const { - copy_op::copy(dst_values, i + dst_offset, src_values, i); - } - }; - - template - struct copy_permute_functor { - // If a Kokkos::View then can generate constant random access - // otherwise can only use the constant type. - - using src_view_type = std::conditional_t< - Kokkos::is_view::value, - Kokkos::View -#endif - >, - typename SrcViewType::const_type>; - - using perm_view_type = typename PermuteViewType::const_type; - - using copy_op = Impl::CopyOp; - - DstViewType dst_values; - perm_view_type sort_order; - src_view_type src_values; - int src_offset; - - copy_permute_functor(DstViewType const& dst_values_, - PermuteViewType const& sort_order_, - SrcViewType const& src_values_, int const& src_offset_) - : dst_values(dst_values_), - sort_order(sort_order_), - src_values(src_values_), - src_offset(src_offset_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int& i) const { - copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); - } - }; - - // Naming this alias "execution_space" would be problematic since it would be - // considered as execution space for the various functors which might use - // another execution space through sort() or create_permute_vector(). - using exec_space = typename Space::execution_space; - using bin_op_type = BinSortOp; - - struct bin_count_tag {}; - struct bin_offset_tag {}; - struct bin_binning_tag {}; - struct bin_sort_bins_tag {}; - - public: - using size_type = SizeType; - using value_type = size_type; - - using offset_type = Kokkos::View; - using bin_count_type = Kokkos::View; - - using const_key_view_type = typename KeyViewType::const_type; - - // If a Kokkos::View then can generate constant random access - // otherwise can only use the constant type. - - using const_rnd_key_view_type = std::conditional_t< - Kokkos::is_view::value, - Kokkos::View >, - const_key_view_type>; - - using non_const_key_scalar = typename KeyViewType::non_const_value_type; - using const_key_scalar = typename KeyViewType::const_value_type; - - using bin_count_atomic_type = - Kokkos::View >; - - private: - const_key_view_type keys; - const_rnd_key_view_type keys_rnd; - - public: - BinSortOp bin_op; - offset_type bin_offsets; - bin_count_atomic_type bin_count_atomic; - bin_count_type bin_count_const; - offset_type sort_order; - - int range_begin; - int range_end; - bool sort_within_bins; - - public: -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinSort() = default; -#else - BinSort() = delete; -#endif - - //---------------------------------------- - // Constructor: takes the keys, the binning_operator and optionally whether to - // sort within bins (default false) - template - BinSort(const ExecutionSpace& exec, const_key_view_type keys_, - int range_begin_, int range_end_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - : keys(keys_), - keys_rnd(keys_), - bin_op(bin_op_), - bin_offsets(), - bin_count_atomic(), - bin_count_const(), - sort_order(), - range_begin(range_begin_), - range_end(range_end_), - sort_within_bins(sort_within_bins_) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - if (bin_op.max_bins() <= 0) - Kokkos::abort( - "The number of bins in the BinSortOp object must be greater than 0!"); - bin_count_atomic = Kokkos::View( - "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); - bin_count_const = bin_count_atomic; - bin_offsets = - offset_type(view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), - bin_op.max_bins()); - sort_order = - offset_type(view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::sort_order"), - range_end - range_begin); - } - - BinSort(const_key_view_type keys_, int range_begin_, int range_end_, - BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, - sort_within_bins_) {} - - template - BinSort(const ExecutionSpace& exec, const_key_view_type keys_, - BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} - - BinSort(const_key_view_type keys_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} - - //---------------------------------------- - // Create the permutation vector, the bin_offset array and the bin_count - // array. Can be called again if keys changed - template - void create_permute_vector(const ExecutionSpace& exec) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - - const size_t len = range_end - range_begin; - Kokkos::parallel_for( - "Kokkos::Sort::BinCount", - Kokkos::RangePolicy(exec, 0, len), - *this); - Kokkos::parallel_scan("Kokkos::Sort::BinOffset", - Kokkos::RangePolicy( - exec, 0, bin_op.max_bins()), - *this); - - Kokkos::deep_copy(exec, bin_count_atomic, 0); - Kokkos::parallel_for( - "Kokkos::Sort::BinBinning", - Kokkos::RangePolicy(exec, 0, len), - *this); - - if (sort_within_bins) - Kokkos::parallel_for( - "Kokkos::Sort::BinSort", - Kokkos::RangePolicy( - exec, 0, bin_op.max_bins()), - *this); - } - - // Create the permutation vector, the bin_offset array and the bin_count - // array. Can be called again if keys changed - void create_permute_vector() { - Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); - exec_space e{}; - create_permute_vector(e); - e.fence("Kokkos::Binsort::create_permute_vector: after"); - } - - // Sort a subset of a view with respect to the first dimension using the - // permutation array - template - void sort(const ExecutionSpace& exec, ValuesViewType const& values, - int values_range_begin, int values_range_end) const { - if (values.extent(0) == 0) { - return; - } - - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename ValuesViewType::memory_space>::accessible, - "The provided execution space must be able to access the memory space " - "of the View argument!"); - - const size_t len = range_end - range_begin; - const size_t values_len = values_range_end - values_range_begin; - if (len != values_len) { - Kokkos::abort( - "BinSort::sort: values range length != permutation vector length"); - } - - using scratch_view_type = - Kokkos::View; - scratch_view_type sorted_values( - view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::sorted_values"), - values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 1 ? values.extent(1) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 2 ? values.extent(2) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 3 ? values.extent(3) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 4 ? values.extent(4) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 5 ? values.extent(5) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 6 ? values.extent(6) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 7 ? values.extent(7) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG); - - { - copy_permute_functor - functor(sorted_values, sort_order, values, - values_range_begin - range_begin); - - parallel_for("Kokkos::Sort::CopyPermute", - Kokkos::RangePolicy(exec, 0, len), functor); - } - - { - copy_functor functor( - values, range_begin, sorted_values); - - parallel_for("Kokkos::Sort::Copy", - Kokkos::RangePolicy(exec, 0, len), functor); - } - } - - // Sort a subset of a view with respect to the first dimension using the - // permutation array - template - void sort(ValuesViewType const& values, int values_range_begin, - int values_range_end) const { - Kokkos::fence("Kokkos::Binsort::sort: before"); - exec_space exec; - sort(exec, values, values_range_begin, values_range_end); - exec.fence("Kokkos::BinSort:sort: after"); - } - - template - void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { - this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); - } - - template - void sort(ValuesViewType const& values) const { - this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); - } - - // Get the permutation vector - KOKKOS_INLINE_FUNCTION - offset_type get_permute_vector() const { return sort_order; } - - // Get the start offsets for each bin - KOKKOS_INLINE_FUNCTION - offset_type get_bin_offsets() const { return bin_offsets; } - - // Get the count for each bin - KOKKOS_INLINE_FUNCTION - bin_count_type get_bin_count() const { return bin_count_const; } - - public: - KOKKOS_INLINE_FUNCTION - void operator()(const bin_count_tag& /*tag*/, const int i) const { - const int j = range_begin + i; - bin_count_atomic(bin_op.bin(keys, j))++; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_offset_tag& /*tag*/, const int i, - value_type& offset, const bool& final) const { - if (final) { - bin_offsets(i) = offset; - } - offset += bin_count_const(i); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_binning_tag& /*tag*/, const int i) const { - const int j = range_begin + i; - const int bin = bin_op.bin(keys, j); - const int count = bin_count_atomic(bin)++; - - sort_order(bin_offsets(bin) + count) = j; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { - auto bin_size = bin_count_const(i); - if (bin_size <= 1) return; - constexpr bool use_std_sort = - std::is_same_v; - int lower_bound = bin_offsets(i); - int upper_bound = lower_bound + bin_size; - // Switching to std::sort for more than 10 elements has been found - // reasonable experimentally. - if (use_std_sort && bin_size > 10) { - if constexpr (use_std_sort) { - std::sort(&sort_order(lower_bound), &sort_order(upper_bound), - [this](int p, int q) { return bin_op(keys_rnd, p, q); }); - } - } else { - for (int k = lower_bound + 1; k < upper_bound; ++k) { - int old_idx = sort_order(k); - int j = k - 1; - while (j >= lower_bound) { - int new_idx = sort_order(j); - if (!bin_op(keys_rnd, old_idx, new_idx)) break; - sort_order(j + 1) = new_idx; - --j; - } - sort_order(j + 1) = old_idx; - } - } - } -}; - -//---------------------------------------------------------------------------- - -template -struct BinOp1D { - int max_bins_ = {}; - double mul_ = {}; - double min_ = {}; - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinOp1D() = default; -#else - BinOp1D() = delete; -#endif - - // Construct BinOp with number of bins, minimum value and maximum value - BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, - typename KeyViewType::const_value_type max) - : max_bins_(max_bins__ + 1), - // Cast to double to avoid possible overflow when using integer - mul_(static_cast(max_bins__) / - (static_cast(max) - static_cast(min))), - min_(static_cast(min)) { - // For integral types the number of bins may be larger than the range - // in which case we can exactly have one unique value per bin - // and then don't need to sort bins. - if (std::is_integral::value && - (static_cast(max) - static_cast(min)) <= - static_cast(max_bins__)) { - mul_ = 1.; - } - } - - // Determine bin index from key value - template - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return static_cast(mul_ * (static_cast(keys(i)) - min_)); - } - - // Return maximum bin index + 1 - KOKKOS_INLINE_FUNCTION - int max_bins() const { return max_bins_; } - - // Compare to keys within a bin if true new_val will be put before old_val - template - KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, - iType2& i2) const { - return keys(i1) < keys(i2); - } -}; - -template -struct BinOp3D { - int max_bins_[3] = {}; - double mul_[3] = {}; - double min_[3] = {}; - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinOp3D() = default; -#else - BinOp3D() = delete; -#endif - - BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], - typename KeyViewType::const_value_type max[]) { - max_bins_[0] = max_bins__[0]; - max_bins_[1] = max_bins__[1]; - max_bins_[2] = max_bins__[2]; - mul_[0] = static_cast(max_bins__[0]) / - (static_cast(max[0]) - static_cast(min[0])); - mul_[1] = static_cast(max_bins__[1]) / - (static_cast(max[1]) - static_cast(min[1])); - mul_[2] = static_cast(max_bins__[2]) / - (static_cast(max[2]) - static_cast(min[2])); - min_[0] = static_cast(min[0]); - min_[1] = static_cast(min[1]); - min_[2] = static_cast(min[2]); - } - - template - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + - int(mul_[1] * (keys(i, 1) - min_[1]))) * - max_bins_[2]) + - int(mul_[2] * (keys(i, 2) - min_[2]))); - } - - KOKKOS_INLINE_FUNCTION - int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } - - template - KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, - iType2& i2) const { - if (keys(i1, 0) > keys(i2, 0)) - return true; - else if (keys(i1, 0) == keys(i2, 0)) { - if (keys(i1, 1) > keys(i2, 1)) - return true; - else if (keys(i1, 1) == keys(i2, 1)) { - if (keys(i1, 2) > keys(i2, 2)) return true; - } - } - return false; - } -}; - -namespace Impl { - -template -struct min_max_functor { - using minmax_scalar = - Kokkos::MinMaxScalar; - - ViewType view; - min_max_functor(const ViewType& view_) : view(view_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const size_t& i, minmax_scalar& minmax) const { - if (view(i) < minmax.min_val) minmax.min_val = view(i); - if (view(i) > minmax.max_val) minmax.max_val = view(i); - } -}; - -} // namespace Impl - -template -std::enable_if_t<(Kokkos::is_execution_space::value) && - (!SpaceAccessibility< - HostSpace, typename Kokkos::View:: - memory_space>::accessible)> -sort(const ExecutionSpace& exec, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - - using ViewType = Kokkos::View; - using CompType = BinOp1D; - - Kokkos::MinMaxScalar result; - Kokkos::MinMax reducer(result); - parallel_reduce("Kokkos::Sort::FindExtent", - Kokkos::RangePolicy( - exec, 0, view.extent(0)), - Impl::min_max_functor(view), reducer); - if (result.min_val == result.max_val) return; - // For integral types the number of bins may be larger than the range - // in which case we can exactly have one unique value per bin - // and then don't need to sort bins. - bool sort_in_bins = true; - // TODO: figure out better max_bins then this ... - int64_t max_bins = view.extent(0) / 2; - if (std::is_integral::value) { - // Cast to double to avoid possible overflow when using integer - auto const max_val = static_cast(result.max_val); - auto const min_val = static_cast(result.min_val); - // using 10M as the cutoff for special behavior (roughly 40MB for the count - // array) - if ((max_val - min_val) < 10000000) { - max_bins = max_val - min_val + 1; - sort_in_bins = false; - } - } - if (std::is_floating_point::value) { - KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - - static_cast(result.min_val))); - } - - BinSort bin_sort( - view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); - bin_sort.create_permute_vector(exec); - bin_sort.sort(exec, view); -} - -#if defined(KOKKOS_ENABLE_ONEDPL) -template -void sort(const Experimental::SYCL& space, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - - using ViewType = Kokkos::View; - static_assert(SpaceAccessibility::accessible, - "SYCL execution space is not able to access the memory space " - "of the View argument!"); - - auto queue = space.sycl_queue(); - auto policy = oneapi::dpl::execution::make_device_policy(queue); - - // Can't use Experimental::begin/end here since the oneDPL then assumes that - // the data is on the host. - static_assert( - ViewType::rank == 1 && - (std::is_same::value || - std::is_same::value), - "SYCL sort only supports contiguous 1D Views."); - const int n = view.extent(0); - oneapi::dpl::sort(policy, view.data(), view.data() + n); -} -#endif - -template -std::enable_if_t<(Kokkos::is_execution_space::value) && - (SpaceAccessibility< - HostSpace, typename Kokkos::View:: - memory_space>::accessible)> -sort(const ExecutionSpace&, const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - auto first = Experimental::begin(view); - auto last = Experimental::end(view); - std::sort(first, last); -} - -#if defined(KOKKOS_ENABLE_CUDA) -template -void sort(const Cuda& space, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - const auto exec = thrust::cuda::par.on(space.cuda_stream()); - auto first = Experimental::begin(view); - auto last = Experimental::end(view); - thrust::sort(exec, first, last); -} -#endif - -template -void sort(ViewType const& view) { - Kokkos::fence("Kokkos::sort: before"); - - if (view.extent(0) == 0) { - return; - } - - typename ViewType::execution_space exec; - sort(exec, view); - exec.fence("Kokkos::sort: fence after sorting"); -} - -template -std::enable_if_t::value> sort( - const ExecutionSpace& exec, ViewType view, size_t const begin, - size_t const end) { - if (view.extent(0) == 0) { - return; - } - - using range_policy = Kokkos::RangePolicy; - using CompType = BinOp1D; - - Kokkos::MinMaxScalar result; - Kokkos::MinMax reducer(result); - - parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), - Impl::min_max_functor(view), reducer); - - if (result.min_val == result.max_val) return; - - BinSort bin_sort( - exec, view, begin, end, - CompType((end - begin) / 2, result.min_val, result.max_val), true); - - bin_sort.create_permute_vector(exec); - bin_sort.sort(exec, view, begin, end); -} - -template -void sort(ViewType view, size_t const begin, size_t const end) { - Kokkos::fence("Kokkos::sort: before"); - - if (view.extent(0) == 0) { - return; - } - - typename ViewType::execution_space exec; - sort(exec, view, begin, end); - exec.fence("Kokkos::Sort: fence after sorting"); -} - -} // namespace Kokkos +#include "sorting/Kokkos_BinSortPublicAPI.hpp" +#include "sorting/Kokkos_SortPublicAPI.hpp" +#include "sorting/Kokkos_NestedSortPublicAPI.hpp" #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #undef KOKKOS_IMPL_PUBLIC_INCLUDE diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp new file mode 100644 index 0000000000..73e751f572 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp @@ -0,0 +1,129 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_BIN_OPS_PUBLIC_API_HPP_ +#define KOKKOS_BIN_OPS_PUBLIC_API_HPP_ + +#include +#include + +namespace Kokkos { + +template +struct BinOp1D { + int max_bins_ = {}; + double mul_ = {}; + double min_ = {}; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp1D() = default; +#else + BinOp1D() = delete; +#endif + + // Construct BinOp with number of bins, minimum value and maximum value + BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, + typename KeyViewType::const_value_type max) + : max_bins_(max_bins__ + 1), + // Cast to double to avoid possible overflow when using integer + mul_(static_cast(max_bins__) / + (static_cast(max) - static_cast(min))), + min_(static_cast(min)) { + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + if (std::is_integral::value && + (static_cast(max) - static_cast(min)) <= + static_cast(max_bins__)) { + mul_ = 1.; + } + } + + // Determine bin index from key value + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return static_cast(mul_ * (static_cast(keys(i)) - min_)); + } + + // Return maximum bin index + 1 + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_; } + + // Compare to keys within a bin if true new_val will be put before old_val + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + return keys(i1) < keys(i2); + } +}; + +template +struct BinOp3D { + int max_bins_[3] = {}; + double mul_[3] = {}; + double min_[3] = {}; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp3D() = default; +#else + BinOp3D() = delete; +#endif + + BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], + typename KeyViewType::const_value_type max[]) { + max_bins_[0] = max_bins__[0]; + max_bins_[1] = max_bins__[1]; + max_bins_[2] = max_bins__[2]; + mul_[0] = static_cast(max_bins__[0]) / + (static_cast(max[0]) - static_cast(min[0])); + mul_[1] = static_cast(max_bins__[1]) / + (static_cast(max[1]) - static_cast(min[1])); + mul_[2] = static_cast(max_bins__[2]) / + (static_cast(max[2]) - static_cast(min[2])); + min_[0] = static_cast(min[0]); + min_[1] = static_cast(min[1]); + min_[2] = static_cast(min[2]); + } + + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + + int(mul_[1] * (keys(i, 1) - min_[1]))) * + max_bins_[2]) + + int(mul_[2] * (keys(i, 2) - min_[2]))); + } + + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } + + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + if (keys(i1, 0) > keys(i2, 0)) + return true; + else if (keys(i1, 0) == keys(i2, 0)) { + if (keys(i1, 1) > keys(i2, 1)) + return true; + else if (keys(i1, 1) == keys(i2, 1)) { + if (keys(i1, 2) > keys(i2, 2)) return true; + } + } + return false; + } +}; + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp new file mode 100644 index 0000000000..c399279fe4 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp @@ -0,0 +1,410 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_BIN_SORT_PUBLIC_API_HPP_ +#define KOKKOS_BIN_SORT_PUBLIC_API_HPP_ + +#include "Kokkos_BinOpsPublicAPI.hpp" +#include "impl/Kokkos_CopyOpsForBinSortImpl.hpp" +#include +#include + +namespace Kokkos { + +template +class BinSort { + public: + template + struct copy_functor { + using src_view_type = typename SrcViewType::const_type; + + using copy_op = Impl::CopyOp; + + DstViewType dst_values; + src_view_type src_values; + int dst_offset; + + copy_functor(DstViewType const& dst_values_, int const& dst_offset_, + SrcViewType const& src_values_) + : dst_values(dst_values_), + src_values(src_values_), + dst_offset(dst_offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& i) const { + copy_op::copy(dst_values, i + dst_offset, src_values, i); + } + }; + + template + struct copy_permute_functor { + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + using src_view_type = std::conditional_t< + Kokkos::is_view::value, + Kokkos::View= 230700) + , + Kokkos::MemoryTraits +#endif + >, + typename SrcViewType::const_type>; + + using perm_view_type = typename PermuteViewType::const_type; + + using copy_op = Impl::CopyOp; + + DstViewType dst_values; + perm_view_type sort_order; + src_view_type src_values; + int src_offset; + + copy_permute_functor(DstViewType const& dst_values_, + PermuteViewType const& sort_order_, + SrcViewType const& src_values_, int const& src_offset_) + : dst_values(dst_values_), + sort_order(sort_order_), + src_values(src_values_), + src_offset(src_offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& i) const { + copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); + } + }; + + // Naming this alias "execution_space" would be problematic since it would be + // considered as execution space for the various functors which might use + // another execution space through sort() or create_permute_vector(). + using exec_space = typename Space::execution_space; + using bin_op_type = BinSortOp; + + struct bin_count_tag {}; + struct bin_offset_tag {}; + struct bin_binning_tag {}; + struct bin_sort_bins_tag {}; + + public: + using size_type = SizeType; + using value_type = size_type; + + using offset_type = Kokkos::View; + using bin_count_type = Kokkos::View; + + using const_key_view_type = typename KeyViewType::const_type; + + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + using const_rnd_key_view_type = std::conditional_t< + Kokkos::is_view::value, + Kokkos::View >, + const_key_view_type>; + + using non_const_key_scalar = typename KeyViewType::non_const_value_type; + using const_key_scalar = typename KeyViewType::const_value_type; + + using bin_count_atomic_type = + Kokkos::View >; + + private: + const_key_view_type keys; + const_rnd_key_view_type keys_rnd; + + public: + BinSortOp bin_op; + offset_type bin_offsets; + bin_count_atomic_type bin_count_atomic; + bin_count_type bin_count_const; + offset_type sort_order; + + int range_begin; + int range_end; + bool sort_within_bins; + + public: +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinSort() = default; +#else + BinSort() = delete; +#endif + + //---------------------------------------- + // Constructor: takes the keys, the binning_operator and optionally whether to + // sort within bins (default false) + template + BinSort(const ExecutionSpace& exec, const_key_view_type keys_, + int range_begin_, int range_end_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : keys(keys_), + keys_rnd(keys_), + bin_op(bin_op_), + bin_offsets(), + bin_count_atomic(), + bin_count_const(), + sort_order(), + range_begin(range_begin_), + range_end(range_end_), + sort_within_bins(sort_within_bins_) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + if (bin_op.max_bins() <= 0) + Kokkos::abort( + "The number of bins in the BinSortOp object must be greater than 0!"); + bin_count_atomic = Kokkos::View( + "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = + offset_type(view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), + bin_op.max_bins()); + sort_order = + offset_type(view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::sort_order"), + range_end - range_begin); + } + + BinSort(const_key_view_type keys_, int range_begin_, int range_end_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, + sort_within_bins_) {} + + template + BinSort(const ExecutionSpace& exec, const_key_view_type keys_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} + + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} + + //---------------------------------------- + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed + template + void create_permute_vector(const ExecutionSpace& exec) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + + const size_t len = range_end - range_begin; + Kokkos::parallel_for( + "Kokkos::Sort::BinCount", + Kokkos::RangePolicy(exec, 0, len), + *this); + Kokkos::parallel_scan("Kokkos::Sort::BinOffset", + Kokkos::RangePolicy( + exec, 0, bin_op.max_bins()), + *this); + + Kokkos::deep_copy(exec, bin_count_atomic, 0); + Kokkos::parallel_for( + "Kokkos::Sort::BinBinning", + Kokkos::RangePolicy(exec, 0, len), + *this); + + if (sort_within_bins) + Kokkos::parallel_for( + "Kokkos::Sort::BinSort", + Kokkos::RangePolicy( + exec, 0, bin_op.max_bins()), + *this); + } + + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed + void create_permute_vector() { + Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); + exec_space e{}; + create_permute_vector(e); + e.fence("Kokkos::Binsort::create_permute_vector: after"); + } + + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(const ExecutionSpace& exec, ValuesViewType const& values, + int values_range_begin, int values_range_end) const { + if (values.extent(0) == 0) { + return; + } + + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + static_assert( + Kokkos::SpaceAccessibility< + ExecutionSpace, typename ValuesViewType::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "of the View argument!"); + + const size_t len = range_end - range_begin; + const size_t values_len = values_range_end - values_range_begin; + if (len != values_len) { + Kokkos::abort( + "BinSort::sort: values range length != permutation vector length"); + } + + using scratch_view_type = + Kokkos::View; + scratch_view_type sorted_values( + view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::sorted_values"), + values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 1 ? values.extent(1) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 2 ? values.extent(2) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 3 ? values.extent(3) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 4 ? values.extent(4) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 5 ? values.extent(5) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 6 ? values.extent(6) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 7 ? values.extent(7) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG); + + { + copy_permute_functor + functor(sorted_values, sort_order, values, + values_range_begin - range_begin); + + parallel_for("Kokkos::Sort::CopyPermute", + Kokkos::RangePolicy(exec, 0, len), functor); + } + + { + copy_functor functor( + values, range_begin, sorted_values); + + parallel_for("Kokkos::Sort::Copy", + Kokkos::RangePolicy(exec, 0, len), functor); + } + } + + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(ValuesViewType const& values, int values_range_begin, + int values_range_end) const { + Kokkos::fence("Kokkos::Binsort::sort: before"); + exec_space exec; + sort(exec, values, values_range_begin, values_range_end); + exec.fence("Kokkos::BinSort:sort: after"); + } + + template + void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { + this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); + } + + template + void sort(ValuesViewType const& values) const { + this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); + } + + // Get the permutation vector + KOKKOS_INLINE_FUNCTION + offset_type get_permute_vector() const { return sort_order; } + + // Get the start offsets for each bin + KOKKOS_INLINE_FUNCTION + offset_type get_bin_offsets() const { return bin_offsets; } + + // Get the count for each bin + KOKKOS_INLINE_FUNCTION + bin_count_type get_bin_count() const { return bin_count_const; } + + public: + KOKKOS_INLINE_FUNCTION + void operator()(const bin_count_tag& /*tag*/, const int i) const { + const int j = range_begin + i; + bin_count_atomic(bin_op.bin(keys, j))++; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_offset_tag& /*tag*/, const int i, + value_type& offset, const bool& final) const { + if (final) { + bin_offsets(i) = offset; + } + offset += bin_count_const(i); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_binning_tag& /*tag*/, const int i) const { + const int j = range_begin + i; + const int bin = bin_op.bin(keys, j); + const int count = bin_count_atomic(bin)++; + + sort_order(bin_offsets(bin) + count) = j; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { + auto bin_size = bin_count_const(i); + if (bin_size <= 1) return; + constexpr bool use_std_sort = + std::is_same_v; + int lower_bound = bin_offsets(i); + int upper_bound = lower_bound + bin_size; + // Switching to std::sort for more than 10 elements has been found + // reasonable experimentally. + if (use_std_sort && bin_size > 10) { + KOKKOS_IF_ON_HOST( + (std::sort(&sort_order(lower_bound), &sort_order(upper_bound), + [this](int p, int q) { return bin_op(keys_rnd, p, q); });)) + } else { + for (int k = lower_bound + 1; k < upper_bound; ++k) { + int old_idx = sort_order(k); + int j = k - 1; + while (j >= lower_bound) { + int new_idx = sort_order(j); + if (!bin_op(keys_rnd, old_idx, new_idx)) break; + sort_order(j + 1) = new_idx; + --j; + } + sort_order(j + 1) = old_idx; + } + } + } +}; + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp new file mode 100644 index 0000000000..dd468e0734 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp @@ -0,0 +1,100 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ +#define KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ + +#include "impl/Kokkos_NestedSortImpl.hpp" +#include +#include + +namespace Kokkos { +namespace Experimental { + +template +KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, + const ViewType& view) { + Impl::sort_nested_impl(t, view, nullptr, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename ViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, + const Comparator& comp) { + Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView) { + Impl::sort_nested_impl(t, keyView, valueView, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename KeyViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView, + const Comparator& comp) { + Impl::sort_nested_impl(t, keyView, valueView, comp, + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, + const ViewType& view) { + Impl::sort_nested_impl(t, view, nullptr, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename ViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, + const ViewType& view, + const Comparator& comp) { + Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView) { + Impl::sort_nested_impl(t, keyView, valueView, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename KeyViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView, + const Comparator& comp) { + Impl::sort_nested_impl(t, keyView, valueView, comp, + Impl::NestedRange()); +} + +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp new file mode 100644 index 0000000000..a763c41e58 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp @@ -0,0 +1,194 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_PUBLIC_API_HPP_ +#define KOKKOS_SORT_PUBLIC_API_HPP_ + +#include "./impl/Kokkos_SortImpl.hpp" +#include +#include +#include + +namespace Kokkos { + +// --------------------------------------------------------------- +// basic overloads +// --------------------------------------------------------------- + +template +void sort([[maybe_unused]] const ExecutionSpace& exec, + const Kokkos::View& view) { + // constraints + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort without comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the " + "View argument!"); + + if (view.extent(0) <= 1) { + return; + } + + if constexpr (Impl::better_off_calling_std_sort_v) { + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last); + } else { + Impl::sort_device_view_without_comparator(exec, view); + } +} + +template +void sort(const Kokkos::View& view) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view); + exec.fence("Kokkos::sort: fence after sorting"); +} + +// --------------------------------------------------------------- +// overloads supporting a custom comparator +// --------------------------------------------------------------- +template +void sort([[maybe_unused]] const ExecutionSpace& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + // constraints + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the View argument!"); + + if (view.extent(0) <= 1) { + return; + } + + if constexpr (Impl::better_off_calling_std_sort_v) { + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last, comparator); + } else { + Impl::sort_device_view_with_comparator(exec, view, comparator); + } +} + +template +void sort(const Kokkos::View& view, + const ComparatorType& comparator) { + using ViewType = Kokkos::View; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + Kokkos::fence("Kokkos::sort with comparator: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view, comparator); + exec.fence("Kokkos::sort with comparator: fence after sorting"); +} + +// --------------------------------------------------------------- +// overloads for sorting a view with a subrange +// specified via integers begin, end +// --------------------------------------------------------------- + +template +std::enable_if_t::value> sort( + const ExecutionSpace& exec, ViewType view, size_t const begin, + size_t const end) { + // view must be rank-1 because the Impl::min_max_functor + // used below only works for rank-1 views for now + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + + using range_policy = Kokkos::RangePolicy; + using CompType = BinOp1D; + + Kokkos::MinMaxScalar result; + Kokkos::MinMax reducer(result); + + parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), + Impl::min_max_functor(view), reducer); + + if (result.min_val == result.max_val) return; + + BinSort bin_sort( + exec, view, begin, end, + CompType((end - begin) / 2, result.min_val, result.max_val), true); + + bin_sort.create_permute_vector(exec); + bin_sort.sort(exec, view, begin, end); +} + +template +void sort(ViewType view, size_t const begin, size_t const end) { + // same constraints as the overload above which this gets dispatched to + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view, begin, end); + exec.fence("Kokkos::Sort: fence after sorting"); +} + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp new file mode 100644 index 0000000000..07f5926d82 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp @@ -0,0 +1,61 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ +#define KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ + +#include +#include + +namespace Kokkos { +namespace Impl { + +template +struct CopyOp; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + dst(i_dst) = src(i_src); + } +}; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); + } +}; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < dst.extent(1); j++) + for (int k = 0; k < dst.extent(2); k++) + dst(i_dst, j, k) = src(i_src, j, k); + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp new file mode 100644 index 0000000000..50ac823319 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp @@ -0,0 +1,115 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_NESTED_SORT_IMPL_HPP_ +#define KOKKOS_NESTED_SORT_IMPL_HPP_ + +#include +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// true for TeamVectorRange, false for ThreadVectorRange +template +struct NestedRange {}; + +// Specialization for team-level +template <> +struct NestedRange { + template + KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { + return Kokkos::TeamVectorRange(t, len); + } + template + KOKKOS_FUNCTION static void barrier(const TeamMember& t) { + t.team_barrier(); + } +}; + +// Specialization for thread-level +template <> +struct NestedRange { + template + KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { + return Kokkos::ThreadVectorRange(t, len); + } + // Barrier is no-op, as vector lanes of a thread are implicitly synchronized + // after parallel region + template + KOKKOS_FUNCTION static void barrier(const TeamMember&) {} +}; + +// When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. +// This only takes the NestedRange instance for template arg deduction. +template +KOKKOS_INLINE_FUNCTION void sort_nested_impl( + const TeamMember& t, const KeyViewType& keyView, + [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, + const NestedRange) { + using SizeType = typename KeyViewType::size_type; + using KeyType = typename KeyViewType::non_const_value_type; + using Range = NestedRange; + SizeType n = keyView.extent(0); + SizeType npot = 1; + SizeType levels = 0; + // FIXME: ceiling power-of-two is a common thing to need - make it a utility + while (npot < n) { + levels++; + npot <<= 1; + } + for (SizeType i = 0; i < levels; i++) { + for (SizeType j = 0; j <= i; j++) { + // n/2 pairs of items are compared in parallel + Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { + // How big are the brown/pink boxes? + // (Terminology comes from Wikipedia diagram) + // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg + SizeType boxSize = SizeType(2) << (i - j); + // Which box contains this thread? + SizeType boxID = k >> (i - j); // k * 2 / boxSize; + SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize + SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; + SizeType elem1 = boxStart + boxOffset; + // In first phase (j == 0, brown box): within a box, compare with the + // opposite value in the box. + // In later phases (j > 0, pink box): within a box, compare with fixed + // distance (boxSize / 2) apart. + SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) + : (elem1 + boxSize / 2); + if (elem2 < n) { + KeyType key1 = keyView(elem1); + KeyType key2 = keyView(elem2); + if (comp(key2, key1)) { + keyView(elem1) = key2; + keyView(elem2) = key1; + if constexpr (!std::is_same_v) { + Kokkos::Experimental::swap(valueView(elem1), valueView(elem2)); + } + } + } + }); + Range::barrier(t); + } + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp new file mode 100644 index 0000000000..d87ab09e77 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -0,0 +1,369 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ +#define KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ + +#include "../Kokkos_BinOpsPublicAPI.hpp" +#include "../Kokkos_BinSortPublicAPI.hpp" +#include +#include +#include + +#if defined(KOKKOS_ENABLE_CUDA) + +// Workaround for `Instruction 'shfl' without '.sync' is not supported on +// .target sm_70 and higher from PTX ISA version 6.4`. +// Also see https://github.com/NVIDIA/cub/pull/170. +#if !defined(CUB_USE_COOPERATIVE_GROUPS) +#define CUB_USE_COOPERATIVE_GROUPS +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + +#if defined(KOKKOS_COMPILER_CLANG) +// Some versions of Clang fail to compile Thrust, failing with errors like +// this: +// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: +// error: use of undeclared identifier 'va_printf' +// The exact combination of versions for Clang and Thrust (or CUDA) for this +// failure was not investigated, however even very recent version combination +// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. +// +// Defining _CubLog here locally allows us to avoid that code path, however +// disabling some debugging diagnostics +#pragma push_macro("_CubLog") +#ifdef _CubLog +#undef _CubLog +#endif +#define _CubLog +#include +#include +#pragma pop_macro("_CubLog") +#else +#include +#include +#endif + +#pragma GCC diagnostic pop + +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +#include +#include +#endif + +namespace Kokkos { +namespace Impl { + +template +struct better_off_calling_std_sort : std::false_type {}; + +#if defined KOKKOS_ENABLE_SERIAL +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_OPENMP +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_THREADS +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_HPX +template <> +struct better_off_calling_std_sort : std::true_type { +}; +#endif + +template +inline constexpr bool better_off_calling_std_sort_v = + better_off_calling_std_sort::value; + +template +struct min_max_functor { + using minmax_scalar = + Kokkos::MinMaxScalar; + + ViewType view; + min_max_functor(const ViewType& view_) : view(view_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t& i, minmax_scalar& minmax) const { + if (view(i) < minmax.min_val) minmax.min_val = view(i); + if (view(i) > minmax.max_val) minmax.max_val = view(i); + } +}; + +template +void sort_via_binsort(const ExecutionSpace& exec, + const Kokkos::View& view) { + // Although we are using BinSort below, which could work on rank-2 views, + // for now view must be rank-1 because the min_max_functor + // used below only works for rank-1 views + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + + Kokkos::MinMaxScalar result; + Kokkos::MinMax reducer(result); + parallel_reduce("Kokkos::Sort::FindExtent", + Kokkos::RangePolicy( + exec, 0, view.extent(0)), + min_max_functor(view), reducer); + if (result.min_val == result.max_val) return; + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + bool sort_in_bins = true; + // TODO: figure out better max_bins then this ... + int64_t max_bins = view.extent(0) / 2; + if (std::is_integral::value) { + // Cast to double to avoid possible overflow when using integer + auto const max_val = static_cast(result.max_val); + auto const min_val = static_cast(result.min_val); + // using 10M as the cutoff for special behavior (roughly 40MB for the count + // array) + if ((max_val - min_val) < 10000000) { + max_bins = max_val - min_val + 1; + sort_in_bins = false; + } + } + if (std::is_floating_point::value) { + KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - + static_cast(result.min_val))); + } + + using CompType = BinOp1D; + BinSort bin_sort( + view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); + bin_sort.create_permute_vector(exec); + bin_sort.sort(exec, view); +} + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_cudathrust(const Cuda& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + const auto exec = thrust::cuda::par.on(space.cuda_stream()); + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + thrust::sort(exec, first, last, + std::forward(maybeComparator)...); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_onedpl(const Kokkos::Experimental::SYCL& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(SpaceAccessibility::accessible, + "SYCL execution space is not able to access the memory space " + "of the View argument!"); + + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "SYCL sort only supports contiguous rank-1 Views with LayoutLeft, " + "LayoutRight or LayoutStride" + "For the latter, this means the View must have stride(0) = 1, enforced " + "at runtime."); + + if (view.stride(0) != 1) { + Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1."); + } + + if (view.extent(0) <= 1) { + return; + } + + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. + auto queue = space.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); + const int n = view.extent(0); + oneapi::dpl::sort(policy, view.data(), view.data() + n, + std::forward(maybeComparator)...); +} +#endif + +template +void copy_to_host_run_stdsort_copy_back( + const ExecutionSpace& exec, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + namespace KE = ::Kokkos::Experimental; + + using ViewType = Kokkos::View; + using layout = typename ViewType::array_layout; + if constexpr (std::is_same_v) { + // for strided views we cannot just deep_copy from device to host, + // so we need to do a few more jumps + using view_value_type = typename ViewType::non_const_value_type; + using view_exespace = typename ViewType::execution_space; + using view_deep_copyable_t = Kokkos::View; + view_deep_copyable_t view_dc("view_dc", view.extent(0)); + KE::copy(exec, view, view_dc); + + // run sort on the mirror of view_dc + auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); + auto first = KE::begin(mv_h); + auto last = KE::end(mv_h); + std::sort(first, last, std::forward(maybeComparator)...); + Kokkos::deep_copy(exec, view_dc, mv_h); + + // copy back to argument view + KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); + } else { + auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); + auto first = KE::begin(view_h); + auto last = KE::end(view_h); + std::sort(first, last, std::forward(maybeComparator)...); + Kokkos::deep_copy(exec, view, view_h); + } +} + +// -------------------------------------------------- +// +// specialize cases for sorting without comparator +// +// -------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_device_view_without_comparator( + const Cuda& exec, const Kokkos::View& view) { + sort_cudathrust(exec, view); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_device_view_without_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& view) { + using ViewType = Kokkos::View; + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "sort_device_view_without_comparator: supports rank-1 Views " + "with LayoutLeft, LayoutRight or LayoutStride"); + + if (view.stride(0) == 1) { + sort_onedpl(exec, view); + } else { + copy_to_host_run_stdsort_copy_back(exec, view); + } +} +#endif + +// fallback case +template +std::enable_if_t::value> +sort_device_view_without_comparator( + const ExecutionSpace& exec, + const Kokkos::View& view) { + sort_via_binsort(exec, view); +} + +// -------------------------------------------------- +// +// specialize cases for sorting with comparator +// +// -------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_device_view_with_comparator( + const Cuda& exec, const Kokkos::View& view, + const ComparatorType& comparator) { + sort_cudathrust(exec, view, comparator); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_device_view_with_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + using ViewType = Kokkos::View; + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "sort_device_view_with_comparator: supports rank-1 Views " + "with LayoutLeft, LayoutRight or LayoutStride"); + + if (view.stride(0) == 1) { + sort_onedpl(exec, view, comparator); + } else { + copy_to_host_run_stdsort_copy_back(exec, view, comparator); + } +} +#endif + +template +std::enable_if_t::value> +sort_device_view_with_comparator( + const ExecutionSpace& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + // This is a fallback case if a more specialized overload does not exist: + // for now, this fallback copies data to host, runs std::sort + // and then copies data back. Potentially, this can later be changed + // with a better solution like our own quicksort on device or similar. + + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert(!SpaceAccessibility::accessible, + "Impl::sort_device_view_with_comparator: should not be called " + "on a view that is already accessible on the host"); + + copy_to_host_run_stdsort_copy_back(exec, view, comparator); +} + +} // namespace Impl +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp index 38dcd1a674..f254686dba 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp @@ -23,64 +23,85 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest) { +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, binary_op()); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, - InputIteratorType last_from, OutputIteratorType first_dest, - BinaryOp bin_op) { - return Impl::adjacent_difference_impl( +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, typename BinaryOp, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, bin_op); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest) { +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl(label, ex, first_from, last_from, - first_dest, binary_op()); + return Impl::adjacent_difference_exespace_impl( + label, ex, first_from, last_from, first_dest, binary_op()); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, BinaryOp bin_op) { - return Impl::adjacent_difference_impl(label, ex, first_from, last_from, - first_dest, bin_op); +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, typename BinaryOp, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + return Impl::adjacent_difference_exespace_impl(label, ex, first_from, + last_from, first_dest, bin_op); } -template +template ::value, + int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -96,13 +117,15 @@ auto adjacent_difference( using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op()); } -template +template ::value, + int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -111,13 +134,15 @@ auto adjacent_difference( namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bin_op); } -template +template ::value, + int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -134,13 +159,15 @@ auto adjacent_difference( Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), binary_op()); + return Impl::adjacent_difference_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op()); } -template +template ::value, + int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -149,9 +176,85 @@ auto adjacent_difference( namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), bin_op); + return Impl::adjacent_difference_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), bin_op); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template ::value && + ::Kokkos::is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType adjacent_difference( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + using value_type1 = typename InputIteratorType::value_type; + using value_type2 = typename OutputIteratorType::value_type; + using binary_op = + Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; + return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, + first_dest, binary_op()); +} + +template ::value && + ::Kokkos::is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +adjacent_difference(const TeamHandleType& teamHandle, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOp bin_op) { + return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, + first_dest, bin_op); +} + +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto adjacent_difference( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + using view_type1 = ::Kokkos::View; + using view_type2 = ::Kokkos::View; + using value_type1 = typename view_type1::value_type; + using value_type2 = typename view_type2::value_type; + using binary_op = + Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; + return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), binary_op()); +} + +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto adjacent_difference( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp bin_op) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), bin_op); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp index 43c2b66010..ac476ca5bf 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp @@ -23,71 +23,144 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set1 -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl(label, ex, first, last); + return Impl::adjacent_find_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v)); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v)); + return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v)); } // overload set2 -template +template < + typename ExecutionSpace, typename IteratorType, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last, pred); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_iterator_api_default", ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl(label, ex, first, last, pred); + return Impl::adjacent_find_exespace_impl(label, ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v), pred); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v), + pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred); + return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v), + pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set1 +template , int> = 0> +KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + return Impl::adjacent_find_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto adjacent_find( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v)); +} + +// overload set2 +template , int> = 0> +KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + BinaryPredicateType pred) { + return Impl::adjacent_find_team_impl(teamHandle, first, last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto adjacent_find( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + BinaryPredicateType pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v), + pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp index 2ffec7e144..d6ed4c4a7e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp @@ -23,41 +23,79 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first, - last, predicate); + return Impl::all_of_exespace_impl("Kokkos::all_of_iterator_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl(label, ex, first, last, predicate); + return Impl::all_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); + return Impl::all_of_exespace_impl("Kokkos::all_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::all_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::all_of_team_impl(teamHandle, first, last, predicate); +} + +template , int> = 0> +KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::all_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp index 019c466c6d..82356e6598 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp @@ -23,41 +23,79 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last, - predicate); + return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl(label, ex, first, last, predicate); + return Impl::any_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); + return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::any_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::any_of_team_impl(teamHandle, first, last, predicate); +} + +template , int> = 0> +KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::any_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp index 028f3b66b2..b7ce1ba5ed 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp @@ -23,22 +23,31 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last, - d_first); + return Impl::copy_exespace_impl("Kokkos::copy_iterator_api_default", ex, + first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::copy_impl(label, ex, first, last, d_first); + return Impl::copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -46,12 +55,15 @@ auto copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl("Kokkos::copy_view_api_default", ex, - KE::cbegin(source), KE::cend(source), KE::begin(dest)); + return Impl::copy_exespace_impl("Kokkos::copy_view_api_default", ex, + KE::cbegin(source), KE::cend(source), + KE::begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -59,8 +71,35 @@ auto copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source), - KE::begin(dest)); + return Impl::copy_exespace_impl(label, ex, KE::cbegin(source), + KE::cend(source), KE::begin(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_team_impl(teamHandle, KE::cbegin(source), KE::cend(source), + KE::begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp index deff6baf9a..8f9e0f19b8 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp @@ -23,42 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default", - ex, first, last, d_last); + return Impl::copy_backward_exespace_impl( + "Kokkos::copy_backward_iterator_api_default", ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl(label, ex, first, last, d_last); + return Impl::copy_backward_exespace_impl(label, ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex, - cbegin(source), cend(source), end(dest)); + return Impl::copy_backward_exespace_impl( + "Kokkos::copy_backward_view_api_default", ex, cbegin(source), + cend(source), end(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source), - end(dest)); + return Impl::copy_backward_exespace_impl(label, ex, cbegin(source), + cend(source), end(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 copy_backward(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { + return Impl::copy_backward_team_impl(teamHandle, first, last, d_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_backward( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_backward_team_impl(teamHandle, cbegin(source), cend(source), + end(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp index 3db2fc074f..ba18bc76b9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp @@ -23,46 +23,85 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first, - last, d_first, std::move(pred)); + return Impl::copy_if_exespace_impl("Kokkos::copy_if_iterator_api_default", ex, + first, last, d_first, std::move(pred)); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred)); + return Impl::copy_if_exespace_impl(label, ex, first, last, d_first, + std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); + return Impl::copy_if_exespace_impl("Kokkos::copy_if_view_api_default", ex, + cbegin(source), cend(source), begin(dest), + std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_if_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); + return Impl::copy_if_exespace_impl(label, ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy_if(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first, Predicate pred) { + return Impl::copy_if_team_impl(teamHandle, first, last, d_first, + std::move(pred)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest, Predicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_if_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest), std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp index a64f99b5c0..43c9120483 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp @@ -23,23 +23,32 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Size, + typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first, - count, result); + return Impl::copy_n_exespace_impl("Kokkos::copy_n_iterator_api_default", ex, + first, count, result); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Size, + typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl(label, ex, first, count, result); + return Impl::copy_n_exespace_impl(label, ex, first, count, result); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Size, typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, ::Kokkos::View& dest) { @@ -47,12 +56,14 @@ auto copy_n(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex, - KE::cbegin(source), count, KE::begin(dest)); + return Impl::copy_n_exespace_impl("Kokkos::copy_n_view_api_default", ex, + KE::cbegin(source), count, KE::begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Size, typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, ::Kokkos::View& dest) { @@ -60,8 +71,35 @@ auto copy_n(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl(label, ex, KE::cbegin(source), count, - KE::begin(dest)); + return Impl::copy_n_exespace_impl(label, ex, KE::cbegin(source), count, + KE::begin(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy_n(const TeamHandleType& teamHandle, + InputIterator first, Size count, + OutputIterator result) { + return Impl::copy_n_team_impl(teamHandle, first, count, result); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, Size count, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_n_team_impl(teamHandle, KE::cbegin(source), count, + KE::begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp index 3ac63467ec..f179e88bab 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp @@ -23,41 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last, - value); + return Impl::count_exespace_impl("Kokkos::count_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - return Impl::count_impl(label, ex, first, last, value); + return Impl::count_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v), - KE::cend(v), value); + return Impl::count_exespace_impl("Kokkos::count_view_api_default", ex, + KE::cbegin(v), KE::cend(v), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value); + return Impl::count_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION typename IteratorType::difference_type count( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + const T& value) { + return Impl::count_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto count(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp index b9731d378a..967cf75e7a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp @@ -23,46 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first, - last, std::move(predicate)); + return Impl::count_if_exespace_impl("Kokkos::count_if_iterator_api_default", + ex, first, last, std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::count_if_impl(label, ex, first, last, std::move(predicate)); + return Impl::count_if_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); + return Impl::count_if_exespace_impl("Kokkos::count_if_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::count_if_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION typename IteratorType::difference_type count_if( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::count_if_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto count_if(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_if_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp index 37c0d75ef5..a72a49cc22 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp @@ -23,50 +23,61 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2); +// +// overload set accepting execution space +// +template && + Kokkos::is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2) { - return Impl::equal_impl(label, ex, first1, last1, first2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, - std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2) { @@ -74,13 +85,15 @@ bool equal(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); + return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2) { @@ -88,12 +101,14 @@ bool equal(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); + return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2, @@ -102,13 +117,15 @@ bool equal(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), - std::move(predicate)); + return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2, @@ -117,51 +134,149 @@ bool equal(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2), std::move(predicate)); + return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), + std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, last2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2, std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, last2, + std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2, - std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2, + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, + std::move(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool equal( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool equal( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2, + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp index 4e05676c2c..ee3a105126 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp @@ -23,105 +23,130 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value) { - static_assert(std::is_move_constructible::value, +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl( + return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last, - first_dest, init_value); + first_dest, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value) { - static_assert(std::is_move_constructible::value, +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl(label, ex, first, last, - first_dest, init_value); + return Impl::exclusive_scan_default_op_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl( + return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value); + std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), init_value); + return Impl::exclusive_scan_default_op_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value)); } // overload set 2 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType bop) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, init_value, bop); + first_dest, std::move(init_value), bop); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType bop) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest, - init_value, bop); + return Impl::exclusive_scan_custom_op_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value), bop); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -129,18 +154,20 @@ auto exclusive_scan(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, bop); + std::move(init_value), bop); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -148,12 +175,92 @@ auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, bop); + KE::begin(view_dest), std::move(init_value), bop); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::exclusive_scan_default_op_team_impl( + teamHandle, first, last, first_dest, std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_default_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value)); +} + +// overload set 2 +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::exclusive_scan_custom_op_team_impl( + teamHandle, first, last, first_dest, std::move(init_value), bop); +} + +template , int> = 0> +KOKKOS_FUNCTION auto exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_custom_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value), bop); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp index 1e300a4c20..6d805ba1be 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp @@ -23,33 +23,67 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl("Kokkos::fill_iterator_api_default", ex, first, last, value); + Impl::fill_exespace_impl("Kokkos::fill_iterator_api_default", ex, first, last, + value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl(label, ex, first, last, value); + Impl::fill_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - Impl::fill_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view), - value); + Impl::fill_exespace_impl("Kokkos::fill_view_api_default", ex, begin(view), + end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::fill_exespace_impl(label, ex, begin(view), end(view), value); +} - Impl::fill_impl(label, ex, begin(view), end(view), value); +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void fill(const TeamHandleType& th, IteratorType first, + IteratorType last, const T& value) { + Impl::fill_team_impl(th, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION void fill(const TeamHandleType& th, + const ::Kokkos::View& view, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::fill_team_impl(th, begin(view), end(view), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp index 02503dfd14..66b8cd66cc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp @@ -23,38 +23,72 @@ namespace Kokkos { namespace Experimental { -template +template < + typename ExecutionSpace, typename IteratorType, typename SizeType, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl("Kokkos::fill_n_iterator_api_default", ex, first, n, - value); + return Impl::fill_n_exespace_impl("Kokkos::fill_n_iterator_api_default", ex, + first, n, value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename SizeType, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl(label, ex, first, n, value); + return Impl::fill_n_exespace_impl(label, ex, first, n, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename SizeType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::fill_n_impl("Kokkos::fill_n_view_api_default", ex, begin(view), - n, value); + return Impl::fill_n_exespace_impl("Kokkos::fill_n_view_api_default", ex, + begin(view), n, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename SizeType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::fill_n_impl(label, ex, begin(view), n, value); + return Impl::fill_n_exespace_impl(label, ex, begin(view), n, value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType fill_n(const TeamHandleType& th, + IteratorType first, SizeType n, + const T& value) { + return Impl::fill_n_team_impl(th, first, n, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto fill_n(const TeamHandleType& th, + const ::Kokkos::View& view, + SizeType n, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::fill_n_team_impl(th, begin(view), n, value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp index 65b68cf931..e5e2b0e2b0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp @@ -23,36 +23,76 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl("Kokkos::find_iterator_api_default", ex, first, last, - value); + return Impl::find_exespace_impl("Kokkos::find_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl(label, ex, first, last, value); + return Impl::find_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::find_impl("Kokkos::find_view_api_default", ex, KE::begin(view), - KE::end(view), value); + return Impl::find_exespace_impl("Kokkos::find_view_api_default", ex, + KE::begin(view), KE::end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::find_impl(label, ex, KE::begin(view), KE::end(view), value); + return Impl::find_exespace_impl(label, ex, KE::begin(view), KE::end(view), + value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION InputIterator find(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + const T& value) { + return Impl::find_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_team_impl(teamHandle, KE::begin(view), KE::end(view), + value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp index f6a38855eb..a4ec735fd5 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp @@ -24,24 +24,34 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last); + return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", + ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last); + return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -49,13 +59,15 @@ auto find_end(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); + return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -63,31 +75,38 @@ auto find_end(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last, pred); + return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", + ex, first, last, s_first, s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last, pred); + return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last, + pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -96,13 +115,15 @@ auto find_end(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); + return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -111,8 +132,71 @@ auto find_end(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_end( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last, + pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_end( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp index 6b0e4993ee..341a70e2f2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp @@ -23,24 +23,36 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, + s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last); + return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, + s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -48,13 +60,15 @@ auto find_first_of(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), + KE::end(view), KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -62,33 +76,41 @@ auto find_first_of(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last, pred); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, + s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last, - pred); + return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, + s_last, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -97,13 +119,15 @@ auto find_first_of(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), + KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -112,8 +136,77 @@ auto find_first_of(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, + s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_first_of( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, s_last, + pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_first_of( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp index 911316a668..283fab7617 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp @@ -23,42 +23,82 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl("Kokkos::find_if_iterator_api_default", - ex, first, last, std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + "Kokkos::find_if_iterator_api_default", ex, first, last, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl(label, ex, first, last, - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl("Kokkos::find_if_view_api_default", ex, - KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + "Kokkos::find_if_view_api_default", ex, KE::begin(v), KE::end(v), + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + label, ex, KE::begin(v), KE::end(v), std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType find_if(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + PredicateType predicate) { + return Impl::find_if_or_not_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto find_if(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_team_impl(teamHandle, KE::begin(v), + KE::end(v), std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp index 18294d7b7d..5e17a6f539 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp @@ -23,45 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl( + return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_iterator_api_default", ex, first, last, std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl(label, ex, first, last, - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if_not(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl( + return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v), std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if_not(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + label, ex, KE::begin(v), KE::end(v), std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType find_if_not(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::find_if_or_not_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto find_if_not( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_team_impl( + teamHandle, KE::begin(v), KE::end(v), std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp index d7b08e4842..6215b325af 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType, class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_impl(label, ex, first, last, std::move(functor)); + return Impl::for_each_exespace_impl(label, ex, first, last, + std::move(functor)); } -template +template < + class ExecutionSpace, class IteratorType, class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_impl("Kokkos::for_each_iterator_api_default", ex, first, - last, std::move(functor)); + return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", + ex, first, last, std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl(label, ex, KE::begin(v), KE::end(v), - std::move(functor)); + return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl("Kokkos::for_each_view_api_default", ex, - KE::begin(v), KE::end(v), std::move(functor)); + return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, + KE::begin(v), KE::end(v), + std::move(functor)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); +} + +template , int> = 0> +KOKKOS_FUNCTION UnaryFunctorType +for_each(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), + std::move(functor)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp index f1769da05b..e6fbcad891 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp @@ -23,43 +23,87 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl(label, ex, first, n, std::move(functor)); + return Impl::for_each_n_exespace_impl(label, ex, first, n, + std::move(functor)); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl("Kokkos::for_each_n_iterator_api_default", ex, - first, n, std::move(functor)); + return Impl::for_each_n_exespace_impl( + "Kokkos::for_each_n_iterator_api_default", ex, first, n, + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> auto for_each_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl(label, ex, KE::begin(v), n, std::move(functor)); + return Impl::for_each_n_exespace_impl(label, ex, KE::begin(v), n, + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> auto for_each_n(const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl("Kokkos::for_each_n_view_api_default", ex, - KE::begin(v), n, std::move(functor)); + return Impl::for_each_n_exespace_impl("Kokkos::for_each_n_view_api_default", + ex, KE::begin(v), n, + std::move(functor)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION IteratorType for_each_n(const TeamHandleType& teamHandle, + IteratorType first, SizeType n, + UnaryFunctorType functor) { + return Impl::for_each_n_team_impl(teamHandle, first, n, std::move(functor)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto for_each_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, SizeType n, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_n_team_impl(teamHandle, KE::begin(v), n, + std::move(functor)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp index 13e12783e0..a3295084ee 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp @@ -23,38 +23,68 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl("Kokkos::generate_iterator_api_default", ex, first, last, - std::move(g)); + Impl::generate_exespace_impl("Kokkos::generate_iterator_api_default", ex, + first, last, std::move(g)); } -template +template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl(label, ex, first, last, std::move(g)); + Impl::generate_exespace_impl(label, ex, first, last, std::move(g)); } -template +template , int> = 0> void generate(const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::generate_impl("Kokkos::generate_view_api_default", ex, begin(view), - end(view), std::move(g)); + Impl::generate_exespace_impl("Kokkos::generate_view_api_default", ex, + begin(view), end(view), std::move(g)); } -template +template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::generate_impl(label, ex, begin(view), end(view), std::move(g)); + Impl::generate_exespace_impl(label, ex, begin(view), end(view), std::move(g)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void generate(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Generator g) { + Impl::generate_team_impl(teamHandle, first, last, std::move(g)); +} + +template , int> = 0> +KOKKOS_FUNCTION void generate( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::generate_team_impl(teamHandle, begin(view), end(view), std::move(g)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp index 4d17512228..e480062c23 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp @@ -23,40 +23,75 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> IteratorType generate_n(const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { - Impl::generate_n_impl("Kokkos::generate_n_iterator_api_default", ex, first, - count, std::move(g)); - return first + count; + return Impl::generate_n_exespace_impl( + "Kokkos::generate_n_iterator_api_default", ex, first, count, + std::move(g)); } -template +template , int> = 0> IteratorType generate_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { - Impl::generate_n_impl(label, ex, first, count, std::move(g)); - return first + count; + return Impl::generate_n_exespace_impl(label, ex, first, count, std::move(g)); } -template +template , int> = 0> auto generate_n(const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::generate_n_impl("Kokkos::generate_n_view_api_default", ex, - begin(view), count, std::move(g)); + return Impl::generate_n_exespace_impl("Kokkos::generate_n_view_api_default", + ex, begin(view), count, std::move(g)); } -template +template , int> = 0> auto generate_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::generate_n_impl(label, ex, begin(view), count, std::move(g)); + return Impl::generate_n_exespace_impl(label, ex, begin(view), count, + std::move(g)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType generate_n(const TeamHandleType& teamHandle, + IteratorType first, Size count, + Generator g) { + return Impl::generate_n_team_impl(teamHandle, first, count, std::move(g)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto generate_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Size count, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::generate_n_team_impl(teamHandle, begin(view), count, + std::move(g)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp index bcd731b850..a0e540b5e7 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp @@ -23,33 +23,45 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last, first_dest); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl(label, ex, first, last, - first_dest); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_exespace_impl(label, ex, first, last, + first_dest); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -57,13 +69,15 @@ auto inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl( + return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -71,39 +85,45 @@ auto inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest)); + return Impl::inclusive_scan_default_op_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest)); } // overload set 2 (accepting custom binary op) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl(label, ex, first, last, - first_dest, binary_op); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_exespace_impl( + label, ex, first, last, first_dest, binary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -111,14 +131,16 @@ auto inclusive_scan(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -126,67 +148,192 @@ auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } // overload set 3 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op, ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, binary_op, init_value); + first_dest, binary_op, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op, - ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( - label, ex, first, last, first_dest, binary_op, init_value); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::inclusive_scan_custom_binary_op_exespace_impl( + label, ex, first, last, first_dest, binary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, init_value); + binary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, init_value); + KE::begin(view_dest), binary_op, std::move(init_value)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_team_impl(teamHandle, first, last, + first_dest); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_default_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest)); +} + +// overload set 2 (accepting custom binary op) +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, first, last, first_dest, binary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op); +} + +// overload set 3 +template && :: + Kokkos::is_team_handle_v, + int> = 0> + +KOKKOS_FUNCTION OutputIteratorType +inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, first, last, first_dest, binary_op, std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op, ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, std::move(init_value)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp index 29d6be9e8b..42f20bc4ec 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp @@ -23,39 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl( + return Impl::is_partitioned_exespace_impl( "Kokkos::is_partitioned_iterator_api_default", ex, first, last, std::move(p)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl(label, ex, first, last, std::move(p)); + return Impl::is_partitioned_exespace_impl(label, ex, first, last, + std::move(p)); } -template +template < + typename ExecutionSpace, typename PredicateType, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::is_partitioned_impl("Kokkos::is_partitioned_view_api_default", - ex, cbegin(v), cend(v), std::move(p)); + return Impl::is_partitioned_exespace_impl( + "Kokkos::is_partitioned_view_api_default", ex, cbegin(v), cend(v), + std::move(p)); } -template +template < + typename ExecutionSpace, typename PredicateType, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::is_partitioned_impl(label, ex, cbegin(v), cend(v), std::move(p)); + return Impl::is_partitioned_exespace_impl(label, ex, cbegin(v), cend(v), + std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool is_partitioned(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + PredicateType p) { + return Impl::is_partitioned_team_impl(teamHandle, first, last, std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_partitioned( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, PredicateType p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::is_partitioned_team_impl(teamHandle, cbegin(v), cend(v), + std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp index f036254a02..2c676c3ff3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp @@ -23,55 +23,73 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", + ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_impl(label, ex, first, last); + return Impl::is_sorted_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view)); + return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), + KE::cend(view)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last, std::move(comp)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", + ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl(label, ex, first, last, std::move(comp)); + return Impl::is_sorted_exespace_impl(label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -79,13 +97,15 @@ bool is_sorted(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view), - std::move(comp)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view), + std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -93,8 +113,56 @@ bool is_sorted(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view), - std::move(comp)); + return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), + KE::cend(view), std::move(comp)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::is_sorted_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::is_sorted_team_impl(teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(teamHandle); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), KE::cend(view), + std::move(comp)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp index 276b3bb884..96a17b6785 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp @@ -23,58 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl( + return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl(label, ex, first, last); + return Impl::is_sorted_until_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view)); + return Impl::is_sorted_until_exespace_impl( + "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), + KE::end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view)); + return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), + KE::end(view)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl( + return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl(label, ex, first, last, std::move(comp)); + return Impl::is_sorted_until_exespace_impl(label, ex, first, last, + std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -82,13 +102,15 @@ auto is_sorted_until(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view), - std::move(comp)); + return Impl::is_sorted_until_exespace_impl( + "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), + KE::end(view), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -96,8 +118,57 @@ auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view), - std::move(comp)); + return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), + KE::end(view), std::move(comp)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + return Impl::is_sorted_until_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto is_sorted_until( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), + KE::end(view)); +} + +template , int> = 0> +KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::is_sorted_until_team_impl(teamHandle, first, last, + std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto is_sorted_until( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(teamHandle); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), + KE::end(view), std::move(comp)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp index 0a77ef629f..4b5c69df45 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp @@ -23,25 +23,34 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2); + return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, + first2, last2); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -50,13 +59,15 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -65,33 +76,39 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2)); + return Impl::lexicographical_compare_exespace_impl( + label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2, comp); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2, comp); + return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, + first2, last2, comp); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -100,13 +117,15 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -115,9 +134,67 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2), comp); + return Impl::lexicographical_compare_exespace_impl( + label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { + return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, + first2, last2); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_team_impl( + teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { + return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, + first2, last2, comp); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_team_impl( + teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp index 2c1374f700..d16bac5bfc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp @@ -23,81 +23,148 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl(label, ex, first, last); + return Impl::min_or_max_element_exespace_impl(label, ex, first, + last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl(label, ex, begin(v), - end(v)); + return Impl::min_or_max_element_exespace_impl(label, ex, + begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_team_impl(teamHandle, begin(v), + end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp index 1d03b7c962..2a53fce3e2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp @@ -23,81 +23,148 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl(label, ex, first, last); + return Impl::min_or_max_element_exespace_impl(label, ex, first, + last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl(label, ex, begin(v), - end(v)); + return Impl::min_or_max_element_exespace_impl(label, ex, + begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_team_impl(teamHandle, begin(v), + end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::min_or_max_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp index d481b499cc..c3a1f73ef6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp @@ -23,82 +23,151 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::minmax_element_impl(label, ex, first, last); + return Impl::minmax_element_exespace_impl(label, ex, + first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::minmax_element_impl(label, ex, begin(v), - end(v)); + return Impl::minmax_element_exespace_impl( + label, ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::minmax_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::minmax_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::minmax_element_team_impl(teamHandle, + begin(v), end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::minmax_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp index 13c994ca90..090afe69e3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp @@ -30,46 +30,60 @@ namespace Experimental { // // makes API ambiguous (with the overload accepting views). -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2); + return Impl::mismatch_exespace_impl("Kokkos::mismatch_iterator_api_default", + ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2, - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + "Kokkos::mismatch_iterator_api_default", ex, first1, last1, first2, last2, + std::forward(predicate)); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2); + return Impl::mismatch_exespace_impl(label, ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2, - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + label, ex, first1, last1, first2, last2, + std::forward(predicate)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { @@ -77,13 +91,15 @@ auto mismatch(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2)); + return Impl::mismatch_exespace_impl("Kokkos::mismatch_view_api_default", ex, + KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType, + std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, @@ -92,14 +108,16 @@ auto mismatch(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2), - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + "Kokkos::mismatch_view_api_default", ex, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward(predicate)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { @@ -107,12 +125,15 @@ auto mismatch(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2)); + return Impl::mismatch_exespace_impl(label, ex, KE::begin(view1), + KE::end(view1), KE::begin(view2), + KE::end(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType, + std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, @@ -121,9 +142,65 @@ auto mismatch(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2), - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + label, ex, KE::begin(view1), KE::end(view1), KE::begin(view2), + KE::end(view2), std::forward(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair mismatch( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2); +} + +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair mismatch( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType&& predicate) { + return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2, + std::forward(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto mismatch( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto mismatch( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2, + BinaryPredicateType&& predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp index d49acd9f70..f04ea12ba8 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp @@ -23,41 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::move_impl("Kokkos::move_iterator_api_default", ex, first, last, - d_first); + return Impl::move_exespace_impl("Kokkos::move_iterator_api_default", ex, + first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::move_impl(label, ex, first, last, d_first); + return Impl::move_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_impl("Kokkos::move_view_api_default", ex, begin(source), - end(source), begin(dest)); + return Impl::move_exespace_impl("Kokkos::move_view_api_default", ex, + begin(source), end(source), begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_impl(label, ex, begin(source), end(source), begin(dest)); + return Impl::move_exespace_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator move(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::move_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto move( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_team_impl(teamHandle, begin(source), end(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp index 60d50fa881..375474ca57 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl("Kokkos::move_backward_iterator_api_default", - ex, first, last, d_last); + return Impl::move_backward_exespace_impl( + "Kokkos::move_backward_iterator_api_default", ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_backward_impl("Kokkos::move_backward_view_api_default", ex, - begin(source), end(source), end(dest)); + return Impl::move_backward_exespace_impl( + "Kokkos::move_backward_view_api_default", ex, begin(source), end(source), + end(dest)); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl(label, ex, first, last, d_last); + return Impl::move_backward_exespace_impl(label, ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_backward_impl(label, ex, begin(source), end(source), - end(dest)); + return Impl::move_backward_exespace_impl(label, ex, begin(source), + end(source), end(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 move_backward(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { + return Impl::move_backward_team_impl(teamHandle, first, last, d_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto move_backward( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_backward_team_impl(teamHandle, begin(source), end(source), + end(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp index cf5de3b72b..f7baab3fc0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp @@ -23,41 +23,80 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl("Kokkos::none_of_iterator_api_default", ex, first, - last, predicate); + return Impl::none_of_exespace_impl("Kokkos::none_of_iterator_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl(label, ex, first, last, predicate); + return Impl::none_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl("Kokkos::none_of_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); + return Impl::none_of_exespace_impl("Kokkos::none_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::none_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template +KOKKOS_FUNCTION + std::enable_if_t<::Kokkos::is_team_handle::value, bool> + none_of(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, Predicate predicate) { + return Impl::none_of_team_impl(teamHandle, first, last, predicate); +} + +template +KOKKOS_FUNCTION + std::enable_if_t<::Kokkos::is_team_handle::value, bool> + none_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::none_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp index 38c0a35b62..a1feee8d6d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp @@ -23,57 +23,103 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorTrueType, typename OutputIteratorFalseType, + typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl( + return Impl::partition_copy_exespace_impl( "Kokkos::partition_copy_iterator_api_default", ex, from_first, from_last, to_first_true, to_first_false, std::move(p)); } -template +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorTrueType, typename OutputIteratorFalseType, + typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const std::string& label, const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, from_first, from_last, - to_first_true, to_first_false, std::move(p)); + return Impl::partition_copy_exespace_impl(label, ex, from_first, from_last, + to_first_true, to_first_false, + std::move(p)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename DataType3, + typename... Properties3, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { - return Impl::partition_copy_impl("Kokkos::partition_copy_view_api_default", - ex, cbegin(view_from), cend(view_from), - begin(view_dest_true), - begin(view_dest_false), std::move(p)); + return Impl::partition_copy_exespace_impl( + "Kokkos::partition_copy_view_api_default", ex, cbegin(view_from), + cend(view_from), begin(view_dest_true), begin(view_dest_false), + std::move(p)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename DataType3, + typename... Properties3, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, cbegin(view_from), - cend(view_from), begin(view_dest_true), - begin(view_dest_false), std::move(p)); + return Impl::partition_copy_exespace_impl( + label, ex, cbegin(view_from), cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair +partition_copy(const TeamHandleType& teamHandle, InputIteratorType from_first, + InputIteratorType from_last, + OutputIteratorTrueType to_first_true, + OutputIteratorFalseType to_first_false, PredicateType p) { + return Impl::partition_copy_team_impl(teamHandle, from_first, from_last, + to_first_true, to_first_false, + std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto partition_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest_true, + const ::Kokkos::View& view_dest_false, + PredicateType p) { + return Impl::partition_copy_team_impl(teamHandle, cbegin(view_from), + cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp index 24798e377e..60cbeeda87 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp @@ -23,38 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl( + return Impl::partition_point_exespace_impl( "Kokkos::partitioned_point_iterator_api_default", ex, first, last, std::move(p)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl(label, ex, first, last, std::move(p)); + return Impl::partition_point_exespace_impl(label, ex, first, last, + std::move(p)); } -template +template < + typename ExecutionSpace, typename UnaryPredicate, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl(label, ex, begin(v), end(v), std::move(p)); + return Impl::partition_point_exespace_impl(label, ex, begin(v), end(v), + std::move(p)); } -template +template < + typename ExecutionSpace, typename UnaryPredicate, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl("Kokkos::partition_point_view_api_default", - ex, begin(v), end(v), std::move(p)); + return Impl::partition_point_exespace_impl( + "Kokkos::partition_point_view_api_default", ex, begin(v), end(v), + std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType partition_point(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + UnaryPredicate p) { + return Impl::partition_point_team_impl(teamHandle, first, last, std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto partition_point( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, UnaryPredicate p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::partition_point_team_impl(teamHandle, begin(v), end(v), + std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp index a31fa1497a..b84f00f8bb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp @@ -23,28 +23,38 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // // overload set 1 // -template +template ::value, + int> = 0> typename IteratorType::value_type reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, typename IteratorType::value_type()); } -template +template ::value, + int> = 0> typename IteratorType::value_type reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( label, ex, first, last, typename IteratorType::value_type()); } -template +template ::value, + int> = 0> auto reduce(const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; @@ -53,12 +63,14 @@ auto reduce(const ExecutionSpace& ex, using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), value_type()); } -template +template ::value, + int> = 0> auto reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; @@ -67,37 +79,43 @@ auto reduce(const std::string& label, const ExecutionSpace& ex, using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; - return Impl::reduce_default_functors_impl(label, ex, KE::cbegin(view), - KE::cend(view), value_type()); + return Impl::reduce_default_functors_exespace_impl( + label, ex, KE::cbegin(view), KE::cend(view), value_type()); } // // overload set2: // -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_default_functors_impl(label, ex, first, last, - init_reduction_value); + return Impl::reduce_default_functors_exespace_impl(label, ex, first, last, + init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { @@ -107,13 +125,15 @@ ValueType reduce(const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { @@ -123,40 +143,46 @@ ValueType reduce(const std::string& label, const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } // // overload set 3 // -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_custom_functors_impl( + return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_custom_functors_impl(label, ex, first, last, - init_reduction_value, joiner); + return Impl::reduce_custom_functors_exespace_impl( + label, ex, first, last, init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { @@ -166,13 +192,15 @@ ValueType reduce(const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_custom_functors_impl( + return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { @@ -182,9 +210,114 @@ ValueType reduce(const std::string& label, const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_custom_functors_impl(label, ex, KE::cbegin(view), - KE::cend(view), init_reduction_value, - joiner); + return Impl::reduce_custom_functors_exespace_impl( + label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value, + joiner); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// +// overload set 1 +// +template < + typename TeamHandleType, typename IteratorType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION typename IteratorType::value_type reduce( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { + return Impl::reduce_default_functors_team_impl( + teamHandle, first, last, typename IteratorType::value_type()); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto reduce( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + using view_type = ::Kokkos::View; + using value_type = typename view_type::value_type; + + return Impl::reduce_default_functors_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view), value_type()); +} + +// +// overload set2: +// +template < + typename TeamHandleType, typename IteratorType, typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ValueType init_reduction_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::reduce_default_functors_team_impl(teamHandle, first, last, + init_reduction_value); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::reduce_default_functors_team_impl( + teamHandle, KE::cbegin(view), KE::cend(view), init_reduction_value); +} + +// +// overload set 3 +// +template < + typename TeamHandleType, typename IteratorType, typename ValueType, + typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ValueType init_reduction_value, + BinaryOp joiner) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::reduce_custom_functors_team_impl(teamHandle, first, last, + init_reduction_value, joiner); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value, BinaryOp joiner) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::reduce_custom_functors_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view), + init_reduction_value, joiner); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp index c8602d2f53..8a429d8d51 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp @@ -23,38 +23,74 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, first, - last, value); + return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl(label, ex, first, last, value); + return Impl::remove_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); + return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); + return Impl::remove_exespace_impl(label, ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION Iterator remove(const TeamHandleType& teamHandle, + Iterator first, Iterator last, + const ValueType& value) { + return Impl::remove_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::remove_team_impl(teamHandle, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp index c2c06f6202..4b8fa9fe07 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp @@ -23,26 +23,36 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - first_from, last_from, first_dest, value); + return Impl::remove_copy_exespace_impl( + "Kokkos::remove_copy_iterator_api_default", ex, first_from, last_from, + first_dest, value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl(label, ex, first_from, last_from, first_dest, - value); + return Impl::remove_copy_exespace_impl(label, ex, first_from, last_from, + first_dest, value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -50,15 +60,17 @@ auto remove_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - ::Kokkos::Experimental::cbegin(view_from), - ::Kokkos::Experimental::cend(view_from), - ::Kokkos::Experimental::begin(view_dest), - value); + return Impl::remove_copy_exespace_impl( + "Kokkos::remove_copy_iterator_api_default", ex, + ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -66,12 +78,46 @@ auto remove_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_impl( + return Impl::remove_copy_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), value); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator remove_copy(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const ValueType& value) { + return Impl::remove_copy_team_impl(teamHandle, first_from, last_from, + first_dest, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_team_impl( + teamHandle, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp index 6d642ed6f0..45e2b54bb6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp @@ -23,30 +23,39 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, first_from, last_from, first_dest, pred); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl(label, ex, first_from, last_from, first_dest, - pred); + return Impl::remove_copy_if_exespace_impl(label, ex, first_from, last_from, + first_dest, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -54,15 +63,17 @@ auto remove_copy_if(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -70,12 +81,46 @@ auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator remove_copy_if(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const UnaryPredicate& pred) { + return Impl::remove_copy_if_team_impl(teamHandle, first_from, last_from, + first_dest, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const UnaryPredicate& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_if_team_impl( + teamHandle, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), pred); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp index 4062e8d373..38461a37f2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp @@ -23,39 +23,77 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - first, last, pred); + return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", + ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename Iterator, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl(label, ex, first, last, pred); + return Impl::remove_if_exespace_impl(label, ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); + return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", + ex, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); + return Impl::remove_if_exespace_impl(label, ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION Iterator remove_if(const TeamHandleType& teamHandle, + Iterator first, Iterator last, + UnaryPredicate pred) { + return Impl::remove_if_team_impl(teamHandle, first, last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, UnaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::remove_if_team_impl(teamHandle, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp index 4d1490ded0..29afc4f0c2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp @@ -23,40 +23,77 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl("Kokkos::replace_iterator_api", ex, first, last, - old_value, new_value); + Impl::replace_exespace_impl("Kokkos::replace_iterator_api", ex, first, last, + old_value, new_value); } -template +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl(label, ex, first, last, old_value, new_value); + Impl::replace_exespace_impl(label, ex, first, last, old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl("Kokkos::replace_view_api", ex, KE::begin(view), - KE::end(view), old_value, new_value); + Impl::replace_exespace_impl("Kokkos::replace_view_api", ex, KE::begin(view), + KE::end(view), old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl(label, ex, KE::begin(view), KE::end(view), - old_value, new_value); + Impl::replace_exespace_impl(label, ex, KE::begin(view), KE::end(view), + old_value, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void replace(const TeamHandleType& teamHandle, Iterator first, + Iterator last, const ValueType& old_value, + const ValueType& new_value) { + Impl::replace_team_impl(teamHandle, first, last, old_value, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION void replace( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + Impl::replace_team_impl(teamHandle, KE::begin(view), KE::end(view), old_value, + new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp index e7f464e4bd..04d5767e89 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp @@ -23,30 +23,39 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_copy_impl("Kokkos::replace_copy_iterator_api", ex, - first_from, last_from, first_dest, old_value, - new_value); + return Impl::replace_copy_exespace_impl("Kokkos::replace_copy_iterator_api", + ex, first_from, last_from, first_dest, + old_value, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_copy_impl(label, ex, first_from, last_from, first_dest, - old_value, new_value); + return Impl::replace_copy_exespace_impl(label, ex, first_from, last_from, + first_dest, old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -54,13 +63,15 @@ auto replace_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl("Kokkos::replace_copy_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), old_value, new_value); + return Impl::replace_copy_exespace_impl( + "Kokkos::replace_copy_view_api", ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -68,9 +79,43 @@ auto replace_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - old_value, new_value); + return Impl::replace_copy_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), old_value, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator replace_copy(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const ValueType& old_value, + const ValueType& new_value) { + return Impl::replace_copy_team_impl(teamHandle, first_from, last_from, + first_dest, old_value, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto replace_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), + old_value, new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp index 71ae8f8452..b87163f194 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp @@ -23,33 +23,42 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename PredicateType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_iterator_api", ex, - first_from, last_from, first_dest, pred, - new_value); + return Impl::replace_copy_if_exespace_impl( + "Kokkos::replace_copy_if_iterator_api", ex, first_from, last_from, + first_dest, pred, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename PredicateType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { - return Impl::replace_copy_if_impl(label, ex, first_from, last_from, - first_dest, pred, new_value); + return Impl::replace_copy_if_exespace_impl(label, ex, first_from, last_from, + first_dest, pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename PredicateType, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -57,14 +66,16 @@ auto replace_copy_if(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), pred, new_value); + return Impl::replace_copy_if_exespace_impl( + "Kokkos::replace_copy_if_view_api", ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename PredicateType, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -72,9 +83,44 @@ auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - pred, new_value); + return Impl::replace_copy_if_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), pred, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator replace_copy_if(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + PredicateType pred, + const ValueType& new_value) { + return Impl::replace_copy_if_team_impl(teamHandle, first_from, last_from, + first_dest, pred, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto replace_copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + PredicateType pred, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_if_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), pred, new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp index 7f06540e06..73af1f16f0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp @@ -23,43 +23,82 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { - return Impl::replace_if_impl("Kokkos::replace_if_iterator_api", ex, first, - last, pred, new_value); + Impl::replace_if_exespace_impl("Kokkos::replace_if_iterator_api", ex, first, + last, pred, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { - return Impl::replace_if_impl(label, ex, first, last, pred, new_value); + Impl::replace_if_exespace_impl(label, ex, first, last, pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Predicate, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl("Kokkos::replace_if_view_api", ex, - KE::begin(view), KE::end(view), pred, new_value); + Impl::replace_if_exespace_impl("Kokkos::replace_if_view_api", ex, + KE::begin(view), KE::end(view), pred, + new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Predicate, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl(label, ex, KE::begin(view), KE::end(view), pred, - new_value); + Impl::replace_if_exespace_impl(label, ex, KE::begin(view), KE::end(view), + pred, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void replace_if(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate pred, const ValueType& new_value) { + Impl::replace_if_team_impl(teamHandle, first, last, pred, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION void replace_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Predicate pred, + const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + Impl::replace_if_team_impl(teamHandle, KE::begin(view), KE::end(view), pred, + new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp index 9f2fc5f3cc..a0786d3a2e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp @@ -23,34 +23,67 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, InputIterator first, InputIterator last) { - return Impl::reverse_impl("Kokkos::reverse_iterator_api_default", ex, first, - last); + return Impl::reverse_exespace_impl("Kokkos::reverse_iterator_api_default", ex, + first, last); } -template +template < + typename ExecutionSpace, typename InputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last) { - return Impl::reverse_impl(label, ex, first, last); + return Impl::reverse_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl("Kokkos::reverse_view_api_default", ex, - KE::begin(view), KE::end(view)); + return Impl::reverse_exespace_impl("Kokkos::reverse_view_api_default", ex, + KE::begin(view), KE::end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl(label, ex, KE::begin(view), KE::end(view)); + return Impl::reverse_exespace_impl(label, ex, KE::begin(view), KE::end(view)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void reverse(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last) { + return Impl::reverse_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION void reverse( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::reverse_team_impl(teamHandle, KE::begin(view), KE::end(view)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp index 279bb22086..37336c983a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl("Kokkos::reverse_copy_iterator_api_default", - ex, first, last, d_first); + return Impl::reverse_copy_exespace_impl( + "Kokkos::reverse_copy_iterator_api_default", ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl(label, ex, first, last, d_first); + return Impl::reverse_copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::reverse_copy_impl("Kokkos::reverse_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest)); + return Impl::reverse_copy_exespace_impl( + "Kokkos::reverse_copy_view_api_default", ex, cbegin(source), cend(source), + begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::reverse_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest)); + return Impl::reverse_copy_exespace_impl(label, ex, cbegin(source), + cend(source), begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator reverse_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + return Impl::reverse_copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto reverse_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::reverse_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp index 738e9bf137..aff04b47d6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp @@ -23,36 +23,71 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { - return Impl::rotate_impl("Kokkos::rotate_iterator_api_default", ex, first, - n_first, last); + return Impl::rotate_exespace_impl("Kokkos::rotate_iterator_api_default", ex, + first, n_first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { - return Impl::rotate_impl(label, ex, first, n_first, last); + return Impl::rotate_exespace_impl(label, ex, first, n_first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl("Kokkos::rotate_view_api_default", ex, begin(view), - begin(view) + n_location, end(view)); + return Impl::rotate_exespace_impl("Kokkos::rotate_view_api_default", ex, + begin(view), begin(view) + n_location, + end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl(label, ex, begin(view), begin(view) + n_location, - end(view)); + return Impl::rotate_exespace_impl(label, ex, begin(view), + begin(view) + n_location, end(view)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType rotate(const TeamHandleType& teamHandle, + IteratorType first, IteratorType n_first, + IteratorType last) { + return Impl::rotate_team_impl(teamHandle, first, n_first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto rotate(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + std::size_t n_location) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::rotate_team_impl(teamHandle, begin(view), + begin(view) + n_location, end(view)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp index f5d826c4bb..cce37fccfa 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp @@ -23,23 +23,34 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl("Kokkos::rotate_copy_iterator_api_default", ex, - first, n_first, last, d_first); + return Impl::rotate_copy_exespace_impl( + "Kokkos::rotate_copy_iterator_api_default", ex, first, n_first, last, + d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl(label, ex, first, n_first, last, d_first); + return Impl::rotate_copy_exespace_impl(label, ex, first, n_first, last, + d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, @@ -47,13 +58,15 @@ auto rotate_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::rotate_copy_impl("Kokkos::rotate_copy_view_api_default", ex, - cbegin(source), cbegin(source) + n_location, - cend(source), begin(dest)); + return Impl::rotate_copy_exespace_impl( + "Kokkos::rotate_copy_view_api_default", ex, cbegin(source), + cbegin(source) + n_location, cend(source), begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, @@ -61,9 +74,41 @@ auto rotate_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::rotate_copy_impl(label, ex, cbegin(source), - cbegin(source) + n_location, cend(source), - begin(dest)); + return Impl::rotate_copy_exespace_impl(label, ex, cbegin(source), + cbegin(source) + n_location, + cend(source), begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator rotate_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator n_first, + InputIterator last, + OutputIterator d_first) { + return Impl::rotate_copy_team_impl(teamHandle, first, n_first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto rotate_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + std::size_t n_location, + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::rotate_copy_team_impl(teamHandle, cbegin(source), + cbegin(source) + n_location, cend(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp index b1154b297e..43258a484e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp @@ -23,24 +23,34 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last); + return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, + first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl(label, ex, first, last, s_first, s_last); + return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -48,13 +58,15 @@ auto search(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); + return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -62,31 +74,38 @@ auto search(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last, pred); + return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, + first, last, s_first, s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl(label, ex, first, last, s_first, s_last, pred); + return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last, + pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -95,13 +114,15 @@ auto search(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); + return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -110,8 +131,70 @@ auto search(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::search_team_impl(teamHandle, first, last, s_first, s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto search( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::search_team_impl(teamHandle, first, last, s_first, s_last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto search( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp index a649c8f205..0f8aa5f1c1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp @@ -23,68 +23,86 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value); + return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", + ex, first, last, count, value); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { - return Impl::search_n_impl(label, ex, first, last, count, value); + return Impl::search_n_exespace_impl(label, ex, first, last, count, value); } template + class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value); + return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, + value); } template + class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value); + return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), + count, value); } // overload set 2: binary predicate passed -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value, pred); + return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", + ex, first, last, count, value, pred); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl(label, ex, first, last, count, value, pred); + return Impl::search_n_exespace_impl(label, ex, first, last, count, value, + pred); } template + class SizeType, class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, @@ -92,13 +110,15 @@ auto search_n(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value, - pred); + return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, + value, pred); } template + class SizeType, class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, @@ -106,8 +126,65 @@ auto search_n(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value, pred); + return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), + count, value, pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value) { + return Impl::search_n_team_impl(teamHandle, first, last, count, value); +} + +template < + class TeamHandleType, class DataType, class... Properties, class SizeType, + class ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto search_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, SizeType count, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), + count, value); +} + +// overload set 2: binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value, + const BinaryPredicateType& pred) { + return Impl::search_n_team_impl(teamHandle, first, last, count, value, pred); +} + +template < + class TeamHandleType, class DataType, class... Properties, class SizeType, + class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto search_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, SizeType count, + const ValueType& value, const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), + count, value, pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp index 4b91a17ab8..b3e04a3b97 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp @@ -23,36 +23,70 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_left_impl("Kokkos::shift_left_iterator_api_default", ex, - first, last, n); + return Impl::shift_left_exespace_impl( + "Kokkos::shift_left_iterator_api_default", ex, first, last, n); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_left_impl(label, ex, first, last, n); + return Impl::shift_left_exespace_impl(label, ex, first, last, n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl("Kokkos::shift_left_view_api_default", ex, - begin(view), end(view), n); + return Impl::shift_left_exespace_impl("Kokkos::shift_left_view_api_default", + ex, begin(view), end(view), n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl(label, ex, begin(view), end(view), n); + return Impl::shift_left_exespace_impl(label, ex, begin(view), end(view), n); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType +shift_left(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, typename IteratorType::difference_type n) { + return Impl::shift_left_team_impl(teamHandle, first, last, n); +} + +template , int> = 0> +KOKKOS_FUNCTION auto shift_left( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_left_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp index 2ea50fd74e..0f7ed53948 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp @@ -23,36 +23,70 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_right_impl("Kokkos::shift_right_iterator_api_default", ex, - first, last, n); + return Impl::shift_right_exespace_impl( + "Kokkos::shift_right_iterator_api_default", ex, first, last, n); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_right_impl(label, ex, first, last, n); + return Impl::shift_right_exespace_impl(label, ex, first, last, n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl("Kokkos::shift_right_view_api_default", ex, - begin(view), end(view), n); + return Impl::shift_right_exespace_impl("Kokkos::shift_right_view_api_default", + ex, begin(view), end(view), n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl(label, ex, begin(view), end(view), n); + return Impl::shift_right_exespace_impl(label, ex, begin(view), end(view), n); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType +shift_right(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, typename IteratorType::difference_type n) { + return Impl::shift_right_team_impl(teamHandle, first, last, n); +} + +template , int> = 0> +KOKKOS_FUNCTION auto shift_right( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_right_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp index 5fbf045318..39f33b6487 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp @@ -23,15 +23,21 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> IteratorType2 swap_ranges(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl("Kokkos::swap_ranges_iterator_api_default", ex, - first1, last1, first2); + return Impl::swap_ranges_exespace_impl( + "Kokkos::swap_ranges_iterator_api_default", ex, first1, last1, first2); } -template +template , int> = 0> auto swap_ranges(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -39,19 +45,23 @@ auto swap_ranges(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl("Kokkos::swap_ranges_view_api_default", ex, - begin(source), end(source), begin(dest)); + return Impl::swap_ranges_exespace_impl("Kokkos::swap_ranges_view_api_default", + ex, begin(source), end(source), + begin(dest)); } -template +template , int> = 0> IteratorType2 swap_ranges(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl(label, ex, first1, last1, first2); + return Impl::swap_ranges_exespace_impl(label, ex, first1, last1, first2); } -template +template , int> = 0> auto swap_ranges(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -59,8 +69,38 @@ auto swap_ranges(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl(label, ex, begin(source), end(source), - begin(dest)); + return Impl::swap_ranges_exespace_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 swap_ranges(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2) { + return Impl::swap_ranges_team_impl(teamHandle, first1, last1, first2); +} + +template , int> = 0> +KOKKOS_FUNCTION auto swap_ranges( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + assert(source.extent(0) == dest.extent(0)); + return Impl::swap_ranges_team_impl(teamHandle, begin(source), end(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp index 27dee30426..838c9169e2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp @@ -23,31 +23,39 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator first1, InputIterator last1, - OutputIterator d_first, UnaryOperation unary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, d_first, std::move(unary_op)); +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator transform(const ExecutionSpace& ex, InputIterator first1, + InputIterator last1, OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", + ex, first1, last1, d_first, + std::move(unary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator first1, InputIterator last1, OutputIterator d_first, - UnaryOperation unary_op) { - return Impl::transform_impl(label, ex, first1, last1, d_first, - std::move(unary_op)); +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator transform(const std::string& label, const ExecutionSpace& ex, + InputIterator first1, InputIterator last1, + OutputIterator d_first, UnaryOperation unary_op) { + return Impl::transform_exespace_impl(label, ex, first1, last1, d_first, + std::move(unary_op)); } -template +template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, @@ -55,13 +63,14 @@ auto transform(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source), end(source), begin(dest), - std::move(unary_op)); + return Impl::transform_exespace_impl("Kokkos::transform_view_api_default", ex, + begin(source), end(source), begin(dest), + std::move(unary_op)); } -template +template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, @@ -69,38 +78,44 @@ auto transform(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl(label, ex, begin(source), end(source), - begin(dest), std::move(unary_op)); + return Impl::transform_exespace_impl(label, ex, begin(source), end(source), + begin(dest), std::move(unary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator d_first, - BinaryOperation binary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, first2, d_first, - std::move(binary_op)); +template < + typename ExecutionSpace, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_execution_space_v, + int> = 0> +OutputIterator transform(const ExecutionSpace& ex, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, + OutputIterator d_first, BinaryOperation binary_op) { + return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", + ex, first1, last1, first2, d_first, + std::move(binary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator d_first, BinaryOperation binary_op) { - return Impl::transform_impl(label, ex, first1, last1, first2, d_first, - std::move(binary_op)); +template < + typename ExecutionSpace, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_execution_space_v, + int> = 0> +OutputIterator transform(const std::string& label, const ExecutionSpace& ex, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_exespace_impl(label, ex, first1, last1, first2, + d_first, std::move(binary_op)); } -template +template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, @@ -110,14 +125,15 @@ auto transform(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source1), end(source1), begin(source2), - begin(dest), std::move(binary_op)); + return Impl::transform_exespace_impl( + "Kokkos::transform_view_api_default", ex, begin(source1), end(source1), + begin(source2), begin(dest), std::move(binary_op)); } -template +template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, @@ -127,9 +143,79 @@ auto transform(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl(label, ex, begin(source1), end(source1), - begin(source2), begin(dest), - std::move(binary_op)); + return Impl::transform_exespace_impl(label, ex, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template < + typename TeamHandleType, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, + InputIterator first1, + InputIterator last1, + OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_team_impl(teamHandle, first1, last1, d_first, + std::move(unary_op)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest, UnaryOperation unary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_team_impl(teamHandle, begin(source), end(source), + begin(dest), std::move(unary_op)); +} + +template < + typename TeamHandleType, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_team_impl(teamHandle, first1, last1, first2, d_first, + std::move(binary_op)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source1, + const ::Kokkos::View& source2, + ::Kokkos::View& dest, + BinaryOperation binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_team_impl(teamHandle, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp index 9d85aee06f..37fc0f860e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp @@ -23,44 +23,52 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType binary_op, - UnaryOpType unary_op) { +// +// overload set accepting execution space +// +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_exclusive_scan( + const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType binary_op, + UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, init_value, binary_op, unary_op); + first, last, first_dest, std::move(init_value), binary_op, unary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType binary_op, UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_exclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl(label, ex, first, last, first_dest, - init_value, binary_op, unary_op); + return Impl::transform_exclusive_scan_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value), binary_op, + unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -69,18 +77,20 @@ auto transform_exclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, binary_op, unary_op); + std::move(init_value), binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -89,12 +99,56 @@ auto transform_exclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, binary_op, unary_op); + KE::begin(view_dest), std::move(init_value), binary_op, unary_op); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_exclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::transform_exclusive_scan_team_impl( + teamHandle, first, last, first_dest, std::move(init_value), binary_op, + unary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_exclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value), binary_op, unary_op); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp index 7489af7e37..5f694dbfd9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp @@ -23,40 +23,53 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 (no init value) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOpType binary_op, + UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op, unary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op); + return Impl::transform_inclusive_scan_exespace_impl( + label, ex, first, last, first_dest, binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -66,15 +79,17 @@ auto transform_inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -84,46 +99,59 @@ auto transform_inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } // overload set 2 (init value) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op, - ValueType init_value) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, + ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, binary_op, unary_op, init_value); + first, last, first_dest, binary_op, unary_op, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op, ValueType init_value) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op, init_value); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_exespace_impl( + label, ex, first, last, first_dest, binary_op, unary_op, + std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -132,16 +160,21 @@ auto transform_inclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, unary_op, init_value); + binary_op, unary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -150,10 +183,97 @@ auto transform_inclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, unary_op, init_value); + KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 (no init value) +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::transform_inclusive_scan_team_impl( + teamHandle, first, last, first_dest, binary_op, unary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op); +} + +// overload set 2 (init value) +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_team_impl( + teamHandle, first, last, first_dest, binary_op, unary_op, + std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp index b5ec9066d2..101f5113f6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp @@ -23,34 +23,44 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // ---------------------------- // overload set1: // no custom functors passed, so equivalent to // transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); // ---------------------------- -template +template ::value, + int> = 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value)); } -template +template ::value, + int> = 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value)); } // overload1 accepting views -template +template ::value, + int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -60,14 +70,16 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } -template +template ::value, + int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -77,7 +89,7 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } @@ -95,8 +107,11 @@ ValueType transform_reduce( // https://en.cppreference.com/w/cpp/algorithm/transform_reduce // api accepting iterators -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, @@ -105,14 +120,17 @@ ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, @@ -121,15 +139,17 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views -template +template ::value, + int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -143,16 +163,18 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template ::value, + int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -166,7 +188,7 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); @@ -176,43 +198,50 @@ ValueType transform_reduce( // overload set3: // // accepting iterators -template -// need this to avoid ambiguous call -std::enable_if_t< - ::Kokkos::Experimental::Impl::are_iterators::value, ValueType> -transform_reduce(const ExecutionSpace& ex, IteratorType first1, - IteratorType last1, ValueType init_reduction_value, - BinaryJoinerType joiner, UnaryTransform transformer) { +template ::value && + is_execution_space::value, + int> = 0> +ValueType transform_reduce(const ExecutionSpace& ex, IteratorType first1, + IteratorType last1, ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template -// need this to avoid ambiguous call -std::enable_if_t< - ::Kokkos::Experimental::Impl::are_iterators::value, ValueType> -transform_reduce(const std::string& label, const ExecutionSpace& ex, - IteratorType first1, IteratorType last1, - ValueType init_reduction_value, BinaryJoinerType joiner, - UnaryTransform transformer) { +template ::value && + is_execution_space::value, + int> = 0> +ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, + IteratorType first1, IteratorType last1, + ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, @@ -224,14 +253,17 @@ ValueType transform_reduce(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, @@ -243,12 +275,154 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// ---------------------------- +// overload set1: +// no custom functors passed, so equivalent to +// transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); +// ---------------------------- +template < + typename TeamHandleType, typename IteratorType1, typename IteratorType2, + typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + ValueType init_reduction_value) { + return Impl::transform_reduce_default_functors_team_impl( + teamHandle, first1, last1, first2, std::move(init_reduction_value)); +} + +// overload1 accepting views +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& first_view, + const ::Kokkos::View& second_view, + ValueType init_reduction_value) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); + + return Impl::transform_reduce_default_functors_team_impl( + teamHandle, KE::cbegin(first_view), KE::cend(first_view), + KE::cbegin(second_view), std::move(init_reduction_value)); +} + +// +// overload set2: +// accepts a custom transform and joiner functor +// + +// Note the std refers to the arg BinaryReductionOp +// but in the Kokkos naming convention, it corresponds +// to a "joiner" that knows how to join two values +// NOTE: "joiner/transformer" need to be commutative. + +// https://en.cppreference.com/w/cpp/algorithm/transform_reduce + +// api accepting iterators +template < + typename TeamHandleType, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, ValueType init_reduction_value, + BinaryJoinerType joiner, BinaryTransform transformer) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, first1, last1, first2, std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// accepting views +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& first_view, + const ::Kokkos::View& second_view, + ValueType init_reduction_value, BinaryJoinerType joiner, + BinaryTransform transformer) { + namespace KE = ::Kokkos::Experimental; + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, KE::cbegin(first_view), KE::cend(first_view), + KE::cbegin(second_view), std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// +// overload set3: +// +// accepting iterators +template ::value && + is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, + IteratorType first1, + IteratorType last1, + ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, first1, last1, std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// accepting views +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value, BinaryJoinerType joiner, + UnaryTransform transformer) { + namespace KE = ::Kokkos::Experimental; + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, KE::cbegin(view), KE::cend(view), + std::move(init_reduction_value), std::move(joiner), + std::move(transformer)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp index b47ecffb20..2d56315f61 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp @@ -23,71 +23,132 @@ namespace Kokkos { namespace Experimental { -// note: the enable_if below is to avoid "call to ... is ambiguous" -// for example in the unit test when using a variadic function - -// overload set1 -template -std::enable_if_t::value, IteratorType> unique( - const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last); +// +// overload set1: default predicate, accepting execution space +// +template && + is_execution_space::value, + int> = 0> +IteratorType unique(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, + first, last); } -template -std::enable_if_t::value, IteratorType> unique( - const std::string& label, const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::unique_impl(label, ex, first, last); +template && + is_execution_space::value, + int> = 0> +IteratorType unique(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::unique_exespace_impl(label, ex, first, last); } -template +template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique("Kokkos::unique_view_api_default", ex, - begin(view), end(view)); + return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, + begin(view), end(view)); } -template +template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique(label, ex, begin(view), end(view)); + return Impl::unique_exespace_impl(label, ex, begin(view), end(view)); } -// overload set2 -template +// +// overload set2: custom predicate, accepting execution space +// +template ::value, int> = 0> IteratorType unique(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last, pred); + return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, + first, last, pred); } -template +template ::value, int> = 0> IteratorType unique(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl(label, ex, first, last, pred); + return Impl::unique_exespace_impl(label, ex, first, last, pred); } -template +template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl("Kokkos::unique_view_api_default", ex, begin(view), - end(view), std::move(pred)); + return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, + begin(view), end(view), std::move(pred)); } -template +template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl(label, ex, begin(view), end(view), std::move(pred)); + return Impl::unique_exespace_impl(label, ex, begin(view), end(view), + std::move(pred)); +} + +// +// overload set3: default predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && + is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::unique_team_impl(teamHandle, first, last); +} + +template ::value, int> = 0> +KOKKOS_FUNCTION auto unique( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + return Impl::unique_team_impl(teamHandle, begin(view), end(view)); +} + +// +// overload set4: custom predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template ::value, int> = 0> +KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + BinaryPredicate pred) { + return Impl::unique_team_impl(teamHandle, first, last, std::move(pred)); +} + +template ::value, int> = 0> +KOKKOS_FUNCTION auto unique(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + BinaryPredicate pred) { + return Impl::unique_team_impl(teamHandle, begin(view), end(view), + std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp index bd2451c220..4a32d7e095 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp @@ -23,67 +23,90 @@ namespace Kokkos { namespace Experimental { -// overload set1 -template -std::enable_if_t::value, OutputIterator> -unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, - OutputIterator d_first) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first); +// +// overload set1: default predicate, accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + return Impl::unique_copy_exespace_impl( + "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first); } -template -std::enable_if_t::value, OutputIterator> -unique_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::unique_copy_impl(label, ex, first, last, d_first); +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return ::Kokkos::Experimental::unique_copy( - "Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source), - begin(dest)); + return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", + ex, cbegin(source), cend(source), + begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return ::Kokkos::Experimental::unique_copy(label, ex, cbegin(source), - cend(source), begin(dest)); + return Impl::unique_copy_exespace_impl(label, ex, cbegin(source), + cend(source), begin(dest)); } -// overload set2 -template +// +// overload set2: custom predicate, accepting execution space +// + +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first, pred); + return Impl::unique_copy_exespace_impl( + "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first, + pred); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl(label, ex, first, last, d_first, pred); + return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, @@ -91,13 +114,15 @@ auto unique_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::unique_copy_impl("Kokkos::unique_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); + return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", + ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, @@ -105,8 +130,70 @@ auto unique_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::unique_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); + return Impl::unique_copy_exespace_impl( + label, ex, cbegin(source), cend(source), begin(dest), std::move(pred)); +} + +// +// overload set3: default predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template < + typename TeamHandleType, typename InputIterator, typename OutputIterator, + std::enable_if_t && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto unique_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest)); +} + +// +// overload set4: custom predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first, + BinaryPredicate pred) { + return Impl::unique_copy_team_impl(teamHandle, first, last, d_first, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto unique_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest, + BinaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest), std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp index 8a474508d7..a8171fa068 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp @@ -63,14 +63,15 @@ struct StdAdjacentDiffFunctor { m_op(std::move(op)) {} }; +// +// exespace impl +// template -OutputIteratorType adjacent_difference_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - BinaryOp bin_op) { +OutputIteratorType adjacent_difference_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOp bin_op) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, @@ -81,20 +82,45 @@ OutputIteratorType adjacent_difference_impl(const std::string& label, return first_dest; } - // aliases - using value_type = typename OutputIteratorType::value_type; - using aux_view_type = ::Kokkos::View; - using functor_t = - StdAdjacentDiffFunctor; + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_for( + label, RangePolicy(ex, 0, num_elements), + StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); + ex.fence("Kokkos::adjacent_difference: fence after operation"); + + // return + return first_dest + num_elements; +} + +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOp bin_op) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + if (first_from == last_from) { + return first_dest; + } // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); - aux_view_type aux_view("aux_view", num_elements); - ::Kokkos::parallel_for(label, - RangePolicy(ex, 0, num_elements), - functor_t(first_from, first_dest, bin_op)); - ex.fence("Kokkos::adjacent_difference: fence after operation"); + ::Kokkos::parallel_for( + TeamThreadRange(teamHandle, 0, num_elements), + StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); + teamHandle.team_barrier(); // return return first_dest + num_elements; diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp index dd785e603b..f30b7be06a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp @@ -27,9 +27,9 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdAdjacentFindFunctor { + using index_type = typename IteratorType::difference_type; using red_value_type = typename ReducerType::value_type; IteratorType m_first; @@ -37,13 +37,13 @@ struct StdAdjacentFindFunctor { PredicateType m_p; KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { + void operator()(const index_type i, red_value_type& red_value) const { const auto& my_value = m_first[i]; const auto& next_value = m_first[i + 1]; const bool are_equal = m_p(my_value, next_value); // FIXME_NVHPC using a ternary operator causes problems - red_value_type value = {::Kokkos::reduction_identity::min()}; + red_value_type value = {::Kokkos::reduction_identity::min()}; if (are_equal) { value.min_loc_true = i; } @@ -59,10 +59,14 @@ struct StdAdjacentFindFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { +IteratorType adjacent_find_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -76,8 +80,6 @@ IteratorType adjacent_find_impl(const std::string& label, using index_type = typename IteratorType::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; - using func_t = StdAdjacentFindFunctor; reduction_value_type red_result; reducer_type reducer(red_result); @@ -86,7 +88,8 @@ IteratorType adjacent_find_impl(const std::string& label, // each index i in the reduction checks i and (i+1). ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, num_elements - 1), - func_t(first, reducer, pred), reducer); + // use CTAD + StdAdjacentFindFunctor(first, reducer, pred), reducer); // fence not needed because reducing into scalar if (red_result.min_loc_true == @@ -98,12 +101,62 @@ IteratorType adjacent_find_impl(const std::string& label, } template -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { +IteratorType adjacent_find_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last) { using value_type = typename IteratorType::value_type; using default_pred_t = StdAlgoEqualBinaryPredicate; - return adjacent_find_impl(label, ex, first, last, default_pred_t()); + return adjacent_find_exespace_impl(label, ex, first, last, default_pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType +adjacent_find_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + if (num_elements <= 1) { + return last; + } + + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + + reduction_value_type red_result; + reducer_type reducer(red_result); + + // note that we use below num_elements-1 because + // each index i in the reduction checks i and (i+1). + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements - 1), + // use CTAD + StdAdjacentFindFunctor(first, reducer, pred), + reducer); + + teamHandle.team_barrier(); + + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + return last; + } else { + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION IteratorType adjacent_find_team_impl( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using default_pred_t = StdAlgoEqualBinaryPredicate; + return adjacent_find_team_impl(teamHandle, first, last, default_pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp index ad562070a0..bdc050f9c1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp @@ -23,23 +23,58 @@ namespace Kokkos { namespace Experimental { namespace Impl { +// +// exespace impl +// template -bool all_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) == - last); +bool all_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) == last); } template -bool any_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) != last); +bool any_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) != last); } template -bool none_of_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) == last); +bool none_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) == last); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool all_of_team_impl(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == + last); +} + +template +KOKKOS_FUNCTION bool any_of_team_impl(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) != + last); +} + +template +KOKKOS_FUNCTION bool none_of_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == + last); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 0376100410..27ce5a6fad 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -55,6 +55,9 @@ using iterator_category_t = typename T::iterator_category; template using is_iterator = Kokkos::is_detected; +template +inline constexpr bool is_iterator_v = is_iterator::value; + // // are_iterators // @@ -63,15 +66,18 @@ struct are_iterators; template struct are_iterators { - static constexpr bool value = is_iterator::value; + static constexpr bool value = is_iterator_v; }; template struct are_iterators { static constexpr bool value = - are_iterators::value && are_iterators::value; + are_iterators::value && (are_iterators::value && ... && true); }; +template +inline constexpr bool are_iterators_v = are_iterators::value; + // // are_random_access_iterators // @@ -81,17 +87,21 @@ struct are_random_access_iterators; template struct are_random_access_iterators { static constexpr bool value = - is_iterator::value && - std::is_base_of::value; + is_iterator_v && std::is_base_of::value; }; template struct are_random_access_iterators { - static constexpr bool value = are_random_access_iterators::value && - are_random_access_iterators::value; + static constexpr bool value = + are_random_access_iterators::value && + (are_random_access_iterators::value && ... && true); }; +template +inline constexpr bool are_random_access_iterators_v = + are_random_access_iterators::value; + // // iterators_are_accessible_from // @@ -113,16 +123,18 @@ struct iterators_are_accessible_from { iterators_are_accessible_from::value; }; -template +template KOKKOS_INLINE_FUNCTION constexpr void -static_assert_random_access_and_accessible(const ExecutionSpace& /* ex */, - IteratorTypes... /* iterators */) { +static_assert_random_access_and_accessible( + const ExecutionSpaceOrTeamHandleType& /* ex_or_th*/, + IteratorTypes... /* iterators */) { static_assert( are_random_access_iterators::value, "Currently, Kokkos standard algorithms require random access iterators."); - static_assert( - iterators_are_accessible_from::value, - "Incompatible view/iterator and execution space"); + static_assert(iterators_are_accessible_from< + typename ExecutionSpaceOrTeamHandleType::execution_space, + IteratorTypes...>::value, + "Incompatible view/iterator and execution space"); } // @@ -182,10 +194,10 @@ struct not_openmptarget { #endif }; -template +template KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( - const ExecutionSpace&) { - static_assert(not_openmptarget::value, + const ExecutionSpaceOrTeamHandleType& /*ex_or_th*/) { + static_assert(not_openmptarget::value, "Currently, Kokkos standard algorithms do not support custom " "comparators in OpenMPTarget"); } @@ -194,7 +206,8 @@ KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( // valid range // template -void expect_valid_range(IteratorType first, IteratorType last) { +KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first, + IteratorType last) { // this is a no-op for release KOKKOS_EXPECTS(last >= first); // avoid compiler complaining when KOKKOS_EXPECTS is no-op diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp index b3adbc5e2d..0f68c9e978 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp @@ -27,16 +27,18 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyBackwardFunctor { - static_assert(std::is_signed::value, - "Kokkos: StdCopyBackwardFunctor requires signed index type"); + // we can use difference type from IteratorType1 since + // the calling functions below already static assert that + // the iterators have matching difference type + using index_type = typename IteratorType1::difference_type; IteratorType1 m_last; IteratorType2 m_dest_last; KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } + void operator()(index_type i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } KOKKOS_FUNCTION StdCopyBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) @@ -44,30 +46,51 @@ struct StdCopyBackwardFunctor { }; template -IteratorType2 copy_backward_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 d_last) { +IteratorType2 copy_backward_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_last); Impl::static_assert_iterators_have_matching_difference_type(first, d_last); Impl::expect_valid_range(first, last); - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = - StdCopyBackwardFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(last, d_last)); + // use CTAD + StdCopyBackwardFunctor(last, d_last)); ex.fence("Kokkos::copy_backward: fence after operation"); // return return d_last - num_elements; } +// +// team-level impl +// +template +KOKKOS_FUNCTION IteratorType2 +copy_backward_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_last); + Impl::static_assert_iterators_have_matching_difference_type(first, d_last); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCopyBackwardFunctor(last, d_last)); + teamHandle.team_barrier(); + + // return + return d_last - num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp index 1b120c46d0..86e99ecbd0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp @@ -27,13 +27,18 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyFunctor { + // we can use difference type from InputIterator since + // the calling functions below already static assert that + // the iterators have matching difference type + using index_type = typename InputIterator::difference_type; + InputIterator m_first; OutputIterator m_dest_first; KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_first[i] = m_first[i]; } + void operator()(index_type i) const { m_dest_first[i] = m_first[i]; } KOKKOS_FUNCTION StdCopyFunctor(InputIterator _first, OutputIterator _dest_first) @@ -41,23 +46,20 @@ struct StdCopyFunctor { }; template -OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { +OutputIterator copy_exespace_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_first); Impl::static_assert_iterators_have_matching_difference_type(first, d_first); Impl::expect_valid_range(first, last); - // aliases - using index_type = typename InputIterator::difference_type; - using func_t = StdCopyFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(first, d_first)); + // use CTAD + StdCopyFunctor(first, d_first)); ex.fence("Kokkos::copy: fence after operation"); // return @@ -66,16 +68,61 @@ OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, template -OutputIterator copy_n_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first_from, Size count, - OutputIterator first_dest) { +OutputIterator copy_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + InputIterator first_from, Size count, + OutputIterator first_dest) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); if (count > 0) { - return copy_impl(label, ex, first_from, first_from + count, first_dest); + return copy_exespace_impl(label, ex, first_from, first_from + count, + first_dest); + } else { + return first_dest; + } +} + +// +// team-level impl +// +template +KOKKOS_FUNCTION OutputIterator copy_team_impl(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCopyFunctor(first, d_first)); + teamHandle.team_barrier(); + + // return + return d_first + num_elements; +} + +template +KOKKOS_FUNCTION OutputIterator +copy_n_team_impl(const TeamHandleType& teamHandle, InputIterator first_from, + Size count, OutputIterator first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + + if (count > 0) { + return copy_team_impl(teamHandle, first_from, first_from + count, + first_dest); } else { return first_dest; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index 3c0c4f7e9b..3c1e2474bc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -20,6 +20,7 @@ #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_MustUseKokkosSingleInTeam.hpp" #include #include @@ -27,8 +28,10 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyIfFunctor { + using index_type = typename FirstFrom::difference_type; + FirstFrom m_first_from; FirstDest m_first_dest; PredType m_pred; @@ -40,7 +43,7 @@ struct StdCopyIfFunctor { m_pred(std::move(pred)) {} KOKKOS_FUNCTION - void operator()(const IndexType i, IndexType& update, + void operator()(const index_type i, index_type& update, const bool final_pass) const { const auto& myval = m_first_from[i]; if (final_pass) { @@ -57,9 +60,11 @@ struct StdCopyIfFunctor { template -OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first, PredicateType pred) { +OutputIterator copy_if_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first, + PredicateType pred) { /* To explain the impl, suppose that our data is: @@ -90,23 +95,67 @@ OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, if (first == last) { return d_first; } else { - // aliases - using index_type = typename InputIterator::difference_type; - using func_type = StdCopyIfFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); - index_type count = 0; + + typename InputIterator::difference_type count = 0; ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), - func_type(first, d_first, pred), count); + // use CTAD + StdCopyIfFunctor(first, d_first, pred), count); // fence not needed because of the scan accumulating into count return d_first + count; } } +template +KOKKOS_FUNCTION OutputIterator copy_if_team_impl( + const TeamHandleType& teamHandle, InputIterator first, InputIterator last, + OutputIterator d_first, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return d_first; + } + + const std::size_t num_elements = Kokkos::Experimental::distance(first, last); + if constexpr (stdalgo_must_use_kokkos_single_for_team_scan_v< + typename TeamHandleType::execution_space>) { + std::size_t count = 0; + Kokkos::single( + Kokkos::PerTeam(teamHandle), + [=](std::size_t& lcount) { + lcount = 0; + for (std::size_t i = 0; i < num_elements; ++i) { + const auto& myval = first[i]; + if (pred(myval)) { + d_first[lcount++] = myval; + } + } + }, + count); + // no barrier needed since single above broadcasts to all members + return d_first + count; + + } else { + typename InputIterator::difference_type count = 0; + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + StdCopyIfFunctor(first, d_first, pred), count); + // no barrier needed because of the scan accumulating into count + return d_first + count; + } + +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) + __builtin_unreachable(); +#endif +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp index 18b8c46359..9b6b403aa4 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp @@ -46,37 +46,65 @@ struct StdCountIfFunctor { }; template -typename IteratorType::difference_type count_if_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - Predicate predicate) { +typename IteratorType::difference_type count_if_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType first, + IteratorType last, Predicate predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); - // aliases - using func_t = StdCountIfFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); typename IteratorType::difference_type count = 0; ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), - func_t(first, predicate), count); + // use CTAD + StdCountIfFunctor(first, predicate), count); ex.fence("Kokkos::count_if: fence after operation"); return count; } template -auto count_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { - return count_if_impl( +auto count_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + const T& value) { + return count_if_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } +// +// team-level impl +// +template +KOKKOS_FUNCTION typename IteratorType::difference_type count_if_team_impl( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + Predicate predicate) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + typename IteratorType::difference_type count = 0; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCountIfFunctor(first, predicate), count); + teamHandle.team_barrier(); + + return count; +} + +template +KOKKOS_FUNCTION auto count_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + const T& value) { + return count_if_team_impl( + teamHandle, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp index e045080d4a..62b7d226f6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp @@ -27,15 +27,16 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdEqualFunctor { + using index_type = typename IteratorType1::difference_type; + IteratorType1 m_first1; IteratorType2 m_first2; BinaryPredicateType m_predicate; KOKKOS_FUNCTION - void operator()(IndexType i, std::size_t& lsum) const { + void operator()(index_type i, std::size_t& lsum) const { if (!m_predicate(m_first1[i], m_first2[i])) { lsum = 1; } @@ -49,67 +50,130 @@ struct StdEqualFunctor { m_predicate(std::move(_predicate)) {} }; +// +// exespace impl +// template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); Impl::expect_valid_range(first1, last1); - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = StdEqualFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first1, last1); std::size_t different = 0; - ::Kokkos::parallel_reduce(label, - RangePolicy(ex, 0, num_elements), - func_t(first1, first2, predicate), different); + ::Kokkos::parallel_reduce( + label, RangePolicy(ex, 0, num_elements), + StdEqualFunctor(first1, first2, predicate), different); ex.fence("Kokkos::equal: fence after operation"); return !different; } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; - return equal_impl(label, ex, first1, last1, first2, pred_t()); + return equal_exespace_impl(label, ex, first1, last1, first2, pred_t()); } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2, BinaryPredicateType predicate) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { const auto d1 = ::Kokkos::Experimental::distance(first1, last1); const auto d2 = ::Kokkos::Experimental::distance(first2, last2); if (d1 != d2) { return false; } - return equal_impl(label, ex, first1, last1, first2, predicate); + return equal_exespace_impl(label, ex, first1, last1, first2, predicate); } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { Impl::expect_valid_range(first1, last1); Impl::expect_valid_range(first2, last2); using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; - return equal_impl(label, ex, first1, last1, first2, last2, pred_t()); + return equal_exespace_impl(label, ex, first1, last1, first2, last2, pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + BinaryPredicateType predicate) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + + // run + const auto num_elements = Kokkos::Experimental::distance(first1, last1); + std::size_t different = 0; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + StdEqualFunctor(first1, first2, predicate), + different); + teamHandle.team_barrier(); + + return !different; +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate; + return equal_team_impl(teamHandle, first1, last1, first2, pred_t()); +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + const auto d1 = ::Kokkos::Experimental::distance(first1, last1); + const auto d2 = ::Kokkos::Experimental::distance(first2, last2); + if (d1 != d2) { + return false; + } + + return equal_team_impl(teamHandle, first1, last1, first2, predicate); +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate; + return equal_team_impl(teamHandle, first1, last1, first2, last2, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp index 71f13e490a..6da992b4bb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp @@ -22,6 +22,7 @@ #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" #include "Kokkos_IdentityReferenceUnaryFunctor.hpp" +#include "Kokkos_FunctorsForExclusiveScan.hpp" #include #include #include @@ -30,127 +31,15 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template -struct ExclusiveScanDefaultFunctorForKnownNeutralElement { - using execution_space = ExeSpace; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, - FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, ValueType& update, - const bool final_pass) const { - if (final_pass) m_first_dest[i] = update + m_init_value; - update += m_first_from[i]; - } -}; - -template -struct ExclusiveScanDefaultFunctor { - using execution_space = ExeSpace; - using value_type = - ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctor(ValueType init, FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - if (final_pass) { - if (i == 0) { - m_first_dest[i] = m_init_value; - } else { - m_first_dest[i] = update.val + m_init_value; - } - } - - const auto tmp = value_type{m_first_from[i], false}; - this->join(update, tmp); - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(value_type& update, const value_type& input) const { - if (input.is_initial) return; - - if (update.is_initial) { - update.val = input.val; - update.is_initial = false; - } else { - update.val = update.val + input.val; - } - } -}; - -template -OutputIteratorType exclusive_scan_custom_op_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = - TransformExclusiveScanFunctor; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy(ex, 0, num_elements), - func_type(init_value, first_from, first_dest, bop, unary_op_type())); - ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - - // return - return first_dest + num_elements; -} - -template -using ex_scan_has_reduction_identity_sum_t = - decltype(Kokkos::reduction_identity::sum()); - +// +// exespace impl +// template -OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - ValueType init_value) { +OutputIteratorType exclusive_scan_default_op_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, @@ -184,17 +73,122 @@ OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, ExclusiveScanDefaultFunctorForKnownNeutralElement< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType>, - ExclusiveScanDefaultFunctor>; + ExclusiveScanDefaultFunctorWithValueWrapper>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy(ex, 0, num_elements), + func_type(std::move(init_value), first_from, first_dest)); + + ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + + return first_dest + num_elements; +} + +template +OutputIteratorType exclusive_scan_custom_op_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TransformExclusiveScanFunctorWithValueWrapper< + ExecutionSpace, index_type, ValueType, InputIteratorType, + OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), - func_type(init_value, first_from, first_dest)); + func_type(std::move(init_value), first_from, + first_dest, bop, unary_op_type())); + ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + // return + return first_dest + num_elements; +} + +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType exclusive_scan_default_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "The team-level impl of Kokkos::Experimental::exclusive_scan currently " + "does not support types without reduction identity"); + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using index_type = typename InputIteratorType::difference_type; + using func_type = ExclusiveScanDefaultFunctorForKnownNeutralElement< + exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType>; + + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + TeamThreadRange(teamHandle, 0, num_elements), + func_type(std::move(init_value), first_from, first_dest)); + teamHandle.team_barrier(); + return first_dest + num_elements; +} + +template +KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "The team-level impl of Kokkos::Experimental::exclusive_scan currently " + "does not support types without reduction identity"); + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using index_type = typename InputIteratorType::difference_type; + using func_type = TransformExclusiveScanFunctorWithoutValueWrapper< + exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(std::move(init_value), first_from, + first_dest, bop, unary_op_type())); + teamHandle.team_barrier(); return first_dest + num_elements; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp index 316d865f31..972e57f2cc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp @@ -41,9 +41,12 @@ struct StdFillFunctor { : m_first(std::move(_first)), m_value(std::move(_value)) {} }; +// +// exespace impl +// template -void fill_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { +void fill_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, const T& value) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -52,13 +55,14 @@ void fill_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - StdFillFunctor(first, value)); + StdFillFunctor(first, value)); ex.fence("Kokkos::fill: fence after operation"); } template -IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, const T& value) { +IteratorType fill_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + SizeType n, const T& value) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -67,7 +71,40 @@ IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, return first; } - fill_impl(label, ex, first, last, value); + fill_exespace_impl(label, ex, first, last, value); + return last; +} + +// +// team-level impl +// +template +KOKKOS_FUNCTION void fill_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + const T& value) { + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + StdFillFunctor(first, value)); + + teamHandle.team_barrier(); +} + +template +KOKKOS_FUNCTION IteratorType fill_n_team_impl(const TeamHandleType& teamHandle, + IteratorType first, SizeType n, + const T& value) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + if (n <= 0) { + return first; + } + + fill_team_impl(teamHandle, first, last, value); return last; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp index 3ec64fa43d..1f1ec5e54f 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp @@ -80,12 +80,17 @@ struct StdFindEndFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { +IteratorType1 find_end_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); @@ -97,7 +102,6 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = KE::distance(first, last); const auto s_count = KE::distance(s_first, s_last); KOKKOS_EXPECTS(num_elements >= s_count); - (void)s_count; // needed when macro above is a no-op if (s_first == s_last) { return last; @@ -109,7 +113,8 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, // special case where the two ranges have equal size if (num_elements == s_count) { - const auto equal_result = equal_impl(label, ex, first, last, s_first, pred); + const auto equal_result = + equal_exespace_impl(label, ex, first, last, s_first, pred); return (equal_result) ? first : last; } else { using index_type = typename IteratorType1::difference_type; @@ -148,14 +153,97 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, } template -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { +IteratorType1 find_end_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; - return find_end_impl(label, ex, first, last, s_first, s_last, - predicate_type()); + return find_end_exespace_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType1 +find_end_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + // the target sequence should not be larger than the range [first, last) + namespace KE = ::Kokkos::Experimental; + const auto num_elements = KE::distance(first, last); + const auto s_count = KE::distance(s_first, s_last); + KOKKOS_EXPECTS(num_elements >= s_count); + + if (s_first == s_last) { + return last; + } + + if (first == last) { + return last; + } + + // special case where the two ranges have equal size + if (num_elements == s_count) { + const auto equal_result = + equal_team_impl(teamHandle, first, last, s_first, pred); + return (equal_result) ? first : last; + } else { + using index_type = typename IteratorType1::difference_type; + using reducer_type = LastLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindEndFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + + // decide the size of the range policy of the par_red: + // note that the last feasible index to start looking is the index + // whose distance from the "last" is equal to the sequence count. + // the +1 is because we need to include that location too. + const auto range_size = num_elements - s_count + 1; + + // run par reduce + ::Kokkos::parallel_reduce( + TeamThreadRange(teamHandle, 0, range_size), + func_t(first, last, s_first, s_last, reducer, pred), reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.max_loc_true == + ::Kokkos::reduction_identity::max()) { + // if here, a subrange has not been found + return last; + } else { + // a location has been found + return first + red_result.max_loc_true; + } + } +} + +template +KOKKOS_FUNCTION IteratorType1 find_end_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate; + return find_end_team_impl(teamHandle, first, last, s_first, s_last, + predicate_type()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp index 5f22d2ad13..145e235b9d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp @@ -71,13 +71,15 @@ struct StdFindFirstOfFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last, - const BinaryPredicateType& pred) { +IteratorType1 find_first_of_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); @@ -116,15 +118,71 @@ IteratorType1 find_first_of_impl(const std::string& label, } template -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last) { +IteratorType1 find_first_of_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; - return find_first_of_impl(label, ex, first, last, s_first, s_last, - predicate_type()); + return find_first_of_exespace_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType1 +find_first_of_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + if ((s_first == s_last) || (first == last)) { + return last; + } + + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindFirstOfFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, s_first, s_last, reducer, pred), + reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + // if here, nothing found + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION IteratorType1 find_first_of_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate; + return find_first_of_team_impl(teamHandle, first, last, s_first, s_last, + predicate_type()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp index 9c0b0c0ccd..8fffb59094 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp @@ -61,11 +61,15 @@ struct StdFindIfOrNotFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType find_if_or_not_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { +IteratorType find_if_or_not_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible( ex, first); // only need one It per type @@ -104,14 +108,68 @@ IteratorType find_if_or_not_impl(const std::string& label, } template -InputIterator find_impl(const std::string& label, ExecutionSpace ex, - InputIterator first, InputIterator last, - const T& value) { - return find_if_or_not_impl( +InputIterator find_exespace_impl(const std::string& label, ExecutionSpace ex, + InputIterator first, InputIterator last, + const T& value) { + return find_if_or_not_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType +find_if_or_not_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible( + teamHandle, first); // only need one It per type + Impl::expect_valid_range(first, last); + + if (first == last) { + return last; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindIfOrNotFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, reducer, pred), reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + // here, it means a valid loc has not been found, + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION InputIterator find_team_impl(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + const T& value) { + return find_if_or_not_team_impl( + teamHandle, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp index f9a6ff2e99..d3be3b7f66 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp @@ -41,29 +41,31 @@ struct StdForEachFunctor { : m_first(std::move(_first)), m_functor(std::move(_functor)) {} }; -template -UnaryFunctorType for_each_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { +template +UnaryFunctorType for_each_exespace_impl(const std::string& label, + const HandleType& handle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { // checks - Impl::static_assert_random_access_and_accessible(ex, first); + Impl::static_assert_random_access_and_accessible(handle, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for( - label, RangePolicy(ex, 0, num_elements), + label, RangePolicy(handle, 0, num_elements), StdForEachFunctor(first, functor)); - ex.fence("Kokkos::for_each: fence after operation"); + handle.fence("Kokkos::for_each: fence after operation"); return functor; } template -IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, - UnaryFunctorType functor) { +IteratorType for_each_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, SizeType n, + UnaryFunctorType functor) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first, last); Impl::expect_valid_range(first, last); @@ -72,8 +74,46 @@ IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, return first; } - for_each_impl(label, ex, first, last, std::move(functor)); - // no neeed to fence since for_each_impl fences already + for_each_exespace_impl(label, ex, first, last, std::move(functor)); + // no neeed to fence since for_each_exespace_impl fences already + + return last; +} + +// +// team impl +// +template +KOKKOS_FUNCTION UnaryFunctorType +for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, UnaryFunctorType functor) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for( + TeamThreadRange(teamHandle, 0, num_elements), + StdForEachFunctor(first, functor)); + teamHandle.team_barrier(); + return functor; +} + +template +KOKKOS_FUNCTION IteratorType +for_each_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, + SizeType n, UnaryFunctorType functor) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(teamHandle, first, last); + Impl::expect_valid_range(first, last); + + if (n == 0) { + return first; + } + + for_each_team_impl(teamHandle, first, last, std::move(functor)); + // no neeed to fence since for_each_team_impl fences already return last; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp new file mode 100644 index 0000000000..8151ee3495 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp @@ -0,0 +1,220 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP + +#include +#include "Kokkos_ValueWrapperForNoNeutralElement.hpp" + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template +using ex_scan_has_reduction_identity_sum_t = + decltype(Kokkos::reduction_identity::sum()); + +template +struct ExclusiveScanDefaultFunctorForKnownNeutralElement { + using execution_space = ExeSpace; + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + if (final_pass) m_first_dest[i] = update + m_init_value; + update += m_first_from[i]; + } +}; + +template +struct ExclusiveScanDefaultFunctorWithValueWrapper { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorWithValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = update.val + m_init_value; + } + } + + const auto tmp = value_type{m_first_from[i], false}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + + if (update.is_initial) { + update.val = input.val; + update.is_initial = false; + } else { + update.val = update.val + input.val; + } + } +}; + +template +struct TransformExclusiveScanFunctorWithValueWrapper { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctorWithValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update.val, m_init_value); + } + } + + const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION void init(value_type& value) const { + value.val = {}; + value.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + + if (update.is_initial) { + update.val = input.val; + } else { + update.val = m_binary_op(update.val, input.val); + } + update.is_initial = false; + } +}; + +template +struct TransformExclusiveScanFunctorWithoutValueWrapper { + using execution_space = ExeSpace; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctorWithoutValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update, m_init_value); + } + } + + const auto tmp = ValueType{m_unary_op(m_first_from[i])}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(ValueType& update) const { update = {}; } + + KOKKOS_FUNCTION + void join(ValueType& update, const ValueType& input) const { + update = m_binary_op(update, input); + } +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp index 228390bdff..157de1125e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp @@ -41,32 +41,65 @@ struct StdGenerateFunctor { : m_first(std::move(_first)), m_generator(std::move(_g)) {} }; +// +// generate impl +// template -void generate_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Generator g) { +void generate_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Generator g) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); - // aliases - using func_t = StdGenerateFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(first, g)); + StdGenerateFunctor(first, g)); ex.fence("Kokkos::generate: fence after operation"); } +template +KOKKOS_FUNCTION void generate_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Generator g) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + StdGenerateFunctor(first, g)); + teamHandle.team_barrier(); +} + +// +// generate_n impl +// template -IteratorType generate_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, Size count, Generator g) { +IteratorType generate_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, Size count, + Generator g) { if (count <= 0) { return first; } - generate_impl(label, ex, first, first + count, g); + generate_exespace_impl(label, ex, first, first + count, g); + return first + count; +} + +template +KOKKOS_FUNCTION IteratorType +generate_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, + Size count, Generator g) { + if (count <= 0) { + return first; + } + + generate_team_impl(teamHandle, first, first + count, g); return first + count; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index ecd6ff39cd..0b4acec0fe 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -101,9 +101,12 @@ struct InclusiveScanDefaultFunctor { } }; +// +// exespace impl +// template -OutputIteratorType inclusive_scan_default_op_impl( +OutputIteratorType inclusive_scan_default_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { @@ -143,7 +146,7 @@ OutputIteratorType inclusive_scan_default_op_impl( // ------------------------------------------------------------- template -OutputIteratorType inclusive_scan_custom_binary_op_impl( +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op) { @@ -158,7 +161,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( using value_type = std::remove_const_t; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = TransformInclusiveScanNoInitValueFunctor< + using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor< ExecutionSpace, index_type, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -179,7 +182,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( // ------------------------------------------------------------- template -OutputIteratorType inclusive_scan_custom_binary_op_impl( +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op, @@ -193,7 +196,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( // aliases using index_type = typename InputIteratorType::difference_type; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = TransformInclusiveScanWithInitValueFunctor< + using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -203,13 +206,142 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, - unary_op_type(), init_value)); + unary_op_type(), std::move(init_value))); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); // return return first_dest + num_elements; } +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_default_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + using value_type = + std::remove_const_t; + + // #if defined(KOKKOS_ENABLE_CUDA) + + using exe_space = typename TeamHandleType::execution_space; + using index_type = typename InputIteratorType::difference_type; + using func_type = std::conditional_t< + ::Kokkos::is_detected::value, + InclusiveScanDefaultFunctorForKnownIdentityElement< + exe_space, index_type, value_type, InputIteratorType, + OutputIteratorType>, + InclusiveScanDefaultFunctor>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest)); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl +// ------------------------------------------------------------- +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + using value_type = + std::remove_const_t; + + static_assert( + ::Kokkos::is_detected_v, + "At the moment inclusive_scan doesn't support types without reduction " + "identity"); + + // #if defined(KOKKOS_ENABLE_CUDA) + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TeamTransformInclusiveScanNoInitValueFunctor< + exe_space, value_type, InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + + ::Kokkos::parallel_scan( + TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest, binary_op, unary_op_type())); + teamHandle.team_barrier(); + + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl with init_value +// ------------------------------------------------------------- +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op, ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "At the moment inclusive_scan doesn't support types without reduction " + "identity"); + + // #if defined(KOKKOS_ENABLE_CUDA) + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TeamTransformInclusiveScanWithInitValueFunctor< + exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, + unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest, binary_op, + unary_op_type(), std::move(init_value))); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp index 0fe2d246ff..281efca36b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp @@ -62,9 +62,9 @@ struct StdIsPartitionedFunctor { }; template -bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - PredicateType pred) { +bool is_partitioned_exespace_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType pred) { // true if all elements in the range [first, last) that satisfy // the predicate "pred" appear before all elements that don't. // Also returns true if [first, last) is empty. @@ -97,6 +97,7 @@ bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), + func_t(first, reducer, pred), reducer); // fence not needed because reducing into scalar @@ -109,8 +110,72 @@ bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, if (red_result.max_loc_true != red_id_max && red_result.min_loc_false != red_id_min) { + // this occurs when the reduction yields nontrivial values return red_result.max_loc_true < red_result.min_loc_false; + } else if (red_result.max_loc_true == red_id_max && + red_result.min_loc_false == 0) { + // this occurs when all values do NOT satisfy + // the predicate, and this corner case should also be true + return true; } else if (first + red_result.max_loc_true == --last) { + // this occurs when all values satisfy the predicate, + // this corner case should also be true + return true; + } else { + return false; + } +} + +template +KOKKOS_FUNCTION bool is_partitioned_team_impl(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + PredicateType pred) { + /* see exespace impl for the description of the impl */ + + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // trivial case + if (first == last) { + return true; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = StdIsPartitioned; + using reduction_value_type = typename reducer_type::value_type; + using func_t = + StdIsPartitionedFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + constexpr index_type red_id_min = + ::Kokkos::reduction_identity::min(); + constexpr index_type red_id_max = + ::Kokkos::reduction_identity::max(); + + if (red_result.max_loc_true != red_id_max && + red_result.min_loc_false != red_id_min) { + // this occurs when the reduction yields nontrivial values + return red_result.max_loc_true < red_result.min_loc_false; + } else if (red_result.max_loc_true == red_id_max && + red_result.min_loc_false == 0) { + // this occurs when all values do NOT satisfy + // the predicate, and this corner case should also be true + return true; + } else if (first + red_result.max_loc_true == --last) { + // this occurs when all values satisfy the predicate, + // this corner case should also be true return true; } else { return false; diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp index 4696821586..b2c912848a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp @@ -48,10 +48,13 @@ struct StdIsSortedFunctor { : m_first(std::move(_first1)), m_comparator(std::move(comparator)) {} }; +// +// exespace impl +// template -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ComparatorType comp) { +bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -75,11 +78,49 @@ bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, } template -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { +bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; - return is_sorted_impl(label, ex, first, last, pred_t()); + return is_sorted_exespace_impl(label, ex, first, last, pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + if (num_elements <= 1) { + return true; + } + + // use num_elements-1 because each index handles i and i+1 + const auto num_elements_minus_one = num_elements - 1; + + // result is incremented by one if sorting breaks at index i + std::size_t result = 0; + ::Kokkos::parallel_reduce( + TeamThreadRange(teamHandle, 0, num_elements_minus_one), + // use CTAD here + StdIsSortedFunctor(first, std::move(comp)), result); + + return result == 0; +} + +template +KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate; + return is_sorted_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp index 2a0c112bf5..d33580ca53 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp @@ -54,10 +54,15 @@ struct StdIsSortedUntilFunctor { m_reducer(std::move(reducer)) {} }; +// +// overloads accepting exespace +// template -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { +IteratorType is_sorted_until_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -81,7 +86,6 @@ IteratorType is_sorted_until_impl(const std::string& label, label, // use num_elements-1 because each index handles i and i+1 RangePolicy(ex, 0, num_elements - 1), - // use CTAD StdIsSortedUntilFunctor(first, comp, reducer), reducer); /* If the reduction result is equal to the initial value, @@ -98,12 +102,66 @@ IteratorType is_sorted_until_impl(const std::string& label, } template -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { +IteratorType is_sorted_until_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; - return is_sorted_until_impl(label, ex, first, last, pred_t()); + return is_sorted_until_exespace_impl(label, ex, first, last, pred_t()); +} + +// +// overloads accepting team handle +// +template +KOKKOS_FUNCTION IteratorType +is_sorted_until_team_impl(const ExecutionSpace& teamHandle, IteratorType first, + IteratorType last, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + // trivial case + if (num_elements <= 1) { + return last; + } + + /* + Do a par_reduce computing the *min* index that breaks the sorting. + If one such index is found, then the range is sorted until that element, + if no such index is found, then it means the range is sorted until the end. + */ + using index_type = typename IteratorType::difference_type; + index_type red_result; + index_type red_result_init; + ::Kokkos::Min reducer(red_result); + reducer.init(red_result_init); + ::Kokkos::parallel_reduce( // use num_elements-1 because each index handles i + // and i+1 + TeamThreadRange(teamHandle, 0, num_elements - 1), + StdIsSortedUntilFunctor(first, comp, reducer), reducer); + teamHandle.team_barrier(); + + /* If the reduction result is equal to the initial value, + and it means the range is sorted until the end */ + if (red_result == red_result_init) { + return last; + } else { + /* If such index is found, then the range is sorted until there and + we need to return an iterator past the element found so do +1 */ + return first + (red_result + 1); + } +} + +template +KOKKOS_FUNCTION IteratorType is_sorted_until_team_impl( + const ExecutionSpace& teamHandle, IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate; + return is_sorted_until_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp index ad7f59232e..b95a66c3bd 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp @@ -84,13 +84,15 @@ struct StdLexicographicalCompareFunctor { m_comparator(std::move(_comp)) {} }; +// +// exespace impl +// template -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - ComparatorType comp) { +bool lexicographical_compare_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); @@ -139,16 +141,84 @@ bool lexicographical_compare_impl(const std::string& label, } template -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { +bool lexicographical_compare_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { using value_type_1 = typename IteratorType1::value_type; using value_type_2 = typename IteratorType2::value_type; using predicate_t = Impl::StdAlgoLessThanBinaryPredicate; - return lexicographical_compare_impl(label, ex, first1, last1, first2, last2, - predicate_t()); + return lexicographical_compare_exespace_impl(label, ex, first1, last1, first2, + last2, predicate_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool lexicographical_compare_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + // aliases + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + + // run + const auto d1 = Kokkos::Experimental::distance(first1, last1); + const auto d2 = Kokkos::Experimental::distance(first2, last2); + const auto range = Kokkos::min(d1, d2); + reduction_value_type red_result; + reducer_type reducer(red_result); + using func1_t = + StdLexicographicalCompareFunctor; + + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, range), + func1_t(first1, first2, reducer, comp), reducer); + + teamHandle.team_barrier(); + + // no mismatch + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + auto new_last1 = first1 + range; + auto new_last2 = first2 + range; + bool is_prefix = (new_last1 == last1) && (new_last2 != last2); + return is_prefix; + } + + // check mismatched + int less = 0; + auto it1 = first1 + red_result.min_loc_true; + auto it2 = first2 + red_result.min_loc_true; + using func2_t = StdCompareFunctor; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, 1), + func2_t(it1, it2, comp), less); + + teamHandle.team_barrier(); + + return static_cast(less); +} + +template +KOKKOS_FUNCTION bool lexicographical_compare_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + using value_type_1 = typename IteratorType1::value_type; + using value_type_2 = typename IteratorType2::value_type; + using predicate_t = + Impl::StdAlgoLessThanBinaryPredicate; + return lexicographical_compare_team_impl(teamHandle, first1, last1, first2, + last2, predicate_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp index 048420f7a8..2f51db03b4 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp @@ -63,12 +63,16 @@ struct StdMinMaxElemFunctor { : m_first(std::move(first)), m_reducer(std::move(reducer)) {} }; +// +// exespace impl +// template