diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b6892aa4ee..1b4cae3aaa 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -153,12 +153,12 @@ tools/vim/* @hammondkd unittest/* @akohlmey # cmake -cmake/* @rbberger +cmake/* @akohlmey cmake/Modules/LAMMPSInterfacePlugin.cmake @akohlmey cmake/Modules/MPI4WIN.cmake @akohlmey cmake/Modules/OpenCLLoader.cmake @akohlmey -cmake/Modules/Packages/COLVARS.cmake @rbberger @giacomofiorin -cmake/Modules/Packages/KIM.cmake @rbberger @ellio167 +cmake/Modules/Packages/COLVARS.cmake @giacomofiorin +cmake/Modules/Packages/KIM.cmake @ellio167 cmake/presets/*.cmake @akohlmey # python diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index b1c23e1f6a..00a4596cc8 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -59,16 +59,13 @@ jobs: -D BUILD_SHARED_LIBS=on \ -D LAMMPS_SIZES=SMALLBIG \ -D LAMMPS_EXCEPTIONS=off \ - -D PKG_MESSAGE=on \ - -D PKG_MPIIO=on \ -D PKG_ATC=on \ -D PKG_AWPMD=on \ - -D PKG_BOCS=on \ - -D PKG_EFF=on \ -D PKG_H5MD=on \ -D PKG_INTEL=on \ -D PKG_LATBOLTZ=on \ -D PKG_MANIFOLD=on \ + -D PKG_MDI=on \ -D PKG_MGPT=on \ -D PKG_ML-PACE=on \ -D PKG_ML-RANN=on \ @@ -77,7 +74,6 @@ jobs: -D PKG_PTM=on \ -D PKG_QTB=on \ -D PKG_SMTBQ=on \ - -D PKG_TALLY=on \ ../cmake - name: Run Coverity Scan diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 95d738d279..28e02bbee7 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -2,7 +2,6 @@ ######################################## # CMake build system # This file is part of LAMMPS -# Created by Christoph Junghans and Richard Berger cmake_minimum_required(VERSION 3.16) ######################################## # set policy to silence warnings about ignoring _ROOT but use it @@ -106,7 +105,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4) set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512") else() - set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=2196") + set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=11074 -diag-disable=11076 -diag-disable=2196") endif() endif() endif() @@ -428,6 +427,18 @@ if(BUILD_OMP) target_link_libraries(lmp PRIVATE OpenMP::OpenMP_CXX) endif() +# lower C++ standard for fmtlib sources when using Intel classic compiler +if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") AND (CMAKE_CXX_STANDARD GREATER_EQUAL 17) + AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 2021.10)) + message(STATUS "Lowering C++ standard for compiling fmtlib sources with Intel Classic compiler") + get_filename_component(LMP_UTILS_SRC "${LAMMPS_SOURCE_DIR}/utils.cpp" ABSOLUTE) + get_filename_component(LMP_VARIABLE_SRC "${LAMMPS_SOURCE_DIR}/variable.cpp" ABSOLUTE) + get_filename_component(FMT_FORMAT_SRC "${LAMMPS_SOURCE_DIR}/fmtlib_format.cpp" ABSOLUTE) + get_filename_component(FMT_OS_SRC "${LAMMPS_SOURCE_DIR}/fmtlib_os.cpp" ABSOLUTE) + set_source_files_properties("${FMT_FORMAT_SRC}" "${FMT_OS_SRC}" "${LMP_VARIABLE_SRC}" "${LMP_UTILS_SRC}" + PROPERTIES COMPILE_OPTIONS "-std=c++14") +endif() + if(PKG_ATC OR PKG_AWPMD OR PKG_ML-QUIP OR PKG_ML-POD OR PKG_ELECTRODE OR BUILD_TOOLS) enable_language(C) if (NOT USE_INTERNAL_LINALG) diff --git a/cmake/Modules/LAMMPSUtils.cmake b/cmake/Modules/LAMMPSUtils.cmake index bb5ea07609..2ec9d1b706 100644 --- a/cmake/Modules/LAMMPSUtils.cmake +++ b/cmake/Modules/LAMMPSUtils.cmake @@ -83,17 +83,17 @@ function(check_for_autogen_files source_dir) file(GLOB SRC_AUTOGEN_FILES CONFIGURE_DEPENDS ${source_dir}/style_*.h) file(GLOB SRC_AUTOGEN_PACKAGES CONFIGURE_DEPENDS ${source_dir}/packages_*.h) list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h) - list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp) + list(APPEND SRC_AUTOGEN_FILES ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp) foreach(_SRC ${SRC_AUTOGEN_FILES}) get_filename_component(FILENAME "${_SRC}" NAME) if(EXISTS ${source_dir}/${FILENAME}) message(FATAL_ERROR "\n########################################################################\n" - "Found header file(s) generated by the make-based build system\n" - "\n" - "Please run\n" - "make -C ${source_dir} purge\n" - "to remove\n" - "########################################################################") + "Found header file ${source_dir}/${FILENAME} generated by the make-based build system\n" + "\n" + "Please run\n" + "make -C ${source_dir} purge\n" + "to remove\n" + "########################################################################") endif() endforeach() endfunction() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 6359d9e615..0edd9a3baa 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -50,8 +50,8 @@ if(DOWNLOAD_KOKKOS) list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") include(ExternalProject) - set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.1.00.tar.gz" CACHE STRING "URL for KOKKOS tarball") - set(KOKKOS_MD5 "a5f096bd8ad01b97fdc7a32583b17a33" CACHE STRING "MD5 checksum of KOKKOS tarball") + set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.00.tar.gz" CACHE STRING "URL for KOKKOS tarball") + set(KOKKOS_MD5 "731647b61a4233f568d583702e9cd6d1" CACHE STRING "MD5 checksum of KOKKOS tarball") mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_MD5) GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK) @@ -76,7 +76,7 @@ if(DOWNLOAD_KOKKOS) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) elseif(EXTERNAL_KOKKOS) - find_package(Kokkos 4.1.00 REQUIRED CONFIG) + find_package(Kokkos 4.2.00 REQUIRED CONFIG) target_link_libraries(lammps PRIVATE Kokkos::kokkos) else() set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) diff --git a/cmake/presets/kokkos-cuda.cmake b/cmake/presets/kokkos-cuda.cmake index ace8ff0879..c3ee081898 100644 --- a/cmake/presets/kokkos-cuda.cmake +++ b/cmake/presets/kokkos-cuda.cmake @@ -6,6 +6,8 @@ set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE) set(Kokkos_ENABLE_CUDA ON CACHE BOOL "" FORCE) set(Kokkos_ARCH_PASCAL60 ON CACHE BOOL "" FORCE) set(BUILD_OMP ON CACHE BOOL "" FORCE) +get_filename_component(NVCC_WRAPPER_CMD ${CMAKE_CURRENT_SOURCE_DIR}/../lib/kokkos/bin/nvcc_wrapper ABSOLUTE) +set(CMAKE_CXX_COMPILER ${NVCC_WRAPPER_CMD} CACHE FILEPATH "" FORCE) # hide deprecation warnings temporarily for stable release set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE) diff --git a/doc/lammps.1 b/doc/lammps.1 index 79964d1680..100ea9b663 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,7 +1,7 @@ -.TH LAMMPS "1" "2 August 2023" "2023-08-2" +.TH LAMMPS "1" "21 November 2023" "2023-11-21" .SH NAME .B LAMMPS -\- Molecular Dynamics Simulator. Version 2 August 2023 +\- Molecular Dynamics Simulator. Version 21 November 2023 .SH SYNOPSIS .B lmp diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 7a7b5cf0d5..1f643a9d14 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -626,22 +626,22 @@ They must be specified in uppercase. * - HOPPER90 - GPU - NVIDIA Hopper generation CC 9.0 GPU - * - VEGA900 + * - AMD_GFX906 - GPU - - AMD GPU MI25 GFX900 - * - VEGA906 + - AMD GPU MI50/MI60 + * - AMD_GFX908 - GPU - - AMD GPU MI50/MI60 GFX906 - * - VEGA908 + - AMD GPU MI100 + * - AMD_GFX90A - GPU - - AMD GPU MI100 GFX908 - * - VEGA90A + - AMD GPU MI200 + * - AMD_GFX942 - GPU - - AMD GPU MI200 GFX90A - * - NAVI1030 + - AMD GPU MI300 + * - AMD_GFX1030 - GPU - AMD GPU V620/W6800 - * - NAVI1100 + * - AMD_GFX1100 - GPU - AMD GPU RX7900XTX * - INTEL_GEN @@ -666,7 +666,7 @@ They must be specified in uppercase. - GPU - Intel GPU Ponte Vecchio -This list was last updated for version 4.0.1 of the Kokkos library. +This list was last updated for version 4.2 of the Kokkos library. .. tabs:: diff --git a/doc/src/Commands_removed.rst b/doc/src/Commands_removed.rst index 84cc534304..98a52fc2d7 100644 --- a/doc/src/Commands_removed.rst +++ b/doc/src/Commands_removed.rst @@ -88,7 +88,7 @@ The same functionality is available through MPIIO package ------------- -.. deprecated:: TBD +.. deprecated:: 21Nov2023 The MPIIO package has been removed from LAMMPS since it was unmaintained for many years and thus not updated to incorporate required changes that @@ -107,7 +107,7 @@ see :doc:`restart `, :doc:`read_restart `, MSCG package ------------ -.. deprecated:: TBD +.. deprecated:: 21Nov2023 The MSCG package has been removed from LAMMPS since it was unmaintained for many years and instead superseded by the `OpenMSCG software @@ -126,6 +126,17 @@ syntax compatible with the removed reax pair style, so input files will have to be adapted. The REAXFF package was originally called USER-REAXC. +USER-REAXC package +------------------ + +.. deprecated:: TBD + +The USER-REAXC package has been renamed to :ref:`REAXFF `. +In the process also the pair style and related fixes were renamed to use +the "reaxff" string instead of "reax/c". For a while LAMMPS was maintaining +backward compatibility by providing aliases for the styles. These have +been removed, so using "reaxff" is now *required*. + USER-CUDA package ----------------- diff --git a/doc/src/Developer_unittest.rst b/doc/src/Developer_unittest.rst index 9886e9e4b4..67c5ce365a 100644 --- a/doc/src/Developer_unittest.rst +++ b/doc/src/Developer_unittest.rst @@ -180,19 +180,11 @@ discarded but by setting the verbose flag (via setting the ``TEST_ARGS`` environment variable, ``TEST_ARGS=-v``) it can be printed and used to understand why tests fail unexpectedly. -Another complexity of these tests stems from the need to capture -situations where LAMMPS will stop with an error, i.e. handle so-called -"death tests". Here the LAMMPS code will operate differently depending -on whether it was configured to throw C++ exceptions on errors or call -either ``exit()`` or ``MPI_Abort()``. In the latter case, the test code -also needs to detect whether LAMMPS was compiled with the OpenMPI -library, as OpenMPI is **only** compatible the death test options of the -GoogleTest library when C++ exceptions are enabled; otherwise those -"death tests" must be skipped to avoid reporting bogus failures. The -specifics of this step are implemented in the ``TEST_FAILURE()`` -macro. These tests operate by capturing the screen output when executing -the failing command and then comparing that with a provided regular -expression string pattern. Example: +The specifics of so-called "death tests", i.e. conditions where LAMMPS +should fail and throw an exception, are implemented in the +``TEST_FAILURE()`` macro. These tests operate by capturing the screen +output when executing the failing command and then comparing that with a +provided regular expression string pattern. Example: .. code-block:: c++ diff --git a/doc/src/Fortran.rst b/doc/src/Fortran.rst index 913c31842e..76fdff753a 100644 --- a/doc/src/Fortran.rst +++ b/doc/src/Fortran.rst @@ -3038,14 +3038,6 @@ Procedures Bound to the :f:type:`lammps` Derived Type This function can be used to query if an error inside of LAMMPS has thrown a :ref:`C++ exception `. - .. note:: - - This function will always report "no error" when the LAMMPS library - has been compiled without ``-DLAMMPS_EXCEPTIONS``, which turns fatal - errors aborting LAMMPS into C++ exceptions. You can use the library - function :cpp:func:`lammps_config_has_exceptions` to check if this is - the case. - :to: :cpp:func:`lammps_has_error` :r has_error: ``.TRUE.`` if there is an error. :rtype has_error: logical @@ -3068,13 +3060,6 @@ Procedures Bound to the :f:type:`lammps` Derived Type would happen only in a single MPI rank and thus may not be recoverable, as other MPI ranks may be waiting on the failing MPI rank(s) to send messages. - .. note:: - - This function will do nothing when the LAMMPS library has been - compiled without ``-DLAMMPS_EXCEPTIONS``, which turns errors aborting - LAMMPS into C++ exceptions. You can use the function - :f:func:`config_has_exceptions` to check whether this is the case. - :p character(len=\*) buffer: string buffer to copy the error message into :o integer(c_int) status [optional]: 1 when all ranks had the error, 2 on a single-rank error. diff --git a/doc/src/Howto.rst b/doc/src/Howto.rst index b1f5da8abc..85c98bd6de 100644 --- a/doc/src/Howto.rst +++ b/doc/src/Howto.rst @@ -101,6 +101,7 @@ Tutorials howto Howto_cmake Howto_github Howto_lammps_gui + Howto_moltemplate Howto_pylammps Howto_wsl diff --git a/doc/src/Howto_body.rst b/doc/src/Howto_body.rst index 88fa2d9c97..115b7797c8 100644 --- a/doc/src/Howto_body.rst +++ b/doc/src/Howto_body.rst @@ -170,9 +170,9 @@ with this body style to compute body/body and body/non-body interactions. The *rounded/polygon* body style represents body particles as a 2d polygon with a variable number of N vertices. This style can only be used for 2d models; see the :doc:`boundary ` command. See the -"pair_style body/rounded/polygon" page for a diagram of two -squares with rounded circles at the vertices. Special cases for N = 1 -(circle) and N = 2 (rod with rounded ends) can also be specified. +:doc:`pair_style body/rounded/polygon ` page for +a diagram of two squares with rounded circles at the vertices. Special cases +for N = 1 (circle) and N = 2 (rod with rounded ends) can also be specified. One use of this body style is for 2d discrete element models, as described in :ref:`Fraige `. diff --git a/doc/src/Howto_moltemplate.rst b/doc/src/Howto_moltemplate.rst new file mode 100644 index 0000000000..bb068a2e93 --- /dev/null +++ b/doc/src/Howto_moltemplate.rst @@ -0,0 +1,371 @@ +Moltemplate Tutorial +==================== + +In this tutorial, we are going to use the tool :ref:`Moltemplate +` to set up a classical molecular dynamic simulation using +the :ref:`OPLS-AA force field `. The first +task is to describe an organic compound and create a complete input deck +for LAMMPS. The second task is to map the OPLS-AA force field to a +molecular sample created with an external tool, e.g. PACKMOL, and +exported as a PDB file. The files used in this tutorial can be found +in the ``tools/moltemplate/tutorial-files`` folder of the LAMMPS +source code distribution. + +Simulating an organic solvent +""""""""""""""""""""""""""""" + +This example aims to create a cubic box of the organic solvent +formamide. + +The first step is to create a molecular topology in the +LAMMPS-template (LT) file format representing a single molecule, which +will be stored in a Moltemplate object called ``_FAM inherits OPLSAA {}``. +This command states that the object ``_FAM`` is based on an existing +object called ``OPLSAA``, which contains OPLS-AA parameters, atom type +definitions, partial charges, masses and bond-angle rules for many organic +and biological compounds. + +The atomic structure is the starting point to populate the command +``write('Data Atoms') {}``, which will write the ``Atoms`` section in the +LAMMPS data file. The OPLS-AA force field uses the ``atom_style full``, +therefore, this column format is used: +``# atomID molID atomType charge coordX coordY coordZ``. +The ``atomID``\ s are replaced with Moltemplate ``$``-type variables, which +are then substituted with unique numerical IDs. The same logic is applied +to the ``molID``, except that the same variable is used for the whole +molecule. The atom types are assigned using ``@``-type variables. The +assignment of atom types (e.g. ``@atom:177``, ``@atom:178``) is done using +the OPLS-AA atom types defined in the "In Charges" section of the file +``oplsaa.lt``, looking for a reasonable match with the description of the atom. +The resulting file (``formamide.lt``) follows: + +.. code-block:: bash + + _FAM inherits OPLSAA { + + # atomID molID atomType charge coordX coordY coordZ + write('Data Atoms') { + $atom:C00 $mol @atom:177 0.00 0.100 0.490 0.0 + $atom:O01 $mol @atom:178 0.00 1.091 -0.250 0.0 + $atom:N02 $mol @atom:179 0.00 -1.121 -0.181 0.0 + $atom:H03 $mol @atom:182 0.00 -2.013 0.272 0.0 + $atom:H04 $mol @atom:182 0.00 -1.056 -1.190 0.0 + $atom:H05 $mol @atom:221 0.00 0.144 1.570 0.0 + } + + # A list of the bonds in the molecule: + # BondID AtomID1 AtomID2 + write('Data Bond List') { + $bond:C1 $atom:C00 $atom:O01 + $bond:C2 $atom:C00 $atom:H05 + $bond:C3 $atom:C00 $atom:N02 + $bond:C4 $atom:N02 $atom:H03 + $bond:C5 $atom:N02 $atom:H04 + } + } + +You don't have to specify the charge in this example because they will +be assigned according to the atom type. Analogously, only a +"Data Bond List" section is needed as the atom type will determine the +bond type. The other bonded interactions (e.g. angles, +dihedrals, and impropers) will be automatically generated by +Moltemplate. + +If the simulation is non-neutral, or Moltemplate complains that you have +missing bond, angle, or dihedral types, this means at least one of your +atom types is incorrect. + +The second step is to create a master file with instructions to build a +starting structure and the LAMMPS commands to run an NPT simulation. The +master file (``solv_01.lt``) follows: + +.. code-block:: bash + + # Import the force field. + import /usr/local/moltemplate/moltemplate/force_fields/oplsaa.lt + import formamide.lt # after oplsaa.lt, as it depends on it. + + # Create the input sample. + solv = new _FAM [5].move( 4.6, 0, 0) + [5].move( 0, 4.6, 0) + [5].move( 0, 0, 4.6) + solv[*][*][*].move(-11.5, -11.5, -11.5) + + # Set the simulation box. + write_once("Data Boundary") { + -11.5 11.5 xlo xhi + -11.5 11.5 ylo yhi + -11.5 11.5 zlo zhi + } + + # Create an input deck for LAMMPS. + write_once("In Init"){ + # Input variables. + variable run string solv_01 # output name + variable ts equal 1 # timestep + variable temp equal 300 # equilibrium temperature + variable p equal 1. # equilibrium pressure + variable d equal 1000 # output frequency + variable equi equal 5000 # Equilibration steps + variable prod equal 30000 # Production steps + + # PBC (set them before the creation of the box). + boundary p p p + } + + # Run an NPT simulation. + write_once("In Run"){ + # Derived variables. + variable tcouple equal \$\{ts\}*100 + variable pcouple equal \$\{ts\}*1000 + + # Output. + thermo \$d + thermo_style custom step etotal evdwl ecoul elong ebond eangle & + edihed eimp ke pe temp press vol density cpu + thermo_modify flush yes + + # Trajectory. + dump TRJ all dcd \$d \$\{run\}.dcd + dump_modify TRJ unwrap yes + + # Thermalisation and relaxation, NPT ensemble. + timestep \$\{ts\} + fix NPT all npt temp \$\{temp\} \$\{temp\} \$\{tcouple\} iso \$p \$p \$\{pcouple\} + velocity all create \$\{temp\} 858096 dist gaussian + # Short runs to update the PPPM settings as the box shinks. + run \$\{equi\} post no + run \$\{equi\} post no + run \$\{equi\} post no + run \$\{equi\} + # From now on, the density shouldn't change too much. + run \$\{prod\} + unfix NPT + } + +The first two commands insert the content of files ``oplsaa.lt`` and +``formamide.lt`` into the master file. At this point, we can use the +command ``solv = new _FAM [N]`` to create N copies of a molecule of type +``_FAM``. In this case, we create an array of 5*5*5 molecules on a cubic +grid using the coordinate transformation command ``.move( 4.6, 0, 0)``. +See the Moltemplate documentation to learn more about the syntax. As +the sample was created from scratch, we also specify the simulation box +size in the "Data Boundary" section. + +The LAMMPS setting for the force field are specified in the file +``oplsaa.lt`` and are written automatically in the input deck. We also +specify the boundary conditions and a set of variables in +the "In Init" section. The remaining commands to run an NPT simulation +are written in the "In Run" section. Note that in this script, LAMMPS +variables are protected with the escape character ``\`` to distinguish +them from Moltemplate variables, e.g. ``\$\{run\}`` is a LAMMPS +variable that is written in the input deck as ``${run}``. + +Compile the master file with: + +.. code-block:: bash + + moltemplate.sh -overlay-all solv_01.lt + +And execute the simulation with the following: + +.. code-block:: bash + + mpirun -np 4 lmp -in solv_01.in -l solv_01.log + +.. figure:: JPG/solv_01.png + :figwidth: 80% + :figclass: align-center + + Snapshot of the sample at the beginning and end of the simulation. + Rendered with Ovito. + +Mapping an existing structure +""""""""""""""""""""""""""""" + +Another helpful way to use Moltemplate is mapping an existing molecular +sample to a force field. This is useful when a complex sample is +assembled from different simulations or created with specialized +software (e.g. PACKMOL). As in the previous example, all molecular +species in the sample must be defined using single-molecule Moltemplate +objects. For this example, we use a short polymer in a box containing +water molecules and ions in the PDB file ``model.pdb``. + +It is essential to understand that the order of atoms in the PDB file +and in the Moltemplate master script must match, as we are using the +coordinates from the PDB file in the order they appear. The order of +atoms and molecules in the PDB file provided is as follows: + +- 500 water molecules, with atoms ordered in this sequence: + + .. parsed-literal:: + + ATOM 1 O MOL D 1 5.901 7.384 1.103 0.00 0.00 DUM + ATOM 2 H MOL D 1 6.047 8.238 0.581 0.00 0.00 DUM + ATOM 3 H MOL D 1 6.188 7.533 2.057 0.00 0.00 DUM + +- 1 polymer molecule. +- 1 Ca\ :sup:`2+` ion. +- 2 Cl\ :sup:`-` ions. + +In the master LT file, this sequence of molecules is matched with the +following commands: + +.. code-block:: bash + + # Create the sample. + wat=new SPC[500] + pol=new PolyNIPAM[1] + cat=new Ca[1] + ani=new Cl[2] + +Note that the first command would create 500 water molecules in the +same position in space, and the other commands will use the coordinates +specified in the corresponding molecular topology block. However, the +coordinates will be overwritten by rendering an external atomic +structure file. Note that if the same molecule species are scattered in +the input structure, it is recommended to reorder and group together +for molecule types to facilitate the creation of the input sample. + +The molecular topology for the polymer is created as in the previous +example, with the atom types assigned as in the following schema: + +.. figure:: JPG/PolyNIPAM.jpg + :scale: 30% + :align: center + + Atom types assigned to the polymer's repeating unit. + +The molecular topology of the water and ions is stated directly into +the master file for the sake of space, but they could also be written +in a separate file(s) and imported before the sample is created. + +The resulting master LT file defining short annealing at a fixed volume +(NVT) follows: + +.. code-block:: bash + + # Use the OPLS-AA force field for all species. + import /usr/local/moltemplate/moltemplate/force_fields/oplsaa.lt + import PolyNIPAM.lt + + # Define the SPC water and ions as in the OPLS-AA + Ca inherits OPLSAA { + write("Data Atoms"){ + $atom:a1 $mol:. @atom:354 0.0 0.00000 0.00000 0.000000 + } + } + Cl inherits OPLSAA { + write("Data Atoms"){ + $atom:a1 $mol:. @atom:344 0.0 0.00000 0.00000 0.000000 + } + } + SPC inherits OPLSAA { + write("Data Atoms"){ + $atom:O $mol:. @atom:76 0. 0.0000000 0.00000 0.000000 + $atom:H1 $mol:. @atom:77 0. 0.8164904 0.00000 0.5773590 + $atom:H2 $mol:. @atom:77 0. -0.8164904 0.00000 0.5773590 + } + write("Data Bond List") { + $bond:OH1 $atom:O $atom:H1 + $bond:OH2 $atom:O $atom:H2 + } + } + + # Create the sample. + wat=new SPC[500] + pol=new PolyNIPAM[1] + cat=new Ca[1] + ani=new Cl[2] + + # Periodic boundary conditions: + write_once("Data Boundary"){ + 0 26 xlo xhi + 0 26 ylo yhi + 0 26 zlo zhi + } + + # Define the input variables. + write_once("In Init"){ + # Input variables. + variable run string sample01 # output name + variable ts equal 2 # timestep + variable temp equal 298.15 # equilibrium temperature + variable p equal 1. # equilibrium pressure + variable equi equal 30000 # equilibration steps + + # PBC (set them before the creation of the box). + boundary p p p + neighbor 3 bin + } + + # Run an NVT simulation. + write_once("In Run"){ + # Set the output. + thermo 1000 + thermo_style custom step etotal evdwl ecoul elong ebond eangle & + edihed eimp pe ke temp press atoms vol density cpu + thermo_modify flush yes + compute pe1 all pe/atom pair + dump TRJ all custom 100 \$\{run\}.dump id xu yu zu c_pe1 + + # Minimise the input structure, just in case. + minimize .01 .001 1000 100000 + write_data \$\{run\}.min + + # Set the constrains. + group watergroup type @atom:76 @atom:77 + fix 0 watergroup shake 0.0001 10 0 b @bond:042_043 a @angle:043_042_043 + + # Short annealing. + timestep \$\{ts\} + fix 1 all nvt temp \$\{temp\} \$\{temp\} \$(100*dt) + velocity all create \$\{temp\} 315443 + run \$\{equi\} + unfix 1 + } + + +In this example, the water model is SPC and it is defined in the +``oplsaa.lt`` file with atom types ``@atom:76`` and ``@atom:77``. For +water we also use the ``group`` and ``fix shake`` commands with +Moltemplate ``@``-type variables, to ensure consistency with the +numerical values assigned during compilation. To identify the bond and +angle types, look for the extended ``@atom`` IDs, which in this case +are: + +.. code-block:: bash + + replace{ @atom:76 @atom:76_b042_a042_d042_i042 } + replace{ @atom:77 @atom:77_b043_a043_d043_i043 } + +From which we can identify the following "Data Bonds By Type": +``@bond:042_043 @atom:*_b042*_a*_d*_i* @atom:*_b043*_a*_d*_i*`` and +"Data Angles By Type": ``@angle:043_042_043 @atom:*_b*_a043*_d*_i* +@atom:*_b*_a042*_d*_i* @atom:*_b*_a043*_d*_i*`` + +Compile the master file with: + +.. code-block:: bash + + moltemplate.sh -overlay-all -pdb model.pdb sample01.lt + +And execute the simulation with the following: + +.. code-block:: bash + + mpirun -np 4 lmp -in sample01.in -l sample01.log + +.. figure:: JPG/sample01.png + :figwidth: 50% + :figclass: align-center + + Sample visualized with Ovito loading the trajectory into the DATA + file written after minimization. + +------------ + +.. _OPLSAA96: + +**(OPLS-AA)** Jorgensen, Maxwell, Tirado-Rives, J Am Chem Soc, +118(45), 11225-11236 (1996). diff --git a/doc/src/JPG/PolyNIPAM.jpg b/doc/src/JPG/PolyNIPAM.jpg new file mode 100644 index 0000000000..4ad3ce8274 Binary files /dev/null and b/doc/src/JPG/PolyNIPAM.jpg differ diff --git a/doc/src/JPG/sample01.png b/doc/src/JPG/sample01.png new file mode 100644 index 0000000000..3a00176edd Binary files /dev/null and b/doc/src/JPG/sample01.png differ diff --git a/doc/src/JPG/solv_01.png b/doc/src/JPG/solv_01.png new file mode 100644 index 0000000000..fc52d44928 Binary files /dev/null and b/doc/src/JPG/solv_01.png differ diff --git a/doc/src/Library.rst b/doc/src/Library.rst index 09561cda82..50c28b7fcd 100644 --- a/doc/src/Library.rst +++ b/doc/src/Library.rst @@ -80,13 +80,15 @@ run LAMMPS in serial mode. :class: note If the LAMMPS executable encounters an error condition, it will abort - after printing an error message. For a library interface this is - usually not desirable. Thus LAMMPS can be compiled to to :ref:`throw - a C++ exception ` instead. If enabled, the library - functions will catch those exceptions and return. The error status - :cpp:func:`can be queried ` and an :cpp:func:`error - message retrieved `. We thus - recommend enabling C++ exceptions when using the library interface, + after printing an error message. It does so by catching the + exceptions that LAMMPS could throw. For a C library interface this + is usually not desirable since the calling code might lack the + ability to catch such exceptions. Thus, the library functions will + catch those exceptions and return from the affected functions. The + error status :cpp:func:`can be queried ` and an + :cpp:func:`error message retrieved `. + This is, for example used by the :doc:`LAMMPS python module + ` and then a suitable Python exception is thrown. .. admonition:: Using the C library interface as a plugin :class: note diff --git a/doc/src/Python_error.rst b/doc/src/Python_error.rst index f6a94c0e82..6aec8df391 100644 --- a/doc/src/Python_error.rst +++ b/doc/src/Python_error.rst @@ -15,9 +15,7 @@ Python exception handling mechanism. try: # LAMMPS will normally terminate itself and the running process if an error - # occurs. This would kill the Python interpreter. To avoid this, make sure to - # compile with LAMMPS_EXCEPTIONS enabled. This ensures the library API calls - # will not terminate the parent process. Instead, the library wrapper will + # occurs. This would kill the Python interpreter. The library wrapper will # detect that an error has occured and throw a Python exception lmp.command('unknown') diff --git a/doc/src/Python_install.rst b/doc/src/Python_install.rst index c4fbec0be4..01610b84f0 100644 --- a/doc/src/Python_install.rst +++ b/doc/src/Python_install.rst @@ -5,8 +5,7 @@ The LAMMPS Python module enables calling the :ref:`LAMMPS C library API ` from Python by dynamically loading functions in the LAMMPS shared library through the Python `ctypes `_ module. Because of the dynamic loading, it is required that LAMMPS is -compiled in :ref:`"shared" mode `. It is also recommended to -compile LAMMPS with :ref:`C++ exceptions ` enabled. +compiled in :ref:`"shared" mode `. Two components are necessary for Python to be able to invoke LAMMPS code: diff --git a/doc/src/compute_composition_atom.rst b/doc/src/compute_composition_atom.rst index b7890fff8b..e973eaa234 100644 --- a/doc/src/compute_composition_atom.rst +++ b/doc/src/compute_composition_atom.rst @@ -36,7 +36,7 @@ Examples Description """"""""""" -.. versionadded:: TBD +.. versionadded:: 21Nov2023 Define a computation that calculates a local composition vector for each atom. For a central atom with :math:`M` neighbors within the neighbor cutoff sphere, diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst index 5dbf600c36..b03d6eb74e 100644 --- a/doc/src/compute_property_atom.rst +++ b/doc/src/compute_property_atom.rst @@ -128,9 +128,9 @@ Attributes *i_name*, *d_name*, *i2_name*, *d2_name* refer to custom per-atom integer and floating-point vectors or arrays that have been added via the :doc:`fix property/atom ` command. When that command is used specific names are given to each attribute -which are the "name" portion of these attributes. For arrays *i2_name* -and *d2_name*, the column of the array must also be included following -the name in brackets (e.g., d2_xyz[2] or i2_mySpin[3]). +which are the "name" portion of these attributes. For arrays +*i2_name* and *d2_name*, the column of the array must also be included +following the name in brackets (e.g., d2_xyz[2] or i2_mySpin[3]). The additional quantities only accessible via this command, and not directly via the :doc:`dump custom ` command, are as follows. diff --git a/doc/src/compute_property_grid.rst b/doc/src/compute_property_grid.rst index 20a4f19605..dfdce220c6 100644 --- a/doc/src/compute_property_grid.rst +++ b/doc/src/compute_property_grid.rst @@ -61,7 +61,7 @@ varying fastest, then Y, then Z slowest. For 2d grids (in 2d simulations), the grid IDs range from 1 to Nx*Ny, with X varying fastest and Y slowest. -.. versionadded:: TBD +.. versionadded:: 21Nov2023 The *proc* attribute is the ID of the processor which owns the grid cell. Processor IDs range from 0 to Nprocs - 1, where Nprocs is the diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index 6820d2ee04..604b1c1571 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -201,7 +201,7 @@ information in this context, the *replace* keywords will extract the atom IDs for the two atoms in the bond of maximum stretch. These atom IDs and the bond stretch will be printed with thermodynamic output. -.. versionadded:: TBD +.. versionadded:: 21Nov2023 The *inputs* keyword allows selection of whether all the inputs are per-atom or local quantities. As noted above, all the inputs must be diff --git a/doc/src/compute_voronoi_atom.rst b/doc/src/compute_voronoi_atom.rst index 9607401ccd..3bada09518 100644 --- a/doc/src/compute_voronoi_atom.rst +++ b/doc/src/compute_voronoi_atom.rst @@ -190,7 +190,7 @@ Voro++ software in the src/VORONOI/README file. Output info """"""""""" -.. deprecated:: TBD +.. deprecated:: 21Nov2023 The *peratom* keyword was removed as it is no longer required. diff --git a/doc/src/compute_xrd.rst b/doc/src/compute_xrd.rst index 8673ce9199..18bb4c886f 100644 --- a/doc/src/compute_xrd.rst +++ b/doc/src/compute_xrd.rst @@ -62,28 +62,29 @@ equations: \frac{\sin(\theta)}{\lambda} &= \frac{\left\lVert\mathbf{k}\right\rVert}{2} Here, :math:`\mathbf{k}` is the location of the reciprocal lattice node, -:math:`r_j` is the position of each atom, :math:`f_j` are atomic scattering -factors, *Lp* is the Lorentz-polarization factor, and :math:`\theta` is the -scattering angle of diffraction. The Lorentz-polarization factor can be turned -off using the optional *LP* keyword. +:math:`r_j` is the position of each atom, :math:`f_j` are atomic +scattering factors, *Lp* is the Lorentz-polarization factor, and +:math:`\theta` is the scattering angle of diffraction. The +Lorentz-polarization factor can be turned off using the optional *LP* +keyword. Diffraction intensities are calculated on a three-dimensional mesh of -reciprocal lattice nodes. The mesh spacing is defined either (a) by the entire -simulation domain or (b) manually using selected values as -shown in the 2D diagram below. +reciprocal lattice nodes. The mesh spacing is defined either (a) by the +entire simulation domain or (b) manually using selected values as shown +in the 2D diagram below. -.. image:: img/xrd_mesh.jpg +.. image:: img/xrd_mesh.png :scale: 75% :align: center For a mesh defined by the simulation domain, a rectilinear grid is constructed with spacing :math:`c A^{-1}` along each reciprocal lattice -axis, where :math:`A` is a matrix containing the vectors corresponding to the -edges of the simulation cell. If one or two directions has non-periodic -boundary conditions, then the spacing in these directions is defined from the -average of the (inversed) box lengths with periodic boundary conditions. -Meshes defined by the simulation domain must contain at least one periodic -boundary. +axis, where :math:`A` is a matrix containing the vectors corresponding +to the edges of the simulation cell. If one or two directions has +non-periodic boundary conditions, then the spacing in these directions +is defined from the average of the (inversed) box lengths with periodic +boundary conditions. Meshes defined by the simulation domain must +contain at least one periodic boundary. If the *manual* flag is included, the mesh of reciprocal lattice nodes will be defined using the *c* values for the spacing along each diff --git a/doc/src/dump.rst b/doc/src/dump.rst index e5885dc25d..2d1598e493 100644 --- a/doc/src/dump.rst +++ b/doc/src/dump.rst @@ -613,7 +613,7 @@ when running on large numbers of processors. Note that using the "\*" and "%" characters together can produce a large number of small dump files! -.. deprecated:: TBD +.. deprecated:: 21Nov2023 The MPIIO package and the the corresponding "/mpiio" dump styles, except for the unrelated "netcdf/mpiio" style were removed from LAMMPS. @@ -805,16 +805,16 @@ computes, fixes, or variables when they are evaluated, so this is a very general means of creating quantities to output to a dump file. The *i_name*, *d_name*, *i2_name*, *d2_name* attributes refer to -per-atom integer and floating-point vectors or arrays that have been -added via the :doc:`fix property/atom ` command. -When that command is used specific names are given to each attribute -which are the "name" portion of these keywords. For arrays *i2_name* -and *d2_name*, the column of the array must also be included following -the name in brackets (e.g., d2_xyz[i], i2_mySpin[i], where :math:`i` is -in the range from 1 to :math:`M`, where :math:`M` is the number of -columns in the custom array). See the discussion above for how :math:`i` -can be specified with a wildcard asterisk to effectively specify -multiple values. +custom per-atom integer and floating-point vectors or arrays that have +been added via the :doc:`fix property/atom ` +command. When that command is used specific names are given to each +attribute which are the "name" portion of these keywords. For arrays +*i2_name* and *d2_name*, the column of the array must also be included +following the name in brackets (e.g., d2_xyz[i], i2_mySpin[i], where +:math:`i` is in the range from 1 to :math:`M`, where :math:`M` is the +number of columns in the custom array). See the discussion above for +how :math:`i` can be specified with a wildcard asterisk to effectively +specify multiple values. See the :doc:`Modify ` page for information on how to add new compute and fix styles to LAMMPS to calculate per-atom quantities diff --git a/doc/src/dump_image.rst b/doc/src/dump_image.rst index 4e227d2f72..3102caaa97 100644 --- a/doc/src/dump_image.rst +++ b/doc/src/dump_image.rst @@ -599,7 +599,7 @@ image will appear. The *sfactor* value must be a value 0.0 <= *sfactor* <= 1.0, where *sfactor* = 1 is a highly reflective surface and *sfactor* = 0 is a rough non-shiny surface. -.. versionadded:: TBD +.. versionadded:: 21Nov2023 The *fsaa* keyword can be used with the dump image command to improve the image quality by enabling full scene anti-aliasing. Internally the diff --git a/doc/src/fix_adapt.rst b/doc/src/fix_adapt.rst index 86eec3eadb..9cfbef7a11 100644 --- a/doc/src/fix_adapt.rst +++ b/doc/src/fix_adapt.rst @@ -205,6 +205,8 @@ formulas for the meaning of these parameters: +------------------------------------------------------------------------------+--------------------------------------------------+-------------+ | :doc:`pace, pace/extrapolation ` | scale | type pairs | +------------------------------------------------------------------------------+--------------------------------------------------+-------------+ +| :doc:`quip ` | scale | type global | ++------------------------------------------------------------------------------+--------------------------------------------------+-------------+ | :doc:`snap ` | scale | type pairs | +------------------------------------------------------------------------------+--------------------------------------------------+-------------+ | :doc:`spin/dmi ` | coulombic_cutoff | type global | @@ -315,21 +317,21 @@ Currently *bond* does not support bond_style hybrid nor bond_style hybrid/overlay as bond styles. The bond styles that currently work with fix_adapt are -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`class2 ` | r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`fene ` | k,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`fene/nm ` | k,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`gromos ` | k,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`harmonic ` | k,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`morse ` | r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ | :doc:`nonlinear ` | epsilon,r0 | type bonds | -+------------------------------------+-------+-----------------+ ++------------------------------------+------------+------------+ ---------- @@ -353,11 +355,11 @@ Currently *angle* does not support angle_style hybrid nor angle_style hybrid/overlay as angle styles. The angle styles that currently work with fix_adapt are -+------------------------------------+-------+-----------------+ -| :doc:`harmonic ` | k,theta0 | type angles | -+------------------------------------+-------+-----------------+ -| :doc:`cosine ` | k | type angles | -+------------------------------------+-------+-----------------+ ++------------------------------------+----------+-------------+ +| :doc:`harmonic ` | k,theta0 | type angles | ++------------------------------------+----------+-------------+ +| :doc:`cosine ` | k | type angles | ++------------------------------------+----------+-------------+ Note that internally, theta0 is stored in radians, so the variable this fix uses to reset theta0 needs to generate values in radians. @@ -482,7 +484,7 @@ Restrictions Related commands """""""""""""""" -:doc:`compute ti ` +:doc:`compute ti `, :doc:`fix adapt/fep ` Default """"""" diff --git a/doc/src/fix_adapt_fep.rst b/doc/src/fix_adapt_fep.rst index c35986de49..1b2298cd96 100644 --- a/doc/src/fix_adapt_fep.rst +++ b/doc/src/fix_adapt_fep.rst @@ -307,7 +307,9 @@ the :doc:`run ` command. This fix is not invoked during Restrictions """""""""""" - none + +The keyword "scale yes" is not supported for scaling per-atom parameters +diameter and change. You can use :doc:`fix adapt ` for those. Related commands """""""""""""""" diff --git a/doc/src/fix_atom_swap.rst b/doc/src/fix_atom_swap.rst index ffd14ebb0f..aa8127561c 100644 --- a/doc/src/fix_atom_swap.rst +++ b/doc/src/fix_atom_swap.rst @@ -181,6 +181,12 @@ This fix is part of the MC package. It is only enabled if LAMMPS was built with that package. See the :doc:`Build package ` doc page for more info. +This fix cannot be used with systems that do not have per-type masses +(e.g. atom style sphere) since the implemented algorithm pre-computes +velocity rescaling factors from per-type masses and ignores any per-atom +masses, if present. In case both, per-type and per-atom masses are +present, a warning is printed. + Related commands """""""""""""""" diff --git a/doc/src/fix_deposit.rst b/doc/src/fix_deposit.rst index d7c78c5d15..5264999839 100644 --- a/doc/src/fix_deposit.rst +++ b/doc/src/fix_deposit.rst @@ -220,6 +220,8 @@ rotated configuration of the molecule. existing particle. LAMMPS will issue a warning if R is smaller than this value, based on the radii of existing and inserted particles. +.. versionadded:: 21Nov2023 + The *var* and *set* keywords can be used together to provide a criterion for accepting or rejecting the addition of an individual atom, based on its coordinates. The *name* specified for the *var* keyword is the name of an @@ -236,7 +238,7 @@ created atom, one for *y*, and one for *z*. When an atom is created, its is defined. The *var* variable is then evaluated. If the returned value is 0.0, the atom is not created. If it is non-zero, the atom is created. For an example of how to use these keywords, see the -:doc:`create_atoms `command. +:doc:`create_atoms ` command. The *rate* option moves the insertion volume in the z direction (3d) or y direction (2d). This enables particles to be inserted from a diff --git a/doc/src/fix_pimd.rst b/doc/src/fix_pimd.rst index 5b51b97c52..a2e137da25 100644 --- a/doc/src/fix_pimd.rst +++ b/doc/src/fix_pimd.rst @@ -31,7 +31,7 @@ Syntax .. parsed-literal:: *keywords* = *method* or *integrator* or *ensemble* or *fmmode* or *fmass* or *scale* or *temp* or *thermostat* or *tau* or *iso* or *aniso* or *barostat* or *taup* or *fixcom* or *lj* - *method* value = *nmpimd* + *method* value = *nmpimd* (default) or *pimd* *integrator* value = *obabo* or *baoab* *fmmode* value = *physical* or *normal* *fmass* value = scaling factor on mass @@ -137,8 +137,6 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics the real particle. .. note:: - Fix pimd/langevin only supports *method* value *nmpimd*. This should be enough - for most PIMD applications for quantum thermodynamics purpose. Motion of the centroid can be effectively uncoupled from the other normal modes by scaling the fictitious masses to achieve a partial @@ -151,6 +149,16 @@ normal-mode PIMD. A value of *cmd* is for centroid molecular dynamics only the k > 0 modes are thermostatted, not the centroid degrees of freedom. +.. versionadded:: 21Nov2023 + + Mode *pimd* added to fix pimd/langevin. + +Fix pimd/langevin supports the *method* values *nmpimd* and *pimd*. The default value is *nmpimd*. +If *method* is *nmpimd*, the normal mode representation is used to integrate the equations of motion. +The exact solution of harmonic oscillator is used to propagate the free ring polymer part of the Hamiltonian. +If *method* is *pimd*, the Cartesian representation is used to integrate the equations of motion. +The harmonic force is added to the total force of the system, and the numerical integrator is used to propagate the Hamiltonian. + The keyword *integrator* specifies the Trotter splitting method used by *fix pimd/langevin*. See :ref:`(Liu) ` for a discussion on the OBABO and BAOAB splitting schemes. Typically either of the two should work fine. @@ -207,6 +215,7 @@ The keyword *thermostat* reads *style* and *seed* of thermostat for fix style *p be *PILE_L* (path integral Langevin equation local thermostat, as described in :ref:`Ceriotti `), and *seed* should a positive integer number, which serves as the seed of the pseudo random number generator. .. note:: + The fix style *pimd/langevin* uses the stochastic PILE_L thermostat to control temperature. This thermostat works on the normal modes of the ring polymer. The *tau* parameter controls the centroid mode, and the *scale* parameter controls the non-centroid modes. @@ -269,6 +278,7 @@ related tasks for each of the partitions, e.g. read_restart system_${ibead}.restart2 .. note:: + Fix *pimd/langevin* dumps the Cartesian coordinates, but dumps the velocities and forces in the normal mode representation. If the Cartesian velocities and forces are needed, it is easy to perform the transformation when doing post-processing. diff --git a/doc/src/img/xrd_mesh.jpg b/doc/src/img/xrd_mesh.jpg deleted file mode 100644 index 677234caab..0000000000 Binary files a/doc/src/img/xrd_mesh.jpg and /dev/null differ diff --git a/doc/src/img/xrd_mesh.png b/doc/src/img/xrd_mesh.png new file mode 100644 index 0000000000..e58c7322a5 Binary files /dev/null and b/doc/src/img/xrd_mesh.png differ diff --git a/doc/src/pair_beck.rst b/doc/src/pair_beck.rst index 2bf027515c..6bb4afdc80 100644 --- a/doc/src/pair_beck.rst +++ b/doc/src/pair_beck.rst @@ -30,11 +30,11 @@ Description Style *beck* computes interactions based on the potential by :ref:`(Beck) `, originally designed for simulation of Helium. It -includes truncation at a cutoff distance Rc. +includes truncation at a cutoff distance :math:`r_c`. .. math:: - E(r) &= A \exp\left[-\alpha r - \beta r^6\right] - \frac{B}{\left(r^2+a^2\right)^3} \left(1+\frac{2.709+3a^2}{r^2+a^2}\right) \qquad r < R_c \\ + E(r) &= A \exp\left[-\alpha r - \beta r^6\right] - \frac{B}{\left(r^2+a^2\right)^3} \left(1+\frac{2.709+3a^2}{r^2+a^2}\right) \qquad r < r_c \\ The following coefficients must be defined for each pair of atoms types via the :doc:`pair_coeff ` command as in the examples @@ -50,7 +50,7 @@ commands. * cutoff (distance units) The last coefficient is optional. If not specified, the global cutoff -:math:`R_c` is used. +:math:`r_c` is used. ---------- diff --git a/doc/src/pair_body_rounded_polyhedron.rst b/doc/src/pair_body_rounded_polyhedron.rst index f2f7c1676a..b3eaf72321 100644 --- a/doc/src/pair_body_rounded_polyhedron.rst +++ b/doc/src/pair_body_rounded_polyhedron.rst @@ -40,7 +40,7 @@ rounded/polyhedron particles. This pairwise interaction between the rounded polyhedra is described in :ref:`Wang `, where a polyhedron does not have sharp corners and edges, but is rounded at its vertices and edges by spheres -centered on each vertex with a specified diameter. The edges if the +centered on each vertex with a specified diameter. The edges of the polyhedron are defined between pairs of adjacent vertices. Its faces are defined by a loop of edges. The sphere diameter for each polygon is specified in the data file read by the :doc:`read data ` diff --git a/doc/src/pair_lj_cut_tip4p.rst b/doc/src/pair_lj_cut_tip4p.rst index 7198b60159..d95f356ab7 100644 --- a/doc/src/pair_lj_cut_tip4p.rst +++ b/doc/src/pair_lj_cut_tip4p.rst @@ -58,6 +58,40 @@ Examples Description """"""""""" +The *lj/cut/tip4p* styles implement the TIP4P water model of +:ref:`(Jorgensen) ` and similar models, which introduce a +massless site M located a short distance away from the oxygen atom along +the bisector of the HOH angle. The atomic types of the oxygen and +hydrogen atoms, the bond and angle types for OH and HOH interactions, +and the distance to the massless charge site are specified as pair_style +arguments and are used to identify the TIP4P-like molecules and +determine the position of the M site from the positions of the hydrogen +and oxygen atoms of the water molecules. The M site location is used +for all Coulomb interactions instead of the oxygen atom location, also +with all other atom types, while the location of the oxygen atom is used +for the Lennard-Jones interactions. Style *lj/cut/tip4p/cut* uses a +cutoff for Coulomb interactions; style *lj/cut/tip4p/long* is for use +with a long-range Coulombic solver (Ewald or PPPM). + +.. note:: + + For each TIP4P water molecule in your system, the atom IDs for + the O and 2 H atoms must be consecutive, with the O atom first. This + is to enable LAMMPS to "find" the 2 H atoms associated with each O + atom. For example, if the atom ID of an O atom in a TIP4P water + molecule is 500, then its 2 H atoms must have IDs 501 and 502. + +See the :doc:`Howto tip4p ` page for more information +on how to use the TIP4P pair styles and lists of parameters to set. +Note that the neighbor list cutoff for Coulomb interactions is +effectively extended by a distance 2\*qdist when using the TIP4P pair +style, to account for the offset distance of the fictitious charges on +O atoms in water molecules. Thus it is typically best in an +efficiency sense to use a LJ cutoff >= Coulombic cutoff + 2\*qdist, to +shrink the size of the neighbor list. This leads to slightly larger +cost for the long-range calculation, so you can test the trade-off for +your model. + The *lj/cut/tip4p* styles compute the standard 12/6 Lennard-Jones potential, given by @@ -91,34 +125,6 @@ specified for this style means that pairwise interactions within this distance are computed directly; interactions outside that distance are computed in reciprocal space. -The *lj/cut/tip4p* styles implement the TIP4P -water model of :ref:`(Jorgensen) `, which introduces a massless -site located a short distance away from the oxygen atom along the -bisector of the HOH angle. The atomic types of the oxygen and -hydrogen atoms, the bond and angle types for OH and HOH interactions, -and the distance to the massless charge site are specified as -pair_style arguments. Style *lj/cut/tip4p/cut* uses a cutoff for -Coulomb interactions; style *lj/cut/tip4p/long* is for use with a -long-range Coulombic solver (Ewald or PPPM). - -.. note:: - - For each TIP4P water molecule in your system, the atom IDs for - the O and 2 H atoms must be consecutive, with the O atom first. This - is to enable LAMMPS to "find" the 2 H atoms associated with each O - atom. For example, if the atom ID of an O atom in a TIP4P water - molecule is 500, then its 2 H atoms must have IDs 501 and 502. - -See the :doc:`Howto tip4p ` page for more information -on how to use the TIP4P pair styles and lists of parameters to set. -Note that the neighbor list cutoff for Coulomb interactions is -effectively extended by a distance 2\*qdist when using the TIP4P pair -style, to account for the offset distance of the fictitious charges on -O atoms in water molecules. Thus it is typically best in an -efficiency sense to use a LJ cutoff >= Coulombic cutoff + 2\*qdist, to -shrink the size of the neighbor list. This leads to slightly larger -cost for the long-range calculation, so you can test the trade-off for -your model. Coefficients """""""""""" diff --git a/doc/src/pair_lj_smooth_linear.rst b/doc/src/pair_lj_smooth_linear.rst index 7a3ba7a3d5..20b5e6cbda 100644 --- a/doc/src/pair_lj_smooth_linear.rst +++ b/doc/src/pair_lj_smooth_linear.rst @@ -31,13 +31,13 @@ Style *lj/smooth/linear* computes a truncated and force-shifted LJ interaction (aka Shifted Force Lennard-Jones) that combines the standard 12/6 Lennard-Jones function and subtracts a linear term based on the cutoff distance, so that both, the potential and the force, go -continuously to zero at the cutoff Rc :ref:`(Toxvaerd) `: +continuously to zero at the cutoff :math:`r_c` :ref:`(Toxvaerd) `: .. math:: \phi\left(r\right) & = 4 \epsilon \left[ \left(\frac{\sigma}{r}\right)^{12} - \left(\frac{\sigma}{r}\right)^6 \right] \\ - E\left(r\right) & = \phi\left(r\right) - \phi\left(R_c\right) - \left(r - R_c\right) \left.\frac{d\phi}{d r} \right|_{r=R_c} \qquad r < R_c + E\left(r\right) & = \phi\left(r\right) - \phi\left(r_c\right) - \left(r - r_c\right) \left.\frac{d\phi}{d r} \right|_{r=r_c} \qquad r < r_c The following coefficients must be defined for each pair of atoms types via the :doc:`pair_coeff ` command as in the examples @@ -77,8 +77,9 @@ tail option for adding long-range tail corrections to energy and pressure, since the energy of the pair interaction is smoothed to 0.0 at the cutoff. -This pair style writes its information to :doc:`binary restart files `, so pair_style and pair_coeff commands do not need -to be specified in an input script that reads a restart file. +This pair style writes its information to :doc:`binary restart files `, +so pair_style and pair_coeff commands do not need to be specified +in an input script that reads a restart file. This pair style can only be used via the *pair* keyword of the :doc:`run_style respa ` command. It does not support the diff --git a/doc/src/pair_mie.rst b/doc/src/pair_mie.rst index 089f8d3d29..6e9eec1f5c 100644 --- a/doc/src/pair_mie.rst +++ b/doc/src/pair_mie.rst @@ -35,7 +35,7 @@ The *mie/cut* style computes the Mie potential, given by E = C \epsilon \left[ \left(\frac{\sigma}{r}\right)^{\gamma_{rep}} - \left(\frac{\sigma}{r}\right)^{\gamma_{att}} \right] \qquad r < r_c -Rc is the cutoff and C is a function that depends on the repulsive and +:math:`r_c` is the cutoff and C is a function that depends on the repulsive and attractive exponents, given by: .. math:: diff --git a/doc/src/pair_morse.rst b/doc/src/pair_morse.rst index 807882980d..4b93d182bb 100644 --- a/doc/src/pair_morse.rst +++ b/doc/src/pair_morse.rst @@ -53,7 +53,7 @@ Style *morse* computes pairwise interactions with the formula E = D_0 \left[ e^{- 2 \alpha (r - r_0)} - 2 e^{- \alpha (r - r_0)} \right] \qquad r < r_c -Rc is the cutoff. +:math:`r_c` is the cutoff. The following coefficients must be defined for each pair of atoms types via the :doc:`pair_coeff ` command as in the examples @@ -78,7 +78,7 @@ so that both, potential energy and force, go to zero at the cut-off: .. math:: \phi\left(r\right) & = D_0 \left[ e^{- 2 \alpha (r - r_0)} - 2 e^{- \alpha (r - r_0)} \right] \qquad r < r_c \\ - E\left(r\right) & = \phi\left(r\right) - \phi\left(R_c\right) - \left(r - R_c\right) \left.\frac{d\phi}{d r} \right|_{r=R_c} \qquad r < R_c + E\left(r\right) & = \phi\left(r\right) - \phi\left(r_c\right) - \left(r - r_c\right) \left.\frac{d\phi}{d r} \right|_{r=r_c} \qquad r < r_c The syntax of the pair_style and pair_coeff commands are the same for the *morse* and *morse/smooth/linear* styles. diff --git a/doc/src/pair_soft.rst b/doc/src/pair_soft.rst index 1702811ed9..e21ae28432 100644 --- a/doc/src/pair_soft.rst +++ b/doc/src/pair_soft.rst @@ -44,8 +44,9 @@ It is useful for pushing apart overlapping atoms, since it does not blow up as r goes to 0. A is a prefactor that can be made to vary in time from the start to the end of the run (see discussion below), e.g. to start with a very soft potential and slowly harden the -interactions over time. Rc is the cutoff. See the :doc:`fix nve/limit ` command for another way to push apart -overlapping atoms. +interactions over time. :math:`r_c` is the cutoff. +See the :doc:`fix nve/limit ` command for another way +to push apart overlapping atoms. The following coefficients must be defined for each pair of atom types via the :doc:`pair_coeff ` command as in the examples above, diff --git a/doc/src/pair_spica.rst b/doc/src/pair_spica.rst index 74a069d8a2..859506593f 100644 --- a/doc/src/pair_spica.rst +++ b/doc/src/pair_spica.rst @@ -81,7 +81,7 @@ given by as required for the SPICA (formerly called SDK) and the pSPICA Coarse-grained MD parameterization discussed in :ref:`(Shinoda) `, :ref:`(DeVane) `, :ref:`(Seo) `, and :ref:`(Miyazaki) `. -Rc is the cutoff. +:math:`r_c` is the cutoff. Summary information on these force fields can be found at https://www.spica-ff.org Style *lj/spica/coul/long* computes the adds Coulombic interactions diff --git a/doc/src/variable.rst b/doc/src/variable.rst index 92a78ee3c1..a70ac25836 100644 --- a/doc/src/variable.rst +++ b/doc/src/variable.rst @@ -53,7 +53,7 @@ Syntax x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x \|\| y, x \|\^ y, !x math functions = sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), - random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x) + random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ternary(x,y,z), ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z), logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) @@ -71,6 +71,7 @@ Syntax feature functions = is_available(category,feature), is_active(category,feature), is_defined(category,id) atom value = id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i] atom vector = id, mass, type, mol, radius, q, x, y, z, vx, vy, vz, fx, fy, fz + custom atom property = i_name, d_name, i_name[i], d_name[i], i2_name[i], d2_name[i], i2_name[i][j], d_name[i][j] compute references = c_ID, c_ID[i], c_ID[i][j], C_ID, C_ID[i] fix references = f_ID, f_ID[i], f_ID[i][j], F_ID, F_ID[i] variable references = v_name, v_name[i] @@ -514,38 +515,40 @@ is a valid (though strange) variable formula: Specifically, a formula can contain numbers, constants, thermo keywords, math operators, math functions, group functions, region functions, special functions, feature functions, atom values, atom -vectors, compute references, fix references, and references to other +vectors, custom atom properties, compute references, fix references, and references to other variables. -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Number | 0.2, 100, 1.0e20, -15.4, etc | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Constant | PI, version, on, off, true, false, yes, no | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Thermo keywords | vol, pe, ebond, etc | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Math operators | (), -x, x+y, x-y, x\*y, x/y, x\^y, x%y, x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x \|\| y, x \|\^ y, !x | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Math functions | sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z), logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Group functions | count(ID), mass(ID), charge(ID), xcm(ID,dim), vcm(ID,dim), fcm(ID,dim), bound(ID,dir), gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), inertia(ID,dimdim), omega(ID,dim) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Region functions | count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Special functions | sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label), is_typelabel(kind,label) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Feature functions | is_available(category,feature), is_active(category,feature), is_defined(category,id) | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Atom values | id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i] | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Atom vectors | id, mass, type, mol, x, y, z, vx, vy, vz, fx, fy, fz, q | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Compute references | c_ID, c_ID[i], c_ID[i][j], C_ID, C_ID[i] | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Fix references | f_ID, f_ID[i], f_ID[i][j], F_ID, F_ID[i] | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Other variables | v_name, v_name[i] | -+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Number | 0.2, 100, 1.0e20, -15.4, etc | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Constant | PI, version, on, off, true, false, yes, no | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Thermo keywords | vol, pe, ebond, etc | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Math operators | (), -x, x+y, x-y, x\*y, x/y, x\^y, x%y, x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x \|\| y, x \|\^ y, !x | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Math functions | sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ternary(x,y,z), ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z), logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Group functions | count(ID), mass(ID), charge(ID), xcm(ID,dim), vcm(ID,dim), fcm(ID,dim), bound(ID,dir), gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), inertia(ID,dimdim), omega(ID,dim) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Region functions | count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Special functions | sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label), is_typelabel(kind,label) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Feature functions | is_available(category,feature), is_active(category,feature), is_defined(category,id) | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Atom values | id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Atom vectors | id, mass, type, mol, x, y, z, vx, vy, vz, fx, fy, fz, q | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Custom atom properties | i_name, d_name, i_name[i], d_name[i], i2_name[i], d2_name[i], i2_name[i][j], d_name[i][j] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Compute references | c_ID, c_ID[i], c_ID[i][j], C_ID, C_ID[i] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Fix references | f_ID, f_ID[i], f_ID[i][j], F_ID, F_ID[i] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Other variables | v_name, v_name[i] | ++------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ Most of the formula elements produce a scalar value. Some produce a global or per-atom vector of values. Global vectors can be produced @@ -703,6 +706,13 @@ library. Ceil() is the smallest integer not less than its argument. Floor() if the largest integer not greater than its argument. Round() is the nearest integer to its argument. +.. versionadded:: TBD + +The ternary(x,y,z) function is the equivalent of the ternary operator +(? and :) in C or C++. It takes 3 arguments. The first argument is a +conditional. The result of the function is y if x evaluates to true +(non-zero). The result is z if x evaluates to false (zero). + The ramp(x,y) function uses the current timestep to generate a value linearly interpolated between the specified x,y values over the course of a run, according to this formula: @@ -1034,10 +1044,9 @@ to built-in commands. For all of these styles except *command*, appending of active suffixes is also tried before reporting failure. The *feature* category checks the availability of the following -compile-time enabled features: GZIP support, PNG support, JPEG -support, FFMPEG support, and C++ exceptions for error -handling. Corresponding names are *gzip*, *png*, *jpeg*, *ffmpeg* and -*exceptions*\ . +compile-time enabled features: GZIP support, PNG support, JPEG support, +FFMPEG support, and C++ exceptions for error handling. Corresponding +names are *gzip*, *png*, *jpeg*, *ffmpeg* and *exceptions*\ . Example: Only dump in a given format if the compiled binary supports it. @@ -1139,8 +1148,45 @@ defines molecule IDs. Note that many other atom attributes can be used as inputs to a variable by using the :doc:`compute property/atom -` command and then specifying a quantity from -that compute. +` command and then referencing that compute. + +---------- + +Custom atom properties +---------------------- + +.. versionadded:: TBD + +Custom atom properties refer to per-atom integer and floating point +vectors or arrays that have been added via the :doc:`fix property/atom +` command. When that command is used specific +names are given to each attribute which are the "name" portion of +these references. References beginning with *i* and *d* refer to +integer and floating point properties respectively. Per-atom vectors +are referenced by *i_name* and *d_name*; per-atom arrays are +referenced by *i2_name* and *d2_name*. + +The various allowed references to integer custom atom properties in +the variable formulas for equal-, vector-, and atom-style variables +are listed in the following table. References to floating point +custom atom properties are the same; just replace the leading "i" with +"d". + ++--------+---------------+------------------------------------------+ +| equal | i_name[I] | element of per-atom vector (I = atom ID) | +| equal | i2_name[I][J] | element of per-atom array (I = atom ID) | ++--------+---------------+------------------------------------------+ +| vector | i_name[I] | element of per-atom vector (I = atom ID) | +| vector | i2_name[I][J] | element of per-atom array (I = atom ID) | ++--------+---------------+------------------------------------------+ +| atom | i_name | per-atom vector | +| atom | i2_name[I] | column of per-atom array | ++--------+---------------+------------------------------------------+ + +The I and J indices in these custom atom property references can be +integers or can be a variable name, specified as v_name, where name is +the name of the variable. The rules for this syntax are the same as +for indices in the "Atom Values and Vectors" discussion above. ---------- diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 75589e3115..10ef06f89f 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -1173,6 +1173,7 @@ Foiles fopenmp forceclear forestgreen +formamide formatarg formulae Forschungszentrum @@ -1969,6 +1970,7 @@ lps lpsapi lrt lsfftw +lt ltbbmalloc Lua lubricateU @@ -3373,6 +3375,7 @@ Sodani Soderlind Solaris Solida +solv solvated solvation someuser @@ -3619,6 +3622,7 @@ timestepping timesteps TiN TiO +Tirado Tirrell Titer Tji diff --git a/examples/PACKAGES/phonon/2-1D-diatomic/in.Ana b/examples/PACKAGES/phonon/2-1D-diatomic/in.Ana index 6312466e58..ca49cd50c6 100644 --- a/examples/PACKAGES/phonon/2-1D-diatomic/in.Ana +++ b/examples/PACKAGES/phonon/2-1D-diatomic/in.Ana @@ -2,15 +2,15 @@ dimension 2 boundary p f p -units lj -atom_style bond +units lj +atom_style bond atom_modify sort 0 1. bond_style harmonic pair_style none -communicate single cutoff 2.0 +comm_modify cutoff 2.0 # geometry -read_data data.pos +read_data data.pos # neighbor 1.0 nsq @@ -43,4 +43,4 @@ thermo_modify temp MyTemp thermo 100 # -run 2000000 +run 2000000 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/in.langevin.metal b/examples/PACKAGES/pimd/langevin_metal_units/in.langevin.metal new file mode 100644 index 0000000000..071b4ad722 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/in.langevin.metal @@ -0,0 +1,28 @@ +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} + +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/in.lmp b/examples/PACKAGES/pimd/langevin_metal_units/in.pimd-langevin.metal similarity index 80% rename from examples/PACKAGES/pimd/langevin_metal_units/in.lmp rename to examples/PACKAGES/pimd/langevin_metal_units/in.pimd-langevin.metal index 124063df99..4d1416063c 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/in.lmp +++ b/examples/PACKAGES/pimd/langevin_metal_units/in.pimd-langevin.metal @@ -16,7 +16,7 @@ timestep 0.001 velocity all create 0.0 ${ibead} -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no thermo_style custom step temp f_1[*] vol press thermo 100 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++ similarity index 64% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++ rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++ index fa22106766..a05d2d9aa8 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++ +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++ @@ -1,2 +1,2 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Running on 4 partitions of processors diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.0 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.0 similarity index 70% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.0 rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.0 index 00787df8ba..c34bf8e7dd 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.0 +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.0 @@ -1,5 +1,6 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Processor partition = 0 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) using 1 OpenMP thread(s) per MPI task variable ibead uloop 99 pad @@ -29,10 +30,10 @@ timestep 0.001 velocity all create 0.0 ${ibead} velocity all create 0.0 01 -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no Initializing PIMD BZP barostat... -The barostat mass is W = 2.3401256650800001e+01 + The barostat mass is W = 2.3401256650800001e+01 thermo_style custom step temp f_1[*] vol press thermo 100 @@ -43,14 +44,13 @@ thermo_modify norm no run 1000 Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 - 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 - 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 - 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 -PILE_L thermostat successfully initialized! + Bead ID | omega | tau | c1 | c2 + 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 + 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 + 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 + 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 + PILE_L thermostat successfully initialized! Neighbor list info ... update: every = 1 steps, delay = 0 steps, check = yes @@ -66,31 +66,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] f_1[11] f_1[12] f_1[13] f_1[14] f_1[15] Volume Press - 0 0 0 0 -7.3046601 4.3005229 -21.877018 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -164.47373 - 100 149.95804 3.8573359 0 -7.7921375 42.886648 -23.396327 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 -101.30374 - 200 245.00113 6.3021074 0 -8.2639651 41.690123 -22.521598 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 112.22426 - 300 300.57486 7.7316177 0 -8.2986331 43.180131 -21.755813 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 814.45889 - 400 368.08438 9.4681493 0 -8.4800193 49.109699 -24.824142 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1202.0398 - 500 419.32066 10.786088 0 -8.640773 45.427771 -22.825143 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 952.59748 - 600 385.4127 9.9138817 0 -8.4356035 47.783726 -22.456104 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 676.62913 - 700 360.14242 9.2638601 0 -8.2900275 42.626187 -20.571698 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 456.91446 - 800 346.92167 8.923786 0 -8.0694169 45.160336 -21.885719 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 454.60123 - 900 364.39442 9.3732334 0 -8.0415668 45.604542 -21.816625 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 499.75868 - 1000 390.77042 10.051697 0 -8.1948009 45.264242 -22.833545 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 572.98799 -Loop time of 0.248186 on 1 procs for 1000 steps with 200 atoms + 0 0 0 0 -7.3046601 4.3005229 3.3062167 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -164.47373 + 100 149.95804 3.8573359 0 -7.7921375 42.886648 2.9263894 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 -101.30374 + 200 245.00113 6.3021074 0 -8.2639651 41.690123 3.1450717 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 112.22426 + 300 300.57486 7.7316177 0 -8.2986331 43.180131 3.336518 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 814.45889 + 400 368.08438 9.4681493 0 -8.4800193 49.109699 2.5694358 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1202.0398 + 500 419.32066 10.786088 0 -8.640773 45.427771 3.0691855 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 952.59748 + 600 385.4127 9.9138817 0 -8.4356035 47.783726 3.1614452 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 676.62913 + 700 360.14242 9.2638601 0 -8.2900275 42.626187 3.6325468 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 456.91446 + 800 346.92167 8.923786 0 -8.0694169 45.160336 3.3040415 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 454.60123 + 900 364.39442 9.3732334 0 -8.0415668 45.604542 3.321315 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 499.75868 + 1000 390.77042 10.051697 0 -8.1948009 45.264242 3.0670849 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 572.98799 +Loop time of 0.218442 on 1 procs for 1000 steps with 200 atoms -Performance: 348.126 ns/day, 0.069 hours/ns, 4029.238 timesteps/s, 805.848 katom-step/s -99.6% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 395.527 ns/day, 0.061 hours/ns, 4577.865 timesteps/s, 915.573 katom-step/s +98.9% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.14541 | 0.14541 | 0.14541 | 0.0 | 58.59 -Neigh | 0.00099082 | 0.00099082 | 0.00099082 | 0.0 | 0.40 -Comm | 0.0039966 | 0.0039966 | 0.0039966 | 0.0 | 1.61 -Output | 0.00016346 | 0.00016346 | 0.00016346 | 0.0 | 0.07 -Modify | 0.096205 | 0.096205 | 0.096205 | 0.0 | 38.76 -Other | | 0.001425 | | | 0.57 +Pair | 0.11918 | 0.11918 | 0.11918 | 0.0 | 54.56 +Neigh | 0.0010314 | 0.0010314 | 0.0010314 | 0.0 | 0.47 +Comm | 0.0046197 | 0.0046197 | 0.0046197 | 0.0 | 2.11 +Output | 0.0001329 | 0.0001329 | 0.0001329 | 0.0 | 0.06 +Modify | 0.092616 | 0.092616 | 0.092616 | 0.0 | 42.40 +Other | | 0.0008616 | | | 0.39 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.1 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.1 similarity index 70% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.1 rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.1 index 83821cafb7..58f4695d46 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.1 +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.1 @@ -1,5 +1,6 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Processor partition = 1 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) using 1 OpenMP thread(s) per MPI task variable ibead uloop 99 pad @@ -17,7 +18,7 @@ Reading data file ... 200 atoms reading velocities ... 200 velocities - read_data CPU = 0.001 seconds + read_data CPU = 0.002 seconds pair_coeff * * 0.00965188 3.4 pair_modify shift yes @@ -29,10 +30,7 @@ timestep 0.001 velocity all create 0.0 ${ibead} velocity all create 0.0 02 -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no - -Initializing PIMD BZP barostat... -The barostat mass is W = 2.3401256650800001e+01 +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no thermo_style custom step temp f_1[*] vol press thermo 100 @@ -43,15 +41,6 @@ thermo_modify norm no run 1000 Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - -Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 - 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 - 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 - 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 -PILE_L thermostat successfully initialized! - Neighbor list info ... update: every = 1 steps, delay = 0 steps, check = yes max neighbors/atom: 2000, page size: 100000 @@ -66,31 +55,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] f_1[11] f_1[12] f_1[13] f_1[14] f_1[15] Volume Press - 0 0 0 11.920908 -7.3063682 4.3005229 -21.877018 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -167.65544 - 100 483.61933 12.440028 11.405863 -7.7749671 42.886648 -23.396327 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 606.14668 - 200 452.03836 11.627678 11.47094 -8.2534927 41.690123 -22.521598 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 583.5476 - 300 470.25997 12.096389 11.739306 -8.3750153 43.180131 -21.755813 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1152.6851 - 400 459.46597 11.818737 12.502421 -8.5240576 49.109699 -24.824142 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1381.0251 - 500 442.73121 11.388273 11.19396 -8.6488583 45.427771 -22.825143 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 1000.6119 - 600 493.47034 12.693424 11.91335 -8.4625706 47.783726 -22.456104 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 904.52944 - 700 470.04548 12.090871 10.348757 -8.278182 42.626187 -20.571698 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 715.22796 - 800 458.04928 11.782296 11.152029 -8.0926613 45.160336 -21.885719 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 678.21261 - 900 468.60547 12.05383 10.937315 -8.0319335 45.604542 -21.816625 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 735.24377 - 1000 427.44192 10.99499 11.916587 -8.2229199 45.264242 -22.833545 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 637.98311 -Loop time of 0.248186 on 1 procs for 1000 steps with 200 atoms + 0 0 0 11.920908 -7.3063682 4.3005229 3.3062167 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -167.65544 + 100 483.61933 12.440028 11.405863 -7.7749671 42.886648 2.9263894 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 606.14668 + 200 452.03836 11.627678 11.47094 -8.2534927 41.690123 3.1450717 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 583.5476 + 300 470.25997 12.096389 11.739306 -8.3750153 43.180131 3.336518 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1152.6851 + 400 459.46597 11.818737 12.502421 -8.5240576 49.109699 2.5694358 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1381.0251 + 500 442.73121 11.388273 11.19396 -8.6488583 45.427771 3.0691855 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 1000.6119 + 600 493.47034 12.693424 11.91335 -8.4625706 47.783726 3.1614452 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 904.52944 + 700 470.04548 12.090871 10.348757 -8.278182 42.626187 3.6325468 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 715.22796 + 800 458.04928 11.782296 11.152029 -8.0926613 45.160336 3.3040415 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 678.21261 + 900 468.60547 12.05383 10.937315 -8.0319335 45.604542 3.321315 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 735.24377 + 1000 427.44192 10.99499 11.916587 -8.2229199 45.264242 3.0670849 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 637.98311 +Loop time of 0.218435 on 1 procs for 1000 steps with 200 atoms -Performance: 348.126 ns/day, 0.069 hours/ns, 4029.238 timesteps/s, 805.848 katom-step/s -99.5% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 395.541 ns/day, 0.061 hours/ns, 4578.021 timesteps/s, 915.604 katom-step/s +99.1% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.14654 | 0.14654 | 0.14654 | 0.0 | 59.04 -Neigh | 0.00099986 | 0.00099986 | 0.00099986 | 0.0 | 0.40 -Comm | 0.0041628 | 0.0041628 | 0.0041628 | 0.0 | 1.68 -Output | 0.00018019 | 0.00018019 | 0.00018019 | 0.0 | 0.07 -Modify | 0.094878 | 0.094878 | 0.094878 | 0.0 | 38.23 -Other | | 0.001424 | | | 0.57 +Pair | 0.11791 | 0.11791 | 0.11791 | 0.0 | 53.98 +Neigh | 0.0010247 | 0.0010247 | 0.0010247 | 0.0 | 0.47 +Comm | 0.0035577 | 0.0035577 | 0.0035577 | 0.0 | 1.63 +Output | 0.00011003 | 0.00011003 | 0.00011003 | 0.0 | 0.05 +Modify | 0.09496 | 0.09496 | 0.09496 | 0.0 | 43.47 +Other | | 0.0008711 | | | 0.40 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.2 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.2 similarity index 70% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.2 rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.2 index fd8dd409ae..0b76ce5bbc 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.2 +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.2 @@ -1,5 +1,6 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Processor partition = 2 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) using 1 OpenMP thread(s) per MPI task variable ibead uloop 99 pad @@ -17,7 +18,7 @@ Reading data file ... 200 atoms reading velocities ... 200 velocities - read_data CPU = 0.001 seconds + read_data CPU = 0.002 seconds pair_coeff * * 0.00965188 3.4 pair_modify shift yes @@ -29,10 +30,7 @@ timestep 0.001 velocity all create 0.0 ${ibead} velocity all create 0.0 03 -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no - -Initializing PIMD BZP barostat... -The barostat mass is W = 2.3401256650800001e+01 +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no thermo_style custom step temp f_1[*] vol press thermo 100 @@ -43,15 +41,6 @@ thermo_modify norm no run 1000 Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - -Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 - 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 - 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 - 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 -PILE_L thermostat successfully initialized! - Neighbor list info ... update: every = 1 steps, delay = 0 steps, check = yes max neighbors/atom: 2000, page size: 100000 @@ -66,31 +55,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] f_1[11] f_1[12] f_1[13] f_1[14] f_1[15] Volume Press - 0 0 0 10.862314 -7.320388 4.3005229 -21.877018 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -175.34503 - 100 455.18121 11.708521 11.48472 -7.8033686 42.886648 -23.396327 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 526.41632 - 200 460.81997 11.853566 10.817157 -8.2276485 41.690123 -22.521598 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 615.80924 - 300 481.48652 12.385166 10.035423 -8.3866916 43.180131 -21.755813 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1169.2917 - 400 487.3584 12.536208 11.766522 -8.3643382 49.109699 -24.824142 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1574.1427 - 500 446.36019 11.48162 12.144202 -8.680266 45.427771 -22.825143 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 979.68395 - 600 500.3783 12.871115 11.075008 -8.47833 47.783726 -22.456104 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 912.39361 - 700 435.40634 11.199857 10.923558 -8.3090105 42.626187 -20.571698 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 617.20857 - 800 446.82793 11.493652 11.599712 -8.0900498 45.160336 -21.885719 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 652.13243 - 900 448.28506 11.531133 12.130739 -8.0810557 45.604542 -21.816625 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 674.68073 - 1000 440.94913 11.342433 10.765654 -8.1419484 45.264242 -22.833545 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 730.67128 -Loop time of 0.248185 on 1 procs for 1000 steps with 200 atoms + 0 0 0 10.862314 -7.320388 4.3005229 3.3062167 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -175.34503 + 100 455.18121 11.708521 11.48472 -7.8033686 42.886648 2.9263894 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 526.41632 + 200 460.81997 11.853566 10.817157 -8.2276485 41.690123 3.1450717 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 615.80924 + 300 481.48652 12.385166 10.035423 -8.3866916 43.180131 3.336518 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1169.2917 + 400 487.3584 12.536208 11.766522 -8.3643382 49.109699 2.5694358 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1574.1427 + 500 446.36019 11.48162 12.144202 -8.680266 45.427771 3.0691855 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 979.68395 + 600 500.3783 12.871115 11.075008 -8.47833 47.783726 3.1614452 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 912.39361 + 700 435.40634 11.199857 10.923558 -8.3090105 42.626187 3.6325468 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 617.20857 + 800 446.82793 11.493652 11.599712 -8.0900498 45.160336 3.3040415 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 652.13243 + 900 448.28506 11.531133 12.130739 -8.0810557 45.604542 3.321315 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 674.68073 + 1000 440.94913 11.342433 10.765654 -8.1419484 45.264242 3.0670849 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 730.67128 +Loop time of 0.218435 on 1 procs for 1000 steps with 200 atoms -Performance: 348.128 ns/day, 0.069 hours/ns, 4029.259 timesteps/s, 805.852 katom-step/s -97.8% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 395.541 ns/day, 0.061 hours/ns, 4578.019 timesteps/s, 915.604 katom-step/s +99.1% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.14702 | 0.14702 | 0.14702 | 0.0 | 59.24 -Neigh | 0.0010003 | 0.0010003 | 0.0010003 | 0.0 | 0.40 -Comm | 0.0039821 | 0.0039821 | 0.0039821 | 0.0 | 1.60 -Output | 0.00023527 | 0.00023527 | 0.00023527 | 0.0 | 0.09 -Modify | 0.094519 | 0.094519 | 0.094519 | 0.0 | 38.08 -Other | | 0.001427 | | | 0.58 +Pair | 0.12079 | 0.12079 | 0.12079 | 0.0 | 55.30 +Neigh | 0.0010224 | 0.0010224 | 0.0010224 | 0.0 | 0.47 +Comm | 0.0035478 | 0.0035478 | 0.0035478 | 0.0 | 1.62 +Output | 0.00010889 | 0.00010889 | 0.00010889 | 0.0 | 0.05 +Modify | 0.092098 | 0.092098 | 0.092098 | 0.0 | 42.16 +Other | | 0.0008684 | | | 0.40 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.3 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.3 similarity index 70% rename from examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.3 rename to examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.3 index 423ebb7d63..25578a068c 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/log.14Jun23.langevin.metal.g++.3 +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.langevin.metal.g++.3 @@ -1,5 +1,6 @@ -LAMMPS (28 Mar 2023) +LAMMPS (3 Aug 2023) Processor partition = 3 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) using 1 OpenMP thread(s) per MPI task variable ibead uloop 99 pad @@ -29,10 +30,7 @@ timestep 0.001 velocity all create 0.0 ${ibead} velocity all create 0.0 04 -fix 1 all pimd/langevin ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no - -Initializing PIMD BZP barostat... -The barostat mass is W = 2.3401256650800001e+01 +fix 1 all pimd/langevin method nmpimd ensemble npt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 iso 1.0 barostat BZP taup 1.0 fixcom no thermo_style custom step temp f_1[*] vol press thermo 100 @@ -43,15 +41,6 @@ thermo_modify norm no run 1000 Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule - -Initializing PI Langevin equation thermostat... -Bead ID | omega | tau | c1 | c2 - 0 0.00000000e+00 1.00000000e+00 9.99500125e-01 3.16148726e-02 - 1 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 - 2 1.18509233e+02 4.21908054e-03 8.88243614e-01 4.59372705e-01 - 3 8.37986825e+01 5.96668092e-03 9.19616372e-01 3.92817678e-01 -PILE_L thermostat successfully initialized! - Neighbor list info ... update: every = 1 steps, delay = 0 steps, check = yes max neighbors/atom: 2000, page size: 100000 @@ -66,31 +55,31 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] f_1[11] f_1[12] f_1[13] f_1[14] f_1[15] Volume Press - 0 0 0 10.794425 -7.3457072 4.3005229 -21.877018 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -191.02389 - 100 426.01705 10.958338 12.206372 -7.8040582 42.886648 -23.396327 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 464.39271 - 200 414.52703 10.662783 11.934129 -8.2331312 41.690123 -22.521598 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 502.87052 - 300 424.85622 10.928478 11.681713 -8.357621 43.180131 -21.755813 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1058.1162 - 400 485.80103 12.496148 12.255827 -8.3658975 49.109699 -24.824142 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1570.2486 - 500 462.99006 11.909386 11.187609 -8.6934698 45.427771 -22.825143 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 1014.2134 - 600 465.24407 11.967366 11.168375 -8.4422887 47.783726 -22.456104 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 864.12413 - 700 426.16111 10.962044 11.000011 -8.2855512 42.626187 -20.571698 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 614.76939 - 800 454.53159 11.691811 10.834606 -8.0654281 45.160336 -21.885719 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 684.85907 - 900 441.72064 11.362278 10.4492 -8.0786302 45.604542 -21.816625 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 659.68525 - 1000 429.90929 11.058457 11.851933 -8.1578394 45.264242 -22.833545 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 698.73278 -Loop time of 0.248175 on 1 procs for 1000 steps with 200 atoms + 0 0 0 10.794425 -7.3457072 4.3005229 3.3062167 -8.7249482 2.9571502 -1743.5332 -698.49808 -172.07477 0 0 0.033460054 -0.37064378 4.216227 13402.228 -191.02389 + 100 426.01705 10.958338 12.206372 -7.8040582 42.886648 2.9263894 -1.980193 2.954003 -1888.0547 -1648.7118 -332.0298 -0.099139345 0.11500091 0.033044702 -0.3701566 42.83112 13235.861 464.39271 + 200 414.52703 10.662783 11.934129 -8.2331312 41.690123 3.1450717 -4.273021 2.9600599 -1906.2904 -1609.02 -265.94404 -0.20527926 0.49305948 0.031504957 -0.36829556 41.729191 12619.125 502.87052 + 300 424.85622 10.928478 11.681713 -8.357621 43.180131 3.336518 -7.7032433 2.9714114 -1968.7685 290.49656 251.72564 -0.21935745 0.56300721 0.029467915 -0.36568855 43.236828 11803.2 1058.1162 + 400 485.80103 12.496148 12.255827 -8.3658975 49.109699 2.5694358 2.9744597 2.9794185 -2335.993 1368.7398 570.03286 -0.028366234 0.0094148316 0.028338146 -0.36416383 49.028096 11350.678 1570.2486 + 500 462.99006 11.909386 11.187609 -8.6934698 45.427771 3.0691855 16.22356 2.9684828 -2113.91 -272.84753 185.53392 0.091614289 0.098205455 0.028793585 -0.36478567 45.368325 11533.101 1014.2134 + 600 465.24407 11.967366 11.168375 -8.4422887 47.783726 3.1614452 6.837575 2.967236 -2023.8117 -918.27943 -2.4106994 0.093360761 0.10198539 0.029589188 -0.36584873 47.725157 11851.775 864.12413 + 700 426.16111 10.962044 11.000011 -8.2855512 42.626187 3.6325468 -5.7252564 2.9560528 -1806.9448 -1418.2247 -148.41657 0.075011202 0.065835696 0.030359455 -0.36685105 42.558523 12160.301 614.76939 + 800 454.53159 11.691811 10.834606 -8.0654281 45.160336 3.3040415 -6.7745694 2.9575472 -1894.3641 -1329.3179 -136.42193 0.011114896 0.0014455064 0.030808183 -0.3674233 45.076543 12340.037 684.85907 + 900 441.72064 11.362278 10.4492 -8.0786302 45.604542 3.321315 5.586068 2.9578604 -1890.4653 -1271.1107 -111.89061 -0.020285587 0.0048148677 0.030774258 -0.36738033 45.521594 12326.448 659.68525 + 1000 429.90929 11.058457 11.851933 -8.1578394 45.264242 3.0670849 6.9260573 2.960122 -2007.6188 -1179.7125 -70.907567 -0.062733519 0.046047757 0.030329191 -0.36681215 45.191633 12148.179 698.73278 +Loop time of 0.218441 on 1 procs for 1000 steps with 200 atoms -Performance: 348.141 ns/day, 0.069 hours/ns, 4029.409 timesteps/s, 805.882 katom-step/s -98.1% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 395.530 ns/day, 0.061 hours/ns, 4577.899 timesteps/s, 915.580 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.14919 | 0.14919 | 0.14919 | 0.0 | 60.12 -Neigh | 0.00099112 | 0.00099112 | 0.00099112 | 0.0 | 0.40 -Comm | 0.0040992 | 0.0040992 | 0.0040992 | 0.0 | 1.65 -Output | 0.0001723 | 0.0001723 | 0.0001723 | 0.0 | 0.07 -Modify | 0.092299 | 0.092299 | 0.092299 | 0.0 | 37.19 -Other | | 0.00142 | | | 0.57 +Pair | 0.11655 | 0.11655 | 0.11655 | 0.0 | 53.35 +Neigh | 0.0010236 | 0.0010236 | 0.0010236 | 0.0 | 0.47 +Comm | 0.0035622 | 0.0035622 | 0.0035622 | 0.0 | 1.63 +Output | 0.0001071 | 0.0001071 | 0.0001071 | 0.0 | 0.05 +Modify | 0.096348 | 0.096348 | 0.096348 | 0.0 | 44.11 +Other | | 0.0008537 | | | 0.39 Nlocal: 200 ave 200 max 200 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++ new file mode 100644 index 0000000000..a05d2d9aa8 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++ @@ -0,0 +1,2 @@ +LAMMPS (3 Aug 2023) +Running on 4 partitions of processors diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.0 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.0 new file mode 100644 index 0000000000..2d9d049a49 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.0 @@ -0,0 +1,103 @@ +LAMMPS (3 Aug 2023) +Processor partition = 0 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} +read_data data.metalnpt01 +Reading data file ... + orthogonal box = (-11.876697 -11.876697 -11.876697) to (11.876697 11.876697 11.876697) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + reading velocities ... + 200 velocities + read_data CPU = 0.001 seconds + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} +velocity all create 0.0 01 + +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Initializing PI Langevin equation thermostat... + Bead ID | omega | tau | c1 | c2 + 0 5.92546167e+01 1.00000000e+00 9.99500125e-01 3.16148726e-02 + 1 5.92546167e+01 1.00000000e+00 9.99500125e-01 3.16148726e-02 + 2 5.92546167e+01 1.00000000e+00 9.99500125e-01 3.16148726e-02 + 3 5.92546167e+01 1.00000000e+00 9.99500125e-01 3.16148726e-02 + PILE_L thermostat successfully initialized! + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11.5251 + ghost atom cutoff = 11.5251 + binsize = 5.76255, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 8.8893303 -7.3046601 4.3005229 3.3062167 0 0 -1743.5332 0 0 13402.228 -164.47373 + 100 248.24141 6.3854564 4.1458616 -7.7546467 9.6829291 3.3062167 0 0 -1743.5332 0.00061164884 127.22547 13402.228 26.894568 + 200 346.2569 8.9066861 2.6427185 -7.8943744 14.016847 3.3062167 0 0 -1743.5332 0.00088274242 177.45909 13402.228 91.225638 + 300 217.65314 5.5986414 7.0223362 -7.788449 18.162833 3.3062167 0 0 -1743.5332 0.00054181173 111.54876 13402.228 -125.00786 + 400 266.83825 6.8638187 6.2507813 -7.7241546 21.628032 3.3062167 0 0 -1743.5332 0.0007209203 136.75648 13402.228 21.104834 + 500 342.40379 8.8075736 5.1959052 -7.7020799 25.668758 3.3062167 0 0 -1743.5332 0.00087137898 175.48435 13402.228 208.60351 + 600 280.37754 7.2120867 8.0025846 -7.5954127 28.570788 3.3062167 0 0 -1743.5332 0.00072432598 143.69546 13402.228 162.09838 + 700 377.11625 9.700474 6.0049074 -7.5861377 30.034627 3.3062167 0 0 -1743.5332 0.00092377441 193.27472 13402.228 389.35575 + 800 378.36221 9.7325237 6.1704761 -7.6170017 31.447502 3.3062167 0 0 -1743.5332 0.00097254954 193.91329 13402.228 387.71781 + 900 271.99864 6.9965581 9.037081 -7.4781664 32.906719 3.3062167 0 0 -1743.5332 0.00074024142 139.40121 13402.228 266.3664 + 1000 362.4753 9.3238683 8.0266514 -7.4835536 34.914285 3.3062167 0 0 -1743.5332 0.00088800624 185.77114 13402.228 433.67079 +Loop time of 0.166656 on 1 procs for 1000 steps with 200 atoms + +Performance: 518.435 ns/day, 0.046 hours/ns, 6000.401 timesteps/s, 1.200 Matom-step/s +99.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10478 | 0.10478 | 0.10478 | 0.0 | 62.87 +Neigh | 0.00096007 | 0.00096007 | 0.00096007 | 0.0 | 0.58 +Comm | 0.0035065 | 0.0035065 | 0.0035065 | 0.0 | 2.10 +Output | 0.0001037 | 0.0001037 | 0.0001037 | 0.0 | 0.06 +Modify | 0.056454 | 0.056454 | 0.056454 | 0.0 | 33.87 +Other | | 0.0008515 | | | 0.51 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1360 ave 1360 max 1360 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 9544 ave 9544 max 9544 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9544 +Ave neighs/atom = 47.72 +Neighbor list builds = 4 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.1 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.1 new file mode 100644 index 0000000000..9be069b960 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.1 @@ -0,0 +1,95 @@ +LAMMPS (3 Aug 2023) +Processor partition = 1 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} +read_data data.metalnpt02 +Reading data file ... + orthogonal box = (-11.876697 -11.876697 -11.876697) to (11.876697 11.876697 11.876697) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + reading velocities ... + 200 velocities + read_data CPU = 0.001 seconds + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} +velocity all create 0.0 02 + +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11.5251 + ghost atom cutoff = 11.5251 + binsize = 5.76255, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 8.4854554 -7.3063682 4.3005229 3.3062167 0 0 -1743.5332 0 0 13402.228 -167.65544 + 100 231.55472 5.9562285 3.9188988 -7.7552569 9.6829291 3.3062167 0 0 -1743.5332 0.00061164884 127.22547 13402.228 -9.7693407 + 200 366.33366 9.423116 2.3606144 -7.8893287 14.016847 3.3062167 0 0 -1743.5332 0.00088274242 177.45909 13402.228 131.05061 + 300 213.74457 5.4981021 6.4391043 -7.7947526 18.162833 3.3062167 0 0 -1743.5332 0.00054181173 111.54876 13402.228 -136.92734 + 400 273.60832 7.0379636 5.6777233 -7.7709858 21.628032 3.3062167 0 0 -1743.5332 0.0007209203 136.75648 13402.228 -0.14681392 + 500 338.99655 8.7199299 5.4335645 -7.7194465 25.668758 3.3062167 0 0 -1743.5332 0.00087137898 175.48435 13402.228 190.1705 + 600 298.58126 7.6803369 7.2512164 -7.5741948 28.570788 3.3062167 0 0 -1743.5332 0.00072432598 143.69546 13402.228 226.78095 + 700 352.53291 9.0681226 5.4845895 -7.5875298 30.034627 3.3062167 0 0 -1743.5332 0.00092377441 193.27472 13402.228 335.39327 + 800 389.70585 10.024313 5.143907 -7.6218106 31.447502 3.3062167 0 0 -1743.5332 0.00097254954 193.91329 13402.228 409.36108 + 900 285.3019 7.3387547 7.6228894 -7.5140003 32.906719 3.3062167 0 0 -1743.5332 0.00074024142 139.40121 13402.228 266.93105 + 1000 345.35667 8.8835299 6.9652602 -7.5180013 34.914285 3.3062167 0 0 -1743.5332 0.00088800624 185.77114 13402.228 368.83819 +Loop time of 0.16666 on 1 procs for 1000 steps with 200 atoms + +Performance: 518.420 ns/day, 0.046 hours/ns, 6000.230 timesteps/s, 1.200 Matom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10798 | 0.10798 | 0.10798 | 0.0 | 64.79 +Neigh | 0.00097784 | 0.00097784 | 0.00097784 | 0.0 | 0.59 +Comm | 0.0035304 | 0.0035304 | 0.0035304 | 0.0 | 2.12 +Output | 8.5625e-05 | 8.5625e-05 | 8.5625e-05 | 0.0 | 0.05 +Modify | 0.05322 | 0.05322 | 0.05322 | 0.0 | 31.93 +Other | | 0.0008694 | | | 0.52 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1360 ave 1360 max 1360 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 9552 ave 9552 max 9552 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9552 +Ave neighs/atom = 47.76 +Neighbor list builds = 4 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.2 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.2 new file mode 100644 index 0000000000..f5869bcb03 --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.2 @@ -0,0 +1,95 @@ +LAMMPS (3 Aug 2023) +Processor partition = 2 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} +read_data data.metalnpt03 +Reading data file ... + orthogonal box = (-11.876697 -11.876697 -11.876697) to (11.876697 11.876697 11.876697) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + reading velocities ... + 200 velocities + read_data CPU = 0.001 seconds + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} +velocity all create 0.0 03 + +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11.5251 + ghost atom cutoff = 11.5251 + binsize = 5.76255, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 8.4016332 -7.320388 4.3005229 3.3062167 0 0 -1743.5332 0 0 13402.228 -175.34503 + 100 235.06814 6.0466034 4.1185166 -7.7660023 9.6829291 3.3062167 0 0 -1743.5332 0.00061164884 127.22547 13402.228 -7.6578222 + 200 341.9927 8.7969992 2.7767151 -7.9109058 14.016847 3.3062167 0 0 -1743.5332 0.00088274242 177.45909 13402.228 69.587081 + 300 206.29873 5.3065745 7.3388955 -7.7440046 18.162833 3.3062167 0 0 -1743.5332 0.00054181173 111.54876 13402.228 -118.3143 + 400 305.56268 7.8599181 5.7681208 -7.7110516 21.628032 3.3062167 0 0 -1743.5332 0.0007209203 136.75648 13402.228 107.63706 + 500 313.47536 8.0634543 5.5086382 -7.7030371 25.668758 3.3062167 0 0 -1743.5332 0.00087137898 175.48435 13402.228 145.14899 + 600 258.53638 6.6502715 8.1299001 -7.6530176 28.570788 3.3062167 0 0 -1743.5332 0.00072432598 143.69546 13402.228 92.36234 + 700 357.63679 9.1994085 6.539048 -7.6186515 30.034627 3.3062167 0 0 -1743.5332 0.00092377441 193.27472 13402.228 329.17147 + 800 391.32883 10.066061 5.7809035 -7.6148923 31.447502 3.3062167 0 0 -1743.5332 0.00097254954 193.91329 13402.228 415.13205 + 900 308.61185 7.9383512 8.9544585 -7.4803275 32.906719 3.3062167 0 0 -1743.5332 0.00074024142 139.40121 13402.228 341.46691 + 1000 317.70376 8.1722204 7.3013798 -7.4667312 34.914285 3.3062167 0 0 -1743.5332 0.00088800624 185.77114 13402.228 352.92253 +Loop time of 0.16666 on 1 procs for 1000 steps with 200 atoms + +Performance: 518.420 ns/day, 0.046 hours/ns, 6000.235 timesteps/s, 1.200 Matom-step/s +98.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10509 | 0.10509 | 0.10509 | 0.0 | 63.06 +Neigh | 0.00096379 | 0.00096379 | 0.00096379 | 0.0 | 0.58 +Comm | 0.0035557 | 0.0035557 | 0.0035557 | 0.0 | 2.13 +Output | 7.8072e-05 | 7.8072e-05 | 7.8072e-05 | 0.0 | 0.05 +Modify | 0.05611 | 0.05611 | 0.05611 | 0.0 | 33.67 +Other | | 0.0008601 | | | 0.52 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1364 ave 1364 max 1364 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 9545 ave 9545 max 9545 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9545 +Ave neighs/atom = 47.725 +Neighbor list builds = 4 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.3 b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.3 new file mode 100644 index 0000000000..da0767fe0d --- /dev/null +++ b/examples/PACKAGES/pimd/langevin_metal_units/log.16Nov23.pimd-langevin.metal.g++.3 @@ -0,0 +1,95 @@ +LAMMPS (3 Aug 2023) +Processor partition = 3 +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +variable ibead uloop 99 pad + +units metal +atom_style atomic +atom_modify map yes +boundary p p p +pair_style lj/cut 9.5251 +read_data data.metalnpt${ibead} +read_data data.metalnpt04 +Reading data file ... + orthogonal box = (-11.876697 -11.876697 -11.876697) to (11.876697 11.876697 11.876697) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 200 atoms + reading velocities ... + 200 velocities + read_data CPU = 0.001 seconds + +pair_coeff * * 0.00965188 3.4 +pair_modify shift yes + +mass 1 39.948 + +timestep 0.001 + +velocity all create 0.0 ${ibead} +velocity all create 0.0 04 + +fix 1 all pimd/langevin method pimd ensemble nvt integrator obabo thermostat PILE_L 1234 tau 1.0 temp 113.15 taup 1.0 fixcom no + +thermo_style custom step temp f_1[*] vol press +thermo 100 +thermo_modify norm no + +# dump dcd all custom 100 ${ibead}.dcd id type xu yu zu vx vy vz ix iy iz fx fy fz +# dump_modify dcd sort id format line "%d %d %.16f %.16f %.16f %.16f %.16f %.16f %d %d %d %.16f %.16f %.16f" + +run 1000 +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11.5251 + ghost atom cutoff = 11.5251 + binsize = 5.76255, bins = 5 5 5 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.121 | 3.121 | 3.121 Mbytes + Step Temp f_1[1] f_1[2] f_1[3] f_1[4] f_1[5] f_1[6] f_1[7] f_1[8] f_1[9] f_1[10] Volume Press + 0 0 0 7.8012276 -7.3457072 4.3005229 3.3062167 0 0 -1743.5332 0 0 13402.228 -191.02389 + 100 241.19035 6.2040835 3.9473764 -7.7641902 9.6829291 3.3062167 0 0 -1743.5332 0.00061164884 127.22547 13402.228 3.5720518 + 200 325.21166 8.3653443 2.3529831 -7.9137212 14.016847 3.3062167 0 0 -1743.5332 0.00088274242 177.45909 13402.228 33.184125 + 300 209.19735 5.381135 6.7063061 -7.801056 18.162833 3.3062167 0 0 -1743.5332 0.00054181173 111.54876 13402.228 -154.10632 + 400 280.84513 7.2241142 5.8838331 -7.7320495 21.628032 3.3062167 0 0 -1743.5332 0.0007209203 136.75648 13402.228 45.624285 + 500 367.15726 9.4443014 5.2842629 -7.6643085 25.668758 3.3062167 0 0 -1743.5332 0.00087137898 175.48435 13402.228 283.59979 + 600 294.68254 7.5800508 6.5104311 -7.6234652 28.570788 3.3062167 0 0 -1743.5332 0.00072432598 143.69546 13402.228 183.09906 + 700 356.64514 9.1739005 5.2769462 -7.6204507 30.034627 3.3062167 0 0 -1743.5332 0.00092377441 193.27472 13402.228 334.31754 + 800 360.77353 9.2800941 5.7976264 -7.6946985 31.447502 3.3062167 0 0 -1743.5332 0.00097254954 193.91329 13402.228 299.62001 + 900 291.14241 7.4889889 7.5124196 -7.5102882 32.906719 3.3062167 0 0 -1743.5332 0.00074024142 139.40121 13402.228 277.94834 + 1000 362.48694 9.3241677 6.8711151 -7.4856221 34.914285 3.3062167 0 0 -1743.5332 0.00088800624 185.77114 13402.228 428.98682 +Loop time of 0.166662 on 1 procs for 1000 steps with 200 atoms + +Performance: 518.414 ns/day, 0.046 hours/ns, 6000.167 timesteps/s, 1.200 Matom-step/s +98.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10817 | 0.10817 | 0.10817 | 0.0 | 64.90 +Neigh | 0.00096402 | 0.00096402 | 0.00096402 | 0.0 | 0.58 +Comm | 0.0044991 | 0.0044991 | 0.0044991 | 0.0 | 2.70 +Output | 8.5449e-05 | 8.5449e-05 | 8.5449e-05 | 0.0 | 0.05 +Modify | 0.052066 | 0.052066 | 0.052066 | 0.0 | 31.24 +Other | | 0.00088 | | | 0.53 + +Nlocal: 200 ave 200 max 200 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1368 ave 1368 max 1368 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 9541 ave 9541 max 9541 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9541 +Ave neighs/atom = 47.705 +Neighbor list builds = 4 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/PACKAGES/pimd/langevin_metal_units/run.sh b/examples/PACKAGES/pimd/langevin_metal_units/run.sh index 2580ef1a41..8bac9231a3 100644 --- a/examples/PACKAGES/pimd/langevin_metal_units/run.sh +++ b/examples/PACKAGES/pimd/langevin_metal_units/run.sh @@ -1 +1,3 @@ -mpirun -np 4 $LMP -in in.lmp -p 4x1 -log log -screen screen +mpirun -np 4 $LMP -in in.langevin.metal -p 4x1 -log log.langevin.metal -screen screen +mpirun -np 4 $LMP -in in.pimd-langevin.metal -p 4x1 -log log.pimd-langevin.metal -screen screen + diff --git a/examples/PACKAGES/qtb/methane_qbmsst/in.methane_qbmsst b/examples/PACKAGES/qtb/methane_qbmsst/in.methane_qbmsst new file mode 100644 index 0000000000..99d60e52d7 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qbmsst/in.methane_qbmsst @@ -0,0 +1,33 @@ +## This script first uses fix qtb to equilibrate liquid methane to an initial state with quantum nuclear correction and then simulate shock induced chemical reactions through the quantum thermal bath multi-scale shock technique +#The default system size may take a while to run you can change to a smaller size +variable x_rep equal 5 #x-direction replication number +variable y_rep equal 5 #y-direction replication number +variable z_rep equal 10 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) +variable v_msst equal 0.122 #Shock velocity (Angstrom/fs in metal units) +variable q_msst equal 25.0 #Box mass-like parameter in the MSST (mass^2/length^4, where mass=grams/mole and length=Angstrom in real units) +variable mu_msst equal 0.9 #Artificial viscosity in the MSST (mass/length/time, where mass=grams/mole, length=Angstrom and time=fs in real units) +variable tscale_msst equal 0.01 #Temperature reduction parameter in the MSST (unitless) +variable eta_qbmsst equal 1.0 #Coupling constant between the shock and the quantum thermal bath (unitless constant) + + +##The included part first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +include methane_qtb.mod + + +##Shock compression with quantum nuclear corrections +reset_timestep 0 +fix shock all qbmsst z ${v_msst} q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix_modify shock energy yes +variable dhug equal f_shock[1] +variable dray equal f_shock[2] +variable lgr_vel equal f_shock[3] +variable lgr_pos equal f_shock[4] +variable T_qm equal f_shock[5] #Temperature with quantum nuclear correction +thermo_style custom step v_T_qm press etotal vol lx ly lz pzz v_dhug v_dray v_lgr_vel v_lgr_pos +thermo 20 +timestep ${delta_t} +#restart 1000 restart +run 500 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.1 b/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.1 new file mode 100644 index 0000000000..d46e62cfd5 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.1 @@ -0,0 +1,280 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +## This script first uses fix qtb to equilibrate liquid methane to an initial state with quantum nuclear correction and then simulate shock induced chemical reactions through the quantum thermal bath multi-scale shock technique +#The default system size may take a while to run you can change to a smaller size +variable x_rep equal 5 #x-direction replication number +variable y_rep equal 5 #y-direction replication number +variable z_rep equal 10 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) +variable v_msst equal 0.122 #Shock velocity (Angstrom/fs in metal units) +variable q_msst equal 25.0 #Box mass-like parameter in the MSST (mass^2/length^4, where mass=grams/mole and length=Angstrom in real units) +variable mu_msst equal 0.9 #Artificial viscosity in the MSST (mass/length/time, where mass=grams/mole, length=Angstrom and time=fs in real units) +variable tscale_msst equal 0.01 #Temperature reduction parameter in the MSST (unitless) +variable eta_qbmsst equal 1.0 #Coupling constant between the shock and the quantum thermal bath (unitless constant) + + +##The included part first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +include methane_qtb.mod +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 a1 3.9783624 0 0 a2 0 3.9783624 0 a3 0 0 3.9783624 basis 0.5 0.5 0.5 basis 0.663 0.663 0.663 basis 0.337 0.337 0.663 basis 0.663 0.337 0.337 basis 0.337 0.663 0.337 +Lattice spacing in x,y,z = 3.9783624 3.9783624 3.9783624 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +Created orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box basis 1 1 basis 2 2 basis 3 2 basis 4 2 basis 5 2 +Created 5 atoms + using lattice units in orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + create_atoms CPU = 0.000 seconds +replicate ${x_rep} ${y_rep} ${z_rep} +replicate 5 ${y_rep} ${z_rep} +replicate 5 5 ${z_rep} +replicate 5 5 10 +Replication is creating a 5x5x10 = 250 times larger system... + orthogonal box = (0 0 0) to (19.891812 19.891812 39.783624) + 1 by 1 by 1 MPI processor grid + 1250 atoms + replicate CPU = 0.000 seconds + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create 110 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 20 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +fix methane_qtb all qtb temp 110 damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 +fix methane_qtb all qtb temp 110 damp 200 seed 35082 f_max 0.3 N_f 50 +timestep ${delta_t} +timestep 0.25 +run 500 #500 fs + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12.5 + ghost atom cutoff = 12.5 + binsize = 6.25, bins = 4 4 7 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 201.3 | 201.3 | 201.3 Mbytes + Step Temp Press TotEng Volume + 0 110 -15717.706 -110869.31 15741.751 + 20 133.92166 8773.5364 -110569.51 15741.751 + 40 184.43244 -12136.835 -110378.92 15741.751 + 60 203.58164 6527.2188 -110190.9 15741.751 + 80 183.0518 -9667.6163 -110095.24 15741.751 + 100 236.07378 4393.5089 -109905.8 15741.751 + 120 226.94599 -5612.6845 -109708.46 15741.751 + 140 249.34156 988.50573 -109631.88 15741.751 + 160 255.08331 -1397.98 -109469.09 15741.751 + 180 281.64743 -1682.598 -109285.53 15741.751 + 200 303.76929 2594.8345 -109206.84 15741.751 + 220 311.6547 -4566.4307 -109053.21 15741.751 + 240 350.68316 5132.0272 -108918.26 15741.751 + 260 347.11102 -6078.5078 -108828.31 15741.751 + 280 366.56298 6373.2426 -108694.64 15741.751 + 300 393.62524 -6438.9321 -108521.5 15741.751 + 320 403.64821 5946.6873 -108487.83 15741.751 + 340 406.12883 -5053.5592 -108331.25 15741.751 + 360 450.60139 4323.0942 -108185.06 15741.751 + 380 429.46056 -3317.8604 -108146.84 15741.751 + 400 448.11876 3264.6165 -108048.01 15741.751 + 420 485.98657 -3047.3542 -107882.88 15741.751 + 440 463.23761 3088.3325 -107853.09 15741.751 + 460 504.27223 -1966.5888 -107689.56 15741.751 + 480 515.66783 2915.6322 -107550.83 15741.751 + 500 516.26369 -1733.2701 -107498.06 15741.751 +Loop time of 41.4818 on 1 procs for 500 steps with 1250 atoms + +Performance: 0.260 ns/day, 92.182 hours/ns, 12.053 timesteps/s, 15.067 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 30.707 | 30.707 | 30.707 | 0.0 | 74.03 +Neigh | 2.2815 | 2.2815 | 2.2815 | 0.0 | 5.50 +Comm | 0.023963 | 0.023963 | 0.023963 | 0.0 | 0.06 +Output | 0.00073327 | 0.00073327 | 0.00073327 | 0.0 | 0.00 +Modify | 8.4653 | 8.4653 | 8.4653 | 0.0 | 20.41 +Other | | 0.00334 | | | 0.01 + +Nlocal: 1250 ave 1250 max 1250 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 8444 ave 8444 max 8444 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 601915 ave 601915 max 601915 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 601915 +Ave neighs/atom = 481.532 +Neighbor list builds = 50 +Dangerous builds not checked +unfix methane_qtb +unfix scapegoat_qtb + + +##Shock compression with quantum nuclear corrections +reset_timestep 0 +fix shock all qbmsst z ${v_msst} q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta 1 beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta 1 beta 400 T_init 110 +QBMSST parameters: + Shock in z direction + Cell mass-like parameter qmass (units of mass^2/length^4) = 2.50000e+01 + Shock velocity = 1.22000e-01 + Artificial viscosity (units of mass/length/time) = 9.00000e-01 + Initial pressure calculated on first step + Initial volume calculated on first step + Initial energy calculated on first step +fix_modify shock energy yes +variable dhug equal f_shock[1] +variable dray equal f_shock[2] +variable lgr_vel equal f_shock[3] +variable lgr_pos equal f_shock[4] +variable T_qm equal f_shock[5] #Temperature with quantum nuclear correction +thermo_style custom step v_T_qm press etotal vol lx ly lz pzz v_dhug v_dray v_lgr_vel v_lgr_pos +thermo 20 +timestep ${delta_t} +timestep 0.25 +#restart 1000 restart +run 500 +Fix QBMSST v0 = 1.57418e+04 +Fix QBMSST p0 = -3.03801e+03 +Fix QBMSST e0 = to be -1.07498e+05 +Fix QBMSST initial strain rate of -1.02043e-04 established by reducing temperature by factor of 1.00000e-02 +Per MPI rank memory allocation (min/avg/max) = 201.4 | 201.4 | 201.4 Mbytes + Step v_T_qm Press TotEng Volume Lx Ly Lz Pzz v_dhug v_dray v_lgr_vel v_lgr_pos + 0 110 -1789.091 -107498.06 15741.751 19.891812 19.891812 39.783624 -3095.1546 1.9543098e-12 -57.148468 0 0 + 20 110 313.41128 -107231.57 15733.908 19.891812 19.891812 39.763803 1026.815 -35.805172 3755.1834 6.0783853e-05 -0.60983919 + 40 110 1248.5771 -107106.23 15726.494 19.891812 19.891812 39.745066 -277.53233 -52.672766 2158.1479 0.00011824041 -1.219383 + 60 110 -944.55947 -107017.75 15719.482 19.891812 19.891812 39.727345 1006.8843 -64.550247 3165.7346 0.00017258388 -1.8286479 + 80 110 2164.646 -107053.82 15712.848 19.891812 19.891812 39.710579 686.99949 -59.728513 2583.9345 0.00022399951 -2.4376489 + 100 110 -332.40946 -106996.04 15706.579 19.891812 19.891812 39.694734 1555.274 -67.472889 3204.6947 0.00027258815 -3.0464001 + 120 110 2556.8172 -106828.33 15700.655 19.891812 19.891812 39.679765 -1406.2492 -90.123866 9.330762 0.00031849257 -3.6549157 + 140 110 -649.1633 -106851.95 15695.029 19.891812 19.891812 39.665545 3704.8784 -86.742267 4898.3193 0.00036209988 -4.2632077 + 160 110 2301.4774 -106787.04 15689.738 19.891812 19.891812 39.652174 -893.31294 -95.690383 91.247096 0.00040310452 -4.8712886 + 180 110 -701.59672 -106639.61 15684.711 19.891812 19.891812 39.63947 3211.2065 -115.27944 3997.3199 0.00044206086 -5.47917 + 200 110 3857.6228 -106696.51 15679.975 19.891812 19.891812 39.627501 -1722.9124 -107.93584 -1123.778 0.00047876602 -6.0868625 + 220 110 -1057.1346 -106590.95 15675.462 19.891812 19.891812 39.616094 3285.0876 -121.80821 3706.0326 0.00051374575 -6.6943761 + 240 110 2748.5299 -106428.9 15671.216 19.891812 19.891812 39.605364 172.15717 -143.78629 425.48974 0.00054664912 -7.3017201 + 260 110 64.99143 -106442.23 15667.188 19.891812 19.891812 39.595183 981.21139 -141.94851 1075.4979 0.00057787086 -7.9089043 + 280 110 1612.9607 -106412.77 15663.362 19.891812 19.891812 39.585514 662.48897 -145.93658 605.73218 0.00060752164 -8.5159364 + 300 110 1435.9566 -106307.06 15659.725 19.891812 19.891812 39.576323 759.46794 -160.13403 559.12791 0.00063570794 -9.1228243 + 320 110 -890.72712 -106332.6 15656.258 19.891812 19.891812 39.56756 234.14376 -156.75496 -103.07714 0.00066257852 -9.7295747 + 340 110 4270.0983 -106252.72 15652.976 19.891812 19.891812 39.559265 5411.2268 -167.0427 4944.423 0.00068801647 -10.336194 + 360 110 -2801.0763 -106105.96 15649.905 19.891812 19.891812 39.551504 -3276.3824 -187.5258 -3864.4213 0.00071181569 -10.942691 + 380 110 5566.9116 -106139.88 15646.926 19.891812 19.891812 39.543977 2737.1121 -182.43141 2031.4929 0.00073489745 -11.549071 + 400 110 -4432.9416 -106074.79 15644.09 19.891812 19.891812 39.536808 -4946.1908 -191.90759 -5763.8068 0.00075688314 -12.155339 + 420 52.599535 5582.8126 -105959.96 15641.311 19.891812 19.891812 39.529786 7869.5301 -206.09135 6942.2136 0.00077841805 -12.761497 + 440 52.599535 -2861.6332 -106017.66 15638.758 19.891812 19.891812 39.523335 -1820.4742 -199.30721 -2848.5648 0.00079820063 -13.367553 + 460 52.599535 3942.7505 -105984.45 15636.294 19.891812 19.891812 39.517106 3327.0393 -203.24794 2201.6559 0.00081729985 -13.973511 + 480 52.599535 419.18442 -105827.32 15633.955 19.891812 19.891812 39.511194 -1910.6109 -224.9021 -3128.3482 0.00083542949 -14.579377 + 500 52.599535 117.60016 -105904.83 15631.655 19.891812 19.891812 39.505383 -603.40365 -214.36236 -1911.9203 0.00085325005 -15.185153 +Loop time of 41.8312 on 1 procs for 500 steps with 1250 atoms + +Performance: 0.258 ns/day, 92.958 hours/ns, 11.953 timesteps/s, 14.941 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 31.016 | 31.016 | 31.016 | 0.0 | 74.15 +Neigh | 2.2849 | 2.2849 | 2.2849 | 0.0 | 5.46 +Comm | 0.020391 | 0.020391 | 0.020391 | 0.0 | 0.05 +Output | 0.0019403 | 0.0019403 | 0.0019403 | 0.0 | 0.00 +Modify | 8.505 | 8.505 | 8.505 | 0.0 | 20.33 +Other | | 0.003238 | | | 0.01 + +Nlocal: 1250 ave 1250 max 1250 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 8489 ave 8489 max 8489 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 606382 ave 606382 max 606382 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 606382 +Ave neighs/atom = 485.1056 +Neighbor list builds = 50 +Dangerous builds not checked +Total wall time: 0:01:23 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.4 b/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.4 new file mode 100644 index 0000000000..357f31a300 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qbmsst/log.30Nov23.methane_qbmsst.g++.4 @@ -0,0 +1,280 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +## This script first uses fix qtb to equilibrate liquid methane to an initial state with quantum nuclear correction and then simulate shock induced chemical reactions through the quantum thermal bath multi-scale shock technique +#The default system size may take a while to run you can change to a smaller size +variable x_rep equal 5 #x-direction replication number +variable y_rep equal 5 #y-direction replication number +variable z_rep equal 10 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) +variable v_msst equal 0.122 #Shock velocity (Angstrom/fs in metal units) +variable q_msst equal 25.0 #Box mass-like parameter in the MSST (mass^2/length^4, where mass=grams/mole and length=Angstrom in real units) +variable mu_msst equal 0.9 #Artificial viscosity in the MSST (mass/length/time, where mass=grams/mole, length=Angstrom and time=fs in real units) +variable tscale_msst equal 0.01 #Temperature reduction parameter in the MSST (unitless) +variable eta_qbmsst equal 1.0 #Coupling constant between the shock and the quantum thermal bath (unitless constant) + + +##The included part first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +include methane_qtb.mod +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 a1 3.9783624 0 0 a2 0 3.9783624 0 a3 0 0 3.9783624 basis 0.5 0.5 0.5 basis 0.663 0.663 0.663 basis 0.337 0.337 0.663 basis 0.663 0.337 0.337 basis 0.337 0.663 0.337 +Lattice spacing in x,y,z = 3.9783624 3.9783624 3.9783624 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +Created orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box basis 1 1 basis 2 2 basis 3 2 basis 4 2 basis 5 2 +Created 5 atoms + using lattice units in orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + create_atoms CPU = 0.000 seconds +replicate ${x_rep} ${y_rep} ${z_rep} +replicate 5 ${y_rep} ${z_rep} +replicate 5 5 ${z_rep} +replicate 5 5 10 +Replication is creating a 5x5x10 = 250 times larger system... + orthogonal box = (0 0 0) to (19.891812 19.891812 39.783624) + 1 by 1 by 4 MPI processor grid + 1250 atoms + replicate CPU = 0.000 seconds + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create 110 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 20 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +fix methane_qtb all qtb temp 110 damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 +fix methane_qtb all qtb temp 110 damp 200 seed 35082 f_max 0.3 N_f 50 +timestep ${delta_t} +timestep 0.25 +run 500 #500 fs + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12.5 + ghost atom cutoff = 12.5 + binsize = 6.25, bins = 4 4 7 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 125.2 | 125.3 | 125.4 Mbytes + Step Temp Press TotEng Volume + 0 110 -15717.706 -110869.31 15741.751 + 20 133.92621 9503.0083 -110548.47 15741.751 + 40 188.1524 -13687.131 -110344.93 15741.751 + 60 205.85747 8421.3906 -110165.58 15741.751 + 80 185.08989 -11337.006 -110026.24 15741.751 + 100 245.36524 5805.0694 -109841.66 15741.751 + 120 218.83661 -7740.8838 -109674.15 15741.751 + 140 254.6075 3396.3936 -109589.89 15741.751 + 160 262.20963 -3574.2575 -109413.81 15741.751 + 180 297.89271 917.40867 -109204.79 15741.751 + 200 315.54026 -371.17448 -109129.45 15741.751 + 220 323.90745 -2811.4367 -108988.12 15741.751 + 240 358.28478 3972.8358 -108848.95 15741.751 + 260 359.12673 -6289.689 -108788.08 15741.751 + 280 376.47656 6851.3186 -108664.07 15741.751 + 300 404.30975 -7805.7238 -108482.75 15741.751 + 320 410.9097 7696.2518 -108421.87 15741.751 + 340 406.19092 -8175.1703 -108311.84 15741.751 + 360 460.37085 7630.6182 -108139.6 15741.751 + 380 413.96355 -7515.2307 -108150.73 15741.751 + 400 452.17428 7148.0954 -108027.39 15741.751 + 420 467.1725 -6662.4113 -107842.71 15741.751 + 440 481.03775 6117.6862 -107759.03 15741.751 + 460 509.03937 -4095.0215 -107648.46 15741.751 + 480 533.22373 2211.9169 -107481.89 15741.751 + 500 517.71195 -214.23969 -107489.48 15741.751 +Loop time of 22.2711 on 4 procs for 500 steps with 1250 atoms + +Performance: 0.485 ns/day, 49.491 hours/ns, 22.451 timesteps/s, 28.063 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 13.689 | 15.195 | 16.732 | 27.7 | 68.23 +Neigh | 1.5325 | 1.5496 | 1.5658 | 1.0 | 6.96 +Comm | 0.073366 | 1.6105 | 3.116 | 85.1 | 7.23 +Output | 0.00052192 | 0.00057642 | 0.00073657 | 0.0 | 0.00 +Modify | 3.896 | 3.9129 | 3.9306 | 0.6 | 17.57 +Other | | 0.00241 | | | 0.01 + +Nlocal: 312.5 ave 317 max 308 min +Histogram: 1 0 0 1 0 0 1 0 0 1 +Nghost: 4982 ave 4995 max 4967 min +Histogram: 1 0 0 0 0 1 1 0 0 1 +Neighs: 172509 ave 174182 max 170676 min +Histogram: 1 0 0 1 0 0 0 1 0 1 + +Total # of neighbors = 690037 +Ave neighs/atom = 552.0296 +Neighbor list builds = 50 +Dangerous builds not checked +unfix methane_qtb +unfix scapegoat_qtb + + +##Shock compression with quantum nuclear corrections +reset_timestep 0 +fix shock all qbmsst z ${v_msst} q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta 1 beta 400 T_init ${temperature} +fix shock all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 f_max 0.3 N_f 50 seed 35082 eta 1 beta 400 T_init 110 +QBMSST parameters: + Shock in z direction + Cell mass-like parameter qmass (units of mass^2/length^4) = 2.50000e+01 + Shock velocity = 1.22000e-01 + Artificial viscosity (units of mass/length/time) = 9.00000e-01 + Initial pressure calculated on first step + Initial volume calculated on first step + Initial energy calculated on first step +fix_modify shock energy yes +variable dhug equal f_shock[1] +variable dray equal f_shock[2] +variable lgr_vel equal f_shock[3] +variable lgr_pos equal f_shock[4] +variable T_qm equal f_shock[5] #Temperature with quantum nuclear correction +thermo_style custom step v_T_qm press etotal vol lx ly lz pzz v_dhug v_dray v_lgr_vel v_lgr_pos +thermo 20 +timestep ${delta_t} +timestep 0.25 +#restart 1000 restart +run 500 +Fix QBMSST v0 = 1.57418e+04 +Fix QBMSST p0 = -5.88788e+01 +Fix QBMSST e0 = to be -1.07489e+05 +Fix QBMSST initial strain rate of -1.02186e-04 established by reducing temperature by factor of 1.00000e-02 +Per MPI rank memory allocation (min/avg/max) = 126.1 | 126.1 | 126.1 Mbytes + Step v_T_qm Press TotEng Volume Lx Ly Lz Pzz v_dhug v_dray v_lgr_vel v_lgr_pos + 0 110 -270.21489 -107489.48 15741.751 19.891812 19.891812 39.783624 -118.93551 0 -60.056661 0 0 + 20 110 -2180.5877 -107208.2 15733.847 19.891812 19.891812 39.763648 223.47326 -37.773571 -29.703539 6.1258507e-05 -0.60983836 + 40 110 5004.864 -107109.95 15726.306 19.891812 19.891812 39.744592 3335.6341 -50.920246 2784.77 0.00011969641 -1.2193771 + 60 110 -4549.1199 -106949.22 15719.136 19.891812 19.891812 39.72647 -4461.4212 -72.656651 -5295.3675 0.00017526726 -1.8286321 + 80 110 6695.6833 -106942.12 15712.214 19.891812 19.891812 39.708976 5333.2741 -73.356417 4226.043 0.00022891479 -2.4376137 + 100 110 -5337.7671 -106930.78 15705.644 19.891812 19.891812 39.692373 -2682.4224 -75.129348 -4049.0157 0.00027982924 -3.0463347 + 120 110 6526.5587 -106736.15 15699.334 19.891812 19.891812 39.676424 7038.2375 -100.8809 5422.5046 0.00032873694 -3.6548061 + 140 110 -3284.0472 -106761.36 15693.36 19.891812 19.891812 39.661329 -3999.8116 -97.977739 -5851.3636 0.00037502973 -4.2630401 + 160 110 4792.0537 -106662.24 15687.56 19.891812 19.891812 39.64667 4484.6905 -110.86184 2404.1579 0.00041998006 -4.8710464 + 180 110 -1253.5849 -106532.38 15682.037 19.891812 19.891812 39.632711 -723.78287 -128.58314 -3022.3825 0.00046278801 -5.4788331 + 200 110 3276.2225 -106488.13 15676.725 19.891812 19.891812 39.619286 5117.4749 -134.15782 2609.1518 0.00050395806 -6.0864105 + 220 110 -553.17982 -106421.17 15671.675 19.891812 19.891812 39.606524 -1360.8796 -143.56979 -4068.5641 0.00054309397 -6.6937871 + 240 110 1329.8793 -106309.56 15666.794 19.891812 19.891812 39.594187 775.35326 -158.40869 -2125.0508 0.00058092605 -7.300972 + 260 110 1809.8974 -106360.42 15662.075 19.891812 19.891812 39.582262 3075.2725 -151.39659 -11.4097 0.00061749364 -7.9079706 + 280 110 24.534819 -106310.46 15657.56 19.891812 19.891812 39.570852 1043.8352 -158.25965 -2221.0935 0.00065248454 -8.5147908 + 300 110 2854.2862 -106150.2 15653.217 19.891812 19.891812 39.559874 3727.6844 -179.54521 291.27132 0.00068614803 -9.1214393 + 320 110 -776.61228 -106199.04 15649.041 19.891812 19.891812 39.549322 -1285.3999 -173.42703 -4886.655 0.00071850756 -9.7279234 + 340 110 3778.2238 -106201.03 15644.958 19.891812 19.891812 39.539001 3694.462 -172.6926 -68.017561 0.00075015694 -10.334247 + 360 110 -1505.9413 -106025.15 15641.031 19.891812 19.891812 39.529078 -1491.3768 -196.81063 -5408.8787 0.00078058882 -10.940416 + 380 110 3414.9599 -106071.49 15637.176 19.891812 19.891812 39.519335 4956.6752 -189.93327 886.98409 0.00081046454 -11.546435 + 400 110 -947.2273 -106003.34 15633.49 19.891812 19.891812 39.510021 726.91825 -199.51619 -3488.2795 0.0008390284 -12.152307 + 420 46.681884 1610.2414 -105884.37 15629.905 19.891812 19.891812 39.500961 -1377.8364 -215.72223 -5734.5653 0.00086681188 -12.758039 + 440 46.681884 2290.4653 -105923.83 15626.371 19.891812 19.891812 39.492029 6296.7177 -209.55961 1800.4591 0.00089420243 -13.363632 + 460 46.681884 -2068.0472 -105879.44 15622.969 19.891812 19.891812 39.483432 -5629.8405 -216.88862 -10260.4 0.00092056659 -13.969092 + 480 46.681884 5011.06 -105748.92 15619.556 19.891812 19.891812 39.474805 8649.5097 -232.72756 3884.1859 0.00094702163 -14.574419 + 500 46.681884 -3314.8335 -105829.23 15616.305 19.891812 19.891812 39.46659 -5120.4784 -223.60669 -10014.132 0.00097221364 -15.179618 +Loop time of 26.5748 on 4 procs for 500 steps with 1250 atoms + +Performance: 0.406 ns/day, 59.055 hours/ns, 18.815 timesteps/s, 23.519 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 16.259 | 18.109 | 19.999 | 31.1 | 68.14 +Neigh | 1.8265 | 1.8477 | 1.8638 | 1.0 | 6.95 +Comm | 0.045073 | 1.9349 | 3.7845 | 95.1 | 7.28 +Output | 0.0019058 | 0.0019666 | 0.0021202 | 0.2 | 0.01 +Modify | 4.6619 | 4.6782 | 4.699 | 0.6 | 17.60 +Other | | 0.002774 | | | 0.01 + +Nlocal: 312.5 ave 318 max 307 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Nghost: 5059 ave 5080 max 5039 min +Histogram: 1 0 1 0 0 0 1 0 0 1 +Neighs: 173854 ave 176807 max 170839 min +Histogram: 1 0 0 1 0 0 1 0 0 1 + +Total # of neighbors = 695414 +Ave neighs/atom = 556.3312 +Neighbor list builds = 50 +Dangerous builds not checked +Total wall time: 0:00:49 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/methane_qbmsst.in b/examples/PACKAGES/qtb/methane_qbmsst/methane_qbmsst.in deleted file mode 100644 index 507164a732..0000000000 --- a/examples/PACKAGES/qtb/methane_qbmsst/methane_qbmsst.in +++ /dev/null @@ -1,33 +0,0 @@ -## This script first uses fix qtb to equilibrate liquid methane to an initial state with quantum nuclear correction and then simulate shock induced chemical reactions through the quantum thermal bath multi-scale shock technique -#The default system size may take a while to run you can change to a smaller size -variable x_rep equal 5 #x-direction replication number -variable y_rep equal 5 #y-direction replication number -variable z_rep equal 10 #z-direction replication number -variable temperature equal 110.0 #Target quantum temperature (K in real units) -variable delta_t equal 0.25 #MD timestep length (fs in real units) -variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) -variable v_msst equal 0.122 #Shock velocity (Angstrom/fs in metal units) -variable q_msst equal 25.0 #Box mass-like parameter in the MSST (mass^2/length^4, where mass=grams/mole and length=Angstrom in real units) -variable mu_msst equal 0.9 #Artificial viscosity in the MSST (mass/length/time, where mass=grams/mole, length=Angstrom and time=fs in real units) -variable tscale_msst equal 0.01 #Temperature reduction parameter in the MSST (unitless) -variable eta_qbmsst equal 1.0 #Coupling constant between the shock and the quantum thermal bath (unitless constant) - - -##The included part first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. -include methane_qtb.mod - - -##Shock compression with quantum nuclear corrections -reset_timestep 0 -fix shock all qbmsst z ${v_msst} q ${q_msst} mu ${mu_msst} tscale ${tscale_msst} damp ${damp_qtb} f_max 0.3 N_f 50 seed 35082 eta ${eta_qbmsst} beta 400 T_init ${temperature} -fix_modify shock energy yes -variable dhug equal f_shock[1] -variable dray equal f_shock[2] -variable lgr_vel equal f_shock[3] -variable lgr_pos equal f_shock[4] -variable T_qm equal f_shock[5] #Temperature with quantum nuclear correction -thermo_style custom step v_T_qm press etotal vol lx ly lz pzz v_dhug v_dray v_lgr_vel v_lgr_pos -thermo 100 -timestep ${delta_t} -restart 1000 restart -run 5000 diff --git a/examples/PACKAGES/qtb/methane_qbmsst/methane_qtb.mod b/examples/PACKAGES/qtb/methane_qbmsst/methane_qtb.mod index 65bfc5d7f7..ac1735ca7b 100644 --- a/examples/PACKAGES/qtb/methane_qbmsst/methane_qtb.mod +++ b/examples/PACKAGES/qtb/methane_qbmsst/methane_qtb.mod @@ -3,62 +3,62 @@ ## This part defines units, methane structure, and atomic information #General -units real -dimension 3 -boundary p p p -atom_style charge +units real +dimension 3 +boundary p p p +atom_style charge #Lattice -lattice custom 1.0 & - a1 3.9783624 0 0 & - a2 0 3.9783624 0 & - a3 0 0 3.9783624 & - & - basis 0.5 0.5 0.5 & - basis 0.663 0.663 0.663 & - basis 0.337 0.337 0.663 & - basis 0.663 0.337 0.337 & - basis 0.337 0.663 0.337 +lattice custom 1.0 & + a1 3.9783624 0 0 & + a2 0 3.9783624 0 & + a3 0 0 3.9783624 & + & + basis 0.5 0.5 0.5 & + basis 0.663 0.663 0.663 & + basis 0.337 0.337 0.663 & + basis 0.663 0.337 0.337 & + basis 0.337 0.663 0.337 #Computational Cell -region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box -create_box 2 simbox -create_atoms 1 box & - basis 1 1 & - basis 2 2 & - basis 3 2 & - basis 4 2 & - basis 5 2 -replicate ${x_rep} ${y_rep} ${z_rep} +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +create_atoms 1 box & + basis 1 1 & + basis 2 2 & + basis 3 2 & + basis 4 2 & + basis 5 2 +replicate ${x_rep} ${y_rep} ${z_rep} #Atomic Information -mass 1 12.011150 -mass 2 1.007970 +mass 1 12.011150 +mass 2 1.007970 ## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" #Pair Potentials -pair_style reax/c NULL -pair_coeff * * ffield.reax C H -fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff #Neighbor Style -neighbor 2.5 bin -neigh_modify every 10 delay 0 check no +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no ## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects #Initialization -velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all #Setup output -thermo_style custom step temp press etotal vol -thermo 100 +thermo_style custom step temp press etotal vol +thermo 20 #Colored thermal bath -fix scapegoat_qtb all nve #NVE does the time integration -fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher -timestep ${delta_t} -run 2000 #500 fs -unfix methane_qtb -unfix scapegoat_qtb +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +timestep ${delta_t} +run 500 #500 fs +unfix methane_qtb +unfix scapegoat_qtb diff --git a/examples/PACKAGES/qtb/methane_qtb/in.methane_qtb b/examples/PACKAGES/qtb/methane_qtb/in.methane_qtb new file mode 100644 index 0000000000..bfaa8706c4 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qtb/in.methane_qtb @@ -0,0 +1,70 @@ +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +variable x_rep equal 2 #x-direction replication number +variable y_rep equal 2 #y-direction replication number +variable z_rep equal 2 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 & + a1 3.9783624 0 0 & + a2 0 3.9783624 0 & + a3 0 0 3.9783624 & + & + basis 0.5 0.5 0.5 & + basis 0.663 0.663 0.663 & + basis 0.337 0.337 0.663 & + basis 0.663 0.337 0.337 & + basis 0.337 0.663 0.337 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +create_atoms 1 box & + basis 1 1 & + basis 2 2 & + basis 3 2 & + basis 4 2 & + basis 5 2 +replicate ${x_rep} ${y_rep} ${z_rep} + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 50 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +timestep ${delta_t} +run 1000 +unfix methane_qtb +unfix scapegoat_qtb diff --git a/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.1 b/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.1 new file mode 100644 index 0000000000..b6efdb6360 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.1 @@ -0,0 +1,174 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +variable x_rep equal 2 #x-direction replication number +variable y_rep equal 2 #y-direction replication number +variable z_rep equal 2 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 a1 3.9783624 0 0 a2 0 3.9783624 0 a3 0 0 3.9783624 basis 0.5 0.5 0.5 basis 0.663 0.663 0.663 basis 0.337 0.337 0.663 basis 0.663 0.337 0.337 basis 0.337 0.663 0.337 +Lattice spacing in x,y,z = 3.9783624 3.9783624 3.9783624 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +Created orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box basis 1 1 basis 2 2 basis 3 2 basis 4 2 basis 5 2 +Created 5 atoms + using lattice units in orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + create_atoms CPU = 0.000 seconds +replicate ${x_rep} ${y_rep} ${z_rep} +replicate 2 ${y_rep} ${z_rep} +replicate 2 2 ${z_rep} +replicate 2 2 2 +Replication is creating a 2x2x2 = 8 times larger system... + orthogonal box = (0 0 0) to (7.9567248 7.9567248 7.9567248) + 1 by 1 by 1 MPI processor grid + 40 atoms + replicate CPU = 0.001 seconds + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create 110 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 50 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +fix methane_qtb all qtb temp 110 damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 +fix methane_qtb all qtb temp 110 damp 200 seed 35082 f_max 0.3 N_f 50 +timestep ${delta_t} +timestep 0.25 +run 1000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12.5 + ghost atom cutoff = 12.5 + binsize = 6.25, bins = 2 2 2 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 82.45 | 82.45 | 82.45 Mbytes + Step Temp Press TotEng Volume + 0 110 -15746.508 -3548.1354 503.73603 + 50 191.27715 -7523.7503 -3530.4179 503.73603 + 100 214.09982 12016.892 -3517.4544 503.73603 + 150 317.38272 3098.2254 -3499.5793 503.73603 + 200 338.76362 -4484.9241 -3490.3649 503.73603 + 250 402.05826 3973.0488 -3474.81 503.73603 + 300 340.80076 11193.4 -3470.8029 503.73603 + 350 556.19747 8086.3266 -3451.5937 503.73603 + 400 566.8737 5499.5505 -3439.2335 503.73603 + 450 643.2883 -8270.5736 -3426.0767 503.73603 + 500 613.09742 -12406.229 -3419.8547 503.73603 + 550 669.28891 -9757.601 -3410.7281 503.73603 + 600 600.66922 10407.403 -3408.3776 503.73603 + 650 573.1485 30971.977 -3405.0744 503.73603 + 700 726.22146 29573.798 -3386.3167 503.73603 + 750 777.22659 13265.88 -3378.8462 503.73603 + 800 652.46476 -9231.9331 -3388.7229 503.73603 + 850 679.18414 -19802.254 -3384.6321 503.73603 + 900 711.60594 -18792.396 -3373.2944 503.73603 + 950 865.79013 -2837.6042 -3363.2971 503.73603 + 1000 884.14995 6160.4875 -3360.6295 503.73603 +Loop time of 7.87 on 1 procs for 1000 steps with 40 atoms + +Performance: 2.745 ns/day, 8.744 hours/ns, 127.065 timesteps/s, 5.083 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 6.3662 | 6.3662 | 6.3662 | 0.0 | 80.89 +Neigh | 0.97757 | 0.97757 | 0.97757 | 0.0 | 12.42 +Comm | 0.013653 | 0.013653 | 0.013653 | 0.0 | 0.17 +Output | 0.00042319 | 0.00042319 | 0.00042319 | 0.0 | 0.01 +Modify | 0.50971 | 0.50971 | 0.50971 | 0.0 | 6.48 +Other | | 0.00248 | | | 0.03 + +Nlocal: 40 ave 40 max 40 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 2552 ave 2552 max 2552 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 25110 ave 25110 max 25110 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 25110 +Ave neighs/atom = 627.75 +Neighbor list builds = 100 +Dangerous builds not checked +unfix methane_qtb +unfix scapegoat_qtb +Total wall time: 0:00:07 diff --git a/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.4 b/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.4 new file mode 100644 index 0000000000..c68935df87 --- /dev/null +++ b/examples/PACKAGES/qtb/methane_qtb/log.30Nov23.methane_qtb.g++.4 @@ -0,0 +1,174 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. +variable x_rep equal 2 #x-direction replication number +variable y_rep equal 2 #y-direction replication number +variable z_rep equal 2 #z-direction replication number +variable temperature equal 110.0 #Target quantum temperature (K in real units) +variable delta_t equal 0.25 #MD timestep length (fs in real units) +variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) + + +## This part defines units, methane structure, and atomic information +#General +units real +dimension 3 +boundary p p p +atom_style charge + +#Lattice +lattice custom 1.0 a1 3.9783624 0 0 a2 0 3.9783624 0 a3 0 0 3.9783624 basis 0.5 0.5 0.5 basis 0.663 0.663 0.663 basis 0.337 0.337 0.663 basis 0.663 0.337 0.337 basis 0.337 0.663 0.337 +Lattice spacing in x,y,z = 3.9783624 3.9783624 3.9783624 + +#Computational Cell +region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box +create_box 2 simbox +Created orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box basis 1 1 basis 2 2 basis 3 2 basis 4 2 basis 5 2 +Created 5 atoms + using lattice units in orthogonal box = (0 0 0) to (3.9783624 3.9783624 3.9783624) + create_atoms CPU = 0.000 seconds +replicate ${x_rep} ${y_rep} ${z_rep} +replicate 2 ${y_rep} ${z_rep} +replicate 2 2 ${z_rep} +replicate 2 2 2 +Replication is creating a 2x2x2 = 8 times larger system... + orthogonal box = (0 0 0) to (7.9567248 7.9567248 7.9567248) + 1 by 2 by 2 MPI processor grid + 40 atoms + replicate CPU = 0.000 seconds + +#Atomic Information +mass 1 12.011150 +mass 2 1.007970 + + +## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" +#Pair Potentials +pair_style reaxff NULL +pair_coeff * * ffield.reax C H +fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reaxff + +#Neighbor Style +neighbor 2.5 bin +neigh_modify every 10 delay 0 check no + + +## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects +#Initialization +velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all +velocity all create 110 93 dist gaussian sum no mom yes rot yes loop all + +#Setup output +thermo_style custom step temp press etotal vol +thermo 50 + +#Colored thermal bath +fix scapegoat_qtb all nve #NVE does the time integration +fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher +fix methane_qtb all qtb temp 110 damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 +fix methane_qtb all qtb temp 110 damp 200 seed 35082 f_max 0.3 N_f 50 +timestep ${delta_t} +timestep 0.25 +run 1000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12.5 + ghost atom cutoff = 12.5 + binsize = 6.25, bins = 2 2 2 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 72.47 | 72.47 | 72.47 Mbytes + Step Temp Press TotEng Volume + 0 110 -15746.508 -3548.1354 503.73603 + 50 183.74482 -917.59204 -3534.8518 503.73603 + 100 200.96363 16464.403 -3517.1456 503.73603 + 150 255.33305 14801.963 -3507.7299 503.73603 + 200 328.11626 5119.3618 -3498.0388 503.73603 + 250 356.88626 -11306.151 -3485.1746 503.73603 + 300 284.7363 -25276.091 -3479.4732 503.73603 + 350 434.79382 -23326.29 -3471.7491 503.73603 + 400 414.69602 2800.9047 -3465.7225 503.73603 + 450 464.61242 20775.398 -3449.1675 503.73603 + 500 671.43369 15272.581 -3433.9453 503.73603 + 550 534.01157 -8545.4173 -3427.6672 503.73603 + 600 512.69648 -15904.052 -3417.8071 503.73603 + 650 604.62051 -1777.9242 -3419.4324 503.73603 + 700 650.2196 20108.199 -3415.8902 503.73603 + 750 677.45644 21721.335 -3409.1253 503.73603 + 800 707.98295 171.53756 -3413.4048 503.73603 + 850 740.68522 -23846.627 -3384.7024 503.73603 + 900 739.55514 -22742.841 -3377.091 503.73603 + 950 769.44821 -7060.9388 -3389.817 503.73603 + 1000 987.6246 -0.47618437 -3373.9263 503.73603 +Loop time of 6.80367 on 4 procs for 1000 steps with 40 atoms + +Performance: 3.175 ns/day, 7.560 hours/ns, 146.980 timesteps/s, 5.879 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.1801 | 5.3491 | 5.5417 | 6.1 | 78.62 +Neigh | 0.76934 | 0.77281 | 0.77567 | 0.3 | 11.36 +Comm | 0.072213 | 0.26492 | 0.4339 | 27.2 | 3.89 +Output | 0.00032365 | 0.00035547 | 0.00044739 | 0.0 | 0.01 +Modify | 0.41139 | 0.41424 | 0.4179 | 0.4 | 6.09 +Other | | 0.00226 | | | 0.03 + +Nlocal: 10 ave 10 max 10 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 1950 ave 1950 max 1950 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 6434.5 ave 6447 max 6427 min +Histogram: 2 0 0 0 0 1 0 0 0 1 + +Total # of neighbors = 25738 +Ave neighs/atom = 643.45 +Neighbor list builds = 100 +Dangerous builds not checked +unfix methane_qtb +unfix scapegoat_qtb +Total wall time: 0:00:06 diff --git a/examples/PACKAGES/qtb/methane_qtb/methane_qtb.in b/examples/PACKAGES/qtb/methane_qtb/methane_qtb.in deleted file mode 100644 index e31f0695b9..0000000000 --- a/examples/PACKAGES/qtb/methane_qtb/methane_qtb.in +++ /dev/null @@ -1,70 +0,0 @@ -## This script first constructs a liquid methane structure of a given size. It then uses fix qtb to equilibrate the computational cell to the specified temperature and pressure. -variable x_rep equal 2 #x-direction replication number -variable y_rep equal 2 #y-direction replication number -variable z_rep equal 2 #z-direction replication number -variable temperature equal 110.0 #Target quantum temperature (K in real units) -variable delta_t equal 0.25 #MD timestep length (fs in real units) -variable damp_qtb equal 200 #1/gamma where gamma is the friction coefficient in quantum thermal bath (fs in real units) - - -## This part defines units, methane structure, and atomic information -#General -units real -dimension 3 -boundary p p p -atom_style charge - -#Lattice -lattice custom 1.0 & - a1 3.9783624 0 0 & - a2 0 3.9783624 0 & - a3 0 0 3.9783624 & - & - basis 0.5 0.5 0.5 & - basis 0.663 0.663 0.663 & - basis 0.337 0.337 0.663 & - basis 0.663 0.337 0.337 & - basis 0.337 0.663 0.337 - -#Computational Cell -region simbox block 0 3.9783624 0 3.9783624 0 3.9783624 units box -create_box 2 simbox -create_atoms 1 box & - basis 1 1 & - basis 2 2 & - basis 3 2 & - basis 4 2 & - basis 5 2 -replicate ${x_rep} ${y_rep} ${z_rep} - -#Atomic Information -mass 1 12.011150 -mass 2 1.007970 - - -## This part defines the reax pair potential in methane, force field coefficients are specified in "ffield.reax" -#Pair Potentials -pair_style reax/c NULL -pair_coeff * * ffield.reax C H -fix 0 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c - -#Neighbor Style -neighbor 2.5 bin -neigh_modify every 10 delay 0 check no - - -## This part equilibrates liquid methane to a temperature of ${temperature}(unit temperatureture) with quantum nuclear effects -#Initialization -velocity all create ${temperature} 93 dist gaussian sum no mom yes rot yes loop all - -#Setup output -thermo_style custom step temp press etotal vol -thermo 100 - -#Colored thermal bath -fix scapegoat_qtb all nve #NVE does the time integration -fix methane_qtb all qtb temp ${temperature} damp ${damp_qtb} seed 35082 f_max 0.3 N_f 50 #Change f_max if your Debye frequency is higher -timestep ${delta_t} -run 3000 #750 fs -unfix methane_qtb -unfix scapegoat_qtb diff --git a/examples/amoeba/amoeba_ubiquitin.key b/examples/amoeba/amoeba_ubiquitin.key index 2870d071d4..3d63525258 100644 --- a/examples/amoeba/amoeba_ubiquitin.key +++ b/examples/amoeba/amoeba_ubiquitin.key @@ -12,7 +12,8 @@ ewald ewald-alpha 0.4 pewald-alpha 0.5 ewald-cutoff 7.0 -#pme-grid 60 45 45 pme-grid 60 48 48 -pme-order 5 polar-eps 0.00001 +#pme-grid 15 12 12 +#polar-eps 0.0002 +pme-order 5 diff --git a/examples/qeq/in.qeq.reaxc b/examples/qeq/in.qeq.reaxff similarity index 90% rename from examples/qeq/in.qeq.reaxc rename to examples/qeq/in.qeq.reaxff index 5271c74671..a60cca269e 100644 --- a/examples/qeq/in.qeq.reaxc +++ b/examples/qeq/in.qeq.reaxff @@ -1,19 +1,19 @@ -# This example demonstrates the use of various fix qeq variants with pair reax/c +# This example demonstrates the use of various fix qeq variants with pair reaxff # You can comment in/out various versions below # # 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when -# used with pair_style reax/c, provided that the QEq parameters are the same. +# used with pair_style reaxff, provided that the QEq parameters are the same. # # 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that # the QEq parameters are the same. These two styles can also be used with -# pair_style reax/c. +# pair_style reaxff. units real atom_style charge read_data data.CHO -pair_style reax/c NULL checkqeq no +pair_style reaxff NULL checkqeq no pair_coeff * * ffield.reax.cho H C O neighbor 1 bin diff --git a/examples/qeq/log.27Nov18.qeq.reaxc.g++.1 b/examples/qeq/log.27Nov18.qeq.reaxc.g++.1 deleted file mode 100644 index c88acc39f2..0000000000 --- a/examples/qeq/log.27Nov18.qeq.reaxc.g++.1 +++ /dev/null @@ -1,116 +0,0 @@ -LAMMPS (27 Nov 2018) - using 1 OpenMP thread(s) per MPI task -# This example demonstrates the use of various fix qeq variants with pair reax/c -# You can comment in/out various versions below -# -# 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when -# used with pair_style reax/c, provided that the QEq parameters are the same. -# -# 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that -# the QEq parameters are the same. These two styles can also be used with -# pair_style reax/c. - -units real -atom_style charge - -read_data data.CHO - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c NULL checkqeq no -pair_coeff * * ffield.reax.cho H C O -Reading potential file ffield.reax.cho with DATE: 2011-02-18 - -neighbor 1 bin -neigh_modify every 1 delay 0 check yes - -group type1 type 1 -60 atoms in group type1 -compute charge1 type1 property/atom q -compute q1 type1 reduce ave c_charge1 -group type2 type 2 -25 atoms in group type2 -compute charge2 type2 property/atom q -compute q2 type2 reduce ave c_charge2 -group type3 type 3 -20 atoms in group type3 -compute charge3 type3 property/atom q -compute q3 type3 reduce ave c_charge3 -variable qtot equal count(type1)*c_q1+count(type2)*c_q2+count(type3)*c_q3 - -thermo_style custom step pe c_q1 c_q2 c_q3 v_qtot -thermo 1 - -velocity all create 300.0 1281937 -fix 1 all nve - -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq.reax -#fix 2 all qeq/shielded 1 10.0 1e-6 400 param.qeq1 -#fix 2 all qeq/point 1 10.0 1e-6 400 param.qeq1 -#fix 2 all qeq/dynamic 1 10.0 1e-3 100 param.qeq1 - -timestep 0.25 - -run 10 -Neighbor list info ... - update every 1 steps, delay 0 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 11 - ghost atom cutoff = 11 - binsize = 5.5, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 16.65 | 16.65 | 16.65 Mbytes -Step PotEng c_q1 c_q2 c_q3 v_qtot - 0 -10226.557 0.095634063 -0.15658793 -0.091167279 4.4408921e-16 - 1 -10225.799 0.095649584 -0.1566219 -0.091171371 7.1054274e-15 - 2 -10223.656 0.095669731 -0.15666714 -0.091175264 -6.4392935e-15 - 3 -10220.276 0.095691257 -0.15671597 -0.091178813 2.6645353e-15 - 4 -10215.894 0.095714363 -0.15676887 -0.091182006 -3.1086245e-15 - 5 -10210.804 0.095733863 -0.15681398 -0.09118412 6.6613381e-16 - 6 -10205.342 0.095751253 -0.15685427 -0.091185918 -1.110223e-15 - 7 -10199.848 0.095762028 -0.1568795 -0.091186707 8.8817842e-15 - 8 -10194.646 0.095767243 -0.15689184 -0.091186932 -2.4424907e-15 - 9 -10190.016 0.095760528 -0.15687664 -0.091185782 -4.4408921e-16 - 10 -10186.168 0.095748006 -0.15684815 -0.09118383 1.110223e-15 -Loop time of 0.0322483 on 1 procs for 10 steps with 105 atoms - -Performance: 6.698 ns/day, 3.583 hours/ns, 310.094 timesteps/s -99.8% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.026229 | 0.026229 | 0.026229 | 0.0 | 81.34 -Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 5.2214e-05 | 5.2214e-05 | 5.2214e-05 | 0.0 | 0.16 -Output | 0.00027299 | 0.00027299 | 0.00027299 | 0.0 | 0.85 -Modify | 0.0056667 | 0.0056667 | 0.0056667 | 0.0 | 17.57 -Other | | 2.694e-05 | | | 0.08 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 512 ave 512 max 512 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3417 ave 3417 max 3417 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3417 -Ave neighs/atom = 32.5429 -Neighbor list builds = 0 -Dangerous builds = 0 - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:00 diff --git a/examples/qeq/log.27Nov18.qeq.reaxc.g++.4 b/examples/qeq/log.27Nov18.qeq.reaxc.g++.4 deleted file mode 100644 index c54a99577e..0000000000 --- a/examples/qeq/log.27Nov18.qeq.reaxc.g++.4 +++ /dev/null @@ -1,116 +0,0 @@ -LAMMPS (27 Nov 2018) - using 1 OpenMP thread(s) per MPI task -# This example demonstrates the use of various fix qeq variants with pair reax/c -# You can comment in/out various versions below -# -# 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when -# used with pair_style reax/c, provided that the QEq parameters are the same. -# -# 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that -# the QEq parameters are the same. These two styles can also be used with -# pair_style reax/c. - -units real -atom_style charge - -read_data data.CHO - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c NULL checkqeq no -pair_coeff * * ffield.reax.cho H C O -Reading potential file ffield.reax.cho with DATE: 2011-02-18 - -neighbor 1 bin -neigh_modify every 1 delay 0 check yes - -group type1 type 1 -60 atoms in group type1 -compute charge1 type1 property/atom q -compute q1 type1 reduce ave c_charge1 -group type2 type 2 -25 atoms in group type2 -compute charge2 type2 property/atom q -compute q2 type2 reduce ave c_charge2 -group type3 type 3 -20 atoms in group type3 -compute charge3 type3 property/atom q -compute q3 type3 reduce ave c_charge3 -variable qtot equal count(type1)*c_q1+count(type2)*c_q2+count(type3)*c_q3 - -thermo_style custom step pe c_q1 c_q2 c_q3 v_qtot -thermo 1 - -velocity all create 300.0 1281937 -fix 1 all nve - -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq.reax -#fix 2 all qeq/shielded 1 10.0 1e-6 400 param.qeq1 -#fix 2 all qeq/point 1 10.0 1e-6 400 param.qeq1 -#fix 2 all qeq/dynamic 1 10.0 1e-3 100 param.qeq1 - -timestep 0.25 - -run 10 -Neighbor list info ... - update every 1 steps, delay 0 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 11 - ghost atom cutoff = 11 - binsize = 5.5, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 10.83 | 11.69 | 12.52 Mbytes -Step PotEng c_q1 c_q2 c_q3 v_qtot - 0 -10226.557 0.095633919 -0.15658765 -0.091167194 1.7763568e-15 - 1 -10225.799 0.0956503 -0.15662357 -0.09117143 2.8865799e-15 - 2 -10223.656 0.095669684 -0.15666698 -0.091175327 1.110223e-15 - 3 -10220.276 0.095691296 -0.15671615 -0.091178696 0 - 4 -10215.894 0.09571384 -0.15676787 -0.091181678 8.8817842e-16 - 5 -10210.804 0.095734178 -0.15681468 -0.09118418 1.3322676e-15 - 6 -10205.342 0.095751126 -0.15685409 -0.091185769 4.4408921e-16 - 7 -10199.848 0.095762403 -0.15688037 -0.091186751 0 - 8 -10194.646 0.095766449 -0.15689014 -0.091186673 -4.4408921e-16 - 9 -10190.016 0.095761078 -0.15687818 -0.09118551 -4.4408921e-16 - 10 -10186.168 0.095747223 -0.15684634 -0.091183742 0 -Loop time of 0.0185181 on 4 procs for 10 steps with 105 atoms - -Performance: 11.664 ns/day, 2.058 hours/ns, 540.011 timesteps/s -92.5% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.0097179 | 0.01078 | 0.012052 | 0.8 | 58.21 -Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.00041604 | 0.0017492 | 0.0028496 | 2.1 | 9.45 -Output | 0.00041103 | 0.00046283 | 0.00051498 | 0.0 | 2.50 -Modify | 0.0051849 | 0.0052357 | 0.0052917 | 0.1 | 28.27 -Other | | 0.0002902 | | | 1.57 - -Nlocal: 26.25 ave 35 max 15 min -Histogram: 1 0 0 1 0 0 0 0 0 2 -Nghost: 300 ave 357 max 239 min -Histogram: 2 0 0 0 0 0 0 0 0 2 -Neighs: 1025.25 ave 1468 max 405 min -Histogram: 1 0 0 0 1 0 0 0 0 2 - -Total # of neighbors = 4101 -Ave neighs/atom = 39.0571 -Neighbor list builds = 0 -Dangerous builds = 0 - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:00 diff --git a/examples/qeq/log.30Nov23.reaxff.g++.1 b/examples/qeq/log.30Nov23.reaxff.g++.1 new file mode 100644 index 0000000000..c79d9fa662 --- /dev/null +++ b/examples/qeq/log.30Nov23.reaxff.g++.1 @@ -0,0 +1,146 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# This example demonstrates the use of various fix qeq variants with pair reaxff +# You can comment in/out various versions below +# +# 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when +# used with pair_style reaxff, provided that the QEq parameters are the same. +# +# 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that +# the QEq parameters are the same. These two styles can also be used with +# pair_style reaxff. + +units real +atom_style charge + +read_data data.CHO +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff NULL checkqeq no +pair_coeff * * ffield.reax.cho H C O +Reading potential file ffield.reax.cho with DATE: 2011-02-18 + +neighbor 1 bin +neigh_modify every 1 delay 0 check yes + +group type1 type 1 +60 atoms in group type1 +compute charge1 type1 property/atom q +compute q1 type1 reduce ave c_charge1 +group type2 type 2 +25 atoms in group type2 +compute charge2 type2 property/atom q +compute q2 type2 reduce ave c_charge2 +group type3 type 3 +20 atoms in group type3 +compute charge3 type3 property/atom q +compute q3 type3 reduce ave c_charge3 +variable qtot equal count(type1)*c_q1+count(type2)*c_q2+count(type3)*c_q3 + +thermo_style custom step pe c_q1 c_q2 c_q3 v_qtot +thermo 1 + +velocity all create 300.0 1281937 +fix 1 all nve + +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq.reax +#fix 2 all qeq/shielded 1 10.0 1e-6 400 param.qeq1 +#fix 2 all qeq/point 1 10.0 1e-6 400 param.qeq1 +#fix 2 all qeq/dynamic 1 10.0 1e-3 100 param.qeq1 + +timestep 0.25 + +run 10 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11 + ghost atom cutoff = 11 + binsize = 5.5, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 14.54 | 14.54 | 14.54 Mbytes + Step PotEng c_q1 c_q2 c_q3 v_qtot + 0 -10226.557 0.095633909 -0.15658753 -0.091167311 -2.8865799e-15 + 1 -10225.799 0.095650157 -0.15662321 -0.091171465 5.3290705e-15 + 2 -10223.656 0.095669727 -0.15666713 -0.091175264 -2.8865799e-15 + 3 -10220.276 0.095691262 -0.15671593 -0.09117887 4.6629367e-15 + 4 -10215.894 0.095714037 -0.15676816 -0.091181914 4.4408921e-16 + 5 -10210.804 0.095733939 -0.15681378 -0.091184589 -3.9968029e-15 + 6 -10205.342 0.09575102 -0.15685378 -0.091185835 -3.5527137e-15 + 7 -10199.848 0.095762356 -0.1568802 -0.091186815 2.220446e-15 + 8 -10194.646 0.095766731 -0.15689071 -0.091186805 -3.9968029e-15 + 9 -10190.016 0.095761083 -0.15687817 -0.091185537 -2.6645353e-15 + 10 -10186.168 0.095747444 -0.15684695 -0.091183644 -1.5543122e-15 +Loop time of 0.013327 on 1 procs for 10 steps with 105 atoms + +Performance: 16.208 ns/day, 1.481 hours/ns, 750.359 timesteps/s, 78.788 katom-step/s +97.3% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.010565 | 0.010565 | 0.010565 | 0.0 | 79.28 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 2.3272e-05 | 2.3272e-05 | 2.3272e-05 | 0.0 | 0.17 +Output | 0.00023198 | 0.00023198 | 0.00023198 | 0.0 | 1.74 +Modify | 0.0024913 | 0.0024913 | 0.0024913 | 0.0 | 18.69 +Other | | 1.529e-05 | | | 0.11 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 512 ave 512 max 512 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3417 ave 3417 max 3417 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3417 +Ave neighs/atom = 32.542857 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/qeq/log.30Nov23.reaxff.g++.4 b/examples/qeq/log.30Nov23.reaxff.g++.4 new file mode 100644 index 0000000000..6f58a13a7f --- /dev/null +++ b/examples/qeq/log.30Nov23.reaxff.g++.4 @@ -0,0 +1,146 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# This example demonstrates the use of various fix qeq variants with pair reaxff +# You can comment in/out various versions below +# +# 1) Fix qeq/shielded generates the same results compared to fix qeq/reax when +# used with pair_style reaxff, provided that the QEq parameters are the same. +# +# 2) Fix qeq/point and fix qeq/dynamic generate comparable results provided that +# the QEq parameters are the same. These two styles can also be used with +# pair_style reaxff. + +units real +atom_style charge + +read_data data.CHO +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.000 seconds + +pair_style reaxff NULL checkqeq no +pair_coeff * * ffield.reax.cho H C O +Reading potential file ffield.reax.cho with DATE: 2011-02-18 + +neighbor 1 bin +neigh_modify every 1 delay 0 check yes + +group type1 type 1 +60 atoms in group type1 +compute charge1 type1 property/atom q +compute q1 type1 reduce ave c_charge1 +group type2 type 2 +25 atoms in group type2 +compute charge2 type2 property/atom q +compute q2 type2 reduce ave c_charge2 +group type3 type 3 +20 atoms in group type3 +compute charge3 type3 property/atom q +compute q3 type3 reduce ave c_charge3 +variable qtot equal count(type1)*c_q1+count(type2)*c_q2+count(type3)*c_q3 + +thermo_style custom step pe c_q1 c_q2 c_q3 v_qtot +thermo 1 + +velocity all create 300.0 1281937 +fix 1 all nve + +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq.reax +#fix 2 all qeq/shielded 1 10.0 1e-6 400 param.qeq1 +#fix 2 all qeq/point 1 10.0 1e-6 400 param.qeq1 +#fix 2 all qeq/dynamic 1 10.0 1e-3 100 param.qeq1 + +timestep 0.25 + +run 10 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11 + ghost atom cutoff = 11 + binsize = 5.5, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 9.845 | 10.57 | 11.28 Mbytes + Step PotEng c_q1 c_q2 c_q3 v_qtot + 0 -10226.557 0.095633904 -0.15658758 -0.091167237 -8.8817842e-16 + 1 -10225.799 0.095650278 -0.1566235 -0.091171458 -1.7763568e-15 + 2 -10223.656 0.095669806 -0.15666728 -0.091175321 0 + 3 -10220.276 0.095691215 -0.15671588 -0.091178792 1.7763568e-15 + 4 -10215.894 0.09571392 -0.15676795 -0.091181826 1.7763568e-15 + 5 -10210.804 0.095734058 -0.15681436 -0.091184227 1.3322676e-15 + 6 -10205.342 0.095751113 -0.15685409 -0.091185731 -4.4408921e-16 + 7 -10199.848 0.095762524 -0.15688062 -0.091186803 -1.3322676e-15 + 8 -10194.646 0.095766647 -0.15689045 -0.091186875 2.8865799e-15 + 9 -10190.016 0.095760978 -0.15687772 -0.09118579 -4.4408921e-16 + 10 -10186.168 0.095747037 -0.15684594 -0.091183687 -1.5543122e-15 +Loop time of 0.00732332 on 4 procs for 10 steps with 105 atoms + +Performance: 29.495 ns/day, 0.814 hours/ns, 1365.500 timesteps/s, 143.378 katom-step/s +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.0033186 | 0.0038166 | 0.0041063 | 0.5 | 52.12 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.0002671 | 0.00057126 | 0.0010608 | 0.0 | 7.80 +Output | 0.00019157 | 0.0002237 | 0.00028058 | 0.0 | 3.05 +Modify | 0.0026446 | 0.0026528 | 0.0026604 | 0.0 | 36.22 +Other | | 5.9e-05 | | | 0.81 + +Nlocal: 26.25 ave 35 max 15 min +Histogram: 1 0 0 1 0 0 0 0 0 2 +Nghost: 300 ave 357 max 239 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Neighs: 1025.25 ave 1468 max 405 min +Histogram: 1 0 0 0 1 0 0 0 0 2 + +Total # of neighbors = 4101 +Ave neighs/atom = 39.057143 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/reaxff/AB/in.AB b/examples/reaxff/AB/in.AB index 5f19b30b6c..30d96f2622 100644 --- a/examples/reaxff/AB/in.AB +++ b/examples/reaxff/AB/in.AB @@ -1,23 +1,23 @@ # REAX potential for Nitroamines system # ..... -units real +units real -atom_style charge -read_data data.AB +atom_style charge +read_data data.AB -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AB H B N +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AB H B N -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.ab -#dump 1 all atom 30 dump.reax.ab - -run 3000 +run 2000 diff --git a/examples/reaxff/AB/lmp_control b/examples/reaxff/AB/lmp_control index c5d98968a5..b97ad8a67f 100644 --- a/examples/reaxff/AB/lmp_control +++ b/examples/reaxff/AB/lmp_control @@ -1,17 +1,8 @@ -simulation_name AB_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -write_freq 1 ! write trajectory after so many steps -traj_title AB ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/AB/log.30Nov23.AB.g++.1 b/examples/reaxff/AB/log.30Nov23.AB.g++.1 new file mode 100644 index 0000000000..e966977f54 --- /dev/null +++ b/examples/reaxff/AB/log.30Nov23.AB.g++.1 @@ -0,0 +1,131 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Nitroamines system +# ..... + +units real + +atom_style charge +read_data data.AB +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 104 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AB H B N +Reading potential file ffield.reax.AB with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.ab + +run 2000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 16.54 | 16.54 | 16.54 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -8505.1816 0 -8505.1816 -673.36566 + 100 83.873108 -8497.003 0 -8471.252 -609.71138 + 200 125.22992 -8479.8879 0 -8441.4394 -1069.4072 + 300 202.34273 -8479.1321 0 -8417.0081 -707.7946 + 400 260.53055 -8476.7914 0 -8396.8025 221.10403 + 500 282.47043 -8466.8576 0 -8380.1326 -223.61988 + 600 288.72043 -8452.9503 0 -8364.3064 681.87761 + 700 379.03381 -8467.4869 0 -8351.1146 921.82426 + 800 382.0856 -8458.717 0 -8341.4078 253.69164 + 900 380.10802 -8449.5745 0 -8332.8725 1199.5539 + 1000 377.60669 -8440.3419 0 -8324.4078 -365.02585 + 1100 372.89451 -8428.8743 0 -8314.387 -1401.9593 + 1200 392.77958 -8426.3492 0 -8305.7567 -572.78319 + 1300 429.04209 -8430.6839 0 -8298.958 -409.55236 + 1400 471.52489 -8438.2785 0 -8293.5093 -16.649651 + 1500 404.49399 -8411.1192 0 -8286.93 338.99191 + 1600 443.77567 -8418.1237 0 -8281.8741 -774.22575 + 1700 479.8234 -8424.6901 0 -8277.3731 65.260334 + 1800 386.73299 -8390.8969 0 -8272.1608 70.076616 + 1900 431.57275 -8401.0671 0 -8268.5641 30.882406 + 2000 454.96043 -8406.0467 0 -8266.3632 728.1499 +Loop time of 2.35094 on 1 procs for 2000 steps with 104 atoms + +Performance: 18.376 ns/day, 1.306 hours/ns, 850.725 timesteps/s, 88.475 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.9254 | 1.9254 | 1.9254 | 0.0 | 81.90 +Neigh | 0.10479 | 0.10479 | 0.10479 | 0.0 | 4.46 +Comm | 0.0067523 | 0.0067523 | 0.0067523 | 0.0 | 0.29 +Output | 0.0005375 | 0.0005375 | 0.0005375 | 0.0 | 0.02 +Modify | 0.31152 | 0.31152 | 0.31152 | 0.0 | 13.25 +Other | | 0.001934 | | | 0.08 + +Nlocal: 104 ave 104 max 104 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 710 ave 710 max 710 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3076 ave 3076 max 3076 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3076 +Ave neighs/atom = 29.576923 +Neighbor list builds = 200 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/AB/log.30Nov23.AB.g++.4 b/examples/reaxff/AB/log.30Nov23.AB.g++.4 new file mode 100644 index 0000000000..5b3ae33fbe --- /dev/null +++ b/examples/reaxff/AB/log.30Nov23.AB.g++.4 @@ -0,0 +1,131 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Nitroamines system +# ..... + +units real + +atom_style charge +read_data data.AB +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 104 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AB H B N +Reading potential file ffield.reax.AB with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.ab + +run 2000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 11.06 | 11.68 | 11.96 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -8505.1816 0 -8505.1816 -673.36566 + 100 83.873123 -8497.0031 0 -8471.252 -609.71119 + 200 125.23001 -8479.8879 0 -8441.4394 -1069.4122 + 300 202.34219 -8479.1319 0 -8417.0081 -707.82246 + 400 260.52726 -8476.7906 0 -8396.8026 221.14446 + 500 282.4624 -8466.8556 0 -8380.133 -223.17501 + 600 288.8059 -8452.9729 0 -8364.3028 679.38441 + 700 378.87007 -8467.429 0 -8351.107 920.99401 + 800 382.10004 -8458.7194 0 -8341.4058 256.06383 + 900 379.69698 -8449.4416 0 -8332.8657 1266.1715 + 1000 379.63496 -8440.9584 0 -8324.4015 -604.987 + 1100 372.82256 -8428.7507 0 -8314.2854 -1236.8451 + 1200 397.12809 -8427.4286 0 -8305.501 -356.42394 + 1300 413.36951 -8425.3861 0 -8298.472 -47.619729 + 1400 428.68835 -8424.4328 0 -8292.8154 -812.52975 + 1500 403.59408 -8411.0829 0 -8287.1701 71.054401 + 1600 448.76276 -8419.8186 0 -8282.0379 -339.19148 + 1700 450.87444 -8416.1981 0 -8277.769 -44.043208 + 1800 485.33509 -8421.3776 0 -8272.3684 -848.94941 + 1900 481.36374 -8416.1719 0 -8268.382 -282.62675 + 2000 437.25967 -8398.9233 0 -8264.6743 -217.40762 +Loop time of 1.44368 on 4 procs for 2000 steps with 104 atoms + +Performance: 29.924 ns/day, 0.802 hours/ns, 1385.350 timesteps/s, 144.076 katom-step/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.77999 | 0.87212 | 0.96576 | 8.1 | 60.41 +Neigh | 0.054058 | 0.059726 | 0.06287 | 1.4 | 4.14 +Comm | 0.031767 | 0.12609 | 0.21802 | 21.3 | 8.73 +Output | 0.00041377 | 0.00045661 | 0.00058001 | 0.0 | 0.03 +Modify | 0.3805 | 0.38348 | 0.3894 | 0.6 | 26.56 +Other | | 0.001808 | | | 0.13 + +Nlocal: 26 ave 34 max 14 min +Histogram: 1 0 0 0 0 1 0 0 0 2 +Nghost: 429.25 ave 457 max 386 min +Histogram: 1 0 0 0 0 1 0 0 0 2 +Neighs: 922.5 ave 1238 max 496 min +Histogram: 1 0 0 0 1 0 0 0 1 1 + +Total # of neighbors = 3690 +Ave neighs/atom = 35.480769 +Neighbor list builds = 200 +Dangerous builds not checked +Total wall time: 0:00:01 diff --git a/examples/reaxff/AB/log.8Mar18.AB.g++.1 b/examples/reaxff/AB/log.8Mar18.AB.g++.1 deleted file mode 100644 index 065b1a1e67..0000000000 --- a/examples/reaxff/AB/log.8Mar18.AB.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for Nitroamines system -# ..... - -units real - -atom_style charge -read_data data.AB - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 104 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AB H B N -Reading potential file ffield.reax.AB with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.ab - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 19.3 | 19.3 | 19.3 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -8505.1816 0 -8505.1816 -673.36566 - 3000 478.18595 -8398.4168 0 -8251.6025 1452.6935 -Loop time of 14.3573 on 1 procs for 3000 steps with 104 atoms - -Performance: 4.513 ns/day, 5.318 hours/ns, 208.952 timesteps/s -96.6% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 12.709 | 12.709 | 12.709 | 0.0 | 88.52 -Neigh | 0.36804 | 0.36804 | 0.36804 | 0.0 | 2.56 -Comm | 0.022419 | 0.022419 | 0.022419 | 0.0 | 0.16 -Output | 2.8133e-05 | 2.8133e-05 | 2.8133e-05 | 0.0 | 0.00 -Modify | 1.2513 | 1.2513 | 1.2513 | 0.0 | 8.72 -Other | | 0.006263 | | | 0.04 - -Nlocal: 104 ave 104 max 104 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 694 ave 694 max 694 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 2866 ave 2866 max 2866 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2866 -Ave neighs/atom = 27.5577 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:14 diff --git a/examples/reaxff/AB/log.8Mar18.AB.g++.4 b/examples/reaxff/AB/log.8Mar18.AB.g++.4 deleted file mode 100644 index 1e02ec5725..0000000000 --- a/examples/reaxff/AB/log.8Mar18.AB.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for Nitroamines system -# ..... - -units real - -atom_style charge -read_data data.AB - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 104 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AB H B N -Reading potential file ffield.reax.AB with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.ab - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 12.38 | 13.22 | 13.64 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -8505.1816 0 -8505.1816 -673.36566 - 3000 555.17702 -8426.5541 0 -8256.1017 219.26856 -Loop time of 9.03521 on 4 procs for 3000 steps with 104 atoms - -Performance: 7.172 ns/day, 3.346 hours/ns, 332.034 timesteps/s -94.6% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 7.0347 | 7.0652 | 7.1049 | 1.0 | 78.20 -Neigh | 0.18481 | 0.20727 | 0.22108 | 3.0 | 2.29 -Comm | 0.075175 | 0.11496 | 0.14517 | 7.4 | 1.27 -Output | 2.2888e-05 | 2.569e-05 | 3.1948e-05 | 0.0 | 0.00 -Modify | 1.6286 | 1.6421 | 1.6649 | 1.1 | 18.17 -Other | | 0.005646 | | | 0.06 - -Nlocal: 26 ave 35 max 13 min -Histogram: 1 0 0 0 0 1 0 0 1 1 -Nghost: 420.25 ave 454 max 370 min -Histogram: 1 0 0 0 0 1 0 0 1 1 -Neighs: 862.5 ave 1178 max 444 min -Histogram: 1 0 0 0 1 0 0 0 1 1 - -Total # of neighbors = 3450 -Ave neighs/atom = 33.1731 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:09 diff --git a/examples/reaxff/AuO/in.AuO b/examples/reaxff/AuO/in.AuO index 90ae812f7d..4e5162d620 100644 --- a/examples/reaxff/AuO/in.AuO +++ b/examples/reaxff/AuO/in.AuO @@ -1,23 +1,25 @@ # REAX potential for AuO system # ..... -units real +units real -atom_style charge -read_data data.AuO +atom_style charge +read_data data.AuO -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AuO O Au +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AuO O Au -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 -#dump 1 all atom 30 dump.reax.auo +thermo 5 -run 100 +#dump 1 all atom 30 dump.reax.auo + +run 100 diff --git a/examples/reaxff/AuO/lmp_control b/examples/reaxff/AuO/lmp_control index d24ae0a055..ecf22940b7 100644 --- a/examples/reaxff/AuO/lmp_control +++ b/examples/reaxff/AuO/lmp_control @@ -1,17 +1,7 @@ -simulation_name AuO_example ! output files will carry this name + their specific ext +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions - -write_freq 1 ! write trajectory after so many steps -traj_title AuO ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/AuO/log.30Nov23.AuO.g++.1 b/examples/reaxff/AuO/log.30Nov23.AuO.g++.1 new file mode 100644 index 0000000000..8896ef1015 --- /dev/null +++ b/examples/reaxff/AuO/log.30Nov23.AuO.g++.1 @@ -0,0 +1,132 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for AuO system +# ..... + +units real + +atom_style charge +read_data data.AuO +Reading data file ... + orthogonal box = (0 0 0) to (26.15618 21.54252 24.00246) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 960 atoms + read_data CPU = 0.004 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AuO O Au +Reading potential file ffield.reax.AuO with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +thermo 5 + +#dump 1 all atom 30 dump.reax.auo + +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 4 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 129.2 | 129.2 | 129.2 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -72201.743 0 -72201.743 -166.19508 + 5 6.5398577 -72202.679 0 -72183.984 71.658901 + 10 13.280881 -72204.445 0 -72166.481 515.28836 + 15 19.951637 -72206.24 0 -72149.206 886.438 + 20 26.441301 -72207.78 0 -72132.195 1549.914 + 25 32.580167 -72208.5 0 -72115.367 2309.8004 + 30 38.264935 -72208.14 0 -72098.756 3148.7379 + 35 43.433009 -72206.523 0 -72082.366 3853.4389 + 40 48.028176 -72203.472 0 -72066.178 4830.1846 + 45 52.019459 -72198.85 0 -72050.147 5881.5166 + 50 55.407353 -72192.638 0 -72034.251 6996.89 + 55 58.218407 -72184.89 0 -72018.467 8191.8057 + 60 60.499102 -72175.717 0 -72002.774 9470.0601 + 65 62.309031 -72165.271 0 -71987.155 10831.309 + 70 63.72857 -72153.749 0 -71971.575 12270.345 + 75 64.847533 -72141.43 0 -71956.057 13791.775 + 80 65.755809 -72128.548 0 -71940.579 15397.406 + 85 66.547696 -72115.362 0 -71925.129 17100.883 + 90 67.309412 -72102.119 0 -71909.708 18888.699 + 95 68.120206 -72089.043 0 -71894.315 20757.038 + 100 69.043359 -72076.31 0 -71878.942 22702.463 +Loop time of 5.72003 on 1 procs for 100 steps with 960 atoms + +Performance: 0.378 ns/day, 63.556 hours/ns, 17.482 timesteps/s, 16.783 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.2903 | 4.2903 | 4.2903 | 0.0 | 75.00 +Neigh | 0.31194 | 0.31194 | 0.31194 | 0.0 | 5.45 +Comm | 0.0034139 | 0.0034139 | 0.0034139 | 0.0 | 0.06 +Output | 0.0005041 | 0.0005041 | 0.0005041 | 0.0 | 0.01 +Modify | 1.1134 | 1.1134 | 1.1134 | 0.0 | 19.46 +Other | | 0.0005147 | | | 0.01 + +Nlocal: 960 ave 960 max 960 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 6708 ave 6708 max 6708 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 369128 ave 369128 max 369128 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 369128 +Ave neighs/atom = 384.50833 +Neighbor list builds = 10 +Dangerous builds not checked +Total wall time: 0:00:05 diff --git a/examples/reaxff/AuO/log.30Nov23.AuO.g++.4 b/examples/reaxff/AuO/log.30Nov23.AuO.g++.4 new file mode 100644 index 0000000000..ba3b81ea1b --- /dev/null +++ b/examples/reaxff/AuO/log.30Nov23.AuO.g++.4 @@ -0,0 +1,132 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for AuO system +# ..... + +units real + +atom_style charge +read_data data.AuO +Reading data file ... + orthogonal box = (0 0 0) to (26.15618 21.54252 24.00246) + 2 by 1 by 2 MPI processor grid + reading atoms ... + 960 atoms + read_data CPU = 0.002 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.AuO O Au +Reading potential file ffield.reax.AuO with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +thermo 5 + +#dump 1 all atom 30 dump.reax.auo + +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 4 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 71.65 | 71.65 | 71.65 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -72201.743 0 -72201.743 -166.19214 + 5 6.5398578 -72202.679 0 -72183.984 71.651708 + 10 13.280883 -72204.445 0 -72166.481 515.29601 + 15 19.951639 -72206.24 0 -72149.206 886.53083 + 20 26.441291 -72207.78 0 -72132.195 1550.0745 + 25 32.580153 -72208.5 0 -72115.366 2309.9393 + 30 38.264928 -72208.14 0 -72098.756 3148.6036 + 35 43.432999 -72206.523 0 -72082.365 3853.6963 + 40 48.028158 -72203.472 0 -72066.179 4830.1407 + 45 52.019436 -72198.85 0 -72050.147 5881.1916 + 50 55.407331 -72192.638 0 -72034.251 6996.6661 + 55 58.218406 -72184.89 0 -72018.467 8191.9075 + 60 60.499115 -72175.716 0 -72002.774 9470.4845 + 65 62.309058 -72165.271 0 -71987.154 10831.926 + 70 63.728581 -72153.75 0 -71971.575 12269.823 + 75 64.847544 -72141.431 0 -71956.058 13791.586 + 80 65.755816 -72128.549 0 -71940.579 15396.822 + 85 66.547694 -72115.363 0 -71925.13 17100.27 + 90 67.309401 -72102.119 0 -71909.708 18888.633 + 95 68.120175 -72089.042 0 -71894.314 20757.565 + 100 69.043333 -72076.31 0 -71878.943 22701.953 +Loop time of 2.52972 on 4 procs for 100 steps with 960 atoms + +Performance: 0.854 ns/day, 28.108 hours/ns, 39.530 timesteps/s, 37.949 katom-step/s +99.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.9911 | 2.0015 | 2.0084 | 0.5 | 79.12 +Neigh | 0.16044 | 0.16105 | 0.16228 | 0.2 | 6.37 +Comm | 0.018211 | 0.025417 | 0.03612 | 4.2 | 1.00 +Output | 0.00039837 | 0.00043613 | 0.00054664 | 0.0 | 0.02 +Modify | 0.34008 | 0.34101 | 0.34154 | 0.1 | 13.48 +Other | | 0.0003489 | | | 0.01 + +Nlocal: 240 ave 240 max 240 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 3981 ave 3981 max 3981 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 105979 ave 105979 max 105979 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 423916 +Ave neighs/atom = 441.57917 +Neighbor list builds = 10 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/AuO/log.8Mar18.AuO.g++.1 b/examples/reaxff/AuO/log.8Mar18.AuO.g++.1 deleted file mode 100644 index 3c609ac023..0000000000 --- a/examples/reaxff/AuO/log.8Mar18.AuO.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for AuO system -# ..... - -units real - -atom_style charge -read_data data.AuO - orthogonal box = (0 0 0) to (26.1562 21.5425 24.0025) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 960 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AuO O Au -Reading potential file ffield.reax.AuO with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.auo - -run 100 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 4 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 157.6 | 157.6 | 157.6 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -72201.743 0 -72201.743 -166.19482 - 100 69.043331 -72076.309 0 -71878.942 22702.89 -Loop time of 18.4369 on 1 procs for 100 steps with 960 atoms - -Performance: 0.117 ns/day, 204.854 hours/ns, 5.424 timesteps/s -98.7% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 15.373 | 15.373 | 15.373 | 0.0 | 83.38 -Neigh | 0.58774 | 0.58774 | 0.58774 | 0.0 | 3.19 -Comm | 0.0079026 | 0.0079026 | 0.0079026 | 0.0 | 0.04 -Output | 3.171e-05 | 3.171e-05 | 3.171e-05 | 0.0 | 0.00 -Modify | 2.4665 | 2.4665 | 2.4665 | 0.0 | 13.38 -Other | | 0.001366 | | | 0.01 - -Nlocal: 960 ave 960 max 960 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 6708 ave 6708 max 6708 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 369128 ave 369128 max 369128 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 369128 -Ave neighs/atom = 384.508 -Neighbor list builds = 10 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:18 diff --git a/examples/reaxff/AuO/log.8Mar18.AuO.g++.4 b/examples/reaxff/AuO/log.8Mar18.AuO.g++.4 deleted file mode 100644 index ed98e1f2f4..0000000000 --- a/examples/reaxff/AuO/log.8Mar18.AuO.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for AuO system -# ..... - -units real - -atom_style charge -read_data data.AuO - orthogonal box = (0 0 0) to (26.1562 21.5425 24.0025) - 2 by 1 by 2 MPI processor grid - reading atoms ... - 960 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.AuO O Au -Reading potential file ffield.reax.AuO with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.auo - -run 100 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 4 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 87.17 | 87.17 | 87.17 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -72201.743 0 -72201.743 -166.2027 - 100 69.043379 -72076.31 0 -71878.943 22701.771 -Loop time of 8.44797 on 4 procs for 100 steps with 960 atoms - -Performance: 0.256 ns/day, 93.866 hours/ns, 11.837 timesteps/s -96.5% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 7.3702 | 7.3757 | 7.3879 | 0.3 | 87.31 -Neigh | 0.28875 | 0.29449 | 0.29747 | 0.6 | 3.49 -Comm | 0.015008 | 0.027055 | 0.032681 | 4.3 | 0.32 -Output | 2.4319e-05 | 2.8551e-05 | 3.8624e-05 | 0.0 | 0.00 -Modify | 0.74721 | 0.74985 | 0.75539 | 0.4 | 8.88 -Other | | 0.0008975 | | | 0.01 - -Nlocal: 240 ave 240 max 240 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 3981 ave 3981 max 3981 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Neighs: 105979 ave 105979 max 105979 min -Histogram: 4 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 423916 -Ave neighs/atom = 441.579 -Neighbor list builds = 10 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:08 diff --git a/examples/reaxff/CHO/in.CHO b/examples/reaxff/CHO/in.CHO index 668be5eee0..3baa885ae5 100644 --- a/examples/reaxff/CHO/in.CHO +++ b/examples/reaxff/CHO/in.CHO @@ -1,23 +1,24 @@ # REAX potential for CHO system # ..... -units real +units real -atom_style charge -read_data data.CHO +atom_style charge +read_data data.CHO -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.cho H C O +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.cho H C O -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 -#dump 1 all atom 30 dump.reax.cho +thermo 100 +#dump 1 all atom 30 dump.reax.cho -run 3000 +run 3000 diff --git a/examples/reaxff/CHO/lmp_control b/examples/reaxff/CHO/lmp_control index 6db169bf70..b97ad8a67f 100644 --- a/examples/reaxff/CHO/lmp_control +++ b/examples/reaxff/CHO/lmp_control @@ -1,17 +1,8 @@ -simulation_name CHO_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -write_freq 1 ! write trajectory after so many steps -traj_title CHO ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/CHO/log.30Nov23.CHO.g++.1 b/examples/reaxff/CHO/log.30Nov23.CHO.g++.1 new file mode 100644 index 0000000000..de8da4edad --- /dev/null +++ b/examples/reaxff/CHO/log.30Nov23.CHO.g++.1 @@ -0,0 +1,141 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for CHO system +# ..... + +units real + +atom_style charge +read_data data.CHO +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.cho H C O +Reading potential file ffield.reax.cho with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +thermo 100 +#dump 1 all atom 30 dump.reax.cho + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 16.04 | 16.04 | 16.04 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10226.557 0 -10226.557 -106.09742 + 100 54.051992 -10207.393 0 -10190.636 -291.38729 + 200 134.81151 -10200.411 0 -10158.619 -1637.1719 + 300 140.9118 -10177.136 0 -10133.452 -1668.5701 + 400 254.70109 -10189.927 0 -10110.969 -2522.3829 + 500 228.22383 -10162.396 0 -10091.646 404.00518 + 600 393.48635 -10197.284 0 -10075.301 394.0729 + 700 305.82675 -10156.708 0 -10061.9 362.69731 + 800 375.9566 -10170.288 0 -10053.74 -664.01093 + 900 361.59639 -10155.849 0 -10043.752 458.54613 + 1000 445.46183 -10176.602 0 -10038.507 251.38181 + 1100 475.46673 -10180.119 0 -10032.723 839.6649 + 1200 406.78262 -10155.498 0 -10029.394 62.559824 + 1300 461.0773 -10167.129 0 -10024.193 266.27742 + 1400 408.15446 -10148.62 0 -10022.091 -1187.1776 + 1500 514.43707 -10178.34 0 -10018.863 -616.2329 + 1600 432.19202 -10151.16 0 -10017.179 -677.67834 + 1700 521.01474 -10175.583 0 -10014.066 97.420991 + 1800 409.79407 -10138.825 0 -10011.787 1883.8131 + 1900 481.84667 -10160.146 0 -10010.772 1059.6448 + 2000 423.61284 -10138.538 0 -10007.216 -434.24008 + 2100 521.01756 -10169.192 0 -10007.674 376.95207 + 2200 477.03314 -10153.033 0 -10005.151 -114.09514 + 2300 477.80526 -10153.294 0 -10005.172 869.97281 + 2400 471.49741 -10149.165 0 -10002.999 689.65295 + 2500 482.38958 -10152.956 0 -10003.413 352.08649 + 2600 505.57503 -10159.507 0 -10002.777 -812.75272 + 2700 498.41415 -10156.448 0 -10001.937 -458.03311 + 2800 534.65278 -10166.893 0 -10001.149 169.20767 + 2900 432.93717 -10134.759 0 -10000.546 -184.75627 + 3000 548.46832 -10170.375 0 -10000.347 41.765546 +Loop time of 3.49376 on 1 procs for 3000 steps with 105 atoms + +Performance: 18.547 ns/day, 1.294 hours/ns, 858.673 timesteps/s, 90.161 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 2.8082 | 2.8082 | 2.8082 | 0.0 | 80.38 +Neigh | 0.15477 | 0.15477 | 0.15477 | 0.0 | 4.43 +Comm | 0.0097478 | 0.0097478 | 0.0097478 | 0.0 | 0.28 +Output | 0.00081006 | 0.00081006 | 0.00081006 | 0.0 | 0.02 +Modify | 0.51773 | 0.51773 | 0.51773 | 0.0 | 14.82 +Other | | 0.002538 | | | 0.07 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 643 ave 643 max 643 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 4237 ave 4237 max 4237 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 4237 +Ave neighs/atom = 40.352381 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:03 diff --git a/examples/reaxff/CHO/log.30Nov23.CHO.g++.4 b/examples/reaxff/CHO/log.30Nov23.CHO.g++.4 new file mode 100644 index 0000000000..158b1aa657 --- /dev/null +++ b/examples/reaxff/CHO/log.30Nov23.CHO.g++.4 @@ -0,0 +1,141 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for CHO system +# ..... + +units real + +atom_style charge +read_data data.CHO +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.cho H C O +Reading potential file ffield.reax.cho with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +thermo 100 +#dump 1 all atom 30 dump.reax.cho + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.47 | 11.39 | 12.19 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10226.557 0 -10226.557 -106.09736 + 100 54.051902 -10207.393 0 -10190.636 -291.39467 + 200 134.81172 -10200.411 0 -10158.619 -1637.1599 + 300 140.91215 -10177.136 0 -10133.452 -1668.5676 + 400 254.70123 -10189.927 0 -10110.968 -2522.3655 + 500 228.22204 -10162.396 0 -10091.646 403.98879 + 600 393.48756 -10197.284 0 -10075.301 394.11243 + 700 305.82625 -10156.707 0 -10061.9 362.73212 + 800 375.95634 -10170.288 0 -10053.74 -664.10079 + 900 361.59143 -10155.847 0 -10043.752 458.52018 + 1000 445.4582 -10176.601 0 -10038.507 251.4509 + 1100 475.47 -10180.12 0 -10032.722 840.09331 + 1200 406.77476 -10155.496 0 -10029.394 62.656622 + 1300 461.06079 -10167.123 0 -10024.192 265.91062 + 1400 408.15869 -10148.621 0 -10022.09 -1187.4869 + 1500 514.43021 -10178.337 0 -10018.862 -616.07216 + 1600 432.22013 -10151.168 0 -10017.178 -678.01121 + 1700 521.0846 -10175.605 0 -10014.067 98.591699 + 1800 409.72383 -10138.803 0 -10011.787 1884.7989 + 1900 481.86369 -10160.152 0 -10010.773 1058.5554 + 2000 423.60058 -10138.532 0 -10007.214 -437.22408 + 2100 520.96555 -10169.169 0 -10007.668 376.18619 + 2200 477.21351 -10153.089 0 -10005.15 -113.43512 + 2300 477.86263 -10153.309 0 -10005.17 868.89369 + 2400 471.46466 -10149.152 0 -10002.996 688.76379 + 2500 482.61616 -10153.025 0 -10003.412 350.03715 + 2600 505.68439 -10159.544 0 -10002.78 -810.94974 + 2700 498.37307 -10156.441 0 -10001.944 -460.12105 + 2800 535.06218 -10167.029 0 -10001.157 152.85379 + 2900 432.98591 -10134.778 0 -10000.55 -170.46638 + 3000 547.92956 -10170.199 0 -10000.339 60.201766 +Loop time of 2.03179 on 4 procs for 3000 steps with 105 atoms + +Performance: 31.893 ns/day, 0.753 hours/ns, 1476.533 timesteps/s, 155.036 katom-step/s +99.0% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.213 | 1.2396 | 1.2505 | 1.4 | 61.01 +Neigh | 0.074765 | 0.08966 | 0.10323 | 4.3 | 4.41 +Comm | 0.12894 | 0.14116 | 0.16833 | 4.2 | 6.95 +Output | 0.0006079 | 0.00066664 | 0.00083802 | 0.0 | 0.03 +Modify | 0.54589 | 0.55836 | 0.57217 | 1.6 | 27.48 +Other | | 0.002368 | | | 0.12 + +Nlocal: 26.25 ave 45 max 6 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Nghost: 380.75 ave 495 max 261 min +Histogram: 1 0 1 0 0 0 0 0 1 1 +Neighs: 1269.5 ave 2197 max 179 min +Histogram: 1 0 1 0 0 0 0 0 1 1 + +Total # of neighbors = 5078 +Ave neighs/atom = 48.361905 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/CHO/log.8Mar18.CHO.g++.1 b/examples/reaxff/CHO/log.8Mar18.CHO.g++.1 deleted file mode 100644 index 305ccbf3a0..0000000000 --- a/examples/reaxff/CHO/log.8Mar18.CHO.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for CHO system -# ..... - -units real - -atom_style charge -read_data data.CHO - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.cho H C O -Reading potential file ffield.reax.cho with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.cho - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 18.68 | 18.68 | 18.68 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10226.557 0 -10226.557 -106.09755 - 3000 548.5116 -10170.389 0 -10000.348 40.372297 -Loop time of 12.6046 on 1 procs for 3000 steps with 105 atoms - -Performance: 5.141 ns/day, 4.668 hours/ns, 238.008 timesteps/s -98.9% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 10.931 | 10.931 | 10.931 | 0.0 | 86.72 -Neigh | 0.33107 | 0.33107 | 0.33107 | 0.0 | 2.63 -Comm | 0.017975 | 0.017975 | 0.017975 | 0.0 | 0.14 -Output | 2.0742e-05 | 2.0742e-05 | 2.0742e-05 | 0.0 | 0.00 -Modify | 1.3197 | 1.3197 | 1.3197 | 0.0 | 10.47 -Other | | 0.005059 | | | 0.04 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 643 ave 643 max 643 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 4237 ave 4237 max 4237 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 4237 -Ave neighs/atom = 40.3524 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:12 diff --git a/examples/reaxff/CHO/log.8Mar18.CHO.g++.4 b/examples/reaxff/CHO/log.8Mar18.CHO.g++.4 deleted file mode 100644 index 2bc19dc789..0000000000 --- a/examples/reaxff/CHO/log.8Mar18.CHO.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for CHO system -# ..... - -units real - -atom_style charge -read_data data.CHO - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.cho H C O -Reading potential file ffield.reax.cho with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.cho - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 11.75 | 12.85 | 13.81 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10226.557 0 -10226.557 -106.09745 - 3000 548.30567 -10170.323 0 -10000.346 47.794514 -Loop time of 7.42367 on 4 procs for 3000 steps with 105 atoms - -Performance: 8.729 ns/day, 2.750 hours/ns, 404.113 timesteps/s -97.7% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 5.3058 | 5.4086 | 5.4922 | 3.1 | 72.86 -Neigh | 0.14791 | 0.17866 | 0.2106 | 6.5 | 2.41 -Comm | 0.080185 | 0.16666 | 0.26933 | 17.7 | 2.24 -Output | 2.5988e-05 | 2.8491e-05 | 3.4571e-05 | 0.0 | 0.00 -Modify | 1.6364 | 1.6658 | 1.6941 | 2.0 | 22.44 -Other | | 0.003964 | | | 0.05 - -Nlocal: 26.25 ave 45 max 6 min -Histogram: 1 0 1 0 0 0 0 0 1 1 -Nghost: 380.75 ave 495 max 261 min -Histogram: 1 0 1 0 0 0 0 0 1 1 -Neighs: 1269.5 ave 2197 max 179 min -Histogram: 1 0 1 0 0 0 0 0 1 1 - -Total # of neighbors = 5078 -Ave neighs/atom = 48.3619 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:07 diff --git a/examples/reaxff/FC/in.FC b/examples/reaxff/FC/in.FC index 3679a9bc19..eaa2b3c444 100644 --- a/examples/reaxff/FC/in.FC +++ b/examples/reaxff/FC/in.FC @@ -3,33 +3,33 @@ dimension 3 boundary p p p -units real +units real -atom_style charge -read_data data.FC +atom_style charge +read_data data.FC -pair_style reax/c NULL -pair_coeff * * ffield.reax.FC C F -neighbor 2. bin -neigh_modify every 10 delay 0 check no -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c +pair_style reaxff NULL +pair_coeff * * ffield.reax.FC C F +neighbor 2. bin +neigh_modify every 10 delay 0 check no +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff # should equilibrate much longer in practice -fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 +fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 timestep 0.2 thermo_style custom step temp epair etotal press thermo 1 dump 4 all xyz 5000 dumpnpt.xyz -run 10 +run 10 unfix 1 fix 1 all nvt temp 100.0 100.0 100.0 thermo_style custom step temp epair etotal press -timestep 0.2 +timestep 0.2 -#dump 5 all xyz 5000 dumpnvt.xyz +#dump 5 all xyz 5000 dumpnvt.xyz #dump 6 all custom 5000 dumpidtype.dat id type x y z -run 10 +run 10 diff --git a/examples/reaxff/FC/log.30Nov23.FC.g++.1 b/examples/reaxff/FC/log.30Nov23.FC.g++.1 new file mode 100644 index 0000000000..a8f8c0ff83 --- /dev/null +++ b/examples/reaxff/FC/log.30Nov23.FC.g++.1 @@ -0,0 +1,172 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Nitroamines system +# ..... + +dimension 3 +boundary p p p +units real + +atom_style charge +read_data data.FC +Reading data file ... + orthogonal box = (-82.62 -79.5011 -50) to (82.62 79.5011 50) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 17280 atoms + read_data CPU = 0.025 seconds + +pair_style reaxff NULL +pair_coeff * * ffield.reax.FC C F +Reading potential file ffield.reax.FC with DATE: 2013-06-28 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) +neighbor 2. bin +neigh_modify every 10 delay 0 check no +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff + +# should equilibrate much longer in practice + +fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 +timestep 0.2 +thermo_style custom step temp epair etotal press +thermo 1 +dump 4 all xyz 5000 dumpnpt.xyz +run 10 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 28 27 17 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 384.3 | 384.3 | 384.3 Mbytes + Step Temp E_pair TotEng Press + 0 0 -808525.04 -808525.04 58194.694 + 1 4.9935726 -808803.88 -808546.69 58205.825 + 2 19.98696 -809640.53 -808611.1 58239.155 + 3 45.012616 -811035.3 -808716.9 58294.499 + 4 80.103613 -812988.58 -808862.81 58371.548 + 5 125.26228 -815500.68 -809049 58469.872 + 6 180.4316 -818571.56 -809278.36 58588.936 + 7 245.47913 -822200.73 -809557.22 58728.144 + 8 320.17692 -826387.19 -809896.34 58886.879 + 9 404.17073 -831129.38 -810312.4 59064.554 + 10 497.02486 -836425.06 -810825.59 59260.717 +Loop time of 6.13793 on 1 procs for 10 steps with 17280 atoms + +Performance: 0.028 ns/day, 852.491 hours/ns, 1.629 timesteps/s, 28.153 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.1752 | 5.1752 | 5.1752 | 0.0 | 84.31 +Neigh | 0.039453 | 0.039453 | 0.039453 | 0.0 | 0.64 +Comm | 0.00042596 | 0.00042596 | 0.00042596 | 0.0 | 0.01 +Output | 0.00064013 | 0.00064013 | 0.00064013 | 0.0 | 0.01 +Modify | 0.92205 | 0.92205 | 0.92205 | 0.0 | 15.02 +Other | | 0.0002045 | | | 0.00 + +Nlocal: 17280 ave 17280 max 17280 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5352 ave 5352 max 5352 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.62136e+06 ave 2.62136e+06 max 2.62136e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2621360 +Ave neighs/atom = 151.69907 +Neighbor list builds = 1 +Dangerous builds not checked + +unfix 1 + +fix 1 all nvt temp 100.0 100.0 100.0 +thermo_style custom step temp epair etotal press +timestep 0.2 + +#dump 5 all xyz 5000 dumpnvt.xyz +#dump 6 all custom 5000 dumpidtype.dat id type x y z + +run 10 +Per MPI rank memory allocation (min/avg/max) = 386.9 | 386.9 | 386.9 Mbytes + Step Temp E_pair TotEng Press + 10 497.02486 -836425.06 -810825.59 59260.717 + 11 601.6514 -841814.09 -810825.78 59489.425 + 12 716.37597 -847724.6 -810827.35 59738.298 + 13 841.27959 -854161.62 -810831.16 60008.164 + 14 976.4666 -861131.68 -810838.36 60300.364 + 15 1122.0668 -868642.96 -810850.45 60616.793 + 16 1278.2373 -876705.43 -810869.28 60959.942 + 17 1445.1655 -885331.03 -810897.18 61332.932 + 18 1623.072 -894533.91 -810936.92 61739.541 + 19 1812.1864 -904337.86 -811000.45 62200.561 + 20 2011.5898 -915379.05 -811771.28 63361.15 +Loop time of 6.11372 on 1 procs for 10 steps with 17280 atoms + +Performance: 0.028 ns/day, 849.127 hours/ns, 1.636 timesteps/s, 28.264 katom-step/s +99.8% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 5.0783 | 5.0783 | 5.0783 | 0.0 | 83.06 +Neigh | 0.03596 | 0.03596 | 0.03596 | 0.0 | 0.59 +Comm | 0.00041578 | 0.00041578 | 0.00041578 | 0.0 | 0.01 +Output | 0.00062133 | 0.00062133 | 0.00062133 | 0.0 | 0.01 +Modify | 0.99825 | 0.99825 | 0.99825 | 0.0 | 16.33 +Other | | 0.0002171 | | | 0.00 + +Nlocal: 17280 ave 17280 max 17280 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 5352 ave 5352 max 5352 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.62136e+06 ave 2.62136e+06 max 2.62136e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2621360 +Ave neighs/atom = 151.69907 +Neighbor list builds = 1 +Dangerous builds not checked +Total wall time: 0:00:13 diff --git a/examples/reaxff/FC/log.30Nov23.FC.g++.4 b/examples/reaxff/FC/log.30Nov23.FC.g++.4 new file mode 100644 index 0000000000..1a53bb8c03 --- /dev/null +++ b/examples/reaxff/FC/log.30Nov23.FC.g++.4 @@ -0,0 +1,172 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Nitroamines system +# ..... + +dimension 3 +boundary p p p +units real + +atom_style charge +read_data data.FC +Reading data file ... + orthogonal box = (-82.62 -79.5011 -50) to (82.62 79.5011 50) + 2 by 2 by 1 MPI processor grid + reading atoms ... + 17280 atoms + read_data CPU = 0.030 seconds + +pair_style reaxff NULL +pair_coeff * * ffield.reax.FC C F +Reading potential file ffield.reax.FC with DATE: 2013-06-28 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) +neighbor 2. bin +neigh_modify every 10 delay 0 check no +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff + +# should equilibrate much longer in practice + +fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 +timestep 0.2 +thermo_style custom step temp epair etotal press +thermo 1 +dump 4 all xyz 5000 dumpnpt.xyz +run 10 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 28 27 17 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 121.6 | 121.6 | 121.6 Mbytes + Step Temp E_pair TotEng Press + 0 0 -808525.04 -808525.04 58194.694 + 1 4.9935726 -808803.88 -808546.69 58205.825 + 2 19.98696 -809640.53 -808611.1 58239.155 + 3 45.012616 -811035.3 -808716.9 58294.499 + 4 80.103613 -812988.58 -808862.81 58371.548 + 5 125.26228 -815500.68 -809049 58469.872 + 6 180.4316 -818571.56 -809278.36 58588.936 + 7 245.47913 -822200.73 -809557.22 58728.144 + 8 320.17692 -826387.19 -809896.34 58886.879 + 9 404.17073 -831129.38 -810312.4 59064.554 + 10 497.02486 -836425.06 -810825.59 59260.717 +Loop time of 1.75962 on 4 procs for 10 steps with 17280 atoms + +Performance: 0.098 ns/day, 244.392 hours/ns, 5.683 timesteps/s, 98.203 katom-step/s +99.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.402 | 1.4417 | 1.4815 | 3.0 | 81.93 +Neigh | 0.012815 | 0.013047 | 0.01323 | 0.2 | 0.74 +Comm | 0.0006609 | 0.040482 | 0.080149 | 17.9 | 2.30 +Output | 0.00028041 | 0.00029538 | 0.00033093 | 0.0 | 0.02 +Modify | 0.26389 | 0.26407 | 0.26425 | 0.0 | 15.01 +Other | | 7.451e-05 | | | 0.00 + +Nlocal: 4320 ave 4320 max 4320 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 2856 ave 2856 max 2856 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 691892 ave 691892 max 691892 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2767568 +Ave neighs/atom = 160.16019 +Neighbor list builds = 1 +Dangerous builds not checked + +unfix 1 + +fix 1 all nvt temp 100.0 100.0 100.0 +thermo_style custom step temp epair etotal press +timestep 0.2 + +#dump 5 all xyz 5000 dumpnvt.xyz +#dump 6 all custom 5000 dumpidtype.dat id type x y z + +run 10 +Per MPI rank memory allocation (min/avg/max) = 123 | 123 | 123 Mbytes + Step Temp E_pair TotEng Press + 10 497.02486 -836425.06 -810825.59 59260.717 + 11 601.6514 -841814.09 -810825.78 59489.425 + 12 716.37597 -847724.6 -810827.35 59738.298 + 13 841.27959 -854161.62 -810831.16 60008.164 + 14 976.4666 -861131.68 -810838.36 60300.364 + 15 1122.0668 -868642.96 -810850.45 60616.793 + 16 1278.2373 -876705.43 -810869.28 60959.942 + 17 1445.1655 -885331.03 -810897.18 61332.932 + 18 1623.072 -894533.91 -810936.92 61739.541 + 19 1812.1864 -904337.86 -811000.45 62200.561 + 20 2011.5898 -915379.05 -811771.28 63361.15 +Loop time of 1.8322 on 4 procs for 10 steps with 17280 atoms + +Performance: 0.094 ns/day, 254.473 hours/ns, 5.458 timesteps/s, 94.313 katom-step/s +99.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.3846 | 1.4468 | 1.524 | 4.2 | 78.96 +Neigh | 0.012048 | 0.012239 | 0.012522 | 0.2 | 0.67 +Comm | 0.00082283 | 0.07804 | 0.14024 | 17.9 | 4.26 +Output | 0.00029695 | 0.00031243 | 0.00035323 | 0.0 | 0.02 +Modify | 0.29449 | 0.29478 | 0.29497 | 0.0 | 16.09 +Other | | 7.342e-05 | | | 0.00 + +Nlocal: 4320 ave 4320 max 4320 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 2856 ave 2856 max 2856 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 691892 ave 691892 max 691892 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2767568 +Ave neighs/atom = 160.16019 +Neighbor list builds = 1 +Dangerous builds not checked +Total wall time: 0:00:04 diff --git a/examples/reaxff/FC/log.8Mar18.FC.g++.1 b/examples/reaxff/FC/log.8Mar18.FC.g++.1 deleted file mode 100644 index 1e2f723966..0000000000 --- a/examples/reaxff/FC/log.8Mar18.FC.g++.1 +++ /dev/null @@ -1,141 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for Nitroamines system -# ..... - -dimension 3 -boundary p p p -units real - -atom_style charge -read_data data.FC - orthogonal box = (-82.62 -79.5011 -50) to (82.62 79.5011 50) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 17280 atoms - -pair_style reax/c NULL -pair_coeff * * ffield.reax.FC C F -Reading potential file ffield.reax.FC with DATE: 2013-06-28 -neighbor 2. bin -neigh_modify every 10 delay 0 check no -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c - -# should equilibrate much longer in practice - -fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 -timestep 0.2 -thermo_style custom step temp epair etotal press -thermo 1 -dump 4 all xyz 5000 dumpnpt.xyz -run 10 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 28 27 17 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 470 | 470 | 470 Mbytes -Step Temp E_pair TotEng Press - 0 0 -808525.04 -808525.04 58194.694 - 1 4.9935726 -808803.89 -808546.69 58205.825 - 2 19.98696 -809640.54 -808611.1 58239.155 - 3 45.012616 -811035.31 -808716.91 58294.499 - 4 80.103613 -812988.6 -808862.83 58371.547 - 5 125.26228 -815500.71 -809049.03 58469.871 - 6 180.4316 -818571.61 -809278.4 58588.935 - 7 245.47913 -822200.79 -809557.28 58728.142 - 8 320.17692 -826387.27 -809896.43 58886.877 - 9 404.17073 -831129.48 -810312.5 59064.551 - 10 497.02486 -836425.19 -810825.72 59260.714 -Loop time of 21.5054 on 1 procs for 10 steps with 17280 atoms - -Performance: 0.008 ns/day, 2986.857 hours/ns, 0.465 timesteps/s -98.8% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 19.008 | 19.008 | 19.008 | 0.0 | 88.39 -Neigh | 0.084401 | 0.084401 | 0.084401 | 0.0 | 0.39 -Comm | 0.00080419 | 0.00080419 | 0.00080419 | 0.0 | 0.00 -Output | 0.00095367 | 0.00095367 | 0.00095367 | 0.0 | 0.00 -Modify | 2.4109 | 2.4109 | 2.4109 | 0.0 | 11.21 -Other | | 0.0004592 | | | 0.00 - -Nlocal: 17280 ave 17280 max 17280 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 5352 ave 5352 max 5352 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 2.62136e+06 ave 2.62136e+06 max 2.62136e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2621360 -Ave neighs/atom = 151.699 -Neighbor list builds = 1 -Dangerous builds not checked - -unfix 1 - -fix 1 all nvt temp 100.0 100.0 100.0 -thermo_style custom step temp epair etotal press -timestep 0.2 - -#dump 5 all xyz 5000 dumpnvt.xyz -#dump 6 all custom 5000 dumpidtype.dat id type x y z - -run 10 -Per MPI rank memory allocation (min/avg/max) = 470 | 470 | 470 Mbytes -Step Temp E_pair TotEng Press - 10 497.02486 -836425.19 -810825.72 59260.714 - 11 601.65141 -841814.22 -810825.91 59489.422 - 12 716.37599 -847724.72 -810827.48 59738.295 - 13 841.27961 -854161.75 -810831.29 60008.162 - 14 976.46663 -861131.81 -810838.49 60300.362 - 15 1122.0668 -868643.09 -810850.57 60616.791 - 16 1278.2373 -876705.56 -810869.41 60959.94 - 17 1445.1655 -885331.16 -810897.31 61332.931 - 18 1623.072 -894534.04 -810937.04 61739.541 - 19 1812.1865 -904337.99 -811000.57 62200.561 - 20 2011.5899 -915379.19 -811771.41 63361.151 -Loop time of 21.362 on 1 procs for 10 steps with 17280 atoms - -Performance: 0.008 ns/day, 2966.945 hours/ns, 0.468 timesteps/s -98.9% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 18.793 | 18.793 | 18.793 | 0.0 | 87.97 -Neigh | 0.077047 | 0.077047 | 0.077047 | 0.0 | 0.36 -Comm | 0.00080276 | 0.00080276 | 0.00080276 | 0.0 | 0.00 -Output | 0.0010097 | 0.0010097 | 0.0010097 | 0.0 | 0.00 -Modify | 2.4897 | 2.4897 | 2.4897 | 0.0 | 11.65 -Other | | 0.0004568 | | | 0.00 - -Nlocal: 17280 ave 17280 max 17280 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 5352 ave 5352 max 5352 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 2.62136e+06 ave 2.62136e+06 max 2.62136e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2621360 -Ave neighs/atom = 151.699 -Neighbor list builds = 1 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:47 diff --git a/examples/reaxff/FC/log.8Mar18.FC.g++.4 b/examples/reaxff/FC/log.8Mar18.FC.g++.4 deleted file mode 100644 index 76dcadfb0f..0000000000 --- a/examples/reaxff/FC/log.8Mar18.FC.g++.4 +++ /dev/null @@ -1,141 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for Nitroamines system -# ..... - -dimension 3 -boundary p p p -units real - -atom_style charge -read_data data.FC - orthogonal box = (-82.62 -79.5011 -50) to (82.62 79.5011 50) - 2 by 2 by 1 MPI processor grid - reading atoms ... - 17280 atoms - -pair_style reax/c NULL -pair_coeff * * ffield.reax.FC C F -Reading potential file ffield.reax.FC with DATE: 2013-06-28 -neighbor 2. bin -neigh_modify every 10 delay 0 check no -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c - -# should equilibrate much longer in practice - -fix 1 all npt temp 100.0 100.0 10.0 iso 1.0 1. 2000.0 -timestep 0.2 -thermo_style custom step temp epair etotal press -thermo 1 -dump 4 all xyz 5000 dumpnpt.xyz -run 10 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 28 27 17 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 149.3 | 149.3 | 149.3 Mbytes -Step Temp E_pair TotEng Press - 0 0 -808525.04 -808525.04 58194.694 - 1 4.9935726 -808803.89 -808546.69 58205.825 - 2 19.98696 -809640.54 -808611.1 58239.155 - 3 45.012616 -811035.31 -808716.91 58294.499 - 4 80.103613 -812988.6 -808862.83 58371.547 - 5 125.26228 -815500.71 -809049.03 58469.871 - 6 180.4316 -818571.61 -809278.4 58588.935 - 7 245.47913 -822200.79 -809557.28 58728.142 - 8 320.17692 -826387.27 -809896.43 58886.877 - 9 404.17073 -831129.48 -810312.5 59064.551 - 10 497.02486 -836425.19 -810825.72 59260.714 -Loop time of 6.02109 on 4 procs for 10 steps with 17280 atoms - -Performance: 0.029 ns/day, 836.262 hours/ns, 1.661 timesteps/s -99.0% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 4.9482 | 5.1186 | 5.3113 | 7.4 | 85.01 -Neigh | 0.024811 | 0.025702 | 0.027556 | 0.7 | 0.43 -Comm | 0.0027421 | 0.19541 | 0.36565 | 38.1 | 3.25 -Output | 0.00053239 | 0.00057119 | 0.00067186 | 0.0 | 0.01 -Modify | 0.67876 | 0.68059 | 0.68165 | 0.1 | 11.30 -Other | | 0.0001779 | | | 0.00 - -Nlocal: 4320 ave 4320 max 4320 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 2856 ave 2856 max 2856 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Neighs: 691892 ave 691892 max 691892 min -Histogram: 4 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2767568 -Ave neighs/atom = 160.16 -Neighbor list builds = 1 -Dangerous builds not checked - -unfix 1 - -fix 1 all nvt temp 100.0 100.0 100.0 -thermo_style custom step temp epair etotal press -timestep 0.2 - -#dump 5 all xyz 5000 dumpnvt.xyz -#dump 6 all custom 5000 dumpidtype.dat id type x y z - -run 10 -Per MPI rank memory allocation (min/avg/max) = 149.3 | 149.3 | 149.3 Mbytes -Step Temp E_pair TotEng Press - 10 497.02486 -836425.19 -810825.72 59260.714 - 11 601.65141 -841814.22 -810825.91 59489.422 - 12 716.37599 -847724.72 -810827.48 59738.295 - 13 841.27961 -854161.75 -810831.29 60008.162 - 14 976.46663 -861131.81 -810838.49 60300.362 - 15 1122.0668 -868643.09 -810850.57 60616.791 - 16 1278.2373 -876705.56 -810869.41 60959.94 - 17 1445.1655 -885331.16 -810897.31 61332.931 - 18 1623.072 -894534.04 -810937.04 61739.541 - 19 1812.1865 -904337.99 -811000.57 62200.561 - 20 2011.5899 -915379.19 -811771.41 63361.151 -Loop time of 6.08805 on 4 procs for 10 steps with 17280 atoms - -Performance: 0.028 ns/day, 845.563 hours/ns, 1.643 timesteps/s -99.2% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 4.9124 | 5.1008 | 5.3405 | 8.3 | 83.78 -Neigh | 0.023652 | 0.024473 | 0.025996 | 0.6 | 0.40 -Comm | 0.0020971 | 0.24171 | 0.43023 | 38.0 | 3.97 -Output | 0.00056076 | 0.00060701 | 0.00072312 | 0.0 | 0.01 -Modify | 0.71869 | 0.72023 | 0.72107 | 0.1 | 11.83 -Other | | 0.0001827 | | | 0.00 - -Nlocal: 4320 ave 4320 max 4320 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 2856 ave 2856 max 2856 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Neighs: 691892 ave 691892 max 691892 min -Histogram: 4 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2767568 -Ave neighs/atom = 160.16 -Neighbor list builds = 1 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:13 diff --git a/examples/reaxff/FeOH3/in.FeOH3 b/examples/reaxff/FeOH3/in.FeOH3 index 8b56f2a7d2..72afbe5416 100644 --- a/examples/reaxff/FeOH3/in.FeOH3 +++ b/examples/reaxff/FeOH3/in.FeOH3 @@ -1,23 +1,24 @@ # REAX potential for Fe/O/H system # ..... -units real +units real -atom_style charge -read_data data.FeOH3 +atom_style charge +read_data data.FeOH3 -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.Fe_O_C_H H O Fe +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.Fe_O_C_H H O Fe -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 +thermo 100 -#dump 1 all atom 30 dump.reax.feoh +#dump 1 all atom 30 dump.reax.feoh -run 3000 +run 3000 diff --git a/examples/reaxff/FeOH3/lmp_control b/examples/reaxff/FeOH3/lmp_control index 779c7da7ec..6b26c6427a 100644 --- a/examples/reaxff/FeOH3/lmp_control +++ b/examples/reaxff/FeOH3/lmp_control @@ -1,17 +1,7 @@ -simulation_name FeOH3_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions - -write_freq 1 ! write trajectory after so many steps -traj_title Fe_OH3 ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions diff --git a/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.1 b/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.1 new file mode 100644 index 0000000000..0acb09bdc0 --- /dev/null +++ b/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.1 @@ -0,0 +1,141 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Fe/O/H system +# ..... + +units real + +atom_style charge +read_data data.FeOH3 +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.000 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.Fe_O_C_H H O Fe +Reading potential file ffield.reax.Fe_O_C_H with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 + +#dump 1 all atom 30 dump.reax.feoh + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 15.99 | 15.99 | 15.99 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -9715.3326 0 -9715.3326 -139.61126 + 100 127.38829 -9720.5854 0 -9681.0945 -933.74373 + 200 141.21008 -9696.3143 0 -9652.5386 -831.74241 + 300 176.81083 -9681.3376 0 -9626.5255 -520.30966 + 400 220.75236 -9672.6196 0 -9604.1854 -388.85436 + 500 301.29415 -9678.8463 0 -9585.4438 -545.22735 + 600 320.36877 -9670.3054 0 -9570.9897 -609.44044 + 700 414.53699 -9688.649 0 -9560.1408 -259.51791 + 800 391.93073 -9675.1212 0 -9553.621 77.352757 + 900 413.52476 -9673.7372 0 -9545.5428 369.71918 + 1000 382.03337 -9656.3848 0 -9537.9528 236.61186 + 1100 381.68223 -9647.4372 0 -9529.1141 -432.67374 + 1200 470.68889 -9671.5116 0 -9525.596 448.90781 + 1300 436.34973 -9659.2277 0 -9523.9574 188.12079 + 1400 422.25034 -9651.2639 0 -9520.3645 48.988693 + 1500 363.49223 -9625.6588 0 -9512.9746 -977.83513 + 1600 450.39155 -9646.4742 0 -9506.8509 44.80204 + 1700 461.44884 -9648.1215 0 -9505.0704 -29.381385 + 1800 457.01538 -9644.6842 0 -9503.0075 -29.157643 + 1900 461.56497 -9642.8457 0 -9499.7586 -608.58801 + 2000 491.20199 -9648.6637 0 -9496.389 -99.409356 + 2100 461.60295 -9636.4878 0 -9493.3889 753.00956 + 2200 480.92601 -9640.304 0 -9491.2149 -176.4371 + 2300 450.00958 -9627.8875 0 -9488.3826 -210.21397 + 2400 475.97134 -9634.1577 0 -9486.6046 -364.46797 + 2500 478.0174 -9631.5069 0 -9483.3194 557.79107 + 2600 500.26141 -9636.8606 0 -9481.7774 115.84535 + 2700 455.06433 -9620.0151 0 -9478.9433 -963.22 + 2800 441.50799 -9612.6546 0 -9475.7852 -177.60856 + 2900 471.67031 -9618.9817 0 -9472.7619 -294.38595 + 3000 526.94336 -9635.8664 0 -9472.5117 119.05777 +Loop time of 2.33387 on 1 procs for 3000 steps with 105 atoms + +Performance: 27.765 ns/day, 0.864 hours/ns, 1285.420 timesteps/s, 134.969 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.8409 | 1.8409 | 1.8409 | 0.0 | 78.88 +Neigh | 0.15998 | 0.15998 | 0.15998 | 0.0 | 6.85 +Comm | 0.0090909 | 0.0090909 | 0.0090909 | 0.0 | 0.39 +Output | 0.00069968 | 0.00069968 | 0.00069968 | 0.0 | 0.03 +Modify | 0.32099 | 0.32099 | 0.32099 | 0.0 | 13.75 +Other | | 0.002244 | | | 0.10 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 651 ave 651 max 651 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3388 ave 3388 max 3388 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3388 +Ave neighs/atom = 32.266667 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.4 b/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.4 new file mode 100644 index 0000000000..3357947749 --- /dev/null +++ b/examples/reaxff/FeOH3/log.30Nov23.FeOH3.g++.4 @@ -0,0 +1,141 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for Fe/O/H system +# ..... + +units real + +atom_style charge +read_data data.FeOH3 +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.Fe_O_C_H H O Fe +Reading potential file ffield.reax.Fe_O_C_H with DATE: 2011-02-18 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 + +#dump 1 all atom 30 dump.reax.feoh + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.56 | 11.55 | 12.17 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -9715.3326 0 -9715.3326 -139.61126 + 100 127.3884 -9720.5854 0 -9681.0945 -933.74975 + 200 141.21023 -9696.3143 0 -9652.5385 -831.74859 + 300 176.81092 -9681.3376 0 -9626.5254 -520.29734 + 400 220.75237 -9672.6195 0 -9604.1853 -388.89122 + 500 301.29434 -9678.8461 0 -9585.4436 -545.24883 + 600 320.36921 -9670.3055 0 -9570.9897 -609.45071 + 700 414.5366 -9688.649 0 -9560.1409 -259.54271 + 800 391.93079 -9675.1212 0 -9553.621 77.314405 + 900 413.52641 -9673.738 0 -9545.5431 369.67477 + 1000 382.02987 -9656.384 0 -9537.9531 236.57634 + 1100 381.6811 -9647.4372 0 -9529.1144 -432.72725 + 1200 470.68578 -9671.511 0 -9525.5964 448.88885 + 1300 436.3616 -9659.2312 0 -9523.9573 188.07625 + 1400 422.26867 -9651.2709 0 -9520.3658 48.829055 + 1500 363.49419 -9625.6611 0 -9512.9764 -977.70396 + 1600 450.39497 -9646.4762 0 -9506.8518 45.000339 + 1700 461.44368 -9648.121 0 -9505.0715 -29.332359 + 1800 457.02327 -9644.687 0 -9503.0078 -29.48478 + 1900 461.60004 -9642.8571 0 -9499.7592 -608.0786 + 2000 491.19069 -9648.6599 0 -9496.3887 -99.479995 + 2100 461.50901 -9636.4581 0 -9493.3884 752.86874 + 2200 480.7646 -9640.2431 0 -9491.204 -175.99562 + 2300 450.00669 -9627.875 0 -9488.3711 -209.83065 + 2400 475.84946 -9634.1191 0 -9486.6038 -366.65233 + 2500 477.75601 -9631.4196 0 -9483.3132 558.18557 + 2600 500.64305 -9636.9676 0 -9481.7661 111.76394 + 2700 455.78826 -9620.2513 0 -9478.955 -962.65771 + 2800 438.72349 -9611.8395 0 -9475.8334 -180.94976 + 2900 471.33135 -9618.8641 0 -9472.7494 -291.14764 + 3000 528.64651 -9636.4232 0 -9472.5405 111.18605 +Loop time of 1.42723 on 4 procs for 3000 steps with 105 atoms + +Performance: 45.403 ns/day, 0.529 hours/ns, 2101.973 timesteps/s, 220.707 katom-step/s +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.79715 | 0.88663 | 0.96735 | 7.5 | 62.12 +Neigh | 0.083068 | 0.096787 | 0.10679 | 2.8 | 6.78 +Comm | 0.058539 | 0.13831 | 0.22776 | 19.0 | 9.69 +Output | 0.0006518 | 0.00071197 | 0.00088964 | 0.0 | 0.05 +Modify | 0.29308 | 0.30291 | 0.31706 | 1.6 | 21.22 +Other | | 0.001886 | | | 0.13 + +Nlocal: 26.25 ave 34 max 12 min +Histogram: 1 0 0 0 0 0 0 1 1 1 +Nghost: 408 ave 462 max 347 min +Histogram: 1 0 0 0 1 0 1 0 0 1 +Neighs: 1109 ave 1419 max 453 min +Histogram: 1 0 0 0 0 0 0 1 0 2 + +Total # of neighbors = 4436 +Ave neighs/atom = 42.247619 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:01 diff --git a/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.1 b/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.1 deleted file mode 100644 index fd9b310bb2..0000000000 --- a/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.1 +++ /dev/null @@ -1,70 +0,0 @@ -LAMMPS (5 Oct 2016) -# REAX potential for Fe/O/H system -# ..... - -units real - -atom_style charge -read_data data.FeOH3 - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.Fe_O_C_H H O Fe -Reading potential file ffield.reax.Fe_O_C_H with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.feoh - -run 3000 -Neighbor list info ... - 2 neighbor list requests - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6 -> bins = 5 5 5 -Memory usage per processor = 17.7294 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -9715.3326 0 -9715.3326 -139.61126 - 3000 529.72301 -9636.7144 0 -9472.498 127.52152 -Loop time of 8.40814 on 1 procs for 3000 steps with 105 atoms - -Performance: 7.707 ns/day, 3.114 hours/ns, 356.797 timesteps/s -99.2% CPU use with 1 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 7.3193 | 7.3193 | 7.3193 | 0.0 | 87.05 -Neigh | 0.29032 | 0.29032 | 0.29032 | 0.0 | 3.45 -Comm | 0.016032 | 0.016032 | 0.016032 | 0.0 | 0.19 -Output | 1.2159e-05 | 1.2159e-05 | 1.2159e-05 | 0.0 | 0.00 -Modify | 0.77846 | 0.77846 | 0.77846 | 0.0 | 9.26 -Other | | 0.004053 | | | 0.05 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 651 ave 651 max 651 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3389 ave 3389 max 3389 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3389 -Ave neighs/atom = 32.2762 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:08 diff --git a/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.4 b/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.4 deleted file mode 100644 index 399c5dbe3c..0000000000 --- a/examples/reaxff/FeOH3/log.5Oct16.FeOH3.g++.4 +++ /dev/null @@ -1,70 +0,0 @@ -LAMMPS (5 Oct 2016) -# REAX potential for Fe/O/H system -# ..... - -units real - -atom_style charge -read_data data.FeOH3 - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.Fe_O_C_H H O Fe -Reading potential file ffield.reax.Fe_O_C_H with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.feoh - -run 3000 -Neighbor list info ... - 2 neighbor list requests - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6 -> bins = 5 5 5 -Memory usage per processor = 12.3695 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -9715.3326 0 -9715.3326 -139.61126 - 3000 534.48882 -9638.0405 0 -9472.3467 127.47989 -Loop time of 4.78344 on 4 procs for 3000 steps with 105 atoms - -Performance: 13.547 ns/day, 1.772 hours/ns, 627.164 timesteps/s -99.0% CPU use with 4 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 3.7061 | 3.7503 | 3.7853 | 1.5 | 78.40 -Neigh | 0.14361 | 0.16388 | 0.18297 | 3.4 | 3.43 -Comm | 0.062001 | 0.098492 | 0.14111 | 9.0 | 2.06 -Output | 2.0981e-05 | 2.2948e-05 | 2.7895e-05 | 0.1 | 0.00 -Modify | 0.75012 | 0.76764 | 0.78678 | 1.5 | 16.05 -Other | | 0.003105 | | | 0.06 - -Nlocal: 26.25 ave 35 max 12 min -Histogram: 1 0 0 0 0 0 0 2 0 1 -Nghost: 408 ave 462 max 348 min -Histogram: 1 0 0 0 1 0 1 0 0 1 -Neighs: 1107 ave 1428 max 453 min -Histogram: 1 0 0 0 0 0 0 1 0 2 - -Total # of neighbors = 4428 -Ave neighs/atom = 42.1714 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:05 diff --git a/examples/reaxff/HNS/in.reaxc.hns b/examples/reaxff/HNS/in.reaxff.hns similarity index 82% rename from examples/reaxff/HNS/in.reaxc.hns rename to examples/reaxff/HNS/in.reaxff.hns index 5b83698917..0f40814bff 100644 --- a/examples/reaxff/HNS/in.reaxc.hns +++ b/examples/reaxff/HNS/in.reaxff.hns @@ -12,16 +12,15 @@ atom_style charge atom_modify sort 100 0.0 # optional dimension 3 boundary p p p -box tilt large read_data data.hns-equil replicate $x $y $z bbox -pair_style reax/c NULL +pair_style reaxff NULL pair_coeff * * ffield.reax.hns C H O N -compute reax all pair reax/c +compute reax all pair reaxff neighbor 1.0 bin neigh_modify every 20 delay 0 check no @@ -35,6 +34,6 @@ thermo 10 velocity all create 300.0 41279 loop geom fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff run $t diff --git a/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.1 b/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.1 new file mode 100644 index 0000000000..b2c9778994 --- /dev/null +++ b/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.1 @@ -0,0 +1,149 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS +# See README for more info + +variable x index 2 +variable y index 2 +variable z index 2 +variable t index 100 + + +units real +atom_style charge +atom_modify sort 100 0.0 # optional +dimension 3 +boundary p p p + +read_data data.hns-equil +Reading data file ... + triclinic box = (0 0 0) to (22.326 11.1412 13.778966) with tilt (0 -5.02603 0) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 304 atoms + reading velocities ... + 304 velocities + read_data CPU = 0.005 seconds +replicate $x $y $z bbox +replicate 2 $y $z bbox +replicate 2 2 $z bbox +replicate 2 2 2 bbox +Replication is creating a 2x2x2 = 8 times larger system... + triclinic box = (0 0 0) to (44.652 22.2824 27.557932) with tilt (0 -10.05206 0) + 1 by 1 by 1 MPI processor grid + bounding box image = (0 -1 -1) to (0 1 1) + bounding box extra memory = 0.03 MB + average # of replicas added to proc = 8.00 out of 8 (100.00%) + 2432 atoms + replicate CPU = 0.001 seconds + + +pair_style reaxff NULL +pair_coeff * * ffield.reax.hns C H O N + +compute reax all pair reaxff + +neighbor 1.0 bin +neigh_modify every 20 delay 0 check no + +timestep 0.1 + +thermo_style custom step temp pe press evdwl ecoul vol +thermo_modify norm yes +thermo 10 + +velocity all create 300.0 41279 loop geom + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff + +run $t +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 20 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11 + ghost atom cutoff = 11 + binsize = 5.5, bins = 10 5 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 215 | 215 | 215 Mbytes + Step Temp PotEng Press E_vdwl E_coul Volume + 0 300 -113.27833 437.52149 -111.57687 -1.7014647 27418.867 + 10 299.38517 -113.27631 1439.2564 -111.57492 -1.7013814 27418.867 + 20 300.27107 -113.27884 3764.4017 -111.57762 -1.7012246 27418.867 + 30 302.21064 -113.28428 7007.6558 -111.58335 -1.7009364 27418.867 + 40 303.52265 -113.28799 9844.8196 -111.58747 -1.7005186 27418.867 + 50 301.8706 -113.28324 9663.08 -111.58318 -1.7000523 27418.867 + 60 296.67808 -113.26777 7273.8875 -111.56815 -1.6996136 27418.867 + 70 292.19999 -113.25435 5533.625 -111.55514 -1.6992157 27418.867 + 80 293.58678 -113.25831 5993.4679 -111.55946 -1.6988532 27418.867 + 90 300.62637 -113.27925 7202.8453 -111.58069 -1.6985592 27418.867 + 100 305.38277 -113.29357 10085.747 -111.59518 -1.6983875 27418.867 +Loop time of 17.6114 on 1 procs for 100 steps with 2432 atoms + +Performance: 0.049 ns/day, 489.205 hours/ns, 5.678 timesteps/s, 13.809 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 13.081 | 13.081 | 13.081 | 0.0 | 74.27 +Neigh | 0.25469 | 0.25469 | 0.25469 | 0.0 | 1.45 +Comm | 0.0061082 | 0.0061082 | 0.0061082 | 0.0 | 0.03 +Output | 0.00035315 | 0.00035315 | 0.00035315 | 0.0 | 0.00 +Modify | 4.2687 | 4.2687 | 4.2687 | 0.0 | 24.24 +Other | | 0.0007784 | | | 0.00 + +Nlocal: 2432 ave 2432 max 2432 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 10685 ave 10685 max 10685 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 823958 ave 823958 max 823958 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 823958 +Ave neighs/atom = 338.79852 +Neighbor list builds = 5 +Dangerous builds not checked +Total wall time: 0:00:17 diff --git a/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.4 b/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.4 new file mode 100644 index 0000000000..d7c3b76f7a --- /dev/null +++ b/examples/reaxff/HNS/log.30Nov23.reaxff.hns.g++.4 @@ -0,0 +1,149 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS +# See README for more info + +variable x index 2 +variable y index 2 +variable z index 2 +variable t index 100 + + +units real +atom_style charge +atom_modify sort 100 0.0 # optional +dimension 3 +boundary p p p + +read_data data.hns-equil +Reading data file ... + triclinic box = (0 0 0) to (22.326 11.1412 13.778966) with tilt (0 -5.02603 0) + 2 by 1 by 2 MPI processor grid + reading atoms ... + 304 atoms + reading velocities ... + 304 velocities + read_data CPU = 0.003 seconds +replicate $x $y $z bbox +replicate 2 $y $z bbox +replicate 2 2 $z bbox +replicate 2 2 2 bbox +Replication is creating a 2x2x2 = 8 times larger system... + triclinic box = (0 0 0) to (44.652 22.2824 27.557932) with tilt (0 -10.05206 0) + 2 by 1 by 2 MPI processor grid + bounding box image = (0 -1 -1) to (0 1 1) + bounding box extra memory = 0.03 MB + average # of replicas added to proc = 5.00 out of 8 (62.50%) + 2432 atoms + replicate CPU = 0.000 seconds + + +pair_style reaxff NULL +pair_coeff * * ffield.reax.hns C H O N + +compute reax all pair reaxff + +neighbor 1.0 bin +neigh_modify every 20 delay 0 check no + +timestep 0.1 + +thermo_style custom step temp pe press evdwl ecoul vol +thermo_modify norm yes +thermo 10 + +velocity all create 300.0 41279 loop geom + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 reaxff + +run $t +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 20 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 11 + ghost atom cutoff = 11 + binsize = 5.5, bins = 10 5 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 103.8 | 103.8 | 103.8 Mbytes + Step Temp PotEng Press E_vdwl E_coul Volume + 0 300 -113.27833 437.52125 -111.57687 -1.7014647 27418.867 + 10 299.38517 -113.27631 1439.2564 -111.57492 -1.7013814 27418.867 + 20 300.27106 -113.27884 3764.3691 -111.57762 -1.7012246 27418.867 + 30 302.21062 -113.28428 7007.6981 -111.58335 -1.7009363 27418.867 + 40 303.52264 -113.28799 9844.8446 -111.58747 -1.7005186 27418.867 + 50 301.87059 -113.28324 9663.0539 -111.58318 -1.7000523 27418.867 + 60 296.67807 -113.26777 7273.8306 -111.56815 -1.6996136 27418.867 + 70 292.19997 -113.25435 5533.612 -111.55514 -1.6992157 27418.867 + 80 293.58675 -113.25831 5993.4344 -111.55946 -1.6988533 27418.867 + 90 300.62636 -113.27925 7202.8636 -111.58069 -1.6985591 27418.867 + 100 305.38278 -113.29357 10085.719 -111.59518 -1.6983876 27418.867 +Loop time of 6.63333 on 4 procs for 100 steps with 2432 atoms + +Performance: 0.130 ns/day, 184.259 hours/ns, 15.075 timesteps/s, 36.663 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.1006 | 4.5745 | 5.0624 | 17.4 | 68.96 +Neigh | 0.11589 | 0.11637 | 0.11669 | 0.1 | 1.75 +Comm | 0.0077297 | 0.49567 | 0.96958 | 52.7 | 7.47 +Output | 0.00027396 | 0.00031049 | 0.00038633 | 0.0 | 0.00 +Modify | 1.4458 | 1.4461 | 1.4465 | 0.0 | 21.80 +Other | | 0.0004201 | | | 0.01 + +Nlocal: 608 ave 612 max 604 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +Nghost: 5737.25 ave 5744 max 5732 min +Histogram: 1 0 1 0 0 1 0 0 0 1 +Neighs: 231539 ave 233090 max 229970 min +Histogram: 1 0 0 0 1 1 0 0 0 1 + +Total # of neighbors = 926155 +Ave neighs/atom = 380.82031 +Neighbor list builds = 5 +Dangerous builds not checked +Total wall time: 0:00:06 diff --git a/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.1 b/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.1 deleted file mode 100644 index d418d287b1..0000000000 --- a/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.1 +++ /dev/null @@ -1,115 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS -# See README for more info - -variable x index 2 -variable y index 2 -variable z index 2 -variable t index 100 - - -units real -atom_style charge -atom_modify sort 100 0.0 # optional -dimension 3 -boundary p p p -box tilt large - -read_data data.hns-equil - triclinic box = (0 0 0) to (22.326 11.1412 13.779) with tilt (0 -5.02603 0) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 304 atoms - reading velocities ... - 304 velocities -replicate $x $y $z bbox -replicate 2 $y $z bbox -replicate 2 2 $z bbox -replicate 2 2 2 bbox - triclinic box = (0 0 0) to (44.652 22.2824 27.5579) with tilt (0 -10.0521 0) - 1 by 1 by 1 MPI processor grid - 2432 atoms - Time spent = 0.000789404 secs - - -pair_style reax/c NULL -pair_coeff * * ffield.reax.hns C H O N - -compute reax all pair reax/c - -neighbor 1.0 bin -neigh_modify every 20 delay 0 check no - -timestep 0.1 - -thermo_style custom step temp pe press evdwl ecoul vol -thermo_modify norm yes -thermo 10 - -velocity all create 300.0 41279 loop geom - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c - -run 100 -Neighbor list info ... - update every 20 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 11 - ghost atom cutoff = 11 - binsize = 5.5, bins = 10 5 6 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 262.4 | 262.4 | 262.4 Mbytes -Step Temp PotEng Press E_vdwl E_coul Volume - 0 300 -113.27833 437.52103 -111.57687 -1.7014647 27418.867 - 10 299.87174 -113.27778 2033.6337 -111.57645 -1.7013325 27418.867 - 20 300.81718 -113.28046 4817.5889 -111.57931 -1.7011463 27418.867 - 30 301.8622 -113.28323 8303.0039 -111.58237 -1.7008608 27418.867 - 40 302.4646 -113.28493 10519.459 -111.58446 -1.700467 27418.867 - 50 300.79064 -113.27989 10402.291 -111.57987 -1.7000218 27418.867 - 60 296.11534 -113.26599 7929.1348 -111.5664 -1.6995929 27418.867 - 70 291.73354 -113.25289 5071.5459 -111.5537 -1.6991916 27418.867 - 80 292.189 -113.25399 5667.0962 -111.55519 -1.6987993 27418.867 - 90 298.40792 -113.27253 7513.3806 -111.57409 -1.6984403 27418.867 - 100 303.58246 -113.28809 10017.879 -111.58991 -1.698177 27418.867 -Loop time of 59.5461 on 1 procs for 100 steps with 2432 atoms - -Performance: 0.015 ns/day, 1654.060 hours/ns, 1.679 timesteps/s -97.0% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 49.922 | 49.922 | 49.922 | 0.0 | 83.84 -Neigh | 0.53154 | 0.53154 | 0.53154 | 0.0 | 0.89 -Comm | 0.011399 | 0.011399 | 0.011399 | 0.0 | 0.02 -Output | 0.00064397 | 0.00064397 | 0.00064397 | 0.0 | 0.00 -Modify | 9.0782 | 9.0782 | 9.0782 | 0.0 | 15.25 -Other | | 0.002116 | | | 0.00 - -Nlocal: 2432 ave 2432 max 2432 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 10687 ave 10687 max 10687 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 823977 ave 823977 max 823977 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 823977 -Ave neighs/atom = 338.806 -Neighbor list builds = 5 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:01:00 diff --git a/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.4 b/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.4 deleted file mode 100644 index aef07f80eb..0000000000 --- a/examples/reaxff/HNS/log.8Mar18.reaxc.hns.g++.4 +++ /dev/null @@ -1,115 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS -# See README for more info - -variable x index 2 -variable y index 2 -variable z index 2 -variable t index 100 - - -units real -atom_style charge -atom_modify sort 100 0.0 # optional -dimension 3 -boundary p p p -box tilt large - -read_data data.hns-equil - triclinic box = (0 0 0) to (22.326 11.1412 13.779) with tilt (0 -5.02603 0) - 2 by 1 by 2 MPI processor grid - reading atoms ... - 304 atoms - reading velocities ... - 304 velocities -replicate $x $y $z bbox -replicate 2 $y $z bbox -replicate 2 2 $z bbox -replicate 2 2 2 bbox - triclinic box = (0 0 0) to (44.652 22.2824 27.5579) with tilt (0 -10.0521 0) - 2 by 1 by 2 MPI processor grid - 2432 atoms - Time spent = 0.000398397 secs - - -pair_style reax/c NULL -pair_coeff * * ffield.reax.hns C H O N - -compute reax all pair reax/c - -neighbor 1.0 bin -neigh_modify every 20 delay 0 check no - -timestep 0.1 - -thermo_style custom step temp pe press evdwl ecoul vol -thermo_modify norm yes -thermo 10 - -velocity all create 300.0 41279 loop geom - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c - -run 100 -Neighbor list info ... - update every 20 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 11 - ghost atom cutoff = 11 - binsize = 5.5, bins = 10 5 6 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 126.6 | 126.6 | 126.6 Mbytes -Step Temp PotEng Press E_vdwl E_coul Volume - 0 300 -113.27833 437.52112 -111.57687 -1.7014647 27418.867 - 10 299.87174 -113.27778 2033.632 -111.57645 -1.7013325 27418.867 - 20 300.81719 -113.28046 4817.5761 -111.57931 -1.7011463 27418.867 - 30 301.8622 -113.28323 8302.9767 -111.58237 -1.7008609 27418.867 - 40 302.4646 -113.28493 10519.481 -111.58446 -1.700467 27418.867 - 50 300.79064 -113.27989 10402.312 -111.57987 -1.7000217 27418.867 - 60 296.11534 -113.26599 7929.1393 -111.5664 -1.6995929 27418.867 - 70 291.73354 -113.25289 5071.5368 -111.5537 -1.6991916 27418.867 - 80 292.18901 -113.25399 5667.1118 -111.55519 -1.6987993 27418.867 - 90 298.40793 -113.27253 7513.4029 -111.57409 -1.6984403 27418.867 - 100 303.58247 -113.28809 10017.892 -111.58991 -1.698177 27418.867 -Loop time of 21.3933 on 4 procs for 100 steps with 2432 atoms - -Performance: 0.040 ns/day, 594.257 hours/ns, 4.674 timesteps/s -97.6% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 14.863 | 16.367 | 18.027 | 28.6 | 76.51 -Neigh | 0.23943 | 0.2422 | 0.24658 | 0.6 | 1.13 -Comm | 0.024331 | 1.6845 | 3.189 | 89.2 | 7.87 -Output | 0.00051165 | 0.00056899 | 0.00068665 | 0.0 | 0.00 -Modify | 3.0933 | 3.0969 | 3.0999 | 0.1 | 14.48 -Other | | 0.001784 | | | 0.01 - -Nlocal: 608 ave 608 max 608 min -Histogram: 4 0 0 0 0 0 0 0 0 0 -Nghost: 5738.25 ave 5742 max 5734 min -Histogram: 1 1 0 0 0 0 0 0 0 2 -Neighs: 231544 ave 231625 max 231466 min -Histogram: 2 0 0 0 0 0 0 0 0 2 - -Total # of neighbors = 926176 -Ave neighs/atom = 380.829 -Neighbor list builds = 5 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:21 diff --git a/examples/reaxff/RDX/in.RDX b/examples/reaxff/RDX/in.RDX index 67d6145787..a510390a03 100644 --- a/examples/reaxff/RDX/in.RDX +++ b/examples/reaxff/RDX/in.RDX @@ -1,23 +1,23 @@ # REAX potential for high energy CHON systems # ..... -units real +units real -atom_style charge -read_data data.RDX +atom_style charge +read_data data.RDX -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.rdx H C O N +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.rdx H C O N -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.rdx -#dump 1 all atom 30 dump.reax.rdx - -run 3000 +run 3000 diff --git a/examples/reaxff/RDX/lmp_control b/examples/reaxff/RDX/lmp_control index c729255b45..b97ad8a67f 100644 --- a/examples/reaxff/RDX/lmp_control +++ b/examples/reaxff/RDX/lmp_control @@ -1,17 +1,8 @@ -simulation_name RDX_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -write_freq 1 ! write trajectory after so many steps -traj_title RDX ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/RDX/log.30Nov23.RDX.g++.1 b/examples/reaxff/RDX/log.30Nov23.RDX.g++.1 new file mode 100644 index 0000000000..7d2214a6c8 --- /dev/null +++ b/examples/reaxff/RDX/log.30Nov23.RDX.g++.1 @@ -0,0 +1,140 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for high energy CHON systems +# ..... + +units real + +atom_style charge +read_data data.RDX +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.rdx H C O N +Reading potential file ffield.reax.rdx with DATE: 2010-02-19 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.rdx + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 16.3 | 16.3 | 16.3 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10197.932 0 -10197.932 38.347492 + 100 47.478574 -10176.425 0 -10161.706 632.99863 + 200 166.95277 -10181.513 0 -10129.757 -27.107717 + 300 142.53594 -10148.039 0 -10103.853 5120.6794 + 400 322.68495 -10178.868 0 -10078.834 2342.89 + 500 193.81476 -10117.984 0 -10057.901 8412.5289 + 600 300.27155 -10134.473 0 -10041.388 -2801.8661 + 700 272.63426 -10110.146 0 -10025.629 10749.023 + 800 339.99867 -10114.124 0 -10008.723 5122.9966 + 900 231.65547 -10068.587 0 -9996.7728 5306.059 + 1000 329.92918 -10088.776 0 -9986.4964 3190.1697 + 1100 376.60905 -10092.398 0 -9975.6476 2921.9605 + 1200 361.98746 -10076.599 0 -9964.3813 3612.0455 + 1300 358.65631 -10069.365 0 -9958.1802 4339.8435 + 1400 470.15262 -10098.553 0 -9952.8035 -146.0811 + 1500 509.62274 -10106.57 0 -9948.5844 2356.8592 + 1600 417.89364 -10075.274 0 -9945.7249 1760.5655 + 1700 453.21317 -10084.329 0 -9943.8306 -570.32375 + 1800 472.92112 -10087.83 0 -9941.2221 1550.3495 + 1900 507.18794 -10096.441 0 -9939.2102 -460.65809 + 2000 443.55347 -10076.832 0 -9939.3281 -145.14295 + 2100 485.44451 -10088.451 0 -9937.9611 -614.40787 + 2200 507.52411 -10095.157 0 -9937.8226 1308.3869 + 2300 496.44961 -10089.637 0 -9935.7354 206.44147 + 2400 457.99343 -10078.114 0 -9936.1344 810.80538 + 2500 507.88115 -10092.692 0 -9935.2463 -464.87873 + 2600 437.84198 -10069.789 0 -9934.0561 1323.4191 + 2700 503.09017 -10088.945 0 -9932.9853 1133.3561 + 2800 481.73908 -10082.193 0 -9932.852 -27.838881 + 2900 487.56555 -10082.752 0 -9931.6045 1772.2131 + 3000 510.30601 -10091.368 0 -9933.1706 1273.0501 +Loop time of 5.35022 on 1 procs for 3000 steps with 105 atoms + +Performance: 12.112 ns/day, 1.982 hours/ns, 560.725 timesteps/s, 58.876 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.7927 | 4.7927 | 4.7927 | 0.0 | 89.58 +Neigh | 0.15169 | 0.15169 | 0.15169 | 0.0 | 2.84 +Comm | 0.011036 | 0.011036 | 0.011036 | 0.0 | 0.21 +Output | 0.00080628 | 0.00080628 | 0.00080628 | 0.0 | 0.02 +Modify | 0.3906 | 0.3906 | 0.3906 | 0.0 | 7.30 +Other | | 0.003436 | | | 0.06 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 645 ave 645 max 645 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3065 ave 3065 max 3065 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3065 +Ave neighs/atom = 29.190476 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:05 diff --git a/examples/reaxff/RDX/log.30Nov23.RDX.g++.4 b/examples/reaxff/RDX/log.30Nov23.RDX.g++.4 new file mode 100644 index 0000000000..37481acd1b --- /dev/null +++ b/examples/reaxff/RDX/log.30Nov23.RDX.g++.4 @@ -0,0 +1,140 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for high energy CHON systems +# ..... + +units real + +atom_style charge +read_data data.RDX +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.rdx H C O N +Reading potential file ffield.reax.rdx with DATE: 2010-02-19 + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 +#dump 1 all atom 30 dump.reax.rdx + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.78 | 11.56 | 12.26 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10197.932 0 -10197.932 38.347492 + 100 47.47852 -10176.425 0 -10161.706 632.97359 + 200 166.95287 -10181.513 0 -10129.757 -27.146803 + 300 142.53582 -10148.039 0 -10103.852 5120.6397 + 400 322.68523 -10178.868 0 -10078.834 2342.7187 + 500 193.81484 -10117.984 0 -10057.901 8412.4559 + 600 300.27165 -10134.473 0 -10041.388 -2801.9143 + 700 272.63408 -10110.146 0 -10025.629 10749.2 + 800 339.99669 -10114.123 0 -10008.723 5123.2489 + 900 231.65632 -10068.587 0 -9996.7729 5306.0392 + 1000 329.93324 -10088.777 0 -9986.4967 3190.4707 + 1100 376.60924 -10092.398 0 -9975.6478 2920.8475 + 1200 361.98231 -10076.598 0 -9964.3816 3612.0573 + 1300 358.6599 -10069.366 0 -9958.1803 4341.9871 + 1400 470.14856 -10098.552 0 -9952.8036 -146.9069 + 1500 509.6454 -10106.577 0 -9948.5847 2355.4022 + 1600 417.9276 -10075.284 0 -9945.7249 1749.565 + 1700 453.25817 -10084.343 0 -9943.8306 -570.48011 + 1800 472.9517 -10087.84 0 -9941.2226 1532.6424 + 1900 507.14171 -10096.428 0 -9939.212 -404.84948 + 2000 443.62843 -10076.86 0 -9939.3329 -132.17302 + 2100 485.441 -10088.414 0 -9937.925 -609.75758 + 2200 507.23914 -10095.067 0 -9937.8209 1288.5372 + 2300 499.64956 -10090.665 0 -9935.7719 149.06622 + 2400 457.97848 -10078.107 0 -9936.1317 2065.2075 + 2500 510.58254 -10093.537 0 -9935.2543 -559.75965 + 2600 440.97503 -10070.865 0 -9934.1605 1164.1078 + 2700 500.4945 -10088.165 0 -9933.0096 1051.9016 + 2800 485.77814 -10083.543 0 -9932.9498 294.64404 + 2900 487.73983 -10082.939 0 -9931.7373 2208.263 + 3000 504.69717 -10089.803 0 -9933.3447 1723.6386 +Loop time of 2.81192 on 4 procs for 3000 steps with 105 atoms + +Performance: 23.045 ns/day, 1.041 hours/ns, 1066.887 timesteps/s, 112.023 katom-step/s +99.3% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 2.0513 | 2.1567 | 2.2232 | 4.3 | 76.70 +Neigh | 0.072125 | 0.087048 | 0.10214 | 3.7 | 3.10 +Comm | 0.086792 | 0.15326 | 0.25749 | 16.2 | 5.45 +Output | 0.00058533 | 0.00064027 | 0.00080207 | 0.0 | 0.02 +Modify | 0.39587 | 0.41124 | 0.42647 | 1.7 | 14.62 +Other | | 0.003062 | | | 0.11 + +Nlocal: 26.25 ave 46 max 8 min +Histogram: 1 0 0 1 0 1 0 0 0 1 +Nghost: 399.5 ave 512 max 288 min +Histogram: 1 0 0 1 0 0 1 0 0 1 +Neighs: 1011.25 ave 1819 max 420 min +Histogram: 1 0 1 1 0 0 0 0 0 1 + +Total # of neighbors = 4045 +Ave neighs/atom = 38.52381 +Neighbor list builds = 300 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/reaxff/RDX/log.8Mar18.RDX.g++.1 b/examples/reaxff/RDX/log.8Mar18.RDX.g++.1 deleted file mode 100644 index d0765a97a5..0000000000 --- a/examples/reaxff/RDX/log.8Mar18.RDX.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for high energy CHON systems -# ..... - -units real - -atom_style charge -read_data data.RDX - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.rdx H C O N -Reading potential file ffield.reax.rdx with DATE: 2010-02-19 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.rdx - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 19 | 19 | 19 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10197.932 0 -10197.932 38.347492 - 3000 510.63767 -10091.537 0 -9933.2374 1144.545 -Loop time of 21.2931 on 1 procs for 3000 steps with 105 atoms - -Performance: 3.043 ns/day, 7.886 hours/ns, 140.891 timesteps/s -97.6% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 19.887 | 19.887 | 19.887 | 0.0 | 93.40 -Neigh | 0.33143 | 0.33143 | 0.33143 | 0.0 | 1.56 -Comm | 0.02079 | 0.02079 | 0.02079 | 0.0 | 0.10 -Output | 2.5272e-05 | 2.5272e-05 | 2.5272e-05 | 0.0 | 0.00 -Modify | 1.0478 | 1.0478 | 1.0478 | 0.0 | 4.92 -Other | | 0.006125 | | | 0.03 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 645 ave 645 max 645 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3063 ave 3063 max 3063 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3063 -Ave neighs/atom = 29.1714 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:21 diff --git a/examples/reaxff/RDX/log.8Mar18.RDX.g++.4 b/examples/reaxff/RDX/log.8Mar18.RDX.g++.4 deleted file mode 100644 index 7082d30636..0000000000 --- a/examples/reaxff/RDX/log.8Mar18.RDX.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for high energy CHON systems -# ..... - -units real - -atom_style charge -read_data data.RDX - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.rdx H C O N -Reading potential file ffield.reax.rdx with DATE: 2010-02-19 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.rdx - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 12.14 | 13.04 | 13.9 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10197.932 0 -10197.932 38.347492 - 3000 509.89257 -10091.36 0 -9933.2916 1406.1215 -Loop time of 10.8858 on 4 procs for 3000 steps with 105 atoms - -Performance: 5.953 ns/day, 4.032 hours/ns, 275.588 timesteps/s -98.1% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 9.3081 | 9.4054 | 9.4994 | 2.6 | 86.40 -Neigh | 0.15541 | 0.18258 | 0.2099 | 4.7 | 1.68 -Comm | 0.070516 | 0.16621 | 0.26541 | 19.7 | 1.53 -Output | 2.2173e-05 | 2.5153e-05 | 3.3855e-05 | 0.0 | 0.00 -Modify | 1.0979 | 1.1272 | 1.1568 | 2.1 | 10.35 -Other | | 0.004379 | | | 0.04 - -Nlocal: 26.25 ave 46 max 8 min -Histogram: 1 0 0 1 0 1 0 0 0 1 -Nghost: 399.5 ave 512 max 288 min -Histogram: 1 0 0 1 0 0 1 0 0 1 -Neighs: 1011.25 ave 1819 max 420 min -Histogram: 1 0 1 1 0 0 0 0 0 1 - -Total # of neighbors = 4045 -Ave neighs/atom = 38.5238 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:11 diff --git a/examples/reaxff/VOH/in.VOH b/examples/reaxff/VOH/in.VOH index 82fa8d1811..3a1047f2c3 100644 --- a/examples/reaxff/VOH/in.VOH +++ b/examples/reaxff/VOH/in.VOH @@ -1,23 +1,23 @@ # REAX potential for VOH system # ..... -units real +units real -atom_style charge -read_data data.VOH +atom_style charge +read_data data.VOH -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.V_O_C_H H C O V +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.V_O_C_H H C O V -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 -#dump 1 all atom 30 dump.reax.voh - -run 3000 +#dump 1 all atom 30 dump.reax.voh +thermo 100 +run 2000 diff --git a/examples/reaxff/VOH/lmp_control b/examples/reaxff/VOH/lmp_control index 735540053f..b97ad8a67f 100644 --- a/examples/reaxff/VOH/lmp_control +++ b/examples/reaxff/VOH/lmp_control @@ -1,17 +1,8 @@ -simulation_name VOH_example ! output files will carry this name + their specific ext -tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 +tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A -hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions -bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs -thb_cutoff 0.001 ! cutoff value for three body interactions +nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A +hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions +bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs +thb_cutoff 0.001 ! cutoff value for three body interactions -write_freq 1 ! write trajectory after so many steps -traj_title VOH ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/VOH/log.30Nov23.VOH.g++.1 b/examples/reaxff/VOH/log.30Nov23.VOH.g++.1 new file mode 100644 index 0000000000..d649653444 --- /dev/null +++ b/examples/reaxff/VOH/log.30Nov23.VOH.g++.1 @@ -0,0 +1,131 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for VOH system +# ..... + +units real + +atom_style charge +read_data data.VOH +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 100 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.V_O_C_H H C O V +Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +#dump 1 all atom 30 dump.reax.voh +thermo 100 +run 2000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 15.34 | 15.34 | 15.34 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10246.825 0 -10246.825 42.256089 + 100 83.813625 -10238.056 0 -10213.322 -246.37234 + 200 128.47312 -10221.424 0 -10183.511 -896.05588 + 300 199.45833 -10218.343 0 -10159.482 -66.676466 + 400 243.93496 -10211.648 0 -10139.663 -1073.274 + 500 314.81116 -10216.592 0 -10123.692 542.54772 + 600 361.45977 -10217.717 0 -10111.05 205.47425 + 700 392.16954 -10215.815 0 -10100.086 -283.06967 + 800 392.49036 -10206.909 0 -10091.085 953.23712 + 900 426.51015 -10209.352 0 -10083.489 473.9928 + 1000 398.23517 -10195.103 0 -10077.584 243.59494 + 1100 414.05403 -10192.081 0 -10069.893 1063.7609 + 1200 442.70037 -10196.631 0 -10065.99 -1189.773 + 1300 470.32545 -10201.252 0 -10062.459 -132.3016 + 1400 446.97236 -10189.078 0 -10057.176 9.8938187 + 1500 475.7665 -10195.598 0 -10055.199 -877.81691 + 1600 440.45154 -10181.071 0 -10051.093 579.85471 + 1700 517.45211 -10201.067 0 -10048.367 136.58133 + 1800 461.86671 -10182.818 0 -10046.521 -260.09694 + 1900 463.99242 -10181.136 0 -10044.211 1567.8398 + 2000 476.73786 -10184.032 0 -10043.346 -883.50859 +Loop time of 1.94324 on 1 procs for 2000 steps with 100 atoms + +Performance: 22.231 ns/day, 1.080 hours/ns, 1029.207 timesteps/s, 102.921 katom-step/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.5707 | 1.5707 | 1.5707 | 0.0 | 80.83 +Neigh | 0.1054 | 0.1054 | 0.1054 | 0.0 | 5.42 +Comm | 0.005606 | 0.005606 | 0.005606 | 0.0 | 0.29 +Output | 0.00053398 | 0.00053398 | 0.00053398 | 0.0 | 0.03 +Modify | 0.2594 | 0.2594 | 0.2594 | 0.0 | 13.35 +Other | | 0.001603 | | | 0.08 + +Nlocal: 100 ave 100 max 100 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 638 ave 638 max 638 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3434 ave 3434 max 3434 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3434 +Ave neighs/atom = 34.34 +Neighbor list builds = 200 +Dangerous builds not checked +Total wall time: 0:00:01 diff --git a/examples/reaxff/VOH/log.30Nov23.VOH.g++.4 b/examples/reaxff/VOH/log.30Nov23.VOH.g++.4 new file mode 100644 index 0000000000..a7363ee766 --- /dev/null +++ b/examples/reaxff/VOH/log.30Nov23.VOH.g++.4 @@ -0,0 +1,131 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for VOH system +# ..... + +units real + +atom_style charge +read_data data.VOH +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 100 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.V_O_C_H H C O V +Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 + +#dump 1 all atom 30 dump.reax.voh +thermo 100 +run 2000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.04 | 11.12 | 12.06 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -10246.825 0 -10246.825 42.256092 + 100 83.813732 -10238.056 0 -10213.322 -246.39794 + 200 128.4729 -10221.424 0 -10183.511 -896.07308 + 300 199.45765 -10218.342 0 -10159.482 -66.695871 + 400 243.93632 -10211.649 0 -10139.663 -1073.2779 + 500 314.81228 -10216.592 0 -10123.691 542.5312 + 600 361.46099 -10217.717 0 -10111.05 205.56032 + 700 392.16903 -10215.815 0 -10100.086 -283.00265 + 800 392.48962 -10206.909 0 -10091.085 953.23878 + 900 426.50866 -10209.352 0 -10083.489 474.04312 + 1000 398.23724 -10195.104 0 -10077.584 243.52194 + 1100 414.05514 -10192.081 0 -10069.893 1063.726 + 1200 442.70432 -10196.633 0 -10065.99 -1189.8309 + 1300 470.32067 -10201.251 0 -10062.459 -132.41831 + 1400 447.00366 -10189.087 0 -10057.177 10.168781 + 1500 475.77239 -10195.599 0 -10055.199 -877.85409 + 1600 440.43788 -10181.066 0 -10051.092 580.25473 + 1700 517.37824 -10201.044 0 -10048.366 136.09841 + 1800 461.75464 -10182.78 0 -10046.517 -259.88878 + 1900 464.0812 -10181.158 0 -10044.207 1566.4734 + 2000 476.55134 -10183.975 0 -10043.344 -884.37537 +Loop time of 1.16101 on 4 procs for 2000 steps with 100 atoms + +Performance: 37.209 ns/day, 0.645 hours/ns, 1722.640 timesteps/s, 172.264 katom-step/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.65447 | 0.69002 | 0.77443 | 6.0 | 59.43 +Neigh | 0.050397 | 0.060259 | 0.06883 | 2.7 | 5.19 +Comm | 0.03061 | 0.1145 | 0.15016 | 14.5 | 9.86 +Output | 0.00039488 | 0.00043537 | 0.00055434 | 0.0 | 0.04 +Modify | 0.28585 | 0.29446 | 0.30467 | 1.2 | 25.36 +Other | | 0.00134 | | | 0.12 + +Nlocal: 25 ave 36 max 10 min +Histogram: 1 0 0 0 0 1 0 0 1 1 +Nghost: 385.75 ave 472 max 299 min +Histogram: 1 0 0 1 0 0 1 0 0 1 +Neighs: 1077 ave 1693 max 379 min +Histogram: 1 0 0 1 0 0 0 0 1 1 + +Total # of neighbors = 4308 +Ave neighs/atom = 43.08 +Neighbor list builds = 200 +Dangerous builds not checked +Total wall time: 0:00:01 diff --git a/examples/reaxff/VOH/log.8Mar18.VOH.g++.1 b/examples/reaxff/VOH/log.8Mar18.VOH.g++.1 deleted file mode 100644 index 924769e570..0000000000 --- a/examples/reaxff/VOH/log.8Mar18.VOH.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for VOH system -# ..... - -units real - -atom_style charge -read_data data.VOH - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 100 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.V_O_C_H H C O V -Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.voh - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 17.79 | 17.79 | 17.79 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10246.825 0 -10246.825 42.256089 - 3000 476.73301 -10185.256 0 -10044.572 -694.70737 -Loop time of 11.0577 on 1 procs for 3000 steps with 100 atoms - -Performance: 5.860 ns/day, 4.095 hours/ns, 271.304 timesteps/s -98.9% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 9.6785 | 9.6785 | 9.6785 | 0.0 | 87.53 -Neigh | 0.32599 | 0.32599 | 0.32599 | 0.0 | 2.95 -Comm | 0.017231 | 0.017231 | 0.017231 | 0.0 | 0.16 -Output | 2.5511e-05 | 2.5511e-05 | 2.5511e-05 | 0.0 | 0.00 -Modify | 1.0311 | 1.0311 | 1.0311 | 0.0 | 9.32 -Other | | 0.004857 | | | 0.04 - -Nlocal: 100 ave 100 max 100 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 598 ave 598 max 598 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3390 ave 3390 max 3390 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3390 -Ave neighs/atom = 33.9 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:11 diff --git a/examples/reaxff/VOH/log.8Mar18.VOH.g++.4 b/examples/reaxff/VOH/log.8Mar18.VOH.g++.4 deleted file mode 100644 index 0395af6671..0000000000 --- a/examples/reaxff/VOH/log.8Mar18.VOH.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for VOH system -# ..... - -units real - -atom_style charge -read_data data.VOH - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 100 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.V_O_C_H H C O V -Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.voh - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 11.21 | 12.52 | 13.64 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -10246.825 0 -10246.825 42.256092 - 3000 489.67803 -10188.866 0 -10044.362 -553.7513 -Loop time of 6.49847 on 4 procs for 3000 steps with 100 atoms - -Performance: 9.972 ns/day, 2.407 hours/ns, 461.647 timesteps/s -97.7% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 4.7412 | 4.8453 | 4.9104 | 2.9 | 74.56 -Neigh | 0.1468 | 0.17834 | 0.20151 | 4.7 | 2.74 -Comm | 0.071841 | 0.14037 | 0.24502 | 17.2 | 2.16 -Output | 2.1219e-05 | 2.408e-05 | 3.1948e-05 | 0.0 | 0.00 -Modify | 1.3072 | 1.3308 | 1.3627 | 1.7 | 20.48 -Other | | 0.003713 | | | 0.06 - -Nlocal: 25 ave 38 max 11 min -Histogram: 1 0 0 0 1 0 1 0 0 1 -Nghost: 369.75 ave 453 max 283 min -Histogram: 1 0 0 0 1 1 0 0 0 1 -Neighs: 1082.25 ave 1788 max 417 min -Histogram: 1 0 1 0 0 0 1 0 0 1 - -Total # of neighbors = 4329 -Ave neighs/atom = 43.29 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:06 diff --git a/examples/reaxff/ZnOH2/in.ZnOH2 b/examples/reaxff/ZnOH2/in.ZnOH2 index f39b1a29dd..75a2d05f4b 100644 --- a/examples/reaxff/ZnOH2/in.ZnOH2 +++ b/examples/reaxff/ZnOH2/in.ZnOH2 @@ -1,23 +1,24 @@ # REAX potential for ZnOH2 system # ..... -units real +units real -atom_style charge -read_data data.ZnOH2 +atom_style charge +read_data data.ZnOH2 -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.ZnOH H O Zn +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.ZnOH H O Zn -neighbor 2 bin -neigh_modify every 10 delay 0 check no +neighbor 2 bin +neigh_modify every 10 delay 0 check no -fix 1 all nve +fix 1 all nve fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq fix 3 all temp/berendsen 500.0 500.0 100.0 -timestep 0.25 +timestep 0.25 +thermo 100 -#dump 1 all atom 30 dump.reax.znoh +#dump 1 all atom 30 dump.reax.znoh -run 3000 +run 1000 diff --git a/examples/reaxff/ZnOH2/lmp_control b/examples/reaxff/ZnOH2/lmp_control index 0fb44fe862..10c27d025d 100644 --- a/examples/reaxff/ZnOH2/lmp_control +++ b/examples/reaxff/ZnOH2/lmp_control @@ -1,17 +1,7 @@ -simulation_name ZnOH2_example ! output files will carry this name + their specific ext tabulate_long_range 10000 ! denotes the granularity of long range tabulation, 0 means no tabulation -energy_update_freq 1 nbrhood_cutoff 4.5 ! near neighbors cutoff for bond calculations in A hbond_cutoff 6.0 ! cutoff distance for hydrogen bond interactions bond_graph_cutoff 0.3 ! bond strength cutoff for bond graphs thb_cutoff 0.001 ! cutoff value for three body interactions - -write_freq 1 ! write trajectory after so many steps -traj_title ZnOH2 ! (no white spaces) -atom_info 1 ! 0: no atom info, 1: print basic atom info in the trajectory file -atom_forces 1 ! 0: basic atom format, 1: print force on each atom in the trajectory file -atom_velocities 0 ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file -bond_info 1 ! 0: do not print bonds, 1: print bonds in the trajectory file -angle_info 1 ! 0: do not print angles, 1: print angles in the trajectory file diff --git a/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.1 b/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.1 new file mode 100644 index 0000000000..850eeb3f3e --- /dev/null +++ b/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.1 @@ -0,0 +1,122 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for ZnOH2 system +# ..... + +units real + +atom_style charge +read_data data.ZnOH2 +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.000 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.ZnOH H O Zn +Reading potential file ffield.reax.ZnOH with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 + +#dump 1 all atom 30 dump.reax.znoh + +run 1000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 15.8 | 15.8 | 15.8 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -7900.2668 0 -7900.2668 60.076093 + 100 89.745108 -7892.7937 0 -7864.9724 -359.37879 + 200 151.73431 -7883.2823 0 -7836.244 118.04838 + 300 223.74392 -7881.6513 0 -7812.2898 -97.069674 + 400 293.70909 -7883.7754 0 -7792.7243 -384.10332 + 500 301.22843 -7869.313 0 -7775.9309 76.604433 + 600 317.45476 -7860.4665 0 -7762.0541 40.95095 + 700 335.70939 -7853.865 0 -7749.7937 -173.3119 + 800 380.48725 -7857.8679 0 -7739.9152 -139.88773 + 900 502.93129 -7891.7095 0 -7735.7987 488.40109 + 1000 510.36735 -7894.0653 0 -7735.8493 -222.85193 +Loop time of 0.583996 on 1 procs for 1000 steps with 105 atoms + +Performance: 36.987 ns/day, 0.649 hours/ns, 1712.342 timesteps/s, 179.796 katom-step/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.41526 | 0.41526 | 0.41526 | 0.0 | 71.11 +Neigh | 0.058908 | 0.058908 | 0.058908 | 0.0 | 10.09 +Comm | 0.0028308 | 0.0028308 | 0.0028308 | 0.0 | 0.48 +Output | 0.00021295 | 0.00021295 | 0.00021295 | 0.0 | 0.04 +Modify | 0.1061 | 0.1061 | 0.1061 | 0.0 | 18.17 +Other | | 0.0006844 | | | 0.12 + +Nlocal: 105 ave 105 max 105 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 640 ave 640 max 640 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 3934 ave 3934 max 3934 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 3934 +Ave neighs/atom = 37.466667 +Neighbor list builds = 100 +Dangerous builds not checked +Total wall time: 0:00:00 diff --git a/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.4 b/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.4 new file mode 100644 index 0000000000..d98fe1df74 --- /dev/null +++ b/examples/reaxff/ZnOH2/log.30Nov23.ZnOH2.g++.4 @@ -0,0 +1,122 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +# REAX potential for ZnOH2 system +# ..... + +units real + +atom_style charge +read_data data.ZnOH2 +Reading data file ... + orthogonal box = (0 0 0) to (25 25 25) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 105 atoms + read_data CPU = 0.001 seconds + +pair_style reaxff lmp_control +pair_coeff * * ffield.reax.ZnOH H O Zn +Reading potential file ffield.reax.ZnOH with DATE: 2011-02-18 +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:289) + +neighbor 2 bin +neigh_modify every 10 delay 0 check no + +fix 1 all nve +fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq +fix 3 all temp/berendsen 500.0 500.0 100.0 + +timestep 0.25 +thermo 100 + +#dump 1 all atom 30 dump.reax.znoh + +run 1000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +- fix qeq/reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 10 steps, delay = 0 steps, check = no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 12 + ghost atom cutoff = 12 + binsize = 6, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeq/reax, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 10.1 | 11.34 | 12.51 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 0 -7900.2668 0 -7900.2668 60.076093 + 100 89.745102 -7892.7938 0 -7864.9724 -359.39279 + 200 151.73402 -7883.2823 0 -7836.2441 118.03582 + 300 223.74416 -7881.6514 0 -7812.2897 -97.060088 + 400 293.70926 -7883.7754 0 -7792.7242 -384.10477 + 500 301.22851 -7869.3129 0 -7775.9308 76.601414 + 600 317.45436 -7860.4664 0 -7762.0542 40.946828 + 700 335.70975 -7853.8651 0 -7749.7936 -173.31084 + 800 380.48744 -7857.868 0 -7739.9153 -139.87915 + 900 502.93034 -7891.7094 0 -7735.7989 488.3973 + 1000 510.36634 -7894.0651 0 -7735.8493 -222.85474 +Loop time of 0.345754 on 4 procs for 1000 steps with 105 atoms + +Performance: 62.472 ns/day, 0.384 hours/ns, 2892.228 timesteps/s, 303.684 katom-step/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.18043 | 0.18884 | 0.19544 | 1.3 | 54.62 +Neigh | 0.027479 | 0.033519 | 0.039456 | 2.3 | 9.69 +Comm | 0.01673 | 0.023479 | 0.031605 | 3.6 | 6.79 +Output | 0.00019732 | 0.0002106 | 0.00024979 | 0.0 | 0.06 +Modify | 0.093437 | 0.099224 | 0.10519 | 1.3 | 28.70 +Other | | 0.0004842 | | | 0.14 + +Nlocal: 26.25 ave 41 max 11 min +Histogram: 1 0 0 0 1 0 1 0 0 1 +Nghost: 390.25 ave 491 max 286 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Neighs: 1154 ave 1912 max 445 min +Histogram: 1 0 0 1 0 1 0 0 0 1 + +Total # of neighbors = 4616 +Ave neighs/atom = 43.961905 +Neighbor list builds = 100 +Dangerous builds not checked +Total wall time: 0:00:00 diff --git a/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.1 b/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.1 deleted file mode 100644 index 58b1a36719..0000000000 --- a/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.1 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for ZnOH2 system -# ..... - -units real - -atom_style charge -read_data data.ZnOH2 - orthogonal box = (0 0 0) to (25 25 25) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.ZnOH H O Zn -Reading potential file ffield.reax.ZnOH with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.znoh - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 18.36 | 18.36 | 18.36 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -7900.2668 0 -7900.2668 60.076093 - 3000 535.58577 -7934.7287 0 -7768.6948 -475.46237 -Loop time of 7.29784 on 1 procs for 3000 steps with 105 atoms - -Performance: 8.879 ns/day, 2.703 hours/ns, 411.081 timesteps/s -97.3% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 5.9988 | 5.9988 | 5.9988 | 0.0 | 82.20 -Neigh | 0.37455 | 0.37455 | 0.37455 | 0.0 | 5.13 -Comm | 0.019186 | 0.019186 | 0.019186 | 0.0 | 0.26 -Output | 2.4557e-05 | 2.4557e-05 | 2.4557e-05 | 0.0 | 0.00 -Modify | 0.89915 | 0.89915 | 0.89915 | 0.0 | 12.32 -Other | | 0.006108 | | | 0.08 - -Nlocal: 105 ave 105 max 105 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 649 ave 649 max 649 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 3971 ave 3971 max 3971 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 3971 -Ave neighs/atom = 37.819 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:07 diff --git a/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.4 b/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.4 deleted file mode 100644 index 77c5cbe1b7..0000000000 --- a/examples/reaxff/ZnOH2/log.8Mar18.ZnOH2.g++.4 +++ /dev/null @@ -1,81 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -# REAX potential for ZnOH2 system -# ..... - -units real - -atom_style charge -read_data data.ZnOH2 - orthogonal box = (0 0 0) to (25 25 25) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 105 atoms - -pair_style reax/c lmp_control -pair_coeff * * ffield.reax.ZnOH H O Zn -Reading potential file ffield.reax.ZnOH with DATE: 2011-02-18 - -neighbor 2 bin -neigh_modify every 10 delay 0 check no - -fix 1 all nve -fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq -fix 3 all temp/berendsen 500.0 500.0 100.0 - -timestep 0.25 - -#dump 1 all atom 30 dump.reax.znoh - -run 3000 -Neighbor list info ... - update every 10 steps, delay 0 steps, check no - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 12 - ghost atom cutoff = 12 - binsize = 6, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) fix qeq/reax, perpetual, copy from (1) - attributes: half, newton off, ghost - pair build: copy - stencil: none - bin: none -Per MPI rank memory allocation (min/avg/max) = 11.28 | 12.77 | 14.21 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 0 -7900.2668 0 -7900.2668 60.076093 - 3000 538.25796 -7935.6159 0 -7768.7536 -525.47078 -Loop time of 4.48824 on 4 procs for 3000 steps with 105 atoms - -Performance: 14.438 ns/day, 1.662 hours/ns, 668.414 timesteps/s -97.2% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 3.1031 | 3.1698 | 3.2378 | 3.3 | 70.62 -Neigh | 0.16642 | 0.20502 | 0.25003 | 6.6 | 4.57 -Comm | 0.074932 | 0.14224 | 0.21025 | 15.6 | 3.17 -Output | 0.00011349 | 0.00011736 | 0.00012231 | 0.0 | 0.00 -Modify | 0.92089 | 0.96736 | 1.0083 | 3.2 | 21.55 -Other | | 0.003731 | | | 0.08 - -Nlocal: 26.25 ave 45 max 15 min -Histogram: 1 0 2 0 0 0 0 0 0 1 -Nghost: 399 ave 509 max 295 min -Histogram: 1 0 0 0 2 0 0 0 0 1 -Neighs: 1151.5 ave 2066 max 701 min -Histogram: 1 2 0 0 0 0 0 0 0 1 - -Total # of neighbors = 4606 -Ave neighs/atom = 43.8667 -Neighbor list builds = 300 -Dangerous builds not checked - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:04 diff --git a/examples/reaxff/ci-reaxFF/in.ci-reax.CH b/examples/reaxff/ci-reaxFF/in.ci-reax.CH index b3a2406a56..bee76b3eea 100644 --- a/examples/reaxff/ci-reaxFF/in.ci-reax.CH +++ b/examples/reaxff/ci-reaxFF/in.ci-reax.CH @@ -4,8 +4,8 @@ units real read_data CH4.dat -pair_style hybrid/overlay reax/c control checkqeq no table linear 11000 -pair_coeff * * reax/c ffield.ci-reax.CH C H +pair_style hybrid/overlay reaxff control checkqeq no table linear 11000 +pair_coeff * * reaxff ffield.ci-reax.CH C H pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF diff --git a/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.1 b/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.1 new file mode 100644 index 0000000000..08f2f4b47b --- /dev/null +++ b/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.1 @@ -0,0 +1,105 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +#ci-reax potential for CH systems with tabulated ZBL correction +atom_style charge +units real + +read_data CH4.dat +Reading data file ... + orthogonal box = (0 0 0) to (20 20 20) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 315 atoms + reading velocities ... + 315 velocities + read_data CPU = 0.003 seconds + +pair_style hybrid/overlay reaxff control checkqeq no table linear 11000 +pair_coeff * * reaxff ffield.ci-reax.CH C H +Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20 +pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF +WARNING: 2 of 10000 force values in table CC_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) +pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF +WARNING: 2 of 11000 force values in table CH_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) +pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF +WARNING: 2 of 6000 force values in table HH_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) + +timestep 0.25 +fix 1 all nve +fix 2 all temp/berendsen 500.0 500.0 100.0 + +#dump 1 all atom 30 dump.ci-reax.lammpstrj + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (src/REAXFF/pair_reaxff.cpp:365) +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 9.5 + ghost atom cutoff = 9.5 + binsize = 4.75, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) pair table, perpetual + attributes: half, newton on, cut 3.1 + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 35.64 | 35.64 | 35.64 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 508.42043 -28736.654 0 -28260.785 1678.3276 + 3000 480.41333 -28707.835 0 -28258.181 -3150.0762 +Loop time of 13.2263 on 1 procs for 3000 steps with 315 atoms + +Performance: 4.899 ns/day, 4.899 hours/ns, 226.821 timesteps/s, 71.449 katom-step/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 13.046 | 13.046 | 13.046 | 0.0 | 98.64 +Neigh | 0.12783 | 0.12783 | 0.12783 | 0.0 | 0.97 +Comm | 0.025611 | 0.025611 | 0.025611 | 0.0 | 0.19 +Output | 2.2361e-05 | 2.2361e-05 | 2.2361e-05 | 0.0 | 0.00 +Modify | 0.017722 | 0.017722 | 0.017722 | 0.0 | 0.13 +Other | | 0.008824 | | | 0.07 + +Nlocal: 315 ave 315 max 315 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 2056 ave 2056 max 2056 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 32754 ave 32754 max 32754 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 32754 +Ave neighs/atom = 103.98095 +Neighbor list builds = 37 +Dangerous builds = 0 +Total wall time: 0:00:13 diff --git a/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.4 b/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.4 new file mode 100644 index 0000000000..cc6c386f03 --- /dev/null +++ b/examples/reaxff/ci-reaxFF/log.30Nov23.ci-reax.CH.g++.4 @@ -0,0 +1,105 @@ +LAMMPS (21 Nov 2023) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98) + using 1 OpenMP thread(s) per MPI task +#ci-reax potential for CH systems with tabulated ZBL correction +atom_style charge +units real + +read_data CH4.dat +Reading data file ... + orthogonal box = (0 0 0) to (20 20 20) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 315 atoms + reading velocities ... + 315 velocities + read_data CPU = 0.002 seconds + +pair_style hybrid/overlay reaxff control checkqeq no table linear 11000 +pair_coeff * * reaxff ffield.ci-reax.CH C H +Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20 +pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF +WARNING: 2 of 10000 force values in table CC_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) +pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF +WARNING: 2 of 11000 force values in table CH_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) +pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF +WARNING: 2 of 6000 force values in table HH_cireaxFF are inconsistent with -dE/dr. +WARNING: Should only be flagged at inflection points (src/pair_table.cpp:466) + +timestep 0.25 +fix 1 all nve +fix 2 all temp/berendsen 500.0 500.0 100.0 + +#dump 1 all atom 30 dump.ci-reax.lammpstrj + +run 3000 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (src/REAXFF/pair_reaxff.cpp:365) +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 9.5 + ghost atom cutoff = 9.5 + binsize = 4.75, bins = 5 5 5 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/newtoff/ghost + stencil: full/ghost/bin/3d + bin: standard + (2) pair table, perpetual + attributes: half, newton on, cut 3.1 + pair build: half/bin/atomonly/newton + stencil: half/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 20.4 | 21.35 | 22.73 Mbytes + Step Temp E_pair E_mol TotEng Press + 0 508.42043 -28736.654 0 -28260.785 1678.3276 + 3000 480.41333 -28707.835 0 -28258.181 -3150.0762 +Loop time of 8.18251 on 4 procs for 3000 steps with 315 atoms + +Performance: 7.919 ns/day, 3.031 hours/ns, 366.636 timesteps/s, 115.490 katom-step/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 6.7983 | 7.3808 | 7.9976 | 16.3 | 90.20 +Neigh | 0.062309 | 0.065996 | 0.07006 | 1.4 | 0.81 +Comm | 0.083586 | 0.70629 | 1.2934 | 53.1 | 8.63 +Output | 1.8365e-05 | 2.0853e-05 | 2.7615e-05 | 0.0 | 0.00 +Modify | 0.016829 | 0.019422 | 0.02157 | 1.4 | 0.24 +Other | | 0.01001 | | | 0.12 + +Nlocal: 78.75 ave 96 max 65 min +Histogram: 2 0 0 0 0 0 0 1 0 1 +Nghost: 1233 ave 1348 max 1116 min +Histogram: 1 0 1 0 0 0 0 1 0 1 +Neighs: 9467.25 ave 12150 max 7160 min +Histogram: 1 1 0 0 0 0 0 1 0 1 + +Total # of neighbors = 37869 +Ave neighs/atom = 120.21905 +Neighbor list builds = 37 +Dangerous builds = 0 +Total wall time: 0:00:08 diff --git a/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.1 b/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.1 deleted file mode 100644 index e966fd26b6..0000000000 --- a/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.1 +++ /dev/null @@ -1,86 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -#ci-reax potential for CH systems with tabulated ZBL correction -atom_style charge -units real - -read_data CH4.dat - orthogonal box = (0 0 0) to (20 20 20) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 315 atoms - reading velocities ... - 315 velocities - -pair_style hybrid/overlay reax/c control checkqeq no table linear 11000 -pair_coeff * * reax/c ffield.ci-reax.CH C H -Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20 -pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF -WARNING: 2 of 10000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) -pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF -WARNING: 2 of 11000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) -pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF -WARNING: 2 of 6000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) - -timestep 0.25 -fix 1 all nve -fix 2 all temp/berendsen 500.0 500.0 100.0 - -#dump 1 all atom 30 dump.ci-reax.lammpstrj - -run 3000 -WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (../pair_reaxc.cpp:392) -Neighbor list info ... - update every 1 steps, delay 10 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 9.5 - ghost atom cutoff = 9.5 - binsize = 4.75, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) pair table, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d/newton - bin: standard -Per MPI rank memory allocation (min/avg/max) = 43.46 | 43.46 | 43.46 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 508.42043 -28736.654 0 -28260.785 1678.3276 - 3000 480.41333 -28707.835 0 -28258.181 -3150.0762 -Loop time of 45.3959 on 1 procs for 3000 steps with 315 atoms - -Performance: 1.427 ns/day, 16.813 hours/ns, 66.085 timesteps/s -96.6% CPU use with 1 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 44.955 | 44.955 | 44.955 | 0.0 | 99.03 -Neigh | 0.29903 | 0.29903 | 0.29903 | 0.0 | 0.66 -Comm | 0.056547 | 0.056547 | 0.056547 | 0.0 | 0.12 -Output | 4.8399e-05 | 4.8399e-05 | 4.8399e-05 | 0.0 | 0.00 -Modify | 0.058722 | 0.058722 | 0.058722 | 0.0 | 0.13 -Other | | 0.02632 | | | 0.06 - -Nlocal: 315 ave 315 max 315 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 2056 ave 2056 max 2056 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 32754 ave 32754 max 32754 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 32754 -Ave neighs/atom = 103.981 -Neighbor list builds = 37 -Dangerous builds = 0 - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:45 diff --git a/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.4 b/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.4 deleted file mode 100644 index ccc87b3536..0000000000 --- a/examples/reaxff/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.4 +++ /dev/null @@ -1,86 +0,0 @@ -LAMMPS (8 Mar 2018) - using 1 OpenMP thread(s) per MPI task -#ci-reax potential for CH systems with tabulated ZBL correction -atom_style charge -units real - -read_data CH4.dat - orthogonal box = (0 0 0) to (20 20 20) - 1 by 2 by 2 MPI processor grid - reading atoms ... - 315 atoms - reading velocities ... - 315 velocities - -pair_style hybrid/overlay reax/c control checkqeq no table linear 11000 -pair_coeff * * reax/c ffield.ci-reax.CH C H -Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20 -pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF -WARNING: 2 of 10000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) -pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF -WARNING: 2 of 11000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) -pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF -WARNING: 2 of 6000 force values in table are inconsistent with -dE/dr. - Should only be flagged at inflection points (../pair_table.cpp:481) - -timestep 0.25 -fix 1 all nve -fix 2 all temp/berendsen 500.0 500.0 100.0 - -#dump 1 all atom 30 dump.ci-reax.lammpstrj - -run 3000 -WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (../pair_reaxc.cpp:392) -Neighbor list info ... - update every 1 steps, delay 10 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 9.5 - ghost atom cutoff = 9.5 - binsize = 4.75, bins = 5 5 5 - 2 neighbor lists, perpetual/occasional/extra = 2 0 0 - (1) pair reax/c, perpetual - attributes: half, newton off, ghost - pair build: half/bin/newtoff/ghost - stencil: half/ghost/bin/3d/newtoff - bin: standard - (2) pair table, perpetual - attributes: half, newton on - pair build: half/bin/atomonly/newton - stencil: half/bin/3d/newton - bin: standard -Per MPI rank memory allocation (min/avg/max) = 24.48 | 25.61 | 27.27 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 508.42043 -28736.654 0 -28260.785 1678.3276 - 3000 480.41333 -28707.835 0 -28258.181 -3150.0762 -Loop time of 24.7034 on 4 procs for 3000 steps with 315 atoms - -Performance: 2.623 ns/day, 9.149 hours/ns, 121.441 timesteps/s -95.8% CPU use with 4 MPI tasks x 1 OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 18.945 | 21.367 | 24.046 | 39.3 | 86.49 -Neigh | 0.1456 | 0.15254 | 0.16101 | 1.6 | 0.62 -Comm | 0.39168 | 3.0859 | 5.5185 | 103.9 | 12.49 -Output | 3.5763e-05 | 4.065e-05 | 5.2452e-05 | 0.0 | 0.00 -Modify | 0.05831 | 0.068811 | 0.077666 | 2.9 | 0.28 -Other | | 0.0292 | | | 0.12 - -Nlocal: 78.75 ave 96 max 65 min -Histogram: 2 0 0 0 0 0 0 1 0 1 -Nghost: 1233 ave 1348 max 1116 min -Histogram: 1 0 1 0 0 0 0 1 0 1 -Neighs: 9467.25 ave 12150 max 7160 min -Histogram: 1 1 0 0 0 0 0 1 0 1 - -Total # of neighbors = 37869 -Ave neighs/atom = 120.219 -Neighbor list builds = 37 -Dangerous builds = 0 - -Please see the log.cite file for references relevant to this simulation - -Total wall time: 0:00:24 diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 4c145c44b3..c6115f4b3d 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,6 +1,97 @@ # CHANGELOG -## [4.1.00](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-06-16) +## [4.2.00](https://github.com/kokkos/kokkos/tree/4.2.00) (2023-11-06) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.1.00...4.2.00) + +### Features: +- SIMD: significant improvements to SIMD support and alignment with C++26 SIMD + - add `Kokkos::abs` overload for SIMD types [\#6069](https://github.com/kokkos/kokkos/pull/6069) + - add generator constructors [\#6347](https://github.com/kokkos/kokkos/pull/6347) + - convert binary operators to hidden friends [\#6320](https://github.com/kokkos/kokkos/pull/6320) + - add shift operators [\#6109](https://github.com/kokkos/kokkos/pull/6109) + - add `float` support [\#6177](https://github.com/kokkos/kokkos/pull/6177) + - add remaining `gather_from` and `scatter_to` overloads [\#6220](https://github.com/kokkos/kokkos/pull/6220) + - define simd math function overloads in the Kokkos namespace [\#6465](https://github.com/kokkos/kokkos/pull/6465), [\#6487](https://github.com/kokkos/kokkos/pull/6487) + - `Kokkos_ENABLE_NATIVE=ON` autodetects SIMD types supported [\#6188](https://github.com/kokkos/kokkos/pull/6188) + - fix AVX2 SIMD support for ZEN2 AMD CPU [\#6238](https://github.com/kokkos/kokkos/pull/6238) +- `Kokkos::printf` [\#6083](https://github.com/kokkos/kokkos/pull/6083) +- `Kokkos::sort`: support custom comparator [\#6253](https://github.com/kokkos/kokkos/pull/6253) +- `half_t` and `bhalf_t` numeric traits [\#5778](https://github.com/kokkos/kokkos/pull/5778) +- `half_t` and `bhalf_t` mixed comparisons [\#6407](https://github.com/kokkos/kokkos/pull/6407) +- `half_t` and `bhalf_t` mathematical functions [\#6124](https://github.com/kokkos/kokkos/pull/6124) +- `TeamThreadRange` `parallel_scan` with return value [\#6090](https://github.com/kokkos/kokkos/pull/6090), [\#6301](https://github.com/kokkos/kokkos/pull/6301), [\#6302](https://github.com/kokkos/kokkos/pull/6302), [\#6303](https://github.com/kokkos/kokkos/pull/6303), [\#6307](https://github.com/kokkos/kokkos/pull/6307) +- `ThreadVectorRange` `parallel_scan` with return value [\#6235](https://github.com/kokkos/kokkos/pull/6235), [\#6242](https://github.com/kokkos/kokkos/pull/6242), [\#6308](https://github.com/kokkos/kokkos/pull/6308), [\#6305](https://github.com/kokkos/kokkos/pull/6305), [\#6292](https://github.com/kokkos/kokkos/pull/6292) +- Add team-level std algorithms [\#6200](https://github.com/kokkos/kokkos/pull/6200), [\#6205](https://github.com/kokkos/kokkos/pull/6205), [\#6207](https://github.com/kokkos/kokkos/pull/6207), [\#6208](https://github.com/kokkos/kokkos/pull/6208), [\#6209](https://github.com/kokkos/kokkos/pull/6209), [\#6210](https://github.com/kokkos/kokkos/pull/6210), [\#6211](https://github.com/kokkos/kokkos/pull/6211), [\#6212](https://github.com/kokkos/kokkos/pull/6212), [\#6213](https://github.com/kokkos/kokkos/pull/6213), [\#6256](https://github.com/kokkos/kokkos/pull/6256), [\#6258](https://github.com/kokkos/kokkos/pull/6258), [\#6350](https://github.com/kokkos/kokkos/pull/6350), [\#6351](https://github.com/kokkos/kokkos/pull/6351) +- Serial: Allow for distinct execution space instances [\#6441](https://github.com/kokkos/kokkos/pull/6441) + +### Backend and Architecture Enhancements: + +#### CUDA: +- Fixed potential data race in Cuda `parallel_reduce` [\#6236](https://github.com/kokkos/kokkos/pull/6236) +- Use `cudaMallocAsync` by default [\#6402](https://github.com/kokkos/kokkos/pull/6402) +- Bugfix for using Kokkos from a thread of execution [\#6299](https://github.com/kokkos/kokkos/pull/6299) + +#### HIP: +- New naming convention for AMD GPU: VEGA906, VEGA908, VEGA90A, NAVI1030 to AMD_GFX906, AMD_GFX908, AMD_GFX90A, AMD_GFX1030 [\#6266](https://github.com/kokkos/kokkos/pull/6266) +- Add initial support for gfx942: [\#6358](https://github.com/kokkos/kokkos/pull/6358) +- Improve reduction performance [\#6229](https://github.com/kokkos/kokkos/pull/6229) +- Deprecate `HIP(hipStream_t,bool)` constructor [\#6401](https://github.com/kokkos/kokkos/pull/6401) +- Add support for Graph [\#6370](https://github.com/kokkos/kokkos/pull/6370) +- Improve reduction performance when using Teams [\#6284](https://github.com/kokkos/kokkos/pull/6284) +- Fix concurrency calculation [\#6479](https://github.com/kokkos/kokkos/pull/6479) +- Fix potential data race in HIP `parallel_reduce` [\#6429](https://github.com/kokkos/kokkos/pull/6429) + +#### SYCL: +- Enforce external `sycl::queues` to be in-order [\#6246](https://github.com/kokkos/kokkos/pull/6246) +- Improve reduction performance: [\#6272](https://github.com/kokkos/kokkos/pull/6272) [\#6271](https://github.com/kokkos/kokkos/pull/6271) [\#6270](https://github.com/kokkos/kokkos/pull/6270) [\#6264](https://github.com/kokkos/kokkos/pull/6264) +- Allow using the SYCL execution space on AMD GPUs [\#6321](https://github.com/kokkos/kokkos/pull/6321) +- Allow sorting via native oneDPL to support Views with stride=1 [\#6322](https://github.com/kokkos/kokkos/pull/6322) +- Make in-order queues the default via macro [\#6189](https://github.com/kokkos/kokkos/pull/6189) + +#### OpenACC: +- Support Clacc compiler [\#6250](https://github.com/kokkos/kokkos/pull/6250) + +### General Enhancements +- Add missing `is_*_view` traits and `is_*_view_v` helper variable templates for `DynRankView`, `DynamicView`, `OffsetView`, `ScatterView` containers [\#6195](https://github.com/kokkos/kokkos/pull/6195) +- Make `nvcc_wrapper` and `compiler_launcher` scripts more portable by switching to a `#!/usr/bin/env` shebang [\#6357](https://github.com/kokkos/kokkos/pull/6357) +- Add an improved `Kokkos::malloc` / `Kokkos::free` performance test [\#6377](https://github.com/kokkos/kokkos/pull/6377) +- Ensure `Views` with `size==0` can be used with `deep_copy` [\#6273](https://github.com/kokkos/kokkos/pull/6273) +- `Kokkos::abort` is moved to header `Kokkos_Abort.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) +- `KOKKOS_ASSERT`, `KOKKOS_EXPECTS`, `KOKKOS_ENSURES` are moved to header `Kokkos_Assert.hpp` [\#6445](https://github.com/kokkos/kokkos/pull/6445) +- Add a permuted-index mode to the gups benchmark [\#6378](https://github.com/kokkos/kokkos/pull/6378) +- Check for overflow during backend initialization [\#6159](https://github.com/kokkos/kokkos/pull/6159) +- Make constraints on `Kokkos::sort` more visible [\#6234](https://github.com/kokkos/kokkos/pull/6234) and cleanup API [\#6239](https://github.com/kokkos/kokkos/pull/6239) +- Add converting assignment to `DualView`: [\#6474](https://github.com/kokkos/kokkos/pull/6474) + + +### Build System Changes + +- Export `Kokkos_CXX_COMPILER_VERSION` [\#6282](https://github.com/kokkos/kokkos/pull/6282) +- Disable default oneDPL support in Trilinos [\#6342](https://github.com/kokkos/kokkos/pull/6342) + +### Incompatibilities (i.e. breaking changes) + - Ensure that `Kokkos::complex` only gets instantiated for cv-unqualified floating-point types [\#6251](https://github.com/kokkos/kokkos/pull/6251) + - Removed (deprecated-3) support for volatile join operators in reductions [\#6385](https://github.com/kokkos/kokkos/pull/6385) + - Enforce `ViewCtorArgs` restrictions for `create_mirror_view` [\#6304](https://github.com/kokkos/kokkos/pull/6304) + - SIMD types for ARM NEON are not autodetected anymore but need `Kokkos_ARCH_ARM_NEON` or `Kokkos_ARCH_NATIVE=ON` [\#6394](https://github.com/kokkos/kokkos/pull/6394) + - Remove `#include ` from headers where possible [\#6482](https://github.com/kokkos/kokkos/pull/6482) + +### Deprecations +- Deprecated `Kokkos::vector` [\#6252](https://github.com/kokkos/kokkos/pull/6252) +- All host allocation mechanisms except for `STD_MALLOC` have been deprecated [\#6341](https://github.com/kokkos/kokkos/pull/6341) + +### Bug Fixes + - Missing memory fence in `RandomPool::free_state` functions [\#6290](https://github.com/kokkos/kokkos/pull/6290) + - Fix for corner case in `Kokkos::Experimental::is_partitioned` algorithm [\#6257](https://github.com/kokkos/kokkos/pull/6257) + - Fix initialization of scratch lock variables in the `Cuda` backend [\#6433](https://github.com/kokkos/kokkos/pull/6433) + - Fixes for `Kokkos::Array` [\#6372](https://github.com/kokkos/kokkos/pull/6372) + - Fixed symlink configure issue for Windows [\#6241](https://github.com/kokkos/kokkos/pull/6241) + - OpenMPTarget init-join fix [\#6444](https://github.com/kokkos/kokkos/pull/6444) + - Fix atomic operations bug for Min and Max [\#6435](https://github.com/kokkos/kokkos/pull/6435) + - Fix implementation for `cyl_bessel_i0` [\#6484](https://github.com/kokkos/kokkos/pull/6484) + - Fix various NVCC warnings in `BinSort`, `Array`, and bit manipulation function templates [\#6483](https://github.com/kokkos/kokkos/pull/6483) + +## [4.1.00](https://github.com/kokkos/kokkos/tree/4.1.00) (2023-06-16) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.01...4.1.00) ### Features: diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 895cee6a08..f6bd81058e 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -150,8 +150,8 @@ ENDIF() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 1) -set(Kokkos_VERSION_PATCH 00) +set(Kokkos_VERSION_MINOR 2) +set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") @@ -314,7 +314,6 @@ KOKKOS_PROCESS_SUBPACKAGES() # E) If Kokkos itself is enabled, process the Kokkos package # -KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() KOKKOS_CONFIGURE_CORE() diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 46998091fe..c970f72755 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS) endif KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 1 -KOKKOS_VERSION_PATCH = 00 +KOKKOS_VERSION_MINOR = 2 +KOKKOS_VERSION_PATCH = 0 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 -# AMD-GPUS: Vega906,Vega908,Vega90A,Navi1030 +# AMD-GPUS: GFX906,GFX908,GFX90A,GFX942,GFX1030,GFX1100 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC KOKKOS_ARCH ?= "" @@ -40,7 +40,7 @@ KOKKOS_TRIBITS ?= "no" KOKKOS_STANDALONE_CMAKE ?= "no" # Default settings specific options. -# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr +# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async KOKKOS_CUDA_OPTIONS ?= "enable_lambda" # Options: rdc @@ -92,6 +92,7 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS), KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr) +KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),disable_malloc_async) KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) # deprecated KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) @@ -412,10 +413,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) endif endif -KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906) -KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega908) -KOKKOS_INTERNAL_USE_ARCH_VEGA90A := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega90A) -KOKKOS_INTERNAL_USE_ARCH_NAVI1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),Navi1030) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)) +KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100)) # Any AVX? KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) @@ -698,6 +700,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND") endif + + ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC") + else + tmp := $(call kokkos_append_header,"/* $H""undef KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC */") + endif endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) @@ -710,6 +718,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -722,6 +731,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -734,6 +744,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_A64FX") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") KOKKOS_CXXFLAGS += -march=armv8.2-a+sve KOKKOS_LDFLAGS += -march=armv8.2-a+sve @@ -749,7 +760,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -762,7 +773,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN2") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -775,7 +786,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN3") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_AVX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) KOKKOS_CXXFLAGS += -mavx2 @@ -789,6 +800,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -802,6 +814,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV81") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX2") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) KOKKOS_CXXFLAGS += @@ -1085,29 +1098,34 @@ endif # Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX906") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX908") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX90A") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx942 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1030") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1100") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU") KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 endif diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 4e08a46c69..ec8770dd7d 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -36,6 +36,8 @@ Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp +Kokkos_Abort.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Abort.cpp ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt index ab557ab66a..368984647e 100644 --- a/lib/kokkos/algorithms/CMakeLists.txt +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -2,6 +2,6 @@ IF (NOT Kokkos_INSTALL_TESTING) ADD_SUBDIRECTORY(src) ENDIF() # FIXME_OPENACC: temporarily disabled due to unimplemented features -IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET OR KOKKOS_ENABLE_OPENACC) AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) +IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) ENDIF() diff --git a/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp b/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp index 4c8be792d8..18e0674efe 100644 --- a/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_NestedSort.hpp @@ -14,175 +14,17 @@ // //@HEADER -#ifndef KOKKOS_NESTEDSORT_HPP_ -#define KOKKOS_NESTEDSORT_HPP_ - -#include -#include -#include - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -// true for TeamVectorRange, false for ThreadVectorRange -template -struct NestedRange {}; - -// Specialization for team-level -template <> -struct NestedRange { - template - KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { - return Kokkos::TeamVectorRange(t, len); - } - template - KOKKOS_FUNCTION static void barrier(const TeamMember& t) { - t.team_barrier(); - } -}; - -// Specialization for thread-level -template <> -struct NestedRange { - template - KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { - return Kokkos::ThreadVectorRange(t, len); - } - // Barrier is no-op, as vector lanes of a thread are implicitly synchronized - // after parallel region - template - KOKKOS_FUNCTION static void barrier(const TeamMember&) {} -}; - -// When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. -// This only takes the NestedRange instance for template arg deduction. -template -KOKKOS_INLINE_FUNCTION void sort_nested_impl( - const TeamMember& t, const KeyViewType& keyView, - [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, - const NestedRange) { - using SizeType = typename KeyViewType::size_type; - using KeyType = typename KeyViewType::non_const_value_type; - using Range = NestedRange; - SizeType n = keyView.extent(0); - SizeType npot = 1; - SizeType levels = 0; - // FIXME: ceiling power-of-two is a common thing to need - make it a utility - while (npot < n) { - levels++; - npot <<= 1; - } - for (SizeType i = 0; i < levels; i++) { - for (SizeType j = 0; j <= i; j++) { - // n/2 pairs of items are compared in parallel - Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { - // How big are the brown/pink boxes? - // (Terminology comes from Wikipedia diagram) - // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg - SizeType boxSize = SizeType(2) << (i - j); - // Which box contains this thread? - SizeType boxID = k >> (i - j); // k * 2 / boxSize; - SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize - SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; - SizeType elem1 = boxStart + boxOffset; - // In first phase (j == 0, brown box): within a box, compare with the - // opposite value in the box. - // In later phases (j > 0, pink box): within a box, compare with fixed - // distance (boxSize / 2) apart. - SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) - : (elem1 + boxSize / 2); - if (elem2 < n) { - KeyType key1 = keyView(elem1); - KeyType key2 = keyView(elem2); - if (comp(key2, key1)) { - keyView(elem1) = key2; - keyView(elem2) = key1; - if constexpr (!std::is_same_v) { - Kokkos::Experimental::swap(valueView(elem1), valueView(elem2)); - } - } - } - }); - Range::barrier(t); - } - } -} - -} // namespace Impl - -template -KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, - const ViewType& view) { - Impl::sort_nested_impl(t, view, nullptr, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename ViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, - const Comparator& comp) { - Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView) { - Impl::sort_nested_impl(t, keyView, valueView, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename KeyViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView, - const Comparator& comp) { - Impl::sort_nested_impl(t, keyView, valueView, comp, - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, - const ViewType& view) { - Impl::sort_nested_impl(t, view, nullptr, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename ViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, - const ViewType& view, - const Comparator& comp) { - Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView) { - Impl::sort_nested_impl(t, keyView, valueView, - Experimental::Impl::StdAlgoLessThanBinaryPredicate< - typename KeyViewType::non_const_value_type>(), - Impl::NestedRange()); -} - -template -KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, - const KeyViewType& keyView, - const ValueViewType& valueView, - const Comparator& comp) { - Impl::sort_nested_impl(t, keyView, valueView, comp, - Impl::NestedRange()); -} - -} // namespace Experimental -} // namespace Kokkos +#ifndef KOKKOS_NESTED_SORT_HPP_ +#define KOKKOS_NESTED_SORT_HPP_ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#endif + +#include "sorting/Kokkos_NestedSortPublicAPI.hpp" + +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_NESTED_SORT +#endif #endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index abb028d28e..2d7d236d2f 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -956,6 +956,8 @@ class Random_XorShift64_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift64& state) const { state_(state.state_idx_, 0) = state.state_; + // Release the lock only after the state has been updated in memory + Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; @@ -1208,7 +1210,9 @@ class Random_XorShift1024_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift1024& state) const { for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; - p_(state.state_idx_, 0) = state.p_; + p_(state.state_idx_, 0) = state.p_; + // Release the lock only after the state has been updated in memory + Kokkos::memory_fence(); locks_(state.state_idx_, 0) = 0; } }; diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 10f9ad6462..f77484cc55 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -21,762 +21,9 @@ #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #endif -#include -#include -#include -#include - -#if defined(KOKKOS_ENABLE_CUDA) - -// Workaround for `Instruction 'shfl' without '.sync' is not supported on -// .target sm_70 and higher from PTX ISA version 6.4`. -// Also see https://github.com/NVIDIA/cub/pull/170. -#if !defined(CUB_USE_COOPERATIVE_GROUPS) -#define CUB_USE_COOPERATIVE_GROUPS -#endif - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" - -#if defined(KOKKOS_COMPILER_CLANG) -// Some versions of Clang fail to compile Thrust, failing with errors like -// this: -// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: -// error: use of undeclared identifier 'va_printf' -// The exact combination of versions for Clang and Thrust (or CUDA) for this -// failure was not investigated, however even very recent version combination -// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. -// -// Defining _CubLog here locally allows us to avoid that code path, however -// disabling some debugging diagnostics -#pragma push_macro("_CubLog") -#ifdef _CubLog -#undef _CubLog -#endif -#define _CubLog -#include -#include -#pragma pop_macro("_CubLog") -#else -#include -#include -#endif - -#pragma GCC diagnostic pop - -#endif - -#if defined(KOKKOS_ENABLE_ONEDPL) -#include -#include -#endif - -namespace Kokkos { - -namespace Impl { - -template -struct CopyOp; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - dst(i_dst) = src(i_src); - } -}; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); - } -}; - -template -struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, - size_t i_src) { - for (int j = 0; j < dst.extent(1); j++) - for (int k = 0; k < dst.extent(2); k++) - dst(i_dst, j, k) = src(i_src, j, k); - } -}; -} // namespace Impl - -//---------------------------------------------------------------------------- - -template -class BinSort { - public: - template - struct copy_functor { - using src_view_type = typename SrcViewType::const_type; - - using copy_op = Impl::CopyOp; - - DstViewType dst_values; - src_view_type src_values; - int dst_offset; - - copy_functor(DstViewType const& dst_values_, int const& dst_offset_, - SrcViewType const& src_values_) - : dst_values(dst_values_), - src_values(src_values_), - dst_offset(dst_offset_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int& i) const { - copy_op::copy(dst_values, i + dst_offset, src_values, i); - } - }; - - template - struct copy_permute_functor { - // If a Kokkos::View then can generate constant random access - // otherwise can only use the constant type. - - using src_view_type = std::conditional_t< - Kokkos::is_view::value, - Kokkos::View -#endif - >, - typename SrcViewType::const_type>; - - using perm_view_type = typename PermuteViewType::const_type; - - using copy_op = Impl::CopyOp; - - DstViewType dst_values; - perm_view_type sort_order; - src_view_type src_values; - int src_offset; - - copy_permute_functor(DstViewType const& dst_values_, - PermuteViewType const& sort_order_, - SrcViewType const& src_values_, int const& src_offset_) - : dst_values(dst_values_), - sort_order(sort_order_), - src_values(src_values_), - src_offset(src_offset_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const int& i) const { - copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); - } - }; - - // Naming this alias "execution_space" would be problematic since it would be - // considered as execution space for the various functors which might use - // another execution space through sort() or create_permute_vector(). - using exec_space = typename Space::execution_space; - using bin_op_type = BinSortOp; - - struct bin_count_tag {}; - struct bin_offset_tag {}; - struct bin_binning_tag {}; - struct bin_sort_bins_tag {}; - - public: - using size_type = SizeType; - using value_type = size_type; - - using offset_type = Kokkos::View; - using bin_count_type = Kokkos::View; - - using const_key_view_type = typename KeyViewType::const_type; - - // If a Kokkos::View then can generate constant random access - // otherwise can only use the constant type. - - using const_rnd_key_view_type = std::conditional_t< - Kokkos::is_view::value, - Kokkos::View >, - const_key_view_type>; - - using non_const_key_scalar = typename KeyViewType::non_const_value_type; - using const_key_scalar = typename KeyViewType::const_value_type; - - using bin_count_atomic_type = - Kokkos::View >; - - private: - const_key_view_type keys; - const_rnd_key_view_type keys_rnd; - - public: - BinSortOp bin_op; - offset_type bin_offsets; - bin_count_atomic_type bin_count_atomic; - bin_count_type bin_count_const; - offset_type sort_order; - - int range_begin; - int range_end; - bool sort_within_bins; - - public: -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinSort() = default; -#else - BinSort() = delete; -#endif - - //---------------------------------------- - // Constructor: takes the keys, the binning_operator and optionally whether to - // sort within bins (default false) - template - BinSort(const ExecutionSpace& exec, const_key_view_type keys_, - int range_begin_, int range_end_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - : keys(keys_), - keys_rnd(keys_), - bin_op(bin_op_), - bin_offsets(), - bin_count_atomic(), - bin_count_const(), - sort_order(), - range_begin(range_begin_), - range_end(range_end_), - sort_within_bins(sort_within_bins_) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - if (bin_op.max_bins() <= 0) - Kokkos::abort( - "The number of bins in the BinSortOp object must be greater than 0!"); - bin_count_atomic = Kokkos::View( - "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); - bin_count_const = bin_count_atomic; - bin_offsets = - offset_type(view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), - bin_op.max_bins()); - sort_order = - offset_type(view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::sort_order"), - range_end - range_begin); - } - - BinSort(const_key_view_type keys_, int range_begin_, int range_end_, - BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, - sort_within_bins_) {} - - template - BinSort(const ExecutionSpace& exec, const_key_view_type keys_, - BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} - - BinSort(const_key_view_type keys_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} - - //---------------------------------------- - // Create the permutation vector, the bin_offset array and the bin_count - // array. Can be called again if keys changed - template - void create_permute_vector(const ExecutionSpace& exec) { - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - - const size_t len = range_end - range_begin; - Kokkos::parallel_for( - "Kokkos::Sort::BinCount", - Kokkos::RangePolicy(exec, 0, len), - *this); - Kokkos::parallel_scan("Kokkos::Sort::BinOffset", - Kokkos::RangePolicy( - exec, 0, bin_op.max_bins()), - *this); - - Kokkos::deep_copy(exec, bin_count_atomic, 0); - Kokkos::parallel_for( - "Kokkos::Sort::BinBinning", - Kokkos::RangePolicy(exec, 0, len), - *this); - - if (sort_within_bins) - Kokkos::parallel_for( - "Kokkos::Sort::BinSort", - Kokkos::RangePolicy( - exec, 0, bin_op.max_bins()), - *this); - } - - // Create the permutation vector, the bin_offset array and the bin_count - // array. Can be called again if keys changed - void create_permute_vector() { - Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); - exec_space e{}; - create_permute_vector(e); - e.fence("Kokkos::Binsort::create_permute_vector: after"); - } - - // Sort a subset of a view with respect to the first dimension using the - // permutation array - template - void sort(const ExecutionSpace& exec, ValuesViewType const& values, - int values_range_begin, int values_range_end) const { - if (values.extent(0) == 0) { - return; - } - - static_assert( - Kokkos::SpaceAccessibility::accessible, - "The provided execution space must be able to access the memory space " - "BinSort was initialized with!"); - static_assert( - Kokkos::SpaceAccessibility< - ExecutionSpace, typename ValuesViewType::memory_space>::accessible, - "The provided execution space must be able to access the memory space " - "of the View argument!"); - - const size_t len = range_end - range_begin; - const size_t values_len = values_range_end - values_range_begin; - if (len != values_len) { - Kokkos::abort( - "BinSort::sort: values range length != permutation vector length"); - } - - using scratch_view_type = - Kokkos::View; - scratch_view_type sorted_values( - view_alloc(exec, WithoutInitializing, - "Kokkos::SortImpl::BinSortFunctor::sorted_values"), - values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 1 ? values.extent(1) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 2 ? values.extent(2) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 3 ? values.extent(3) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 4 ? values.extent(4) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 5 ? values.extent(5) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 6 ? values.extent(6) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 7 ? values.extent(7) - : KOKKOS_IMPL_CTOR_DEFAULT_ARG); - - { - copy_permute_functor - functor(sorted_values, sort_order, values, - values_range_begin - range_begin); - - parallel_for("Kokkos::Sort::CopyPermute", - Kokkos::RangePolicy(exec, 0, len), functor); - } - - { - copy_functor functor( - values, range_begin, sorted_values); - - parallel_for("Kokkos::Sort::Copy", - Kokkos::RangePolicy(exec, 0, len), functor); - } - } - - // Sort a subset of a view with respect to the first dimension using the - // permutation array - template - void sort(ValuesViewType const& values, int values_range_begin, - int values_range_end) const { - Kokkos::fence("Kokkos::Binsort::sort: before"); - exec_space exec; - sort(exec, values, values_range_begin, values_range_end); - exec.fence("Kokkos::BinSort:sort: after"); - } - - template - void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { - this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); - } - - template - void sort(ValuesViewType const& values) const { - this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); - } - - // Get the permutation vector - KOKKOS_INLINE_FUNCTION - offset_type get_permute_vector() const { return sort_order; } - - // Get the start offsets for each bin - KOKKOS_INLINE_FUNCTION - offset_type get_bin_offsets() const { return bin_offsets; } - - // Get the count for each bin - KOKKOS_INLINE_FUNCTION - bin_count_type get_bin_count() const { return bin_count_const; } - - public: - KOKKOS_INLINE_FUNCTION - void operator()(const bin_count_tag& /*tag*/, const int i) const { - const int j = range_begin + i; - bin_count_atomic(bin_op.bin(keys, j))++; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_offset_tag& /*tag*/, const int i, - value_type& offset, const bool& final) const { - if (final) { - bin_offsets(i) = offset; - } - offset += bin_count_const(i); - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_binning_tag& /*tag*/, const int i) const { - const int j = range_begin + i; - const int bin = bin_op.bin(keys, j); - const int count = bin_count_atomic(bin)++; - - sort_order(bin_offsets(bin) + count) = j; - } - - KOKKOS_INLINE_FUNCTION - void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { - auto bin_size = bin_count_const(i); - if (bin_size <= 1) return; - constexpr bool use_std_sort = - std::is_same_v; - int lower_bound = bin_offsets(i); - int upper_bound = lower_bound + bin_size; - // Switching to std::sort for more than 10 elements has been found - // reasonable experimentally. - if (use_std_sort && bin_size > 10) { - if constexpr (use_std_sort) { - std::sort(&sort_order(lower_bound), &sort_order(upper_bound), - [this](int p, int q) { return bin_op(keys_rnd, p, q); }); - } - } else { - for (int k = lower_bound + 1; k < upper_bound; ++k) { - int old_idx = sort_order(k); - int j = k - 1; - while (j >= lower_bound) { - int new_idx = sort_order(j); - if (!bin_op(keys_rnd, old_idx, new_idx)) break; - sort_order(j + 1) = new_idx; - --j; - } - sort_order(j + 1) = old_idx; - } - } - } -}; - -//---------------------------------------------------------------------------- - -template -struct BinOp1D { - int max_bins_ = {}; - double mul_ = {}; - double min_ = {}; - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinOp1D() = default; -#else - BinOp1D() = delete; -#endif - - // Construct BinOp with number of bins, minimum value and maximum value - BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, - typename KeyViewType::const_value_type max) - : max_bins_(max_bins__ + 1), - // Cast to double to avoid possible overflow when using integer - mul_(static_cast(max_bins__) / - (static_cast(max) - static_cast(min))), - min_(static_cast(min)) { - // For integral types the number of bins may be larger than the range - // in which case we can exactly have one unique value per bin - // and then don't need to sort bins. - if (std::is_integral::value && - (static_cast(max) - static_cast(min)) <= - static_cast(max_bins__)) { - mul_ = 1.; - } - } - - // Determine bin index from key value - template - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return static_cast(mul_ * (static_cast(keys(i)) - min_)); - } - - // Return maximum bin index + 1 - KOKKOS_INLINE_FUNCTION - int max_bins() const { return max_bins_; } - - // Compare to keys within a bin if true new_val will be put before old_val - template - KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, - iType2& i2) const { - return keys(i1) < keys(i2); - } -}; - -template -struct BinOp3D { - int max_bins_[3] = {}; - double mul_[3] = {}; - double min_[3] = {}; - -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 - KOKKOS_DEPRECATED BinOp3D() = default; -#else - BinOp3D() = delete; -#endif - - BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], - typename KeyViewType::const_value_type max[]) { - max_bins_[0] = max_bins__[0]; - max_bins_[1] = max_bins__[1]; - max_bins_[2] = max_bins__[2]; - mul_[0] = static_cast(max_bins__[0]) / - (static_cast(max[0]) - static_cast(min[0])); - mul_[1] = static_cast(max_bins__[1]) / - (static_cast(max[1]) - static_cast(min[1])); - mul_[2] = static_cast(max_bins__[2]) / - (static_cast(max[2]) - static_cast(min[2])); - min_[0] = static_cast(min[0]); - min_[1] = static_cast(min[1]); - min_[2] = static_cast(min[2]); - } - - template - KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { - return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + - int(mul_[1] * (keys(i, 1) - min_[1]))) * - max_bins_[2]) + - int(mul_[2] * (keys(i, 2) - min_[2]))); - } - - KOKKOS_INLINE_FUNCTION - int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } - - template - KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, - iType2& i2) const { - if (keys(i1, 0) > keys(i2, 0)) - return true; - else if (keys(i1, 0) == keys(i2, 0)) { - if (keys(i1, 1) > keys(i2, 1)) - return true; - else if (keys(i1, 1) == keys(i2, 1)) { - if (keys(i1, 2) > keys(i2, 2)) return true; - } - } - return false; - } -}; - -namespace Impl { - -template -struct min_max_functor { - using minmax_scalar = - Kokkos::MinMaxScalar; - - ViewType view; - min_max_functor(const ViewType& view_) : view(view_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const size_t& i, minmax_scalar& minmax) const { - if (view(i) < minmax.min_val) minmax.min_val = view(i); - if (view(i) > minmax.max_val) minmax.max_val = view(i); - } -}; - -} // namespace Impl - -template -std::enable_if_t<(Kokkos::is_execution_space::value) && - (!SpaceAccessibility< - HostSpace, typename Kokkos::View:: - memory_space>::accessible)> -sort(const ExecutionSpace& exec, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - - using ViewType = Kokkos::View; - using CompType = BinOp1D; - - Kokkos::MinMaxScalar result; - Kokkos::MinMax reducer(result); - parallel_reduce("Kokkos::Sort::FindExtent", - Kokkos::RangePolicy( - exec, 0, view.extent(0)), - Impl::min_max_functor(view), reducer); - if (result.min_val == result.max_val) return; - // For integral types the number of bins may be larger than the range - // in which case we can exactly have one unique value per bin - // and then don't need to sort bins. - bool sort_in_bins = true; - // TODO: figure out better max_bins then this ... - int64_t max_bins = view.extent(0) / 2; - if (std::is_integral::value) { - // Cast to double to avoid possible overflow when using integer - auto const max_val = static_cast(result.max_val); - auto const min_val = static_cast(result.min_val); - // using 10M as the cutoff for special behavior (roughly 40MB for the count - // array) - if ((max_val - min_val) < 10000000) { - max_bins = max_val - min_val + 1; - sort_in_bins = false; - } - } - if (std::is_floating_point::value) { - KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - - static_cast(result.min_val))); - } - - BinSort bin_sort( - view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); - bin_sort.create_permute_vector(exec); - bin_sort.sort(exec, view); -} - -#if defined(KOKKOS_ENABLE_ONEDPL) -template -void sort(const Experimental::SYCL& space, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - - using ViewType = Kokkos::View; - static_assert(SpaceAccessibility::accessible, - "SYCL execution space is not able to access the memory space " - "of the View argument!"); - - auto queue = space.sycl_queue(); - auto policy = oneapi::dpl::execution::make_device_policy(queue); - - // Can't use Experimental::begin/end here since the oneDPL then assumes that - // the data is on the host. - static_assert( - ViewType::rank == 1 && - (std::is_same::value || - std::is_same::value), - "SYCL sort only supports contiguous 1D Views."); - const int n = view.extent(0); - oneapi::dpl::sort(policy, view.data(), view.data() + n); -} -#endif - -template -std::enable_if_t<(Kokkos::is_execution_space::value) && - (SpaceAccessibility< - HostSpace, typename Kokkos::View:: - memory_space>::accessible)> -sort(const ExecutionSpace&, const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - auto first = Experimental::begin(view); - auto last = Experimental::end(view); - std::sort(first, last); -} - -#if defined(KOKKOS_ENABLE_CUDA) -template -void sort(const Cuda& space, - const Kokkos::View& view) { - if (view.extent(0) == 0) { - return; - } - const auto exec = thrust::cuda::par.on(space.cuda_stream()); - auto first = Experimental::begin(view); - auto last = Experimental::end(view); - thrust::sort(exec, first, last); -} -#endif - -template -void sort(ViewType const& view) { - Kokkos::fence("Kokkos::sort: before"); - - if (view.extent(0) == 0) { - return; - } - - typename ViewType::execution_space exec; - sort(exec, view); - exec.fence("Kokkos::sort: fence after sorting"); -} - -template -std::enable_if_t::value> sort( - const ExecutionSpace& exec, ViewType view, size_t const begin, - size_t const end) { - if (view.extent(0) == 0) { - return; - } - - using range_policy = Kokkos::RangePolicy; - using CompType = BinOp1D; - - Kokkos::MinMaxScalar result; - Kokkos::MinMax reducer(result); - - parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), - Impl::min_max_functor(view), reducer); - - if (result.min_val == result.max_val) return; - - BinSort bin_sort( - exec, view, begin, end, - CompType((end - begin) / 2, result.min_val, result.max_val), true); - - bin_sort.create_permute_vector(exec); - bin_sort.sort(exec, view, begin, end); -} - -template -void sort(ViewType view, size_t const begin, size_t const end) { - Kokkos::fence("Kokkos::sort: before"); - - if (view.extent(0) == 0) { - return; - } - - typename ViewType::execution_space exec; - sort(exec, view, begin, end); - exec.fence("Kokkos::Sort: fence after sorting"); -} - -} // namespace Kokkos +#include "sorting/Kokkos_BinSortPublicAPI.hpp" +#include "sorting/Kokkos_SortPublicAPI.hpp" +#include "sorting/Kokkos_NestedSortPublicAPI.hpp" #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT #undef KOKKOS_IMPL_PUBLIC_INCLUDE diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp new file mode 100644 index 0000000000..73e751f572 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp @@ -0,0 +1,129 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_BIN_OPS_PUBLIC_API_HPP_ +#define KOKKOS_BIN_OPS_PUBLIC_API_HPP_ + +#include +#include + +namespace Kokkos { + +template +struct BinOp1D { + int max_bins_ = {}; + double mul_ = {}; + double min_ = {}; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp1D() = default; +#else + BinOp1D() = delete; +#endif + + // Construct BinOp with number of bins, minimum value and maximum value + BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, + typename KeyViewType::const_value_type max) + : max_bins_(max_bins__ + 1), + // Cast to double to avoid possible overflow when using integer + mul_(static_cast(max_bins__) / + (static_cast(max) - static_cast(min))), + min_(static_cast(min)) { + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + if (std::is_integral::value && + (static_cast(max) - static_cast(min)) <= + static_cast(max_bins__)) { + mul_ = 1.; + } + } + + // Determine bin index from key value + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return static_cast(mul_ * (static_cast(keys(i)) - min_)); + } + + // Return maximum bin index + 1 + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_; } + + // Compare to keys within a bin if true new_val will be put before old_val + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + return keys(i1) < keys(i2); + } +}; + +template +struct BinOp3D { + int max_bins_[3] = {}; + double mul_[3] = {}; + double min_[3] = {}; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp3D() = default; +#else + BinOp3D() = delete; +#endif + + BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], + typename KeyViewType::const_value_type max[]) { + max_bins_[0] = max_bins__[0]; + max_bins_[1] = max_bins__[1]; + max_bins_[2] = max_bins__[2]; + mul_[0] = static_cast(max_bins__[0]) / + (static_cast(max[0]) - static_cast(min[0])); + mul_[1] = static_cast(max_bins__[1]) / + (static_cast(max[1]) - static_cast(min[1])); + mul_[2] = static_cast(max_bins__[2]) / + (static_cast(max[2]) - static_cast(min[2])); + min_[0] = static_cast(min[0]); + min_[1] = static_cast(min[1]); + min_[2] = static_cast(min[2]); + } + + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + + int(mul_[1] * (keys(i, 1) - min_[1]))) * + max_bins_[2]) + + int(mul_[2] * (keys(i, 2) - min_[2]))); + } + + KOKKOS_INLINE_FUNCTION + int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } + + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + if (keys(i1, 0) > keys(i2, 0)) + return true; + else if (keys(i1, 0) == keys(i2, 0)) { + if (keys(i1, 1) > keys(i2, 1)) + return true; + else if (keys(i1, 1) == keys(i2, 1)) { + if (keys(i1, 2) > keys(i2, 2)) return true; + } + } + return false; + } +}; + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp new file mode 100644 index 0000000000..c399279fe4 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinSortPublicAPI.hpp @@ -0,0 +1,410 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_BIN_SORT_PUBLIC_API_HPP_ +#define KOKKOS_BIN_SORT_PUBLIC_API_HPP_ + +#include "Kokkos_BinOpsPublicAPI.hpp" +#include "impl/Kokkos_CopyOpsForBinSortImpl.hpp" +#include +#include + +namespace Kokkos { + +template +class BinSort { + public: + template + struct copy_functor { + using src_view_type = typename SrcViewType::const_type; + + using copy_op = Impl::CopyOp; + + DstViewType dst_values; + src_view_type src_values; + int dst_offset; + + copy_functor(DstViewType const& dst_values_, int const& dst_offset_, + SrcViewType const& src_values_) + : dst_values(dst_values_), + src_values(src_values_), + dst_offset(dst_offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& i) const { + copy_op::copy(dst_values, i + dst_offset, src_values, i); + } + }; + + template + struct copy_permute_functor { + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + using src_view_type = std::conditional_t< + Kokkos::is_view::value, + Kokkos::View= 230700) + , + Kokkos::MemoryTraits +#endif + >, + typename SrcViewType::const_type>; + + using perm_view_type = typename PermuteViewType::const_type; + + using copy_op = Impl::CopyOp; + + DstViewType dst_values; + perm_view_type sort_order; + src_view_type src_values; + int src_offset; + + copy_permute_functor(DstViewType const& dst_values_, + PermuteViewType const& sort_order_, + SrcViewType const& src_values_, int const& src_offset_) + : dst_values(dst_values_), + sort_order(sort_order_), + src_values(src_values_), + src_offset(src_offset_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int& i) const { + copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); + } + }; + + // Naming this alias "execution_space" would be problematic since it would be + // considered as execution space for the various functors which might use + // another execution space through sort() or create_permute_vector(). + using exec_space = typename Space::execution_space; + using bin_op_type = BinSortOp; + + struct bin_count_tag {}; + struct bin_offset_tag {}; + struct bin_binning_tag {}; + struct bin_sort_bins_tag {}; + + public: + using size_type = SizeType; + using value_type = size_type; + + using offset_type = Kokkos::View; + using bin_count_type = Kokkos::View; + + using const_key_view_type = typename KeyViewType::const_type; + + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + using const_rnd_key_view_type = std::conditional_t< + Kokkos::is_view::value, + Kokkos::View >, + const_key_view_type>; + + using non_const_key_scalar = typename KeyViewType::non_const_value_type; + using const_key_scalar = typename KeyViewType::const_value_type; + + using bin_count_atomic_type = + Kokkos::View >; + + private: + const_key_view_type keys; + const_rnd_key_view_type keys_rnd; + + public: + BinSortOp bin_op; + offset_type bin_offsets; + bin_count_atomic_type bin_count_atomic; + bin_count_type bin_count_const; + offset_type sort_order; + + int range_begin; + int range_end; + bool sort_within_bins; + + public: +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinSort() = default; +#else + BinSort() = delete; +#endif + + //---------------------------------------- + // Constructor: takes the keys, the binning_operator and optionally whether to + // sort within bins (default false) + template + BinSort(const ExecutionSpace& exec, const_key_view_type keys_, + int range_begin_, int range_end_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : keys(keys_), + keys_rnd(keys_), + bin_op(bin_op_), + bin_offsets(), + bin_count_atomic(), + bin_count_const(), + sort_order(), + range_begin(range_begin_), + range_end(range_end_), + sort_within_bins(sort_within_bins_) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + if (bin_op.max_bins() <= 0) + Kokkos::abort( + "The number of bins in the BinSortOp object must be greater than 0!"); + bin_count_atomic = Kokkos::View( + "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = + offset_type(view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), + bin_op.max_bins()); + sort_order = + offset_type(view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::sort_order"), + range_end - range_begin); + } + + BinSort(const_key_view_type keys_, int range_begin_, int range_end_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, + sort_within_bins_) {} + + template + BinSort(const ExecutionSpace& exec, const_key_view_type keys_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} + + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} + + //---------------------------------------- + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed + template + void create_permute_vector(const ExecutionSpace& exec) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + + const size_t len = range_end - range_begin; + Kokkos::parallel_for( + "Kokkos::Sort::BinCount", + Kokkos::RangePolicy(exec, 0, len), + *this); + Kokkos::parallel_scan("Kokkos::Sort::BinOffset", + Kokkos::RangePolicy( + exec, 0, bin_op.max_bins()), + *this); + + Kokkos::deep_copy(exec, bin_count_atomic, 0); + Kokkos::parallel_for( + "Kokkos::Sort::BinBinning", + Kokkos::RangePolicy(exec, 0, len), + *this); + + if (sort_within_bins) + Kokkos::parallel_for( + "Kokkos::Sort::BinSort", + Kokkos::RangePolicy( + exec, 0, bin_op.max_bins()), + *this); + } + + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed + void create_permute_vector() { + Kokkos::fence("Kokkos::Binsort::create_permute_vector: before"); + exec_space e{}; + create_permute_vector(e); + e.fence("Kokkos::Binsort::create_permute_vector: after"); + } + + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(const ExecutionSpace& exec, ValuesViewType const& values, + int values_range_begin, int values_range_end) const { + if (values.extent(0) == 0) { + return; + } + + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + static_assert( + Kokkos::SpaceAccessibility< + ExecutionSpace, typename ValuesViewType::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "of the View argument!"); + + const size_t len = range_end - range_begin; + const size_t values_len = values_range_end - values_range_begin; + if (len != values_len) { + Kokkos::abort( + "BinSort::sort: values range length != permutation vector length"); + } + + using scratch_view_type = + Kokkos::View; + scratch_view_type sorted_values( + view_alloc(exec, WithoutInitializing, + "Kokkos::SortImpl::BinSortFunctor::sorted_values"), + values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 1 ? values.extent(1) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 2 ? values.extent(2) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 3 ? values.extent(3) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 4 ? values.extent(4) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 5 ? values.extent(5) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 6 ? values.extent(6) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 7 ? values.extent(7) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG); + + { + copy_permute_functor + functor(sorted_values, sort_order, values, + values_range_begin - range_begin); + + parallel_for("Kokkos::Sort::CopyPermute", + Kokkos::RangePolicy(exec, 0, len), functor); + } + + { + copy_functor functor( + values, range_begin, sorted_values); + + parallel_for("Kokkos::Sort::Copy", + Kokkos::RangePolicy(exec, 0, len), functor); + } + } + + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(ValuesViewType const& values, int values_range_begin, + int values_range_end) const { + Kokkos::fence("Kokkos::Binsort::sort: before"); + exec_space exec; + sort(exec, values, values_range_begin, values_range_end); + exec.fence("Kokkos::BinSort:sort: after"); + } + + template + void sort(ExecutionSpace const& exec, ValuesViewType const& values) const { + this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin); + } + + template + void sort(ValuesViewType const& values) const { + this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); + } + + // Get the permutation vector + KOKKOS_INLINE_FUNCTION + offset_type get_permute_vector() const { return sort_order; } + + // Get the start offsets for each bin + KOKKOS_INLINE_FUNCTION + offset_type get_bin_offsets() const { return bin_offsets; } + + // Get the count for each bin + KOKKOS_INLINE_FUNCTION + bin_count_type get_bin_count() const { return bin_count_const; } + + public: + KOKKOS_INLINE_FUNCTION + void operator()(const bin_count_tag& /*tag*/, const int i) const { + const int j = range_begin + i; + bin_count_atomic(bin_op.bin(keys, j))++; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_offset_tag& /*tag*/, const int i, + value_type& offset, const bool& final) const { + if (final) { + bin_offsets(i) = offset; + } + offset += bin_count_const(i); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_binning_tag& /*tag*/, const int i) const { + const int j = range_begin + i; + const int bin = bin_op.bin(keys, j); + const int count = bin_count_atomic(bin)++; + + sort_order(bin_offsets(bin) + count) = j; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { + auto bin_size = bin_count_const(i); + if (bin_size <= 1) return; + constexpr bool use_std_sort = + std::is_same_v; + int lower_bound = bin_offsets(i); + int upper_bound = lower_bound + bin_size; + // Switching to std::sort for more than 10 elements has been found + // reasonable experimentally. + if (use_std_sort && bin_size > 10) { + KOKKOS_IF_ON_HOST( + (std::sort(&sort_order(lower_bound), &sort_order(upper_bound), + [this](int p, int q) { return bin_op(keys_rnd, p, q); });)) + } else { + for (int k = lower_bound + 1; k < upper_bound; ++k) { + int old_idx = sort_order(k); + int j = k - 1; + while (j >= lower_bound) { + int new_idx = sort_order(j); + if (!bin_op(keys_rnd, old_idx, new_idx)) break; + sort_order(j + 1) = new_idx; + --j; + } + sort_order(j + 1) = old_idx; + } + } + } +}; + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp new file mode 100644 index 0000000000..dd468e0734 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_NestedSortPublicAPI.hpp @@ -0,0 +1,100 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ +#define KOKKOS_NESTED_SORT_PUBLIC_API_HPP_ + +#include "impl/Kokkos_NestedSortImpl.hpp" +#include +#include + +namespace Kokkos { +namespace Experimental { + +template +KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, + const ViewType& view) { + Impl::sort_nested_impl(t, view, nullptr, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename ViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_team(const TeamMember& t, const ViewType& view, + const Comparator& comp) { + Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView) { + Impl::sort_nested_impl(t, keyView, valueView, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename KeyViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_team(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView, + const Comparator& comp) { + Impl::sort_nested_impl(t, keyView, valueView, comp, + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, + const ViewType& view) { + Impl::sort_nested_impl(t, view, nullptr, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename ViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_thread(const TeamMember& t, + const ViewType& view, + const Comparator& comp) { + Impl::sort_nested_impl(t, view, nullptr, comp, Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView) { + Impl::sort_nested_impl(t, keyView, valueView, + Experimental::Impl::StdAlgoLessThanBinaryPredicate< + typename KeyViewType::non_const_value_type>(), + Impl::NestedRange()); +} + +template +KOKKOS_INLINE_FUNCTION void sort_by_key_thread(const TeamMember& t, + const KeyViewType& keyView, + const ValueViewType& valueView, + const Comparator& comp) { + Impl::sort_nested_impl(t, keyView, valueView, comp, + Impl::NestedRange()); +} + +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp new file mode 100644 index 0000000000..a763c41e58 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp @@ -0,0 +1,194 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_PUBLIC_API_HPP_ +#define KOKKOS_SORT_PUBLIC_API_HPP_ + +#include "./impl/Kokkos_SortImpl.hpp" +#include +#include +#include + +namespace Kokkos { + +// --------------------------------------------------------------- +// basic overloads +// --------------------------------------------------------------- + +template +void sort([[maybe_unused]] const ExecutionSpace& exec, + const Kokkos::View& view) { + // constraints + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort without comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the " + "View argument!"); + + if (view.extent(0) <= 1) { + return; + } + + if constexpr (Impl::better_off_calling_std_sort_v) { + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last); + } else { + Impl::sort_device_view_without_comparator(exec, view); + } +} + +template +void sort(const Kokkos::View& view) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view); + exec.fence("Kokkos::sort: fence after sorting"); +} + +// --------------------------------------------------------------- +// overloads supporting a custom comparator +// --------------------------------------------------------------- +template +void sort([[maybe_unused]] const ExecutionSpace& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + // constraints + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + static_assert(SpaceAccessibility::accessible, + "Kokkos::sort: execution space instance is not able to access " + "the memory space of the View argument!"); + + if (view.extent(0) <= 1) { + return; + } + + if constexpr (Impl::better_off_calling_std_sort_v) { + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last, comparator); + } else { + Impl::sort_device_view_with_comparator(exec, view, comparator); + } +} + +template +void sort(const Kokkos::View& view, + const ComparatorType& comparator) { + using ViewType = Kokkos::View; + static_assert( + ViewType::rank == 1 && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "Kokkos::sort with comparator: supports 1D Views with LayoutRight, " + "LayoutLeft or LayoutStride."); + + Kokkos::fence("Kokkos::sort with comparator: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view, comparator); + exec.fence("Kokkos::sort with comparator: fence after sorting"); +} + +// --------------------------------------------------------------- +// overloads for sorting a view with a subrange +// specified via integers begin, end +// --------------------------------------------------------------- + +template +std::enable_if_t::value> sort( + const ExecutionSpace& exec, ViewType view, size_t const begin, + size_t const end) { + // view must be rank-1 because the Impl::min_max_functor + // used below only works for rank-1 views for now + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + + using range_policy = Kokkos::RangePolicy; + using CompType = BinOp1D; + + Kokkos::MinMaxScalar result; + Kokkos::MinMax reducer(result); + + parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end), + Impl::min_max_functor(view), reducer); + + if (result.min_val == result.max_val) return; + + BinSort bin_sort( + exec, view, begin, end, + CompType((end - begin) / 2, result.min_val, result.max_val), true); + + bin_sort.create_permute_vector(exec); + bin_sort.sort(exec, view, begin, end); +} + +template +void sort(ViewType view, size_t const begin, size_t const end) { + // same constraints as the overload above which this gets dispatched to + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) <= 1) { + return; + } + + typename ViewType::execution_space exec; + sort(exec, view, begin, end); + exec.fence("Kokkos::Sort: fence after sorting"); +} + +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp new file mode 100644 index 0000000000..07f5926d82 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_CopyOpsForBinSortImpl.hpp @@ -0,0 +1,61 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ +#define KOKKOS_COPY_OPS_FOR_BINSORT_IMPL_HPP_ + +#include +#include + +namespace Kokkos { +namespace Impl { + +template +struct CopyOp; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + dst(i_dst) = src(i_src); + } +}; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); + } +}; + +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < dst.extent(1); j++) + for (int k = 0; k < dst.extent(2); k++) + dst(i_dst, j, k) = src(i_src, j, k); + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp new file mode 100644 index 0000000000..50ac823319 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_NestedSortImpl.hpp @@ -0,0 +1,115 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_NESTED_SORT_IMPL_HPP_ +#define KOKKOS_NESTED_SORT_IMPL_HPP_ + +#include +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// true for TeamVectorRange, false for ThreadVectorRange +template +struct NestedRange {}; + +// Specialization for team-level +template <> +struct NestedRange { + template + KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { + return Kokkos::TeamVectorRange(t, len); + } + template + KOKKOS_FUNCTION static void barrier(const TeamMember& t) { + t.team_barrier(); + } +}; + +// Specialization for thread-level +template <> +struct NestedRange { + template + KOKKOS_FUNCTION static auto create(const TeamMember& t, SizeType len) { + return Kokkos::ThreadVectorRange(t, len); + } + // Barrier is no-op, as vector lanes of a thread are implicitly synchronized + // after parallel region + template + KOKKOS_FUNCTION static void barrier(const TeamMember&) {} +}; + +// When just doing sort (not sort_by_key), use nullptr_t for ValueViewType. +// This only takes the NestedRange instance for template arg deduction. +template +KOKKOS_INLINE_FUNCTION void sort_nested_impl( + const TeamMember& t, const KeyViewType& keyView, + [[maybe_unused]] const ValueViewType& valueView, const Comparator& comp, + const NestedRange) { + using SizeType = typename KeyViewType::size_type; + using KeyType = typename KeyViewType::non_const_value_type; + using Range = NestedRange; + SizeType n = keyView.extent(0); + SizeType npot = 1; + SizeType levels = 0; + // FIXME: ceiling power-of-two is a common thing to need - make it a utility + while (npot < n) { + levels++; + npot <<= 1; + } + for (SizeType i = 0; i < levels; i++) { + for (SizeType j = 0; j <= i; j++) { + // n/2 pairs of items are compared in parallel + Kokkos::parallel_for(Range::create(t, npot / 2), [=](const SizeType k) { + // How big are the brown/pink boxes? + // (Terminology comes from Wikipedia diagram) + // https://commons.wikimedia.org/wiki/File:BitonicSort.svg#/media/File:BitonicSort.svg + SizeType boxSize = SizeType(2) << (i - j); + // Which box contains this thread? + SizeType boxID = k >> (i - j); // k * 2 / boxSize; + SizeType boxStart = boxID << (1 + i - j); // boxID * boxSize + SizeType boxOffset = k - (boxStart >> 1); // k - boxID * boxSize / 2; + SizeType elem1 = boxStart + boxOffset; + // In first phase (j == 0, brown box): within a box, compare with the + // opposite value in the box. + // In later phases (j > 0, pink box): within a box, compare with fixed + // distance (boxSize / 2) apart. + SizeType elem2 = (j == 0) ? (boxStart + boxSize - 1 - boxOffset) + : (elem1 + boxSize / 2); + if (elem2 < n) { + KeyType key1 = keyView(elem1); + KeyType key2 = keyView(elem2); + if (comp(key2, key1)) { + keyView(elem1) = key2; + keyView(elem2) = key1; + if constexpr (!std::is_same_v) { + Kokkos::Experimental::swap(valueView(elem1), valueView(elem2)); + } + } + } + }); + Range::barrier(t); + } + } +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp new file mode 100644 index 0000000000..d87ab09e77 --- /dev/null +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -0,0 +1,369 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ +#define KOKKOS_SORT_FREE_FUNCS_IMPL_HPP_ + +#include "../Kokkos_BinOpsPublicAPI.hpp" +#include "../Kokkos_BinSortPublicAPI.hpp" +#include +#include +#include + +#if defined(KOKKOS_ENABLE_CUDA) + +// Workaround for `Instruction 'shfl' without '.sync' is not supported on +// .target sm_70 and higher from PTX ISA version 6.4`. +// Also see https://github.com/NVIDIA/cub/pull/170. +#if !defined(CUB_USE_COOPERATIVE_GROUPS) +#define CUB_USE_COOPERATIVE_GROUPS +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" + +#if defined(KOKKOS_COMPILER_CLANG) +// Some versions of Clang fail to compile Thrust, failing with errors like +// this: +// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: +// error: use of undeclared identifier 'va_printf' +// The exact combination of versions for Clang and Thrust (or CUDA) for this +// failure was not investigated, however even very recent version combination +// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. +// +// Defining _CubLog here locally allows us to avoid that code path, however +// disabling some debugging diagnostics +#pragma push_macro("_CubLog") +#ifdef _CubLog +#undef _CubLog +#endif +#define _CubLog +#include +#include +#pragma pop_macro("_CubLog") +#else +#include +#include +#endif + +#pragma GCC diagnostic pop + +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +#include +#include +#endif + +namespace Kokkos { +namespace Impl { + +template +struct better_off_calling_std_sort : std::false_type {}; + +#if defined KOKKOS_ENABLE_SERIAL +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_OPENMP +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_THREADS +template <> +struct better_off_calling_std_sort : std::true_type {}; +#endif + +#if defined KOKKOS_ENABLE_HPX +template <> +struct better_off_calling_std_sort : std::true_type { +}; +#endif + +template +inline constexpr bool better_off_calling_std_sort_v = + better_off_calling_std_sort::value; + +template +struct min_max_functor { + using minmax_scalar = + Kokkos::MinMaxScalar; + + ViewType view; + min_max_functor(const ViewType& view_) : view(view_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t& i, minmax_scalar& minmax) const { + if (view(i) < minmax.min_val) minmax.min_val = view(i); + if (view(i) > minmax.max_val) minmax.max_val = view(i); + } +}; + +template +void sort_via_binsort(const ExecutionSpace& exec, + const Kokkos::View& view) { + // Although we are using BinSort below, which could work on rank-2 views, + // for now view must be rank-1 because the min_max_functor + // used below only works for rank-1 views + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + + Kokkos::MinMaxScalar result; + Kokkos::MinMax reducer(result); + parallel_reduce("Kokkos::Sort::FindExtent", + Kokkos::RangePolicy( + exec, 0, view.extent(0)), + min_max_functor(view), reducer); + if (result.min_val == result.max_val) return; + // For integral types the number of bins may be larger than the range + // in which case we can exactly have one unique value per bin + // and then don't need to sort bins. + bool sort_in_bins = true; + // TODO: figure out better max_bins then this ... + int64_t max_bins = view.extent(0) / 2; + if (std::is_integral::value) { + // Cast to double to avoid possible overflow when using integer + auto const max_val = static_cast(result.max_val); + auto const min_val = static_cast(result.min_val); + // using 10M as the cutoff for special behavior (roughly 40MB for the count + // array) + if ((max_val - min_val) < 10000000) { + max_bins = max_val - min_val + 1; + sort_in_bins = false; + } + } + if (std::is_floating_point::value) { + KOKKOS_ASSERT(std::isfinite(static_cast(result.max_val) - + static_cast(result.min_val))); + } + + using CompType = BinOp1D; + BinSort bin_sort( + view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins); + bin_sort.create_permute_vector(exec); + bin_sort.sort(exec, view); +} + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_cudathrust(const Cuda& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(ViewType::rank == 1, + "Kokkos::sort: currently only supports rank-1 Views."); + + if (view.extent(0) <= 1) { + return; + } + const auto exec = thrust::cuda::par.on(space.cuda_stream()); + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + thrust::sort(exec, first, last, + std::forward(maybeComparator)...); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_onedpl(const Kokkos::Experimental::SYCL& space, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + using ViewType = Kokkos::View; + static_assert(SpaceAccessibility::accessible, + "SYCL execution space is not able to access the memory space " + "of the View argument!"); + + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "SYCL sort only supports contiguous rank-1 Views with LayoutLeft, " + "LayoutRight or LayoutStride" + "For the latter, this means the View must have stride(0) = 1, enforced " + "at runtime."); + + if (view.stride(0) != 1) { + Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1."); + } + + if (view.extent(0) <= 1) { + return; + } + + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. + auto queue = space.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); + const int n = view.extent(0); + oneapi::dpl::sort(policy, view.data(), view.data() + n, + std::forward(maybeComparator)...); +} +#endif + +template +void copy_to_host_run_stdsort_copy_back( + const ExecutionSpace& exec, + const Kokkos::View& view, + MaybeComparator&&... maybeComparator) { + namespace KE = ::Kokkos::Experimental; + + using ViewType = Kokkos::View; + using layout = typename ViewType::array_layout; + if constexpr (std::is_same_v) { + // for strided views we cannot just deep_copy from device to host, + // so we need to do a few more jumps + using view_value_type = typename ViewType::non_const_value_type; + using view_exespace = typename ViewType::execution_space; + using view_deep_copyable_t = Kokkos::View; + view_deep_copyable_t view_dc("view_dc", view.extent(0)); + KE::copy(exec, view, view_dc); + + // run sort on the mirror of view_dc + auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); + auto first = KE::begin(mv_h); + auto last = KE::end(mv_h); + std::sort(first, last, std::forward(maybeComparator)...); + Kokkos::deep_copy(exec, view_dc, mv_h); + + // copy back to argument view + KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); + } else { + auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); + auto first = KE::begin(view_h); + auto last = KE::end(view_h); + std::sort(first, last, std::forward(maybeComparator)...); + Kokkos::deep_copy(exec, view, view_h); + } +} + +// -------------------------------------------------- +// +// specialize cases for sorting without comparator +// +// -------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_device_view_without_comparator( + const Cuda& exec, const Kokkos::View& view) { + sort_cudathrust(exec, view); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_device_view_without_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& view) { + using ViewType = Kokkos::View; + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "sort_device_view_without_comparator: supports rank-1 Views " + "with LayoutLeft, LayoutRight or LayoutStride"); + + if (view.stride(0) == 1) { + sort_onedpl(exec, view); + } else { + copy_to_host_run_stdsort_copy_back(exec, view); + } +} +#endif + +// fallback case +template +std::enable_if_t::value> +sort_device_view_without_comparator( + const ExecutionSpace& exec, + const Kokkos::View& view) { + sort_via_binsort(exec, view); +} + +// -------------------------------------------------- +// +// specialize cases for sorting with comparator +// +// -------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +void sort_device_view_with_comparator( + const Cuda& exec, const Kokkos::View& view, + const ComparatorType& comparator) { + sort_cudathrust(exec, view, comparator); +} +#endif + +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort_device_view_with_comparator( + const Kokkos::Experimental::SYCL& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + using ViewType = Kokkos::View; + static_assert( + (ViewType::rank == 1) && + (std::is_same_v || + std::is_same_v || + std::is_same_v), + "sort_device_view_with_comparator: supports rank-1 Views " + "with LayoutLeft, LayoutRight or LayoutStride"); + + if (view.stride(0) == 1) { + sort_onedpl(exec, view, comparator); + } else { + copy_to_host_run_stdsort_copy_back(exec, view, comparator); + } +} +#endif + +template +std::enable_if_t::value> +sort_device_view_with_comparator( + const ExecutionSpace& exec, + const Kokkos::View& view, + const ComparatorType& comparator) { + // This is a fallback case if a more specialized overload does not exist: + // for now, this fallback copies data to host, runs std::sort + // and then copies data back. Potentially, this can later be changed + // with a better solution like our own quicksort on device or similar. + + using ViewType = Kokkos::View; + using MemSpace = typename ViewType::memory_space; + static_assert(!SpaceAccessibility::accessible, + "Impl::sort_device_view_with_comparator: should not be called " + "on a view that is already accessible on the host"); + + copy_to_host_run_stdsort_copy_back(exec, view, comparator); +} + +} // namespace Impl +} // namespace Kokkos +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp index 38dcd1a674..f254686dba 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp @@ -23,64 +23,85 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest) { +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, binary_op()); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const ExecutionSpace& ex, InputIteratorType first_from, - InputIteratorType last_from, OutputIteratorType first_dest, - BinaryOp bin_op) { - return Impl::adjacent_difference_impl( +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, typename BinaryOp, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_iterator_api", ex, first_from, last_from, first_dest, bin_op); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest) { +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest) { using value_type1 = typename InputIteratorType::value_type; using value_type2 = typename OutputIteratorType::value_type; using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl(label, ex, first_from, last_from, - first_dest, binary_op()); + return Impl::adjacent_difference_exespace_impl( + label, ex, first_from, last_from, first_dest, binary_op()); } -template -std::enable_if_t::value, - OutputIteratorType> -adjacent_difference(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, BinaryOp bin_op) { - return Impl::adjacent_difference_impl(label, ex, first_from, last_from, - first_dest, bin_op); +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorType, typename BinaryOp, + std::enable_if_t::value && + ::Kokkos::is_execution_space::value, + int> = 0> +OutputIteratorType adjacent_difference(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first_from, + InputIteratorType last_from, + OutputIteratorType first_dest, + BinaryOp bin_op) { + return Impl::adjacent_difference_exespace_impl(label, ex, first_from, + last_from, first_dest, bin_op); } -template +template ::value, + int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -96,13 +117,15 @@ auto adjacent_difference( using binary_op = Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op()); } -template +template ::value, + int> = 0> auto adjacent_difference( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -111,13 +134,15 @@ auto adjacent_difference( namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::adjacent_difference_impl( + return Impl::adjacent_difference_exespace_impl( "Kokkos::adjacent_difference_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bin_op); } -template +template ::value, + int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -134,13 +159,15 @@ auto adjacent_difference( Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; - return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), binary_op()); + return Impl::adjacent_difference_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op()); } -template +template ::value, + int> = 0> auto adjacent_difference( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -149,9 +176,85 @@ auto adjacent_difference( namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::adjacent_difference_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), bin_op); + return Impl::adjacent_difference_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), bin_op); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template ::value && + ::Kokkos::is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType adjacent_difference( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + using value_type1 = typename InputIteratorType::value_type; + using value_type2 = typename OutputIteratorType::value_type; + using binary_op = + Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; + return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, + first_dest, binary_op()); +} + +template ::value && + ::Kokkos::is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +adjacent_difference(const TeamHandleType& teamHandle, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOp bin_op) { + return Impl::adjacent_difference_team_impl(teamHandle, first_from, last_from, + first_dest, bin_op); +} + +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto adjacent_difference( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + using view_type1 = ::Kokkos::View; + using view_type2 = ::Kokkos::View; + using value_type1 = typename view_type1::value_type; + using value_type2 = typename view_type2::value_type; + using binary_op = + Impl::StdAdjacentDifferenceDefaultBinaryOpFunctor; + return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), binary_op()); +} + +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto adjacent_difference( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp bin_op) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + return Impl::adjacent_difference_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), bin_op); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp index 43c2b66010..ac476ca5bf 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp @@ -23,71 +23,144 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set1 -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::adjacent_find_impl(label, ex, first, last); + return Impl::adjacent_find_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v)); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v)); + return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v)); } // overload set2 -template +template < + typename ExecutionSpace, typename IteratorType, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", - ex, first, last, pred); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_iterator_api_default", ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicateType pred) { - return Impl::adjacent_find_impl(label, ex, first, last, pred); + return Impl::adjacent_find_exespace_impl(label, ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> auto adjacent_find(const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, - KE::begin(v), KE::end(v), pred); + return Impl::adjacent_find_exespace_impl( + "Kokkos::adjacent_find_view_api_default", ex, KE::begin(v), KE::end(v), + pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename BinaryPredicateType, + std::enable_if_t, int> = 0> auto adjacent_find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, BinaryPredicateType pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred); + return Impl::adjacent_find_exespace_impl(label, ex, KE::begin(v), KE::end(v), + pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set1 +template , int> = 0> +KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + return Impl::adjacent_find_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto adjacent_find( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v)); +} + +// overload set2 +template , int> = 0> +KOKKOS_FUNCTION IteratorType adjacent_find(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + BinaryPredicateType pred) { + return Impl::adjacent_find_team_impl(teamHandle, first, last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto adjacent_find( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + BinaryPredicateType pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_team_impl(teamHandle, KE::begin(v), KE::end(v), + pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp index 2ffec7e144..d6ed4c4a7e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp @@ -23,41 +23,79 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first, - last, predicate); + return Impl::all_of_exespace_impl("Kokkos::all_of_iterator_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::all_of_impl(label, ex, first, last, predicate); + return Impl::all_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); + return Impl::all_of_exespace_impl("Kokkos::all_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool all_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::all_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::all_of_team_impl(teamHandle, first, last, predicate); +} + +template , int> = 0> +KOKKOS_FUNCTION bool all_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::all_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp index 019c466c6d..82356e6598 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp @@ -23,41 +23,79 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last, - predicate); + return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate predicate) { - return Impl::any_of_impl(label, ex, first, last, predicate); + return Impl::any_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v), - KE::cend(v), std::move(predicate)); + return Impl::any_of_exespace_impl("Kokkos::any_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool any_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::any_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::any_of_team_impl(teamHandle, first, last, predicate); +} + +template , int> = 0> +KOKKOS_FUNCTION bool any_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::any_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp index 028f3b66b2..b7ce1ba5ed 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp @@ -23,22 +23,31 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last, - d_first); + return Impl::copy_exespace_impl("Kokkos::copy_iterator_api_default", ex, + first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::copy_impl(label, ex, first, last, d_first); + return Impl::copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -46,12 +55,15 @@ auto copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl("Kokkos::copy_view_api_default", ex, - KE::cbegin(source), KE::cend(source), KE::begin(dest)); + return Impl::copy_exespace_impl("Kokkos::copy_view_api_default", ex, + KE::cbegin(source), KE::cend(source), + KE::begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -59,8 +71,35 @@ auto copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source), - KE::begin(dest)); + return Impl::copy_exespace_impl(label, ex, KE::cbegin(source), + KE::cend(source), KE::begin(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_team_impl(teamHandle, KE::cbegin(source), KE::cend(source), + KE::begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp index deff6baf9a..8f9e0f19b8 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp @@ -23,42 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default", - ex, first, last, d_last); + return Impl::copy_backward_exespace_impl( + "Kokkos::copy_backward_iterator_api_default", ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::copy_backward_impl(label, ex, first, last, d_last); + return Impl::copy_backward_exespace_impl(label, ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex, - cbegin(source), cend(source), end(dest)); + return Impl::copy_backward_exespace_impl( + "Kokkos::copy_backward_view_api_default", ex, cbegin(source), + cend(source), end(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source), - end(dest)); + return Impl::copy_backward_exespace_impl(label, ex, cbegin(source), + cend(source), end(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 copy_backward(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { + return Impl::copy_backward_team_impl(teamHandle, first, last, d_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_backward( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_backward_team_impl(teamHandle, cbegin(source), cend(source), + end(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp index 3db2fc074f..ba18bc76b9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp @@ -23,46 +23,85 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first, - last, d_first, std::move(pred)); + return Impl::copy_if_exespace_impl("Kokkos::copy_if_iterator_api_default", ex, + first, last, d_first, std::move(pred)); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, Predicate pred) { - return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred)); + return Impl::copy_if_exespace_impl(label, ex, first, last, d_first, + std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); + return Impl::copy_if_exespace_impl("Kokkos::copy_if_view_api_default", ex, + cbegin(source), cend(source), begin(dest), + std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, Predicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::copy_if_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); + return Impl::copy_if_exespace_impl(label, ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy_if(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first, Predicate pred) { + return Impl::copy_if_team_impl(teamHandle, first, last, d_first, + std::move(pred)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest, Predicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_if_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest), std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp index a64f99b5c0..43c9120483 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp @@ -23,23 +23,32 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Size, + typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first, - count, result); + return Impl::copy_n_exespace_impl("Kokkos::copy_n_iterator_api_default", ex, + first, count, result); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Size, + typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex, InputIterator first, Size count, OutputIterator result) { - return Impl::copy_n_impl(label, ex, first, count, result); + return Impl::copy_n_exespace_impl(label, ex, first, count, result); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Size, typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, ::Kokkos::View& dest) { @@ -47,12 +56,14 @@ auto copy_n(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex, - KE::cbegin(source), count, KE::begin(dest)); + return Impl::copy_n_exespace_impl("Kokkos::copy_n_view_api_default", ex, + KE::cbegin(source), count, KE::begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Size, typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto copy_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, Size count, ::Kokkos::View& dest) { @@ -60,8 +71,35 @@ auto copy_n(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); namespace KE = ::Kokkos::Experimental; - return Impl::copy_n_impl(label, ex, KE::cbegin(source), count, - KE::begin(dest)); + return Impl::copy_n_exespace_impl(label, ex, KE::cbegin(source), count, + KE::begin(dest)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator copy_n(const TeamHandleType& teamHandle, + InputIterator first, Size count, + OutputIterator result) { + return Impl::copy_n_team_impl(teamHandle, first, count, result); +} + +template , int> = 0> +KOKKOS_FUNCTION auto copy_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, Size count, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_n_team_impl(teamHandle, KE::cbegin(source), count, + KE::begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp index 3ac63467ec..f179e88bab 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp @@ -23,41 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last, - value); + return Impl::count_exespace_impl("Kokkos::count_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - return Impl::count_impl(label, ex, first, last, value); + return Impl::count_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v), - KE::cend(v), value); + return Impl::count_exespace_impl("Kokkos::count_view_api_default", ex, + KE::cbegin(v), KE::cend(v), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value); + return Impl::count_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION typename IteratorType::difference_type count( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + const T& value) { + return Impl::count_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto count(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp index b9731d378a..967cf75e7a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp @@ -23,46 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first, - last, std::move(predicate)); + return Impl::count_if_exespace_impl("Kokkos::count_if_iterator_api_default", + ex, first, last, std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> typename IteratorType::difference_type count_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::count_if_impl(label, ex, first, last, std::move(predicate)); + return Impl::count_if_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); + return Impl::count_if_exespace_impl("Kokkos::count_if_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto count_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::count_if_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION typename IteratorType::difference_type count_if( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::count_if_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto count_if(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_if_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp index 37c0d75ef5..a72a49cc22 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp @@ -23,50 +23,61 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2); +// +// overload set accepting execution space +// +template && + Kokkos::is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2) { - return Impl::equal_impl(label, ex, first1, last1, first2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, - std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2) { @@ -74,13 +85,15 @@ bool equal(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); + return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2) { @@ -88,12 +101,14 @@ bool equal(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2)); + return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2, @@ -102,13 +117,15 @@ bool equal(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl("Kokkos::equal_view_api_default", ex, - KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), - std::move(predicate)); + return Impl::equal_exespace_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool equal(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, ::Kokkos::View& view2, @@ -117,51 +134,149 @@ bool equal(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), - KE::cbegin(view2), std::move(predicate)); + return Impl::equal_exespace_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), + std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, last2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, - last1, first2, last2, std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_exespace_impl("Kokkos::equal_iterator_api_default", ex, + first1, last1, first2, last2, + std::move(predicate)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - IteratorType1, IteratorType2>::value, - bool> -equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, - IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, - BinaryPredicateType predicate) { - return Impl::equal_impl(label, ex, first1, last1, first2, last2, - std::move(predicate)); +template && ::Kokkos:: + is_execution_space_v, + int> = 0> +bool equal(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2, BinaryPredicateType predicate) { + return Impl::equal_exespace_impl(label, ex, first1, last1, first2, last2, + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, + std::move(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool equal( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool equal( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_team_impl(teamHandle, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2); +} + +template && ::Kokkos:: + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION bool equal(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_team_impl(teamHandle, first1, last1, first2, last2, + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp index 4e05676c2c..ee3a105126 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp @@ -23,105 +23,130 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value) { - static_assert(std::is_move_constructible::value, +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl( + return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last, - first_dest, init_value); + first_dest, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value) { - static_assert(std::is_move_constructible::value, +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_default_op_impl(label, ex, first, last, - first_dest, init_value); + return Impl::exclusive_scan_default_op_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl( + return Impl::exclusive_scan_default_op_exespace_impl( "Kokkos::exclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value); + std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest), init_value); + return Impl::exclusive_scan_default_op_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value)); } // overload set 2 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType bop) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, init_value, bop); + first_dest, std::move(init_value), bop); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType bop) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType exclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest, - init_value, bop); + return Impl::exclusive_scan_custom_op_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value), bop); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -129,18 +154,20 @@ auto exclusive_scan(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( "Kokkos::exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, bop); + std::move(init_value), bop); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -148,12 +175,92 @@ auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::exclusive_scan_custom_op_impl( + return Impl::exclusive_scan_custom_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, bop); + KE::begin(view_dest), std::move(init_value), bop); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::exclusive_scan_default_op_team_impl( + teamHandle, first, last, first_dest, std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_default_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value)); +} + +// overload set 2 +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +exclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::exclusive_scan_custom_op_team_impl( + teamHandle, first, last, first_dest, std::move(init_value), bop); +} + +template , int> = 0> +KOKKOS_FUNCTION auto exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_custom_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value), bop); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp index 1e300a4c20..6d805ba1be 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp @@ -23,33 +23,67 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl("Kokkos::fill_iterator_api_default", ex, first, last, value); + Impl::fill_exespace_impl("Kokkos::fill_iterator_api_default", ex, first, last, + value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, const T& value) { - Impl::fill_impl(label, ex, first, last, value); + Impl::fill_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - - Impl::fill_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view), - value); + Impl::fill_exespace_impl("Kokkos::fill_view_api_default", ex, begin(view), + end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void fill(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::fill_exespace_impl(label, ex, begin(view), end(view), value); +} - Impl::fill_impl(label, ex, begin(view), end(view), value); +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void fill(const TeamHandleType& th, IteratorType first, + IteratorType last, const T& value) { + Impl::fill_team_impl(th, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION void fill(const TeamHandleType& th, + const ::Kokkos::View& view, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::fill_team_impl(th, begin(view), end(view), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp index 02503dfd14..66b8cd66cc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp @@ -23,38 +23,72 @@ namespace Kokkos { namespace Experimental { -template +template < + typename ExecutionSpace, typename IteratorType, typename SizeType, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl("Kokkos::fill_n_iterator_api_default", ex, first, n, - value); + return Impl::fill_n_exespace_impl("Kokkos::fill_n_iterator_api_default", ex, + first, n, value); } -template +template < + typename ExecutionSpace, typename IteratorType, typename SizeType, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType fill_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, const T& value) { - return Impl::fill_n_impl(label, ex, first, n, value); + return Impl::fill_n_exespace_impl(label, ex, first, n, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename SizeType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::fill_n_impl("Kokkos::fill_n_view_api_default", ex, begin(view), - n, value); + return Impl::fill_n_exespace_impl("Kokkos::fill_n_view_api_default", ex, + begin(view), n, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename SizeType, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto fill_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType n, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::fill_n_impl(label, ex, begin(view), n, value); + return Impl::fill_n_exespace_impl(label, ex, begin(view), n, value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType fill_n(const TeamHandleType& th, + IteratorType first, SizeType n, + const T& value) { + return Impl::fill_n_team_impl(th, first, n, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto fill_n(const TeamHandleType& th, + const ::Kokkos::View& view, + SizeType n, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::fill_n_team_impl(th, begin(view), n, value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp index 65b68cf931..e5e2b0e2b0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp @@ -23,36 +23,76 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl("Kokkos::find_iterator_api_default", ex, first, last, - value); + return Impl::find_exespace_impl("Kokkos::find_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> InputIterator find(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, const T& value) { - return Impl::find_impl(label, ex, first, last, value); + return Impl::find_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::find_impl("Kokkos::find_view_api_default", ex, KE::begin(view), - KE::end(view), value); + return Impl::find_exespace_impl("Kokkos::find_view_api_default", ex, + KE::begin(view), KE::end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename T, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const T& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::find_impl(label, ex, KE::begin(view), KE::end(view), value); + return Impl::find_exespace_impl(label, ex, KE::begin(view), KE::end(view), + value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION InputIterator find(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + const T& value) { + return Impl::find_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_team_impl(teamHandle, KE::begin(view), KE::end(view), + value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp index f6a38855eb..a4ec735fd5 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp @@ -24,24 +24,34 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last); + return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", + ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last); + return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -49,13 +59,15 @@ auto find_end(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); + return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -63,31 +75,38 @@ auto find_end(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, - last, s_first, s_last, pred); + return Impl::find_end_exespace_impl("Kokkos::find_end_iterator_api_default", + ex, first, last, s_first, s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_end_impl(label, ex, first, last, s_first, s_last, pred); + return Impl::find_end_exespace_impl(label, ex, first, last, s_first, s_last, + pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -96,13 +115,15 @@ auto find_end(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); + return Impl::find_end_exespace_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_end(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -111,8 +132,71 @@ auto find_end(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_end_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_end( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 find_end(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_end_team_impl(teamHandle, first, last, s_first, s_last, + pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_end( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp index 6b0e4993ee..341a70e2f2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp @@ -23,24 +23,36 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, + s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last); + return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, + s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -48,13 +60,15 @@ auto find_first_of(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), + KE::end(view), KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -62,33 +76,41 @@ auto find_first_of(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", - ex, first, last, s_first, s_last, pred); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_iterator_api_default", ex, first, last, s_first, + s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last, - pred); + return Impl::find_first_of_exespace_impl(label, ex, first, last, s_first, + s_last, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -97,13 +119,15 @@ auto find_first_of(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, - KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_first_of_exespace_impl( + "Kokkos::find_first_of_view_api_default", ex, KE::begin(view), + KE::end(view), KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto find_first_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -112,8 +136,77 @@ auto find_first_of(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::find_first_of_exespace_impl(label, ex, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, + s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_first_of( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 find_first_of(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_first_of_team_impl(teamHandle, first, last, s_first, s_last, + pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto find_first_of( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_team_impl(teamHandle, KE::begin(view), + KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp index 911316a668..283fab7617 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp @@ -23,42 +23,82 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl("Kokkos::find_if_iterator_api_default", - ex, first, last, std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + "Kokkos::find_if_iterator_api_default", ex, first, last, + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType predicate) { - return Impl::find_if_or_not_impl(label, ex, first, last, - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl("Kokkos::find_if_view_api_default", ex, - KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + "Kokkos::find_if_view_api_default", ex, KE::begin(v), KE::end(v), + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + label, ex, KE::begin(v), KE::end(v), std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType find_if(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + PredicateType predicate) { + return Impl::find_if_or_not_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto find_if(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_team_impl(teamHandle, KE::begin(v), + KE::end(v), std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp index 18294d7b7d..5e17a6f539 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp @@ -23,45 +23,84 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl( + return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_iterator_api_default", ex, first, last, std::move(predicate)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::find_if_or_not_impl(label, ex, first, last, - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl(label, ex, first, last, + std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if_not(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl( + return Impl::find_if_or_not_exespace_impl( "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v), std::move(predicate)); } -template +template ::value, + int> = 0> auto find_if_not(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), - std::move(predicate)); + return Impl::find_if_or_not_exespace_impl( + label, ex, KE::begin(v), KE::end(v), std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType find_if_not(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::find_if_or_not_team_impl(teamHandle, first, last, + std::move(predicate)); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto find_if_not( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_team_impl( + teamHandle, KE::begin(v), KE::end(v), std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp index d7b08e4842..6215b325af 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType, class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_impl(label, ex, first, last, std::move(functor)); + return Impl::for_each_exespace_impl(label, ex, first, last, + std::move(functor)); } -template +template < + class ExecutionSpace, class IteratorType, class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryFunctorType functor) { - return Impl::for_each_impl("Kokkos::for_each_iterator_api_default", ex, first, - last, std::move(functor)); + return Impl::for_each_exespace_impl("Kokkos::for_each_iterator_api_default", + ex, first, last, std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl(label, ex, KE::begin(v), KE::end(v), - std::move(functor)); + return Impl::for_each_exespace_impl(label, ex, KE::begin(v), KE::end(v), + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, + class UnaryFunctorType, + std::enable_if_t, int> = 0> UnaryFunctorType for_each(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_impl("Kokkos::for_each_view_api_default", ex, - KE::begin(v), KE::end(v), std::move(functor)); + return Impl::for_each_exespace_impl("Kokkos::for_each_view_api_default", ex, + KE::begin(v), KE::end(v), + std::move(functor)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION UnaryFunctorType for_each(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + return Impl::for_each_team_impl(teamHandle, first, last, std::move(functor)); +} + +template , int> = 0> +KOKKOS_FUNCTION UnaryFunctorType +for_each(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_team_impl(teamHandle, KE::begin(v), KE::end(v), + std::move(functor)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp index f1769da05b..e6fbcad891 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp @@ -23,43 +23,87 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl(label, ex, first, n, std::move(functor)); + return Impl::for_each_n_exespace_impl(label, ex, first, n, + std::move(functor)); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first, SizeType n, UnaryFunctorType functor) { - return Impl::for_each_n_impl("Kokkos::for_each_n_iterator_api_default", ex, - first, n, std::move(functor)); + return Impl::for_each_n_exespace_impl( + "Kokkos::for_each_n_iterator_api_default", ex, first, n, + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> auto for_each_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl(label, ex, KE::begin(v), n, std::move(functor)); + return Impl::for_each_n_exespace_impl(label, ex, KE::begin(v), n, + std::move(functor)); } -template +template < + class ExecutionSpace, class DataType, class... Properties, class SizeType, + class UnaryFunctorType, + std::enable_if_t, int> = 0> auto for_each_n(const ExecutionSpace& ex, const ::Kokkos::View& v, SizeType n, UnaryFunctorType functor) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::for_each_n_impl("Kokkos::for_each_n_view_api_default", ex, - KE::begin(v), n, std::move(functor)); + return Impl::for_each_n_exespace_impl("Kokkos::for_each_n_view_api_default", + ex, KE::begin(v), n, + std::move(functor)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +template , int> = 0> +KOKKOS_FUNCTION IteratorType for_each_n(const TeamHandleType& teamHandle, + IteratorType first, SizeType n, + UnaryFunctorType functor) { + return Impl::for_each_n_team_impl(teamHandle, first, n, std::move(functor)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto for_each_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, SizeType n, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_n_team_impl(teamHandle, KE::begin(v), n, + std::move(functor)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp index 13e12783e0..a3295084ee 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp @@ -23,38 +23,68 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl("Kokkos::generate_iterator_api_default", ex, first, last, - std::move(g)); + Impl::generate_exespace_impl("Kokkos::generate_iterator_api_default", ex, + first, last, std::move(g)); } -template +template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Generator g) { - Impl::generate_impl(label, ex, first, last, std::move(g)); + Impl::generate_exespace_impl(label, ex, first, last, std::move(g)); } -template +template , int> = 0> void generate(const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::generate_impl("Kokkos::generate_view_api_default", ex, begin(view), - end(view), std::move(g)); + Impl::generate_exespace_impl("Kokkos::generate_view_api_default", ex, + begin(view), end(view), std::move(g)); } -template +template , int> = 0> void generate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - Impl::generate_impl(label, ex, begin(view), end(view), std::move(g)); + Impl::generate_exespace_impl(label, ex, begin(view), end(view), std::move(g)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void generate(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Generator g) { + Impl::generate_team_impl(teamHandle, first, last, std::move(g)); +} + +template , int> = 0> +KOKKOS_FUNCTION void generate( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::generate_team_impl(teamHandle, begin(view), end(view), std::move(g)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp index 4d17512228..e480062c23 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp @@ -23,40 +23,75 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> IteratorType generate_n(const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { - Impl::generate_n_impl("Kokkos::generate_n_iterator_api_default", ex, first, - count, std::move(g)); - return first + count; + return Impl::generate_n_exespace_impl( + "Kokkos::generate_n_iterator_api_default", ex, first, count, + std::move(g)); } -template +template , int> = 0> IteratorType generate_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, Size count, Generator g) { - Impl::generate_n_impl(label, ex, first, count, std::move(g)); - return first + count; + return Impl::generate_n_exespace_impl(label, ex, first, count, std::move(g)); } -template +template , int> = 0> auto generate_n(const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::generate_n_impl("Kokkos::generate_n_view_api_default", ex, - begin(view), count, std::move(g)); + return Impl::generate_n_exespace_impl("Kokkos::generate_n_view_api_default", + ex, begin(view), count, std::move(g)); } -template +template , int> = 0> auto generate_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Size count, Generator g) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::generate_n_impl(label, ex, begin(view), count, std::move(g)); + return Impl::generate_n_exespace_impl(label, ex, begin(view), count, + std::move(g)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType generate_n(const TeamHandleType& teamHandle, + IteratorType first, Size count, + Generator g) { + return Impl::generate_n_team_impl(teamHandle, first, count, std::move(g)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto generate_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Size count, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::generate_n_team_impl(teamHandle, begin(view), count, + std::move(g)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp index bcd731b850..a0e540b5e7 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp @@ -23,33 +23,45 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last, first_dest); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest) { - return Impl::inclusive_scan_default_op_impl(label, ex, first, last, - first_dest); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_exespace_impl(label, ex, first, last, + first_dest); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -57,13 +69,15 @@ auto inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl( + return Impl::inclusive_scan_default_op_exespace_impl( "Kokkos::inclusive_scan_default_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -71,39 +85,45 @@ auto inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), - KE::begin(view_dest)); + return Impl::inclusive_scan_default_op_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest)); } // overload set 2 (accepting custom binary op) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op) { - return Impl::inclusive_scan_custom_binary_op_impl(label, ex, first, last, - first_dest, binary_op); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_exespace_impl( + label, ex, first, last, first_dest, binary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -111,14 +131,16 @@ auto inclusive_scan(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -126,67 +148,192 @@ auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op); } // overload set 3 -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOp binary_op, ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, - first_dest, binary_op, init_value); + first_dest, binary_op, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOp binary_op, - ValueType init_value) { - return Impl::inclusive_scan_custom_binary_op_impl( - label, ex, first, last, first_dest, binary_op, init_value); +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType inclusive_scan(const std::string& label, + const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::inclusive_scan_custom_binary_op_exespace_impl( + label, ex, first, last, first_dest, binary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( "Kokkos::inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, init_value); + binary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOp, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, BinaryOp binary_op, ValueType init_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::inclusive_scan_custom_binary_op_impl( + return Impl::inclusive_scan_custom_binary_op_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, init_value); + KE::begin(view_dest), binary_op, std::move(init_value)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType +inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_team_impl(teamHandle, first, last, + first_dest); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_default_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest)); +} + +// overload set 2 (accepting custom binary op) +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, first, last, first_dest, binary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op); +} + +// overload set 3 +template && :: + Kokkos::is_team_handle_v, + int> = 0> + +KOKKOS_FUNCTION OutputIteratorType +inclusive_scan(const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, first, last, first_dest, binary_op, std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op, ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, std::move(init_value)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp index 29d6be9e8b..42f20bc4ec 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp @@ -23,39 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl( + return Impl::is_partitioned_exespace_impl( "Kokkos::is_partitioned_iterator_api_default", ex, first, last, std::move(p)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, PredicateType p) { - return Impl::is_partitioned_impl(label, ex, first, last, std::move(p)); + return Impl::is_partitioned_exespace_impl(label, ex, first, last, + std::move(p)); } -template +template < + typename ExecutionSpace, typename PredicateType, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::is_partitioned_impl("Kokkos::is_partitioned_view_api_default", - ex, cbegin(v), cend(v), std::move(p)); + return Impl::is_partitioned_exespace_impl( + "Kokkos::is_partitioned_view_api_default", ex, cbegin(v), cend(v), + std::move(p)); } -template +template < + typename ExecutionSpace, typename PredicateType, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_partitioned(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, PredicateType p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::is_partitioned_impl(label, ex, cbegin(v), cend(v), std::move(p)); + return Impl::is_partitioned_exespace_impl(label, ex, cbegin(v), cend(v), + std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool is_partitioned(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + PredicateType p) { + return Impl::is_partitioned_team_impl(teamHandle, first, last, std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_partitioned( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, PredicateType p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::is_partitioned_team_impl(teamHandle, cbegin(v), cend(v), + std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp index f036254a02..2c676c3ff3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp @@ -23,55 +23,73 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", + ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_impl(label, ex, first, last); + return Impl::is_sorted_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view)); + return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), + KE::cend(view)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, - first, last, std::move(comp)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_iterator_api_default", + ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_impl(label, ex, first, last, std::move(comp)); + return Impl::is_sorted_exespace_impl(label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -79,13 +97,15 @@ bool is_sorted(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, - KE::cbegin(view), KE::cend(view), - std::move(comp)); + return Impl::is_sorted_exespace_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view), + std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool is_sorted(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -93,8 +113,56 @@ bool is_sorted(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view), - std::move(comp)); + return Impl::is_sorted_exespace_impl(label, ex, KE::cbegin(view), + KE::cend(view), std::move(comp)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::is_sorted_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::is_sorted_team_impl(teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool is_sorted( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(teamHandle); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_team_impl(teamHandle, KE::cbegin(view), KE::cend(view), + std::move(comp)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp index 276b3bb884..96a17b6785 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp @@ -23,58 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl( + return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::is_sorted_until_impl(label, ex, first, last); + return Impl::is_sorted_until_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view)); + return Impl::is_sorted_until_exespace_impl( + "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), + KE::end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view)); + return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), + KE::end(view)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl( + return Impl::is_sorted_until_exespace_impl( "Kokkos::is_sorted_until_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::is_sorted_until_impl(label, ex, first, last, std::move(comp)); + return Impl::is_sorted_until_exespace_impl(label, ex, first, last, + std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -82,13 +102,15 @@ auto is_sorted_until(const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", - ex, KE::begin(view), KE::end(view), - std::move(comp)); + return Impl::is_sorted_until_exespace_impl( + "Kokkos::is_sorted_until_view_api_default", ex, KE::begin(view), + KE::end(view), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ComparatorType comp) { @@ -96,8 +118,57 @@ auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_not_openmptarget(ex); namespace KE = ::Kokkos::Experimental; - return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view), - std::move(comp)); + return Impl::is_sorted_until_exespace_impl(label, ex, KE::begin(view), + KE::end(view), std::move(comp)); +} + +// +// overload set accepting team handle +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + return Impl::is_sorted_until_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto is_sorted_until( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), + KE::end(view)); +} + +template , int> = 0> +KOKKOS_FUNCTION IteratorType is_sorted_until(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::is_sorted_until_team_impl(teamHandle, first, last, + std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto is_sorted_until( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(teamHandle); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_team_impl(teamHandle, KE::begin(view), + KE::end(view), std::move(comp)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp index 0a77ef629f..4b5c69df45 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp @@ -23,25 +23,34 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2); + return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, + first2, last2); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -50,13 +59,15 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -65,33 +76,39 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2)); + return Impl::lexicographical_compare_exespace_impl( + label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, first2, last2, comp); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { - return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, - last2, comp); + return Impl::lexicographical_compare_exespace_impl(label, ex, first1, last1, + first2, last2, comp); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare( const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -100,13 +117,15 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl( + return Impl::lexicographical_compare_exespace_impl( "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class ComparatorType, + std::enable_if_t, int> = 0> bool lexicographical_compare( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, @@ -115,9 +134,67 @@ bool lexicographical_compare( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), - KE::cend(view1), KE::cbegin(view2), - KE::cend(view2), comp); + return Impl::lexicographical_compare_exespace_impl( + label, ex, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { + return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, + first2, last2); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_team_impl( + teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { + return Impl::lexicographical_compare_team_impl(teamHandle, first1, last1, + first2, last2, comp); +} + +template , int> = 0> +KOKKOS_FUNCTION bool lexicographical_compare( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_team_impl( + teamHandle, KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp index 2c1374f700..d16bac5bfc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp @@ -23,81 +23,148 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl(label, ex, first, last); + return Impl::min_or_max_element_exespace_impl(label, ex, first, + last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl(label, ex, begin(v), - end(v)); + return Impl::min_or_max_element_exespace_impl(label, ex, + begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::max_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto max_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_team_impl(teamHandle, begin(v), + end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto max_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp index 1d03b7c962..2a53fce3e2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp @@ -23,81 +23,148 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::min_or_max_element_impl(label, ex, first, last); + return Impl::min_or_max_element_exespace_impl(label, ex, first, + last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( "Kokkos::min_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::min_or_max_element_impl(label, ex, begin(v), - end(v)); + return Impl::min_or_max_element_exespace_impl(label, ex, + begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto min_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::min_or_max_element_impl( + return Impl::min_or_max_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_team_impl(teamHandle, begin(v), + end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + return Impl::min_or_max_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto min_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::min_or_max_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp index d481b499cc..c3a1f73ef6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp @@ -23,82 +23,151 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::minmax_element_impl(label, ex, first, last); + return Impl::minmax_element_exespace_impl(label, ex, + first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_iterator_api_default", ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename ComparatorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ComparatorType comp) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( label, ex, first, last, std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::minmax_element_impl(label, ex, begin(v), - end(v)); + return Impl::minmax_element_exespace_impl( + label, ex, begin(v), end(v)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v), std::move(comp)); } -template +template < + typename ExecutionSpace, typename DataType, typename ComparatorType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto minmax_element(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, ComparatorType comp) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); Impl::static_assert_is_not_openmptarget(ex); - return Impl::minmax_element_impl( + return Impl::minmax_element_exespace_impl( label, ex, begin(v), end(v), std::move(comp)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::minmax_element_team_impl(teamHandle, first, + last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::minmax_element_team_impl( + teamHandle, first, last, std::move(comp)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::minmax_element_team_impl(teamHandle, + begin(v), end(v)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto minmax_element( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::minmax_element_team_impl( + teamHandle, begin(v), end(v), std::move(comp)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp index 13c994ca90..090afe69e3 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Mismatch.hpp @@ -30,46 +30,60 @@ namespace Experimental { // // makes API ambiguous (with the overload accepting views). -template +// +// overload set accepting execution space +// +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2); + return Impl::mismatch_exespace_impl("Kokkos::mismatch_iterator_api_default", + ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { - return Impl::mismatch_impl("Kokkos::mismatch_iterator_api_default", ex, - first1, last1, first2, last2, - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + "Kokkos::mismatch_iterator_api_default", ex, first1, last1, first2, last2, + std::forward(predicate)); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2); + return Impl::mismatch_exespace_impl(label, ex, first1, last1, first2, last2); } -template +template < + class ExecutionSpace, class IteratorType1, class IteratorType2, + class BinaryPredicateType, + std::enable_if_t, int> = 0> ::Kokkos::pair mismatch( const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, BinaryPredicateType&& predicate) { - return Impl::mismatch_impl(label, ex, first1, last1, first2, last2, - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + label, ex, first1, last1, first2, last2, + std::forward(predicate)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { @@ -77,13 +91,15 @@ auto mismatch(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2)); + return Impl::mismatch_exespace_impl("Kokkos::mismatch_view_api_default", ex, + KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType, + std::enable_if_t, int> = 0> auto mismatch(const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, @@ -92,14 +108,16 @@ auto mismatch(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl("Kokkos::mismatch_view_api_default", ex, - KE::begin(view1), KE::end(view1), KE::begin(view2), - KE::end(view2), - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + "Kokkos::mismatch_view_api_default", ex, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward(predicate)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, + std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2) { @@ -107,12 +125,15 @@ auto mismatch(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2)); + return Impl::mismatch_exespace_impl(label, ex, KE::begin(view1), + KE::end(view1), KE::begin(view2), + KE::end(view2)); } -template +template < + class ExecutionSpace, class DataType1, class... Properties1, + class DataType2, class... Properties2, class BinaryPredicateType, + std::enable_if_t, int> = 0> auto mismatch(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view1, const ::Kokkos::View& view2, @@ -121,9 +142,65 @@ auto mismatch(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); namespace KE = ::Kokkos::Experimental; - return Impl::mismatch_impl(label, ex, KE::begin(view1), KE::end(view1), - KE::begin(view2), KE::end(view2), - std::forward(predicate)); + return Impl::mismatch_exespace_impl( + label, ex, KE::begin(view1), KE::end(view1), KE::begin(view2), + KE::end(view2), std::forward(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair mismatch( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2); +} + +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair mismatch( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType&& predicate) { + return Impl::mismatch_team_impl(teamHandle, first1, last1, first2, last2, + std::forward(predicate)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto mismatch( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto mismatch( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view1, + const ::Kokkos::View& view2, + BinaryPredicateType&& predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::mismatch_team_impl(teamHandle, KE::begin(view1), KE::end(view1), + KE::begin(view2), KE::end(view2), + std::forward(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp index d49acd9f70..f04ea12ba8 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Move.hpp @@ -23,41 +23,81 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::move_impl("Kokkos::move_iterator_api_default", ex, first, last, - d_first); + return Impl::move_exespace_impl("Kokkos::move_iterator_api_default", ex, + first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator move(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::move_impl(label, ex, first, last, d_first); + return Impl::move_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_impl("Kokkos::move_view_api_default", ex, begin(source), - end(source), begin(dest)); + return Impl::move_exespace_impl("Kokkos::move_view_api_default", ex, + begin(source), end(source), begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_impl(label, ex, begin(source), end(source), begin(dest)); + return Impl::move_exespace_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator move(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::move_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto move( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_team_impl(teamHandle, begin(source), end(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp index 60d50fa881..375474ca57 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MoveBackward.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl("Kokkos::move_backward_iterator_api_default", - ex, first, last, d_last); + return Impl::move_backward_exespace_impl( + "Kokkos::move_backward_iterator_api_default", ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_backward_impl("Kokkos::move_backward_view_api_default", ex, - begin(source), end(source), end(dest)); + return Impl::move_backward_exespace_impl( + "Kokkos::move_backward_view_api_default", ex, begin(source), end(source), + end(dest)); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType2 move_backward(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 d_last) { - return Impl::move_backward_impl(label, ex, first, last, d_last); + return Impl::move_backward_exespace_impl(label, ex, first, last, d_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto move_backward(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::move_backward_impl(label, ex, begin(source), end(source), - end(dest)); + return Impl::move_backward_exespace_impl(label, ex, begin(source), + end(source), end(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 move_backward(const TeamHandleType& teamHandle, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { + return Impl::move_backward_team_impl(teamHandle, first, last, d_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto move_backward( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::move_backward_team_impl(teamHandle, begin(source), end(source), + end(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp index cf5de3b72b..f7baab3fc0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_NoneOf.hpp @@ -23,41 +23,80 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl("Kokkos::none_of_iterator_api_default", ex, first, - last, predicate); + return Impl::none_of_exespace_impl("Kokkos::none_of_iterator_api_default", ex, + first, last, predicate); } -template +template < + typename ExecutionSpace, typename IteratorType, typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, Predicate predicate) { - return Impl::none_of_impl(label, ex, first, last, predicate); + return Impl::none_of_exespace_impl(label, ex, first, last, predicate); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl("Kokkos::none_of_view_api_default", ex, - KE::cbegin(v), KE::cend(v), std::move(predicate)); + return Impl::none_of_exespace_impl("Kokkos::none_of_view_api_default", ex, + KE::cbegin(v), KE::cend(v), + std::move(predicate)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename Predicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> bool none_of(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, Predicate predicate) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); namespace KE = ::Kokkos::Experimental; - return Impl::none_of_impl(label, ex, KE::cbegin(v), KE::cend(v), - std::move(predicate)); + return Impl::none_of_exespace_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template +KOKKOS_FUNCTION + std::enable_if_t<::Kokkos::is_team_handle::value, bool> + none_of(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, Predicate predicate) { + return Impl::none_of_team_impl(teamHandle, first, last, predicate); +} + +template +KOKKOS_FUNCTION + std::enable_if_t<::Kokkos::is_team_handle::value, bool> + none_of(const TeamHandleType& teamHandle, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::none_of_team_impl(teamHandle, KE::cbegin(v), KE::cend(v), + std::move(predicate)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp index 38c0a35b62..a1feee8d6d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionCopy.hpp @@ -23,57 +23,103 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorTrueType, typename OutputIteratorFalseType, + typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl( + return Impl::partition_copy_exespace_impl( "Kokkos::partition_copy_iterator_api_default", ex, from_first, from_last, to_first_true, to_first_false, std::move(p)); } -template +template < + typename ExecutionSpace, typename InputIteratorType, + typename OutputIteratorTrueType, typename OutputIteratorFalseType, + typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> ::Kokkos::pair partition_copy( const std::string& label, const ExecutionSpace& ex, InputIteratorType from_first, InputIteratorType from_last, OutputIteratorTrueType to_first_true, OutputIteratorFalseType to_first_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, from_first, from_last, - to_first_true, to_first_false, std::move(p)); + return Impl::partition_copy_exespace_impl(label, ex, from_first, from_last, + to_first_true, to_first_false, + std::move(p)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename DataType3, + typename... Properties3, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { - return Impl::partition_copy_impl("Kokkos::partition_copy_view_api_default", - ex, cbegin(view_from), cend(view_from), - begin(view_dest_true), - begin(view_dest_false), std::move(p)); + return Impl::partition_copy_exespace_impl( + "Kokkos::partition_copy_view_api_default", ex, cbegin(view_from), + cend(view_from), begin(view_dest_true), begin(view_dest_false), + std::move(p)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename DataType3, + typename... Properties3, typename PredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_copy( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest_true, const ::Kokkos::View& view_dest_false, PredicateType p) { - return Impl::partition_copy_impl(label, ex, cbegin(view_from), - cend(view_from), begin(view_dest_true), - begin(view_dest_false), std::move(p)); + return Impl::partition_copy_exespace_impl( + label, ex, cbegin(view_from), cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION ::Kokkos::pair +partition_copy(const TeamHandleType& teamHandle, InputIteratorType from_first, + InputIteratorType from_last, + OutputIteratorTrueType to_first_true, + OutputIteratorFalseType to_first_false, PredicateType p) { + return Impl::partition_copy_team_impl(teamHandle, from_first, from_last, + to_first_true, to_first_false, + std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto partition_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest_true, + const ::Kokkos::View& view_dest_false, + PredicateType p) { + return Impl::partition_copy_team_impl(teamHandle, cbegin(view_from), + cend(view_from), begin(view_dest_true), + begin(view_dest_false), std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp index 24798e377e..60cbeeda87 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_PartitionPoint.hpp @@ -23,38 +23,78 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl( + return Impl::partition_point_exespace_impl( "Kokkos::partitioned_point_iterator_api_default", ex, first, last, std::move(p)); } -template +template < + typename ExecutionSpace, typename IteratorType, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType partition_point(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, UnaryPredicate p) { - return Impl::partition_point_impl(label, ex, first, last, std::move(p)); + return Impl::partition_point_exespace_impl(label, ex, first, last, + std::move(p)); } -template +template < + typename ExecutionSpace, typename UnaryPredicate, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl(label, ex, begin(v), end(v), std::move(p)); + return Impl::partition_point_exespace_impl(label, ex, begin(v), end(v), + std::move(p)); } -template +template < + typename ExecutionSpace, typename UnaryPredicate, typename DataType, + typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto partition_point(const ExecutionSpace& ex, const ::Kokkos::View& v, UnaryPredicate p) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); - return Impl::partition_point_impl("Kokkos::partition_point_view_api_default", - ex, begin(v), end(v), std::move(p)); + return Impl::partition_point_exespace_impl( + "Kokkos::partition_point_view_api_default", ex, begin(v), end(v), + std::move(p)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType partition_point(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + UnaryPredicate p) { + return Impl::partition_point_team_impl(teamHandle, first, last, std::move(p)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto partition_point( + const TeamHandleType& teamHandle, + const ::Kokkos::View& v, UnaryPredicate p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + return Impl::partition_point_team_impl(teamHandle, begin(v), end(v), + std::move(p)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp index a31fa1497a..b84f00f8bb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reduce.hpp @@ -23,28 +23,38 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // // overload set 1 // -template +template ::value, + int> = 0> typename IteratorType::value_type reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, typename IteratorType::value_type()); } -template +template ::value, + int> = 0> typename IteratorType::value_type reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( label, ex, first, last, typename IteratorType::value_type()); } -template +template ::value, + int> = 0> auto reduce(const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; @@ -53,12 +63,14 @@ auto reduce(const ExecutionSpace& ex, using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), value_type()); } -template +template ::value, + int> = 0> auto reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { namespace KE = ::Kokkos::Experimental; @@ -67,37 +79,43 @@ auto reduce(const std::string& label, const ExecutionSpace& ex, using view_type = ::Kokkos::View; using value_type = typename view_type::value_type; - return Impl::reduce_default_functors_impl(label, ex, KE::cbegin(view), - KE::cend(view), value_type()); + return Impl::reduce_default_functors_exespace_impl( + label, ex, KE::cbegin(view), KE::cend(view), value_type()); } // // overload set2: // -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_default_functors_impl(label, ex, first, last, - init_reduction_value); + return Impl::reduce_default_functors_exespace_impl(label, ex, first, last, + init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { @@ -107,13 +125,15 @@ ValueType reduce(const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( "Kokkos::reduce_default_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value) { @@ -123,40 +143,46 @@ ValueType reduce(const std::string& label, const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_default_functors_impl( + return Impl::reduce_default_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value); } // // overload set 3 // -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_custom_functors_impl( + return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_default_functors_iterator_api", ex, first, last, init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, ValueType init_reduction_value, BinaryOp joiner) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::reduce_custom_functors_impl(label, ex, first, last, - init_reduction_value, joiner); + return Impl::reduce_custom_functors_exespace_impl( + label, ex, first, last, init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { @@ -166,13 +192,15 @@ ValueType reduce(const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_custom_functors_impl( + return Impl::reduce_custom_functors_exespace_impl( "Kokkos::reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), init_reduction_value, joiner); } -template +template ::value, + int> = 0> ValueType reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, BinaryOp joiner) { @@ -182,9 +210,114 @@ ValueType reduce(const std::string& label, const ExecutionSpace& ex, namespace KE = ::Kokkos::Experimental; Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::reduce_custom_functors_impl(label, ex, KE::cbegin(view), - KE::cend(view), init_reduction_value, - joiner); + return Impl::reduce_custom_functors_exespace_impl( + label, ex, KE::cbegin(view), KE::cend(view), init_reduction_value, + joiner); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// +// overload set 1 +// +template < + typename TeamHandleType, typename IteratorType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION typename IteratorType::value_type reduce( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { + return Impl::reduce_default_functors_team_impl( + teamHandle, first, last, typename IteratorType::value_type()); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto reduce( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + using view_type = ::Kokkos::View; + using value_type = typename view_type::value_type; + + return Impl::reduce_default_functors_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view), value_type()); +} + +// +// overload set2: +// +template < + typename TeamHandleType, typename IteratorType, typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ValueType init_reduction_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::reduce_default_functors_team_impl(teamHandle, first, last, + init_reduction_value); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::reduce_default_functors_team_impl( + teamHandle, KE::cbegin(view), KE::cend(view), init_reduction_value); +} + +// +// overload set 3 +// +template < + typename TeamHandleType, typename IteratorType, typename ValueType, + typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType reduce(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ValueType init_reduction_value, + BinaryOp joiner) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::reduce_custom_functors_team_impl(teamHandle, first, last, + init_reduction_value, joiner); +} + +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, typename BinaryOp, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value, BinaryOp joiner) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::reduce_custom_functors_team_impl(teamHandle, KE::cbegin(view), + KE::cend(view), + init_reduction_value, joiner); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp index c8602d2f53..8a429d8d51 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Remove.hpp @@ -23,38 +23,74 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, first, - last, value); + return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, + first, last, value); } -template +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& value) { - return Impl::remove_impl(label, ex, first, last, value); + return Impl::remove_exespace_impl(label, ex, first, last, value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl("Kokkos::remove_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); + return Impl::remove_exespace_impl("Kokkos::remove_iterator_api_default", ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), value); + return Impl::remove_exespace_impl(label, ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION Iterator remove(const TeamHandleType& teamHandle, + Iterator first, Iterator last, + const ValueType& value) { + return Impl::remove_team_impl(teamHandle, first, last, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::remove_team_impl(teamHandle, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp index c2c06f6202..4b8fa9fe07 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopy.hpp @@ -23,26 +23,36 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - first_from, last_from, first_dest, value); + return Impl::remove_copy_exespace_impl( + "Kokkos::remove_copy_iterator_api_default", ex, first_from, last_from, + first_dest, value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& value) { - return Impl::remove_copy_impl(label, ex, first_from, last_from, first_dest, - value); + return Impl::remove_copy_exespace_impl(label, ex, first_from, last_from, + first_dest, value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -50,15 +60,17 @@ auto remove_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_impl("Kokkos::remove_copy_iterator_api_default", ex, - ::Kokkos::Experimental::cbegin(view_from), - ::Kokkos::Experimental::cend(view_from), - ::Kokkos::Experimental::begin(view_dest), - value); + return Impl::remove_copy_exespace_impl( + "Kokkos::remove_copy_iterator_api_default", ex, + ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -66,12 +78,46 @@ auto remove_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_impl( + return Impl::remove_copy_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), value); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator remove_copy(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const ValueType& value) { + return Impl::remove_copy_team_impl(teamHandle, first_from, last_from, + first_dest, value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_team_impl( + teamHandle, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), value); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp index 6d642ed6f0..45e2b54bb6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveCopyIf.hpp @@ -23,30 +23,39 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, first_from, last_from, first_dest, pred); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator remove_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const UnaryPredicate& pred) { - return Impl::remove_copy_if_impl(label, ex, first_from, last_from, first_dest, - pred); + return Impl::remove_copy_if_exespace_impl(label, ex, first_from, last_from, + first_dest, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -54,15 +63,17 @@ auto remove_copy_if(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( "Kokkos::remove_copy_if_iterator_api_default", ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -70,12 +81,46 @@ auto remove_copy_if(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - return Impl::remove_copy_if_impl( + return Impl::remove_copy_if_exespace_impl( label, ex, ::Kokkos::Experimental::cbegin(view_from), ::Kokkos::Experimental::cend(view_from), ::Kokkos::Experimental::begin(view_dest), pred); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator remove_copy_if(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const UnaryPredicate& pred) { + return Impl::remove_copy_if_team_impl(teamHandle, first_from, last_from, + first_dest, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const UnaryPredicate& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + + return Impl::remove_copy_if_team_impl( + teamHandle, ::Kokkos::Experimental::cbegin(view_from), + ::Kokkos::Experimental::cend(view_from), + ::Kokkos::Experimental::begin(view_dest), pred); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp index 4062e8d373..38461a37f2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RemoveIf.hpp @@ -23,39 +23,77 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - first, last, pred); + return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", + ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename Iterator, typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> Iterator remove_if(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, UnaryPredicate pred) { - return Impl::remove_if_impl(label, ex, first, last, pred); + return Impl::remove_if_exespace_impl(label, ex, first, last, pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl("Kokkos::remove_if_iterator_api_default", ex, - ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); + return Impl::remove_if_exespace_impl("Kokkos::remove_if_iterator_api_default", + ex, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename UnaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto remove_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, UnaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::remove_if_impl(label, ex, ::Kokkos::Experimental::begin(view), - ::Kokkos::Experimental::end(view), pred); + return Impl::remove_if_exespace_impl(label, ex, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION Iterator remove_if(const TeamHandleType& teamHandle, + Iterator first, Iterator last, + UnaryPredicate pred) { + return Impl::remove_if_team_impl(teamHandle, first, last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto remove_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, UnaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::remove_if_team_impl(teamHandle, + ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp index 4d1490ded0..29afc4f0c2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Replace.hpp @@ -23,40 +23,77 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl("Kokkos::replace_iterator_api", ex, first, last, - old_value, new_value); + Impl::replace_exespace_impl("Kokkos::replace_iterator_api", ex, first, last, + old_value, new_value); } -template +template < + typename ExecutionSpace, typename Iterator, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, Iterator first, Iterator last, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_impl(label, ex, first, last, old_value, new_value); + Impl::replace_exespace_impl(label, ex, first, last, old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl("Kokkos::replace_view_api", ex, KE::begin(view), - KE::end(view), old_value, new_value); + Impl::replace_exespace_impl("Kokkos::replace_view_api", ex, KE::begin(view), + KE::end(view), old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ValueType& old_value, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_impl(label, ex, KE::begin(view), KE::end(view), - old_value, new_value); + Impl::replace_exespace_impl(label, ex, KE::begin(view), KE::end(view), + old_value, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void replace(const TeamHandleType& teamHandle, Iterator first, + Iterator last, const ValueType& old_value, + const ValueType& new_value) { + Impl::replace_team_impl(teamHandle, first, last, old_value, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION void replace( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + Impl::replace_team_impl(teamHandle, KE::begin(view), KE::end(view), old_value, + new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp index e7f464e4bd..04d5767e89 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopy.hpp @@ -23,30 +23,39 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_copy_impl("Kokkos::replace_copy_iterator_api", ex, - first_from, last_from, first_dest, old_value, - new_value); + return Impl::replace_copy_exespace_impl("Kokkos::replace_copy_iterator_api", + ex, first_from, last_from, first_dest, + old_value, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, const ValueType& old_value, const ValueType& new_value) { - return Impl::replace_copy_impl(label, ex, first_from, last_from, first_dest, - old_value, new_value); + return Impl::replace_copy_exespace_impl(label, ex, first_from, last_from, + first_dest, old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -54,13 +63,15 @@ auto replace_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl("Kokkos::replace_copy_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), old_value, new_value); + return Impl::replace_copy_exespace_impl( + "Kokkos::replace_copy_view_api", ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), old_value, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -68,9 +79,43 @@ auto replace_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - old_value, new_value); + return Impl::replace_copy_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), old_value, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator replace_copy(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + const ValueType& old_value, + const ValueType& new_value) { + return Impl::replace_copy_team_impl(teamHandle, first_from, last_from, + first_dest, old_value, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto replace_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + const ValueType& old_value, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), + old_value, new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp index 71ae8f8452..b87163f194 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceCopyIf.hpp @@ -23,33 +23,42 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename PredicateType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_iterator_api", ex, - first_from, last_from, first_dest, pred, - new_value); + return Impl::replace_copy_if_exespace_impl( + "Kokkos::replace_copy_if_iterator_api", ex, first_from, last_from, + first_dest, pred, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename PredicateType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator replace_copy_if(const std::string& label, const ExecutionSpace& ex, InputIterator first_from, InputIterator last_from, OutputIterator first_dest, PredicateType pred, const ValueType& new_value) { - return Impl::replace_copy_if_impl(label, ex, first_from, last_from, - first_dest, pred, new_value); + return Impl::replace_copy_if_exespace_impl(label, ex, first_from, last_from, + first_dest, pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename PredicateType, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -57,14 +66,16 @@ auto replace_copy_if(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl("Kokkos::replace_copy_if_view_api", ex, - KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), pred, new_value); + return Impl::replace_copy_if_exespace_impl( + "Kokkos::replace_copy_if_view_api", ex, KE::cbegin(view_from), + KE::cend(view_from), KE::begin(view_dest), pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename PredicateType, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, const ::Kokkos::View& view_dest, @@ -72,9 +83,44 @@ auto replace_copy_if(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::replace_copy_if_impl(label, ex, KE::cbegin(view_from), - KE::cend(view_from), KE::begin(view_dest), - pred, new_value); + return Impl::replace_copy_if_exespace_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), pred, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator replace_copy_if(const TeamHandleType& teamHandle, + InputIterator first_from, + InputIterator last_from, + OutputIterator first_dest, + PredicateType pred, + const ValueType& new_value) { + return Impl::replace_copy_if_team_impl(teamHandle, first_from, last_from, + first_dest, pred, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION auto replace_copy_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + PredicateType pred, const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::replace_copy_if_team_impl(teamHandle, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), pred, new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp index 7f06540e06..73af1f16f0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReplaceIf.hpp @@ -23,43 +23,82 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { - return Impl::replace_if_impl("Kokkos::replace_if_iterator_api", ex, first, - last, pred, new_value); + Impl::replace_if_exespace_impl("Kokkos::replace_if_iterator_api", ex, first, + last, pred, new_value); } -template +template < + typename ExecutionSpace, typename InputIterator, typename Predicate, + typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, Predicate pred, const ValueType& new_value) { - return Impl::replace_if_impl(label, ex, first, last, pred, new_value); + Impl::replace_if_exespace_impl(label, ex, first, last, pred, new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Predicate, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl("Kokkos::replace_if_view_api", ex, - KE::begin(view), KE::end(view), pred, new_value); + Impl::replace_if_exespace_impl("Kokkos::replace_if_view_api", ex, + KE::begin(view), KE::end(view), pred, + new_value); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename Predicate, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void replace_if(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, Predicate pred, const ValueType& new_value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::replace_if_impl(label, ex, KE::begin(view), KE::end(view), pred, - new_value); + Impl::replace_if_exespace_impl(label, ex, KE::begin(view), KE::end(view), + pred, new_value); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void replace_if(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate pred, const ValueType& new_value) { + Impl::replace_if_team_impl(teamHandle, first, last, pred, new_value); +} + +template , int> = 0> +KOKKOS_FUNCTION void replace_if( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, Predicate pred, + const ValueType& new_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + Impl::replace_if_team_impl(teamHandle, KE::begin(view), KE::end(view), pred, + new_value); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp index 9f2fc5f3cc..a0786d3a2e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Reverse.hpp @@ -23,34 +23,67 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, InputIterator first, InputIterator last) { - return Impl::reverse_impl("Kokkos::reverse_iterator_api_default", ex, first, - last); + return Impl::reverse_exespace_impl("Kokkos::reverse_iterator_api_default", ex, + first, last); } -template +template < + typename ExecutionSpace, typename InputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last) { - return Impl::reverse_impl(label, ex, first, last); + return Impl::reverse_exespace_impl(label, ex, first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl("Kokkos::reverse_view_api_default", ex, - KE::begin(view), KE::end(view)); + return Impl::reverse_exespace_impl("Kokkos::reverse_view_api_default", ex, + KE::begin(view), KE::end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> void reverse(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::reverse_impl(label, ex, KE::begin(view), KE::end(view)); + return Impl::reverse_exespace_impl(label, ex, KE::begin(view), KE::end(view)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION void reverse(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last) { + return Impl::reverse_team_impl(teamHandle, first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION void reverse( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + namespace KE = ::Kokkos::Experimental; + return Impl::reverse_team_impl(teamHandle, KE::begin(view), KE::end(view)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp index 279bb22086..37336c983a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ReverseCopy.hpp @@ -23,42 +23,83 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl("Kokkos::reverse_copy_iterator_api_default", - ex, first, last, d_first); + return Impl::reverse_copy_exespace_impl( + "Kokkos::reverse_copy_iterator_api_default", ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator reverse_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::reverse_copy_impl(label, ex, first, last, d_first); + return Impl::reverse_copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::reverse_copy_impl("Kokkos::reverse_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest)); + return Impl::reverse_copy_exespace_impl( + "Kokkos::reverse_copy_view_api_default", ex, cbegin(source), cend(source), + begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto reverse_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::reverse_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest)); + return Impl::reverse_copy_exespace_impl(label, ex, cbegin(source), + cend(source), begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator reverse_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + return Impl::reverse_copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto reverse_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::reverse_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp index 738e9bf137..aff04b47d6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Rotate.hpp @@ -23,36 +23,71 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { - return Impl::rotate_impl("Kokkos::rotate_iterator_api_default", ex, first, - n_first, last); + return Impl::rotate_exespace_impl("Kokkos::rotate_iterator_api_default", ex, + first, n_first, last); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType rotate(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType n_first, IteratorType last) { - return Impl::rotate_impl(label, ex, first, n_first, last); + return Impl::rotate_exespace_impl(label, ex, first, n_first, last); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl("Kokkos::rotate_view_api_default", ex, begin(view), - begin(view) + n_location, end(view)); + return Impl::rotate_exespace_impl("Kokkos::rotate_view_api_default", ex, + begin(view), begin(view) + n_location, + end(view)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, std::size_t n_location) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::rotate_impl(label, ex, begin(view), begin(view) + n_location, - end(view)); + return Impl::rotate_exespace_impl(label, ex, begin(view), + begin(view) + n_location, end(view)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType rotate(const TeamHandleType& teamHandle, + IteratorType first, IteratorType n_first, + IteratorType last) { + return Impl::rotate_team_impl(teamHandle, first, n_first, last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto rotate(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + std::size_t n_location) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::rotate_team_impl(teamHandle, begin(view), + begin(view) + n_location, end(view)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp index f5d826c4bb..cce37fccfa 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_RotateCopy.hpp @@ -23,23 +23,34 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl("Kokkos::rotate_copy_iterator_api_default", ex, - first, n_first, last, d_first); + return Impl::rotate_copy_exespace_impl( + "Kokkos::rotate_copy_iterator_api_default", ex, first, n_first, last, + d_first); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator rotate_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator n_first, InputIterator last, OutputIterator d_first) { - return Impl::rotate_copy_impl(label, ex, first, n_first, last, d_first); + return Impl::rotate_copy_exespace_impl(label, ex, first, n_first, last, + d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, @@ -47,13 +58,15 @@ auto rotate_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::rotate_copy_impl("Kokkos::rotate_copy_view_api_default", ex, - cbegin(source), cbegin(source) + n_location, - cend(source), begin(dest)); + return Impl::rotate_copy_exespace_impl( + "Kokkos::rotate_copy_view_api_default", ex, cbegin(source), + cbegin(source) + n_location, cend(source), begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto rotate_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, std::size_t n_location, @@ -61,9 +74,41 @@ auto rotate_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::rotate_copy_impl(label, ex, cbegin(source), - cbegin(source) + n_location, cend(source), - begin(dest)); + return Impl::rotate_copy_exespace_impl(label, ex, cbegin(source), + cbegin(source) + n_location, + cend(source), begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator rotate_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator n_first, + InputIterator last, + OutputIterator d_first) { + return Impl::rotate_copy_team_impl(teamHandle, first, n_first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto rotate_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + std::size_t n_location, + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::rotate_copy_team_impl(teamHandle, cbegin(source), + cbegin(source) + n_location, cend(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp index b1154b297e..43258a484e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Search.hpp @@ -23,24 +23,34 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last); + return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, + first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { - return Impl::search_impl(label, ex, first, last, s_first, s_last); + return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -48,13 +58,15 @@ auto search(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view)); + return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view) { @@ -62,31 +74,38 @@ auto search(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view)); + return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); } // overload set 2: binary predicate passed -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl("Kokkos::search_iterator_api_default", ex, first, - last, s_first, s_last, pred); + return Impl::search_exespace_impl("Kokkos::search_iterator_api_default", ex, + first, last, s_first, s_last, pred); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType1 search(const std::string& label, const ExecutionSpace& ex, IteratorType1 first, IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, const BinaryPredicateType& pred) { - return Impl::search_impl(label, ex, first, last, s_first, s_last, pred); + return Impl::search_exespace_impl(label, ex, first, last, s_first, s_last, + pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -95,13 +114,15 @@ auto search(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl("Kokkos::search_view_api_default", ex, - KE::begin(view), KE::end(view), KE::begin(s_view), - KE::end(s_view), pred); + return Impl::search_exespace_impl("Kokkos::search_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto search(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, const ::Kokkos::View& s_view, @@ -110,8 +131,70 @@ auto search(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); namespace KE = ::Kokkos::Experimental; - return Impl::search_impl(label, ex, KE::begin(view), KE::end(view), - KE::begin(s_view), KE::end(s_view), pred); + return Impl::search_exespace_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::search_team_impl(teamHandle, first, last, s_first, s_last); +} + +template , int> = 0> +KOKKOS_FUNCTION auto search( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template , int> = 0> + +KOKKOS_FUNCTION IteratorType1 search(const TeamHandleType& teamHandle, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::search_team_impl(teamHandle, first, last, s_first, s_last, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto search( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_team_impl(teamHandle, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp index a649c8f205..0f8aa5f1c1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SearchN.hpp @@ -23,68 +23,86 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1: no binary predicate passed -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value); + return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", + ex, first, last, count, value); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value) { - return Impl::search_n_impl(label, ex, first, last, count, value); + return Impl::search_n_exespace_impl(label, ex, first, last, count, value); } template + class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value); + return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, + value); } template + class SizeType, class ValueType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value); + return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), + count, value); } // overload set 2: binary predicate passed -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl("Kokkos::search_n_iterator_api_default", ex, first, - last, count, value, pred); + return Impl::search_n_exespace_impl("Kokkos::search_n_iterator_api_default", + ex, first, last, count, value, pred); } -template +template < + class ExecutionSpace, class IteratorType, class SizeType, class ValueType, + class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType search_n(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, SizeType count, const ValueType& value, const BinaryPredicateType& pred) { - return Impl::search_n_impl(label, ex, first, last, count, value, pred); + return Impl::search_n_exespace_impl(label, ex, first, last, count, value, + pred); } template + class SizeType, class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, @@ -92,13 +110,15 @@ auto search_n(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl("Kokkos::search_n_view_api_default", ex, - KE::begin(view), KE::end(view), count, value, - pred); + return Impl::search_n_exespace_impl("Kokkos::search_n_view_api_default", ex, + KE::begin(view), KE::end(view), count, + value, pred); } template + class SizeType, class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_execution_space::value, + int> = 0> auto search_n(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, SizeType count, const ValueType& value, @@ -106,8 +126,65 @@ auto search_n(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); namespace KE = ::Kokkos::Experimental; - return Impl::search_n_impl(label, ex, KE::begin(view), KE::end(view), count, - value, pred); + return Impl::search_n_exespace_impl(label, ex, KE::begin(view), KE::end(view), + count, value, pred); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1: no binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value) { + return Impl::search_n_team_impl(teamHandle, first, last, count, value); +} + +template < + class TeamHandleType, class DataType, class... Properties, class SizeType, + class ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto search_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, SizeType count, + const ValueType& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), + count, value); +} + +// overload set 2: binary predicate passed +template , int> = 0> +KOKKOS_FUNCTION IteratorType search_n(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + SizeType count, const ValueType& value, + const BinaryPredicateType& pred) { + return Impl::search_n_team_impl(teamHandle, first, last, count, value, pred); +} + +template < + class TeamHandleType, class DataType, class... Properties, class SizeType, + class ValueType, class BinaryPredicateType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION auto search_n( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, SizeType count, + const ValueType& value, const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::search_n_team_impl(teamHandle, KE::begin(view), KE::end(view), + count, value, pred); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp index 4b91a17ab8..b3e04a3b97 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftLeft.hpp @@ -23,36 +23,70 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_left_impl("Kokkos::shift_left_iterator_api_default", ex, - first, last, n); + return Impl::shift_left_exespace_impl( + "Kokkos::shift_left_iterator_api_default", ex, first, last, n); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_left(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_left_impl(label, ex, first, last, n); + return Impl::shift_left_exespace_impl(label, ex, first, last, n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl("Kokkos::shift_left_view_api_default", ex, - begin(view), end(view), n); + return Impl::shift_left_exespace_impl("Kokkos::shift_left_view_api_default", + ex, begin(view), end(view), n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_left(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_left_impl(label, ex, begin(view), end(view), n); + return Impl::shift_left_exespace_impl(label, ex, begin(view), end(view), n); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType +shift_left(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, typename IteratorType::difference_type n) { + return Impl::shift_left_team_impl(teamHandle, first, last, n); +} + +template , int> = 0> +KOKKOS_FUNCTION auto shift_left( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_left_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp index 2ea50fd74e..0f7ed53948 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ShiftRight.hpp @@ -23,36 +23,70 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_right_impl("Kokkos::shift_right_iterator_api_default", ex, - first, last, n); + return Impl::shift_right_exespace_impl( + "Kokkos::shift_right_iterator_api_default", ex, first, last, n); } -template +template < + typename ExecutionSpace, typename IteratorType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> IteratorType shift_right(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, typename IteratorType::difference_type n) { - return Impl::shift_right_impl(label, ex, first, last, n); + return Impl::shift_right_exespace_impl(label, ex, first, last, n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl("Kokkos::shift_right_view_api_default", ex, - begin(view), end(view), n); + return Impl::shift_right_exespace_impl("Kokkos::shift_right_view_api_default", + ex, begin(view), end(view), n); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto shift_right(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, typename decltype(begin(view))::difference_type n) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::shift_right_impl(label, ex, begin(view), end(view), n); + return Impl::shift_right_exespace_impl(label, ex, begin(view), end(view), n); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType +shift_right(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, typename IteratorType::difference_type n) { + return Impl::shift_right_team_impl(teamHandle, first, last, n); +} + +template , int> = 0> +KOKKOS_FUNCTION auto shift_right( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + typename decltype(begin(view))::difference_type n) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + return Impl::shift_right_team_impl(teamHandle, begin(view), end(view), n); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp index 5fbf045318..39f33b6487 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_SwapRanges.hpp @@ -23,15 +23,21 @@ namespace Kokkos { namespace Experimental { -template +// +// overload set accepting execution space +// +template , int> = 0> IteratorType2 swap_ranges(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl("Kokkos::swap_ranges_iterator_api_default", ex, - first1, last1, first2); + return Impl::swap_ranges_exespace_impl( + "Kokkos::swap_ranges_iterator_api_default", ex, first1, last1, first2); } -template +template , int> = 0> auto swap_ranges(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -39,19 +45,23 @@ auto swap_ranges(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl("Kokkos::swap_ranges_view_api_default", ex, - begin(source), end(source), begin(dest)); + return Impl::swap_ranges_exespace_impl("Kokkos::swap_ranges_view_api_default", + ex, begin(source), end(source), + begin(dest)); } -template +template , int> = 0> IteratorType2 swap_ranges(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2) { - return Impl::swap_ranges_impl(label, ex, first1, last1, first2); + return Impl::swap_ranges_exespace_impl(label, ex, first1, last1, first2); } -template +template , int> = 0> auto swap_ranges(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest) { @@ -59,8 +69,38 @@ auto swap_ranges(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); assert(source.extent(0) == dest.extent(0)); - return Impl::swap_ranges_impl(label, ex, begin(source), end(source), - begin(dest)); + return Impl::swap_ranges_exespace_impl(label, ex, begin(source), end(source), + begin(dest)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION IteratorType2 swap_ranges(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2) { + return Impl::swap_ranges_team_impl(teamHandle, first1, last1, first2); +} + +template , int> = 0> +KOKKOS_FUNCTION auto swap_ranges( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + assert(source.extent(0) == dest.extent(0)); + return Impl::swap_ranges_team_impl(teamHandle, begin(source), end(source), + begin(dest)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp index 27dee30426..838c9169e2 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Transform.hpp @@ -23,31 +23,39 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator first1, InputIterator last1, - OutputIterator d_first, UnaryOperation unary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, d_first, std::move(unary_op)); +// +// overload set accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator transform(const ExecutionSpace& ex, InputIterator first1, + InputIterator last1, OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", + ex, first1, last1, d_first, + std::move(unary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator first1, InputIterator last1, OutputIterator d_first, - UnaryOperation unary_op) { - return Impl::transform_impl(label, ex, first1, last1, d_first, - std::move(unary_op)); +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator transform(const std::string& label, const ExecutionSpace& ex, + InputIterator first1, InputIterator last1, + OutputIterator d_first, UnaryOperation unary_op) { + return Impl::transform_exespace_impl(label, ex, first1, last1, d_first, + std::move(unary_op)); } -template +template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, @@ -55,13 +63,14 @@ auto transform(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source), end(source), begin(dest), - std::move(unary_op)); + return Impl::transform_exespace_impl("Kokkos::transform_view_api_default", ex, + begin(source), end(source), begin(dest), + std::move(unary_op)); } -template +template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, ::Kokkos::View& dest, @@ -69,38 +78,44 @@ auto transform(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl(label, ex, begin(source), end(source), - begin(dest), std::move(unary_op)); + return Impl::transform_exespace_impl(label, ex, begin(source), end(source), + begin(dest), std::move(unary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const ExecutionSpace& ex, InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputIterator d_first, - BinaryOperation binary_op) { - return Impl::transform_impl("Kokkos::transform_iterator_api_default", ex, - first1, last1, first2, d_first, - std::move(binary_op)); +template < + typename ExecutionSpace, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_execution_space_v, + int> = 0> +OutputIterator transform(const ExecutionSpace& ex, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, + OutputIterator d_first, BinaryOperation binary_op) { + return Impl::transform_exespace_impl("Kokkos::transform_iterator_api_default", + ex, first1, last1, first2, d_first, + std::move(binary_op)); } -template -std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< - InputIterator1, InputIterator2, OutputIterator>::value, - OutputIterator> -transform(const std::string& label, const ExecutionSpace& ex, - InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator d_first, BinaryOperation binary_op) { - return Impl::transform_impl(label, ex, first1, last1, first2, d_first, - std::move(binary_op)); +template < + typename ExecutionSpace, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_execution_space_v, + int> = 0> +OutputIterator transform(const std::string& label, const ExecutionSpace& ex, + InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_exespace_impl(label, ex, first1, last1, first2, + d_first, std::move(binary_op)); } -template +template , int> = 0> auto transform(const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, @@ -110,14 +125,15 @@ auto transform(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl("Kokkos::transform_view_api_default", ex, - begin(source1), end(source1), begin(source2), - begin(dest), std::move(binary_op)); + return Impl::transform_exespace_impl( + "Kokkos::transform_view_api_default", ex, begin(source1), end(source1), + begin(source2), begin(dest), std::move(binary_op)); } -template +template , int> = 0> auto transform(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source1, const ::Kokkos::View& source2, @@ -127,9 +143,79 @@ auto transform(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::transform_impl(label, ex, begin(source1), end(source1), - begin(source2), begin(dest), - std::move(binary_op)); + return Impl::transform_exespace_impl(label, ex, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template < + typename TeamHandleType, typename InputIterator, typename OutputIterator, + typename UnaryOperation, + std::enable_if_t && + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, + InputIterator first1, + InputIterator last1, + OutputIterator d_first, + UnaryOperation unary_op) { + return Impl::transform_team_impl(teamHandle, first1, last1, d_first, + std::move(unary_op)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + ::Kokkos::View& dest, UnaryOperation unary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_team_impl(teamHandle, begin(source), end(source), + begin(dest), std::move(unary_op)); +} + +template < + typename TeamHandleType, typename InputIterator1, typename InputIterator2, + typename OutputIterator, typename BinaryOperation, + std::enable_if_t< + Impl::are_iterators_v && + is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator transform(const TeamHandleType& teamHandle, + InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator d_first, + BinaryOperation binary_op) { + return Impl::transform_team_impl(teamHandle, first1, last1, first2, d_first, + std::move(binary_op)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source1, + const ::Kokkos::View& source2, + ::Kokkos::View& dest, + BinaryOperation binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::transform_team_impl(teamHandle, begin(source1), end(source1), + begin(source2), begin(dest), + std::move(binary_op)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp index 9d85aee06f..37fc0f860e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformExclusiveScan.hpp @@ -23,44 +23,52 @@ namespace Kokkos { namespace Experimental { -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - ValueType init_value, BinaryOpType binary_op, - UnaryOpType unary_op) { +// +// overload set accepting execution space +// +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_exclusive_scan( + const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType binary_op, + UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, init_value, binary_op, unary_op); + first, last, first_dest, std::move(init_value), binary_op, unary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_exclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, ValueType init_value, - BinaryOpType binary_op, UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_exclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); - return Impl::transform_exclusive_scan_impl(label, ex, first, last, first_dest, - init_value, binary_op, unary_op); + return Impl::transform_exclusive_scan_exespace_impl( + label, ex, first, last, first_dest, std::move(init_value), binary_op, + unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -69,18 +77,20 @@ auto transform_exclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( "Kokkos::transform_exclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - init_value, binary_op, unary_op); + std::move(init_value), binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryOpType, typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_exclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -89,12 +99,56 @@ auto transform_exclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); - static_assert(std::is_move_constructible::value, + static_assert(std::is_move_constructible_v, "ValueType must be move constructible."); namespace KE = ::Kokkos::Experimental; - return Impl::transform_exclusive_scan_impl( + return Impl::transform_exclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), init_value, binary_op, unary_op); + KE::begin(view_dest), std::move(init_value), binary_op, unary_op); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && :: + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_exclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, ValueType init_value, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + return Impl::transform_exclusive_scan_team_impl( + teamHandle, first, last, first_dest, std::move(init_value), binary_op, + unary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_exclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_exclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), std::move(init_value), binary_op, unary_op); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp index 7489af7e37..5f694dbfd9 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformInclusiveScan.hpp @@ -23,40 +23,53 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // overload set 1 (no init value) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan(const ExecutionSpace& ex, + InputIteratorType first, + InputIteratorType last, + OutputIteratorType first_dest, + BinaryOpType binary_op, + UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, first, last, first_dest, binary_op, unary_op); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op); + return Impl::transform_inclusive_scan_exespace_impl( + label, ex, first, last, first_dest, binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -66,15 +79,17 @@ auto transform_inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -84,46 +99,59 @@ auto transform_inclusive_scan( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), binary_op, unary_op); } // overload set 2 (init value) -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, - InputIteratorType last, OutputIteratorType first_dest, - BinaryOpType binary_op, UnaryOpType unary_op, - ValueType init_value) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const ExecutionSpace& ex, InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOpType binary_op, UnaryOpType unary_op, + ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl( + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_iterator_api", ex, - first, last, first_dest, binary_op, unary_op, init_value); + first, last, first_dest, binary_op, unary_op, std::move(init_value)); } -template -std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< - InputIteratorType, OutputIteratorType>::value, - OutputIteratorType> -transform_inclusive_scan(const std::string& label, const ExecutionSpace& ex, - InputIteratorType first, InputIteratorType last, - OutputIteratorType first_dest, BinaryOpType binary_op, - UnaryOpType unary_op, ValueType init_value) { +template && :: + Kokkos::is_execution_space_v, + int> = 0> +OutputIteratorType transform_inclusive_scan( + const std::string& label, const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { Impl::static_assert_is_not_openmptarget(ex); - return Impl::transform_inclusive_scan_impl(label, ex, first, last, first_dest, - binary_op, unary_op, init_value); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_exespace_impl( + label, ex, first, last, first_dest, binary_op, unary_op, + std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -132,16 +160,21 @@ auto transform_inclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( "Kokkos::transform_inclusive_scan_custom_functors_view_api", ex, KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), - binary_op, unary_op, init_value); + binary_op, unary_op, std::move(init_value)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryOpType, + typename UnaryOpType, typename ValueType, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto transform_inclusive_scan( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view_from, @@ -150,10 +183,97 @@ auto transform_inclusive_scan( Impl::static_assert_is_not_openmptarget(ex); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; - return Impl::transform_inclusive_scan_impl( + return Impl::transform_inclusive_scan_exespace_impl( label, ex, KE::cbegin(view_from), KE::cend(view_from), - KE::begin(view_dest), binary_op, unary_op, init_value); + KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); +} + +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// overload set 1 (no init value) +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + + return Impl::transform_inclusive_scan_team_impl( + teamHandle, first, last, first_dest, binary_op, unary_op); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op); +} + +// overload set 2 (init value) +template && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIteratorType transform_inclusive_scan( + const TeamHandleType& teamHandle, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(teamHandle); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + return Impl::transform_inclusive_scan_team_impl( + teamHandle, first, last, first_dest, binary_op, unary_op, + std::move(init_value)); +} + +template , int> = 0> +KOKKOS_FUNCTION auto transform_inclusive_scan( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOpType binary_op, UnaryOpType unary_op, ValueType init_value) { + Impl::static_assert_is_not_openmptarget(teamHandle); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible_v, + "ValueType must be move constructible."); + + namespace KE = ::Kokkos::Experimental; + return Impl::transform_inclusive_scan_team_impl( + teamHandle, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, unary_op, std::move(init_value)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp index b5ec9066d2..101f5113f6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_TransformReduce.hpp @@ -23,34 +23,44 @@ namespace Kokkos { namespace Experimental { +// +// overload set accepting execution space +// + // ---------------------------- // overload set1: // no custom functors passed, so equivalent to // transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); // ---------------------------- -template +template ::value, + int> = 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value)); } -template +template ::value, + int> = 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value) { - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value)); } // overload1 accepting views -template +template ::value, + int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -60,14 +70,16 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( "Kokkos::transform_reduce_default_functors_iterator_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } -template +template ::value, + int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -77,7 +89,7 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_default_functors_impl( + return Impl::transform_reduce_default_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value)); } @@ -95,8 +107,11 @@ ValueType transform_reduce( // https://en.cppreference.com/w/cpp/algorithm/transform_reduce // api accepting iterators -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, @@ -105,14 +120,17 @@ ValueType transform_reduce(const ExecutionSpace& ex, IteratorType1 first1, static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template < + typename ExecutionSpace, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, ValueType init_reduction_value, @@ -121,15 +139,17 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, first2, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views -template +template ::value, + int> = 0> ValueType transform_reduce( const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -143,16 +163,18 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template ::value, + int> = 0> ValueType transform_reduce( const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& first_view, @@ -166,7 +188,7 @@ ValueType transform_reduce( Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(first_view), KE::cend(first_view), KE::cbegin(second_view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); @@ -176,43 +198,50 @@ ValueType transform_reduce( // overload set3: // // accepting iterators -template -// need this to avoid ambiguous call -std::enable_if_t< - ::Kokkos::Experimental::Impl::are_iterators::value, ValueType> -transform_reduce(const ExecutionSpace& ex, IteratorType first1, - IteratorType last1, ValueType init_reduction_value, - BinaryJoinerType joiner, UnaryTransform transformer) { +template ::value && + is_execution_space::value, + int> = 0> +ValueType transform_reduce(const ExecutionSpace& ex, IteratorType first1, + IteratorType last1, ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_iterator_api", ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template -// need this to avoid ambiguous call -std::enable_if_t< - ::Kokkos::Experimental::Impl::are_iterators::value, ValueType> -transform_reduce(const std::string& label, const ExecutionSpace& ex, - IteratorType first1, IteratorType last1, - ValueType init_reduction_value, BinaryJoinerType joiner, - UnaryTransform transformer) { +template ::value && + is_execution_space::value, + int> = 0> +ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, + IteratorType first1, IteratorType last1, + ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { static_assert(std::is_move_constructible::value, "ValueType must be move constructible."); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, first1, last1, std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } // accepting views -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, @@ -224,14 +253,17 @@ ValueType transform_reduce(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( "Kokkos::transform_reduce_custom_functors_view_api", ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } -template +template < + typename ExecutionSpace, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_execution_space::value, int> = + 0> ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, ValueType init_reduction_value, @@ -243,12 +275,154 @@ ValueType transform_reduce(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::transform_reduce_custom_functors_impl( + return Impl::transform_reduce_custom_functors_exespace_impl( label, ex, KE::cbegin(view), KE::cend(view), std::move(init_reduction_value), std::move(joiner), std::move(transformer)); } +// +// overload set accepting a team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// + +// ---------------------------- +// overload set1: +// no custom functors passed, so equivalent to +// transform_reduce(first1, last1, first2, init, plus<>(), multiplies<>()); +// ---------------------------- +template < + typename TeamHandleType, typename IteratorType1, typename IteratorType2, + typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, + IteratorType1 first1, + IteratorType1 last1, + IteratorType2 first2, + ValueType init_reduction_value) { + return Impl::transform_reduce_default_functors_team_impl( + teamHandle, first1, last1, first2, std::move(init_reduction_value)); +} + +// overload1 accepting views +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& first_view, + const ::Kokkos::View& second_view, + ValueType init_reduction_value) { + namespace KE = ::Kokkos::Experimental; + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); + + return Impl::transform_reduce_default_functors_team_impl( + teamHandle, KE::cbegin(first_view), KE::cend(first_view), + KE::cbegin(second_view), std::move(init_reduction_value)); +} + +// +// overload set2: +// accepts a custom transform and joiner functor +// + +// Note the std refers to the arg BinaryReductionOp +// but in the Kokkos naming convention, it corresponds +// to a "joiner" that knows how to join two values +// NOTE: "joiner/transformer" need to be commutative. + +// https://en.cppreference.com/w/cpp/algorithm/transform_reduce + +// api accepting iterators +template < + typename TeamHandleType, typename IteratorType1, typename IteratorType2, + typename ValueType, typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, ValueType init_reduction_value, + BinaryJoinerType joiner, BinaryTransform transformer) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, first1, last1, first2, std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// accepting views +template < + typename TeamHandleType, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename ValueType, + typename BinaryJoinerType, typename BinaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& first_view, + const ::Kokkos::View& second_view, + ValueType init_reduction_value, BinaryJoinerType joiner, + BinaryTransform transformer) { + namespace KE = ::Kokkos::Experimental; + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(first_view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(second_view); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, KE::cbegin(first_view), KE::cend(first_view), + KE::cbegin(second_view), std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// +// overload set3: +// +// accepting iterators +template ::value && + is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION ValueType transform_reduce(const TeamHandleType& teamHandle, + IteratorType first1, + IteratorType last1, + ValueType init_reduction_value, + BinaryJoinerType joiner, + UnaryTransform transformer) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, first1, last1, std::move(init_reduction_value), + std::move(joiner), std::move(transformer)); +} + +// accepting views +template < + typename TeamHandleType, typename DataType, typename... Properties, + typename ValueType, typename BinaryJoinerType, typename UnaryTransform, + std::enable_if_t<::Kokkos::is_team_handle::value, int> = 0> +KOKKOS_FUNCTION ValueType +transform_reduce(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + ValueType init_reduction_value, BinaryJoinerType joiner, + UnaryTransform transformer) { + namespace KE = ::Kokkos::Experimental; + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::transform_reduce_custom_functors_team_impl( + teamHandle, KE::cbegin(view), KE::cend(view), + std::move(init_reduction_value), std::move(joiner), + std::move(transformer)); +} + } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp index b47ecffb20..2d56315f61 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Unique.hpp @@ -23,71 +23,132 @@ namespace Kokkos { namespace Experimental { -// note: the enable_if below is to avoid "call to ... is ambiguous" -// for example in the unit test when using a variadic function - -// overload set1 -template -std::enable_if_t::value, IteratorType> unique( - const ExecutionSpace& ex, IteratorType first, IteratorType last) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last); +// +// overload set1: default predicate, accepting execution space +// +template && + is_execution_space::value, + int> = 0> +IteratorType unique(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, + first, last); } -template -std::enable_if_t::value, IteratorType> unique( - const std::string& label, const ExecutionSpace& ex, IteratorType first, - IteratorType last) { - return Impl::unique_impl(label, ex, first, last); +template && + is_execution_space::value, + int> = 0> +IteratorType unique(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::unique_exespace_impl(label, ex, first, last); } -template +template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique("Kokkos::unique_view_api_default", ex, - begin(view), end(view)); + return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, + begin(view), end(view)); } -template +template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return ::Kokkos::Experimental::unique(label, ex, begin(view), end(view)); + return Impl::unique_exespace_impl(label, ex, begin(view), end(view)); } -// overload set2 -template +// +// overload set2: custom predicate, accepting execution space +// +template ::value, int> = 0> IteratorType unique(const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl("Kokkos::unique_iterator_api_default", ex, first, - last, pred); + return Impl::unique_exespace_impl("Kokkos::unique_iterator_api_default", ex, + first, last, pred); } -template +template ::value, int> = 0> IteratorType unique(const std::string& label, const ExecutionSpace& ex, IteratorType first, IteratorType last, BinaryPredicate pred) { - return Impl::unique_impl(label, ex, first, last, pred); + return Impl::unique_exespace_impl(label, ex, first, last, pred); } -template +template ::value, int> = 0> auto unique(const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl("Kokkos::unique_view_api_default", ex, begin(view), - end(view), std::move(pred)); + return Impl::unique_exespace_impl("Kokkos::unique_view_api_default", ex, + begin(view), end(view), std::move(pred)); } -template +template ::value, int> = 0> auto unique(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& view, BinaryPredicate pred) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); - return Impl::unique_impl(label, ex, begin(view), end(view), std::move(pred)); + return Impl::unique_exespace_impl(label, ex, begin(view), end(view), + std::move(pred)); +} + +// +// overload set3: default predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template && + is_team_handle::value, + int> = 0> +KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last) { + return Impl::unique_team_impl(teamHandle, first, last); +} + +template ::value, int> = 0> +KOKKOS_FUNCTION auto unique( + const TeamHandleType& teamHandle, + const ::Kokkos::View& view) { + return Impl::unique_team_impl(teamHandle, begin(view), end(view)); +} + +// +// overload set4: custom predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template ::value, int> = 0> +KOKKOS_FUNCTION IteratorType unique(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + BinaryPredicate pred) { + return Impl::unique_team_impl(teamHandle, first, last, std::move(pred)); +} + +template ::value, int> = 0> +KOKKOS_FUNCTION auto unique(const TeamHandleType& teamHandle, + const ::Kokkos::View& view, + BinaryPredicate pred) { + return Impl::unique_team_impl(teamHandle, begin(view), end(view), + std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp index bd2451c220..4a32d7e095 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_UniqueCopy.hpp @@ -23,67 +23,90 @@ namespace Kokkos { namespace Experimental { -// overload set1 -template -std::enable_if_t::value, OutputIterator> -unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, - OutputIterator d_first) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first); +// +// overload set1: default predicate, accepting execution space +// +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + return Impl::unique_copy_exespace_impl( + "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first); } -template -std::enable_if_t::value, OutputIterator> -unique_copy(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, OutputIterator d_first) { - return Impl::unique_copy_impl(label, ex, first, last, d_first); +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + std::enable_if_t && + is_execution_space_v, + int> = 0> +OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return ::Kokkos::Experimental::unique_copy( - "Kokkos::unique_copy_view_api_default", ex, cbegin(source), cend(source), - begin(dest)); + return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", + ex, cbegin(source), cend(source), + begin(dest)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest) { Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return ::Kokkos::Experimental::unique_copy(label, ex, cbegin(source), - cend(source), begin(dest)); + return Impl::unique_copy_exespace_impl(label, ex, cbegin(source), + cend(source), begin(dest)); } -// overload set2 -template +// +// overload set2: custom predicate, accepting execution space +// + +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl("Kokkos::unique_copy_iterator_api_default", ex, - first, last, d_first, pred); + return Impl::unique_copy_exespace_impl( + "Kokkos::unique_copy_iterator_api_default", ex, first, last, d_first, + pred); } -template +template < + typename ExecutionSpace, typename InputIterator, typename OutputIterator, + typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> OutputIterator unique_copy(const std::string& label, const ExecutionSpace& ex, InputIterator first, InputIterator last, OutputIterator d_first, BinaryPredicate pred) { - return Impl::unique_copy_impl(label, ex, first, last, d_first, pred); + return Impl::unique_copy_exespace_impl(label, ex, first, last, d_first, pred); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, @@ -91,13 +114,15 @@ auto unique_copy(const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::unique_copy_impl("Kokkos::unique_copy_view_api_default", ex, - cbegin(source), cend(source), begin(dest), - std::move(pred)); + return Impl::unique_copy_exespace_impl("Kokkos::unique_copy_view_api_default", + ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); } -template +template < + typename ExecutionSpace, typename DataType1, typename... Properties1, + typename DataType2, typename... Properties2, typename BinaryPredicate, + std::enable_if_t<::Kokkos::is_execution_space_v, int> = 0> auto unique_copy(const std::string& label, const ExecutionSpace& ex, const ::Kokkos::View& source, const ::Kokkos::View& dest, @@ -105,8 +130,70 @@ auto unique_copy(const std::string& label, const ExecutionSpace& ex, Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); - return Impl::unique_copy_impl(label, ex, cbegin(source), cend(source), - begin(dest), std::move(pred)); + return Impl::unique_copy_exespace_impl( + label, ex, cbegin(source), cend(source), begin(dest), std::move(pred)); +} + +// +// overload set3: default predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template < + typename TeamHandleType, typename InputIterator, typename OutputIterator, + std::enable_if_t && + Kokkos::is_team_handle_v, + int> = 0> +KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + return Impl::unique_copy_team_impl(teamHandle, first, last, d_first); +} + +template , int> = 0> +KOKKOS_FUNCTION auto unique_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest)); +} + +// +// overload set4: custom predicate, accepting team handle +// Note: for now omit the overloads accepting a label +// since they cause issues on device because of the string allocation. +// +template , int> = 0> +KOKKOS_FUNCTION OutputIterator unique_copy(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first, + BinaryPredicate pred) { + return Impl::unique_copy_team_impl(teamHandle, first, last, d_first, pred); +} + +template , int> = 0> +KOKKOS_FUNCTION auto unique_copy( + const TeamHandleType& teamHandle, + const ::Kokkos::View& source, + const ::Kokkos::View& dest, + BinaryPredicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::unique_copy_team_impl(teamHandle, cbegin(source), cend(source), + begin(dest), std::move(pred)); } } // namespace Experimental diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp index 8a474508d7..a8171fa068 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentDifference.hpp @@ -63,14 +63,15 @@ struct StdAdjacentDiffFunctor { m_op(std::move(op)) {} }; +// +// exespace impl +// template -OutputIteratorType adjacent_difference_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - BinaryOp bin_op) { +OutputIteratorType adjacent_difference_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, BinaryOp bin_op) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, @@ -81,20 +82,45 @@ OutputIteratorType adjacent_difference_impl(const std::string& label, return first_dest; } - // aliases - using value_type = typename OutputIteratorType::value_type; - using aux_view_type = ::Kokkos::View; - using functor_t = - StdAdjacentDiffFunctor; + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_for( + label, RangePolicy(ex, 0, num_elements), + StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); + ex.fence("Kokkos::adjacent_difference: fence after operation"); + + // return + return first_dest + num_elements; +} + +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType adjacent_difference_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOp bin_op) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + if (first_from == last_from) { + return first_dest; + } // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); - aux_view_type aux_view("aux_view", num_elements); - ::Kokkos::parallel_for(label, - RangePolicy(ex, 0, num_elements), - functor_t(first_from, first_dest, bin_op)); - ex.fence("Kokkos::adjacent_difference: fence after operation"); + ::Kokkos::parallel_for( + TeamThreadRange(teamHandle, 0, num_elements), + StdAdjacentDiffFunctor(first_from, first_dest, bin_op)); + teamHandle.team_barrier(); // return return first_dest + num_elements; diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp index dd785e603b..f30b7be06a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp @@ -27,9 +27,9 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdAdjacentFindFunctor { + using index_type = typename IteratorType::difference_type; using red_value_type = typename ReducerType::value_type; IteratorType m_first; @@ -37,13 +37,13 @@ struct StdAdjacentFindFunctor { PredicateType m_p; KOKKOS_FUNCTION - void operator()(const IndexType i, red_value_type& red_value) const { + void operator()(const index_type i, red_value_type& red_value) const { const auto& my_value = m_first[i]; const auto& next_value = m_first[i + 1]; const bool are_equal = m_p(my_value, next_value); // FIXME_NVHPC using a ternary operator causes problems - red_value_type value = {::Kokkos::reduction_identity::min()}; + red_value_type value = {::Kokkos::reduction_identity::min()}; if (are_equal) { value.min_loc_true = i; } @@ -59,10 +59,14 @@ struct StdAdjacentFindFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { +IteratorType adjacent_find_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -76,8 +80,6 @@ IteratorType adjacent_find_impl(const std::string& label, using index_type = typename IteratorType::difference_type; using reducer_type = FirstLoc; using reduction_value_type = typename reducer_type::value_type; - using func_t = StdAdjacentFindFunctor; reduction_value_type red_result; reducer_type reducer(red_result); @@ -86,7 +88,8 @@ IteratorType adjacent_find_impl(const std::string& label, // each index i in the reduction checks i and (i+1). ::Kokkos::parallel_reduce( label, RangePolicy(ex, 0, num_elements - 1), - func_t(first, reducer, pred), reducer); + // use CTAD + StdAdjacentFindFunctor(first, reducer, pred), reducer); // fence not needed because reducing into scalar if (red_result.min_loc_true == @@ -98,12 +101,62 @@ IteratorType adjacent_find_impl(const std::string& label, } template -IteratorType adjacent_find_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { +IteratorType adjacent_find_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last) { using value_type = typename IteratorType::value_type; using default_pred_t = StdAlgoEqualBinaryPredicate; - return adjacent_find_impl(label, ex, first, last, default_pred_t()); + return adjacent_find_exespace_impl(label, ex, first, last, default_pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType +adjacent_find_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + if (num_elements <= 1) { + return last; + } + + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + + reduction_value_type red_result; + reducer_type reducer(red_result); + + // note that we use below num_elements-1 because + // each index i in the reduction checks i and (i+1). + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements - 1), + // use CTAD + StdAdjacentFindFunctor(first, reducer, pred), + reducer); + + teamHandle.team_barrier(); + + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + return last; + } else { + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION IteratorType adjacent_find_team_impl( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using default_pred_t = StdAlgoEqualBinaryPredicate; + return adjacent_find_team_impl(teamHandle, first, last, default_pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp index ad562070a0..bdc050f9c1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_AllOfAnyOfNoneOf.hpp @@ -23,23 +23,58 @@ namespace Kokkos { namespace Experimental { namespace Impl { +// +// exespace impl +// template -bool all_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) == - last); +bool all_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) == last); } template -bool any_of_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) != last); +bool any_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) != last); } template -bool none_of_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Predicate predicate) { - return (find_if_or_not_impl(label, ex, first, last, predicate) == last); +bool none_of_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Predicate predicate) { + return (find_if_or_not_exespace_impl(label, ex, first, last, + predicate) == last); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool all_of_team_impl(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == + last); +} + +template +KOKKOS_FUNCTION bool any_of_team_impl(const TeamHandleType& teamHandle, + InputIterator first, InputIterator last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) != + last); +} + +template +KOKKOS_FUNCTION bool none_of_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Predicate predicate) { + return (find_if_or_not_team_impl(teamHandle, first, last, predicate) == + last); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 0376100410..27ce5a6fad 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -55,6 +55,9 @@ using iterator_category_t = typename T::iterator_category; template using is_iterator = Kokkos::is_detected; +template +inline constexpr bool is_iterator_v = is_iterator::value; + // // are_iterators // @@ -63,15 +66,18 @@ struct are_iterators; template struct are_iterators { - static constexpr bool value = is_iterator::value; + static constexpr bool value = is_iterator_v; }; template struct are_iterators { static constexpr bool value = - are_iterators::value && are_iterators::value; + are_iterators::value && (are_iterators::value && ... && true); }; +template +inline constexpr bool are_iterators_v = are_iterators::value; + // // are_random_access_iterators // @@ -81,17 +87,21 @@ struct are_random_access_iterators; template struct are_random_access_iterators { static constexpr bool value = - is_iterator::value && - std::is_base_of::value; + is_iterator_v && std::is_base_of::value; }; template struct are_random_access_iterators { - static constexpr bool value = are_random_access_iterators::value && - are_random_access_iterators::value; + static constexpr bool value = + are_random_access_iterators::value && + (are_random_access_iterators::value && ... && true); }; +template +inline constexpr bool are_random_access_iterators_v = + are_random_access_iterators::value; + // // iterators_are_accessible_from // @@ -113,16 +123,18 @@ struct iterators_are_accessible_from { iterators_are_accessible_from::value; }; -template +template KOKKOS_INLINE_FUNCTION constexpr void -static_assert_random_access_and_accessible(const ExecutionSpace& /* ex */, - IteratorTypes... /* iterators */) { +static_assert_random_access_and_accessible( + const ExecutionSpaceOrTeamHandleType& /* ex_or_th*/, + IteratorTypes... /* iterators */) { static_assert( are_random_access_iterators::value, "Currently, Kokkos standard algorithms require random access iterators."); - static_assert( - iterators_are_accessible_from::value, - "Incompatible view/iterator and execution space"); + static_assert(iterators_are_accessible_from< + typename ExecutionSpaceOrTeamHandleType::execution_space, + IteratorTypes...>::value, + "Incompatible view/iterator and execution space"); } // @@ -182,10 +194,10 @@ struct not_openmptarget { #endif }; -template +template KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( - const ExecutionSpace&) { - static_assert(not_openmptarget::value, + const ExecutionSpaceOrTeamHandleType& /*ex_or_th*/) { + static_assert(not_openmptarget::value, "Currently, Kokkos standard algorithms do not support custom " "comparators in OpenMPTarget"); } @@ -194,7 +206,8 @@ KOKKOS_INLINE_FUNCTION constexpr void static_assert_is_not_openmptarget( // valid range // template -void expect_valid_range(IteratorType first, IteratorType last) { +KOKKOS_INLINE_FUNCTION void expect_valid_range(IteratorType first, + IteratorType last) { // this is a no-op for release KOKKOS_EXPECTS(last >= first); // avoid compiler complaining when KOKKOS_EXPECTS is no-op diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp index b3adbc5e2d..0f68c9e978 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyBackward.hpp @@ -27,16 +27,18 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyBackwardFunctor { - static_assert(std::is_signed::value, - "Kokkos: StdCopyBackwardFunctor requires signed index type"); + // we can use difference type from IteratorType1 since + // the calling functions below already static assert that + // the iterators have matching difference type + using index_type = typename IteratorType1::difference_type; IteratorType1 m_last; IteratorType2 m_dest_last; KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } + void operator()(index_type i) const { m_dest_last[-i - 1] = m_last[-i - 1]; } KOKKOS_FUNCTION StdCopyBackwardFunctor(IteratorType1 _last, IteratorType2 _dest_last) @@ -44,30 +46,51 @@ struct StdCopyBackwardFunctor { }; template -IteratorType2 copy_backward_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 d_last) { +IteratorType2 copy_backward_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, + IteratorType1 last, + IteratorType2 d_last) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_last); Impl::static_assert_iterators_have_matching_difference_type(first, d_last); Impl::expect_valid_range(first, last); - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = - StdCopyBackwardFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(last, d_last)); + // use CTAD + StdCopyBackwardFunctor(last, d_last)); ex.fence("Kokkos::copy_backward: fence after operation"); // return return d_last - num_elements; } +// +// team-level impl +// +template +KOKKOS_FUNCTION IteratorType2 +copy_backward_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_last); + Impl::static_assert_iterators_have_matching_difference_type(first, d_last); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCopyBackwardFunctor(last, d_last)); + teamHandle.team_barrier(); + + // return + return d_last - num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp index 1b120c46d0..86e99ecbd0 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyCopyN.hpp @@ -27,13 +27,18 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyFunctor { + // we can use difference type from InputIterator since + // the calling functions below already static assert that + // the iterators have matching difference type + using index_type = typename InputIterator::difference_type; + InputIterator m_first; OutputIterator m_dest_first; KOKKOS_FUNCTION - void operator()(IndexType i) const { m_dest_first[i] = m_first[i]; } + void operator()(index_type i) const { m_dest_first[i] = m_first[i]; } KOKKOS_FUNCTION StdCopyFunctor(InputIterator _first, OutputIterator _dest_first) @@ -41,23 +46,20 @@ struct StdCopyFunctor { }; template -OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first) { +OutputIterator copy_exespace_impl(const std::string& label, + const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { // checks Impl::static_assert_random_access_and_accessible(ex, first, d_first); Impl::static_assert_iterators_have_matching_difference_type(first, d_first); Impl::expect_valid_range(first, last); - // aliases - using index_type = typename InputIterator::difference_type; - using func_t = StdCopyFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(first, d_first)); + // use CTAD + StdCopyFunctor(first, d_first)); ex.fence("Kokkos::copy: fence after operation"); // return @@ -66,16 +68,61 @@ OutputIterator copy_impl(const std::string& label, const ExecutionSpace& ex, template -OutputIterator copy_n_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first_from, Size count, - OutputIterator first_dest) { +OutputIterator copy_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + InputIterator first_from, Size count, + OutputIterator first_dest) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, first_dest); if (count > 0) { - return copy_impl(label, ex, first_from, first_from + count, first_dest); + return copy_exespace_impl(label, ex, first_from, first_from + count, + first_dest); + } else { + return first_dest; + } +} + +// +// team-level impl +// +template +KOKKOS_FUNCTION OutputIterator copy_team_impl(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + OutputIterator d_first) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCopyFunctor(first, d_first)); + teamHandle.team_barrier(); + + // return + return d_first + num_elements; +} + +template +KOKKOS_FUNCTION OutputIterator +copy_n_team_impl(const TeamHandleType& teamHandle, InputIterator first_from, + Size count, OutputIterator first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + + if (count > 0) { + return copy_team_impl(teamHandle, first_from, first_from + count, + first_dest); } else { return first_dest; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index 3c0c4f7e9b..3c1e2474bc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -20,6 +20,7 @@ #include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" +#include "Kokkos_MustUseKokkosSingleInTeam.hpp" #include #include @@ -27,8 +28,10 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdCopyIfFunctor { + using index_type = typename FirstFrom::difference_type; + FirstFrom m_first_from; FirstDest m_first_dest; PredType m_pred; @@ -40,7 +43,7 @@ struct StdCopyIfFunctor { m_pred(std::move(pred)) {} KOKKOS_FUNCTION - void operator()(const IndexType i, IndexType& update, + void operator()(const index_type i, index_type& update, const bool final_pass) const { const auto& myval = m_first_from[i]; if (final_pass) { @@ -57,9 +60,11 @@ struct StdCopyIfFunctor { template -OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, - InputIterator first, InputIterator last, - OutputIterator d_first, PredicateType pred) { +OutputIterator copy_if_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first, + PredicateType pred) { /* To explain the impl, suppose that our data is: @@ -90,23 +95,67 @@ OutputIterator copy_if_impl(const std::string& label, const ExecutionSpace& ex, if (first == last) { return d_first; } else { - // aliases - using index_type = typename InputIterator::difference_type; - using func_type = StdCopyIfFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); - index_type count = 0; + + typename InputIterator::difference_type count = 0; ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), - func_type(first, d_first, pred), count); + // use CTAD + StdCopyIfFunctor(first, d_first, pred), count); // fence not needed because of the scan accumulating into count return d_first + count; } } +template +KOKKOS_FUNCTION OutputIterator copy_if_team_impl( + const TeamHandleType& teamHandle, InputIterator first, InputIterator last, + OutputIterator d_first, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, d_first); + Impl::static_assert_iterators_have_matching_difference_type(first, d_first); + Impl::expect_valid_range(first, last); + + if (first == last) { + return d_first; + } + + const std::size_t num_elements = Kokkos::Experimental::distance(first, last); + if constexpr (stdalgo_must_use_kokkos_single_for_team_scan_v< + typename TeamHandleType::execution_space>) { + std::size_t count = 0; + Kokkos::single( + Kokkos::PerTeam(teamHandle), + [=](std::size_t& lcount) { + lcount = 0; + for (std::size_t i = 0; i < num_elements; ++i) { + const auto& myval = first[i]; + if (pred(myval)) { + d_first[lcount++] = myval; + } + } + }, + count); + // no barrier needed since single above broadcasts to all members + return d_first + count; + + } else { + typename InputIterator::difference_type count = 0; + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + StdCopyIfFunctor(first, d_first, pred), count); + // no barrier needed because of the scan accumulating into count + return d_first + count; + } + +#if defined KOKKOS_COMPILER_INTEL || \ + (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130) + __builtin_unreachable(); +#endif +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp index 18b8c46359..9b6b403aa4 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CountCountIf.hpp @@ -46,37 +46,65 @@ struct StdCountIfFunctor { }; template -typename IteratorType::difference_type count_if_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType first, - IteratorType last, - Predicate predicate) { +typename IteratorType::difference_type count_if_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType first, + IteratorType last, Predicate predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); - // aliases - using func_t = StdCountIfFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); typename IteratorType::difference_type count = 0; ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), - func_t(first, predicate), count); + // use CTAD + StdCountIfFunctor(first, predicate), count); ex.fence("Kokkos::count_if: fence after operation"); return count; } template -auto count_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { - return count_if_impl( +auto count_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + const T& value) { + return count_if_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } +// +// team-level impl +// +template +KOKKOS_FUNCTION typename IteratorType::difference_type count_if_team_impl( + const TeamHandleType& teamHandle, IteratorType first, IteratorType last, + Predicate predicate) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + typename IteratorType::difference_type count = 0; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + // use CTAD + StdCountIfFunctor(first, predicate), count); + teamHandle.team_barrier(); + + return count; +} + +template +KOKKOS_FUNCTION auto count_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + const T& value) { + return count_if_team_impl( + teamHandle, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp index e045080d4a..62b7d226f6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Equal.hpp @@ -27,15 +27,16 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdEqualFunctor { + using index_type = typename IteratorType1::difference_type; + IteratorType1 m_first1; IteratorType2 m_first2; BinaryPredicateType m_predicate; KOKKOS_FUNCTION - void operator()(IndexType i, std::size_t& lsum) const { + void operator()(index_type i, std::size_t& lsum) const { if (!m_predicate(m_first1[i], m_first2[i])) { lsum = 1; } @@ -49,67 +50,130 @@ struct StdEqualFunctor { m_predicate(std::move(_predicate)) {} }; +// +// exespace impl +// template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - BinaryPredicateType predicate) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); Impl::expect_valid_range(first1, last1); - // aliases - using index_type = typename IteratorType1::difference_type; - using func_t = StdEqualFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first1, last1); std::size_t different = 0; - ::Kokkos::parallel_reduce(label, - RangePolicy(ex, 0, num_elements), - func_t(first1, first2, predicate), different); + ::Kokkos::parallel_reduce( + label, RangePolicy(ex, 0, num_elements), + StdEqualFunctor(first1, first2, predicate), different); ex.fence("Kokkos::equal: fence after operation"); return !different; } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; - return equal_impl(label, ex, first1, last1, first2, pred_t()); + return equal_exespace_impl(label, ex, first1, last1, first2, pred_t()); } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2, BinaryPredicateType predicate) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { const auto d1 = ::Kokkos::Experimental::distance(first1, last1); const auto d2 = ::Kokkos::Experimental::distance(first2, last2); if (d1 != d2) { return false; } - return equal_impl(label, ex, first1, last1, first2, predicate); + return equal_exespace_impl(label, ex, first1, last1, first2, predicate); } template -bool equal_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, IteratorType2 first2, - IteratorType2 last2) { +bool equal_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { Impl::expect_valid_range(first1, last1); Impl::expect_valid_range(first2, last2); using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using pred_t = StdAlgoEqualBinaryPredicate; - return equal_impl(label, ex, first1, last1, first2, last2, pred_t()); + return equal_exespace_impl(label, ex, first1, last1, first2, last2, pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + BinaryPredicateType predicate) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + + // run + const auto num_elements = Kokkos::Experimental::distance(first1, last1); + std::size_t different = 0; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + StdEqualFunctor(first1, first2, predicate), + different); + teamHandle.team_barrier(); + + return !different; +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate; + return equal_team_impl(teamHandle, first1, last1, first2, pred_t()); +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + const auto d1 = ::Kokkos::Experimental::distance(first1, last1); + const auto d2 = ::Kokkos::Experimental::distance(first2, last2); + if (d1 != d2) { + return false; + } + + return equal_team_impl(teamHandle, first1, last1, first2, predicate); +} + +template +KOKKOS_FUNCTION bool equal_team_impl(const TeamHandleType& teamHandle, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, + IteratorType2 last2) { + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using pred_t = StdAlgoEqualBinaryPredicate; + return equal_team_impl(teamHandle, first1, last1, first2, last2, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp index 71f13e490a..6da992b4bb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp @@ -22,6 +22,7 @@ #include "Kokkos_HelperPredicates.hpp" #include "Kokkos_ValueWrapperForNoNeutralElement.hpp" #include "Kokkos_IdentityReferenceUnaryFunctor.hpp" +#include "Kokkos_FunctorsForExclusiveScan.hpp" #include #include #include @@ -30,127 +31,15 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template -struct ExclusiveScanDefaultFunctorForKnownNeutralElement { - using execution_space = ExeSpace; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, - FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, ValueType& update, - const bool final_pass) const { - if (final_pass) m_first_dest[i] = update + m_init_value; - update += m_first_from[i]; - } -}; - -template -struct ExclusiveScanDefaultFunctor { - using execution_space = ExeSpace; - using value_type = - ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; - - ValueType m_init_value; - FirstFrom m_first_from; - FirstDest m_first_dest; - - KOKKOS_FUNCTION - ExclusiveScanDefaultFunctor(ValueType init, FirstFrom first_from, - FirstDest first_dest) - : m_init_value(std::move(init)), - m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)) {} - - KOKKOS_FUNCTION - void operator()(const IndexType i, value_type& update, - const bool final_pass) const { - if (final_pass) { - if (i == 0) { - m_first_dest[i] = m_init_value; - } else { - m_first_dest[i] = update.val + m_init_value; - } - } - - const auto tmp = value_type{m_first_from[i], false}; - this->join(update, tmp); - } - - KOKKOS_FUNCTION - void init(value_type& update) const { - update.val = {}; - update.is_initial = true; - } - - KOKKOS_FUNCTION - void join(value_type& update, const value_type& input) const { - if (input.is_initial) return; - - if (update.is_initial) { - update.val = input.val; - update.is_initial = false; - } else { - update.val = update.val + input.val; - } - } -}; - -template -OutputIteratorType exclusive_scan_custom_op_impl( - const std::string& label, const ExecutionSpace& ex, - InputIteratorType first_from, InputIteratorType last_from, - OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - // aliases - using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = - TransformExclusiveScanFunctor; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( - label, RangePolicy(ex, 0, num_elements), - func_type(init_value, first_from, first_dest, bop, unary_op_type())); - ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - - // return - return first_dest + num_elements; -} - -template -using ex_scan_has_reduction_identity_sum_t = - decltype(Kokkos::reduction_identity::sum()); - +// +// exespace impl +// template -OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - ValueType init_value) { +OutputIteratorType exclusive_scan_default_op_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value) { // checks Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); Impl::static_assert_iterators_have_matching_difference_type(first_from, @@ -184,17 +73,122 @@ OutputIteratorType exclusive_scan_default_op_impl(const std::string& label, ExclusiveScanDefaultFunctorForKnownNeutralElement< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType>, - ExclusiveScanDefaultFunctor>; + ExclusiveScanDefaultFunctorWithValueWrapper>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + label, RangePolicy(ex, 0, num_elements), + func_type(std::move(init_value), first_from, first_dest)); + + ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + + return first_dest + num_elements; +} + +template +OutputIteratorType exclusive_scan_custom_op_exespace_impl( + const std::string& label, const ExecutionSpace& ex, + InputIteratorType first_from, InputIteratorType last_from, + OutputIteratorType first_dest, ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + // aliases + using index_type = typename InputIteratorType::difference_type; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TransformExclusiveScanFunctorWithValueWrapper< + ExecutionSpace, index_type, ValueType, InputIteratorType, + OutputIteratorType, BinaryOpType, unary_op_type>; // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), - func_type(init_value, first_from, first_dest)); + func_type(std::move(init_value), first_from, + first_dest, bop, unary_op_type())); + ex.fence("Kokkos::exclusive_scan_custom_op: fence after operation"); - ex.fence("Kokkos::exclusive_scan_default_op: fence after operation"); + // return + return first_dest + num_elements; +} + +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType exclusive_scan_default_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "The team-level impl of Kokkos::Experimental::exclusive_scan currently " + "does not support types without reduction identity"); + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using index_type = typename InputIteratorType::difference_type; + using func_type = ExclusiveScanDefaultFunctorForKnownNeutralElement< + exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType>; + + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan( + TeamThreadRange(teamHandle, 0, num_elements), + func_type(std::move(init_value), first_from, first_dest)); + teamHandle.team_barrier(); + return first_dest + num_elements; +} + +template +KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "The team-level impl of Kokkos::Experimental::exclusive_scan currently " + "does not support types without reduction identity"); + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using index_type = typename InputIteratorType::difference_type; + using func_type = TransformExclusiveScanFunctorWithoutValueWrapper< + exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(std::move(init_value), first_from, + first_dest, bop, unary_op_type())); + teamHandle.team_barrier(); return first_dest + num_elements; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp index 316d865f31..972e57f2cc 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FillFillN.hpp @@ -41,9 +41,12 @@ struct StdFillFunctor { : m_first(std::move(_first)), m_value(std::move(_value)) {} }; +// +// exespace impl +// template -void fill_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, const T& value) { +void fill_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, const T& value) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -52,13 +55,14 @@ void fill_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - StdFillFunctor(first, value)); + StdFillFunctor(first, value)); ex.fence("Kokkos::fill: fence after operation"); } template -IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, const T& value) { +IteratorType fill_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + SizeType n, const T& value) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -67,7 +71,40 @@ IteratorType fill_n_impl(const std::string& label, const ExecutionSpace& ex, return first; } - fill_impl(label, ex, first, last, value); + fill_exespace_impl(label, ex, first, last, value); + return last; +} + +// +// team-level impl +// +template +KOKKOS_FUNCTION void fill_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + const T& value) { + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + StdFillFunctor(first, value)); + + teamHandle.team_barrier(); +} + +template +KOKKOS_FUNCTION IteratorType fill_n_team_impl(const TeamHandleType& teamHandle, + IteratorType first, SizeType n, + const T& value) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + if (n <= 0) { + return first; + } + + fill_team_impl(teamHandle, first, last, value); return last; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp index 3ec64fa43d..1f1ec5e54f 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindEnd.hpp @@ -80,12 +80,17 @@ struct StdFindEndFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last, - const BinaryPredicateType& pred) { +IteratorType1 find_end_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); @@ -97,7 +102,6 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = KE::distance(first, last); const auto s_count = KE::distance(s_first, s_last); KOKKOS_EXPECTS(num_elements >= s_count); - (void)s_count; // needed when macro above is a no-op if (s_first == s_last) { return last; @@ -109,7 +113,8 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, // special case where the two ranges have equal size if (num_elements == s_count) { - const auto equal_result = equal_impl(label, ex, first, last, s_first, pred); + const auto equal_result = + equal_exespace_impl(label, ex, first, last, s_first, pred); return (equal_result) ? first : last; } else { using index_type = typename IteratorType1::difference_type; @@ -148,14 +153,97 @@ IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, } template -IteratorType1 find_end_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType1 first, IteratorType1 last, - IteratorType2 s_first, IteratorType2 s_last) { +IteratorType1 find_end_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, + IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; - return find_end_impl(label, ex, first, last, s_first, s_last, - predicate_type()); + return find_end_exespace_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType1 +find_end_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + // the target sequence should not be larger than the range [first, last) + namespace KE = ::Kokkos::Experimental; + const auto num_elements = KE::distance(first, last); + const auto s_count = KE::distance(s_first, s_last); + KOKKOS_EXPECTS(num_elements >= s_count); + + if (s_first == s_last) { + return last; + } + + if (first == last) { + return last; + } + + // special case where the two ranges have equal size + if (num_elements == s_count) { + const auto equal_result = + equal_team_impl(teamHandle, first, last, s_first, pred); + return (equal_result) ? first : last; + } else { + using index_type = typename IteratorType1::difference_type; + using reducer_type = LastLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindEndFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + + // decide the size of the range policy of the par_red: + // note that the last feasible index to start looking is the index + // whose distance from the "last" is equal to the sequence count. + // the +1 is because we need to include that location too. + const auto range_size = num_elements - s_count + 1; + + // run par reduce + ::Kokkos::parallel_reduce( + TeamThreadRange(teamHandle, 0, range_size), + func_t(first, last, s_first, s_last, reducer, pred), reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.max_loc_true == + ::Kokkos::reduction_identity::max()) { + // if here, a subrange has not been found + return last; + } else { + // a location has been found + return first + red_result.max_loc_true; + } + } +} + +template +KOKKOS_FUNCTION IteratorType1 find_end_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate; + return find_end_team_impl(teamHandle, first, last, s_first, s_last, + predicate_type()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp index 5f22d2ad13..145e235b9d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp @@ -71,13 +71,15 @@ struct StdFindFirstOfFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last, - const BinaryPredicateType& pred) { +IteratorType1 find_first_of_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { // checks Impl::static_assert_random_access_and_accessible(ex, first, s_first); Impl::static_assert_iterators_have_matching_difference_type(first, s_first); @@ -116,15 +118,71 @@ IteratorType1 find_first_of_impl(const std::string& label, } template -IteratorType1 find_first_of_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType1 first, - IteratorType1 last, IteratorType2 s_first, - IteratorType2 s_last) { +IteratorType1 find_first_of_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, IteratorType2 s_last) { using value_type1 = typename IteratorType1::value_type; using value_type2 = typename IteratorType2::value_type; using predicate_type = StdAlgoEqualBinaryPredicate; - return find_first_of_impl(label, ex, first, last, s_first, s_last, - predicate_type()); + return find_first_of_exespace_impl(label, ex, first, last, s_first, s_last, + predicate_type()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType1 +find_first_of_team_impl(const TeamHandleType& teamHandle, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first, s_first); + Impl::static_assert_iterators_have_matching_difference_type(first, s_first); + Impl::expect_valid_range(first, last); + Impl::expect_valid_range(s_first, s_last); + + if ((s_first == s_last) || (first == last)) { + return last; + } + + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindFirstOfFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, s_first, s_last, reducer, pred), + reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + // if here, nothing found + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION IteratorType1 find_first_of_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + using value_type1 = typename IteratorType1::value_type; + using value_type2 = typename IteratorType2::value_type; + using predicate_type = StdAlgoEqualBinaryPredicate; + return find_first_of_team_impl(teamHandle, first, last, s_first, s_last, + predicate_type()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp index 9c0b0c0ccd..8fffb59094 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp @@ -61,11 +61,15 @@ struct StdFindIfOrNotFunctor { m_p(std::move(p)) {} }; +// +// exespace impl +// template -IteratorType find_if_or_not_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, PredicateType pred) { +IteratorType find_if_or_not_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType pred) { // checks Impl::static_assert_random_access_and_accessible( ex, first); // only need one It per type @@ -104,14 +108,68 @@ IteratorType find_if_or_not_impl(const std::string& label, } template -InputIterator find_impl(const std::string& label, ExecutionSpace ex, - InputIterator first, InputIterator last, - const T& value) { - return find_if_or_not_impl( +InputIterator find_exespace_impl(const std::string& label, ExecutionSpace ex, + InputIterator first, InputIterator last, + const T& value) { + return find_if_or_not_exespace_impl( label, ex, first, last, ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); } +// +// team impl +// +template +KOKKOS_FUNCTION IteratorType +find_if_or_not_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, PredicateType pred) { + // checks + Impl::static_assert_random_access_and_accessible( + teamHandle, first); // only need one It per type + Impl::expect_valid_range(first, last); + + if (first == last) { + return last; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + using func_t = StdFindIfOrNotFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, reducer, pred), reducer); + + teamHandle.team_barrier(); + + // decide and return + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + // here, it means a valid loc has not been found, + return last; + } else { + // a location has been found + return first + red_result.min_loc_true; + } +} + +template +KOKKOS_FUNCTION InputIterator find_team_impl(const TeamHandleType& teamHandle, + InputIterator first, + InputIterator last, + const T& value) { + return find_if_or_not_team_impl( + teamHandle, first, last, + ::Kokkos::Experimental::Impl::StdAlgoEqualsValUnaryPredicate(value)); +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp index f9a6ff2e99..d3be3b7f66 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ForEachForEachN.hpp @@ -41,29 +41,31 @@ struct StdForEachFunctor { : m_first(std::move(_first)), m_functor(std::move(_functor)) {} }; -template -UnaryFunctorType for_each_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, UnaryFunctorType functor) { +template +UnaryFunctorType for_each_exespace_impl(const std::string& label, + const HandleType& handle, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { // checks - Impl::static_assert_random_access_and_accessible(ex, first); + Impl::static_assert_random_access_and_accessible(handle, first); Impl::expect_valid_range(first, last); // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for( - label, RangePolicy(ex, 0, num_elements), + label, RangePolicy(handle, 0, num_elements), StdForEachFunctor(first, functor)); - ex.fence("Kokkos::for_each: fence after operation"); + handle.fence("Kokkos::for_each: fence after operation"); return functor; } template -IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, SizeType n, - UnaryFunctorType functor) { +IteratorType for_each_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, SizeType n, + UnaryFunctorType functor) { auto last = first + n; Impl::static_assert_random_access_and_accessible(ex, first, last); Impl::expect_valid_range(first, last); @@ -72,8 +74,46 @@ IteratorType for_each_n_impl(const std::string& label, const ExecutionSpace& ex, return first; } - for_each_impl(label, ex, first, last, std::move(functor)); - // no neeed to fence since for_each_impl fences already + for_each_exespace_impl(label, ex, first, last, std::move(functor)); + // no neeed to fence since for_each_exespace_impl fences already + + return last; +} + +// +// team impl +// +template +KOKKOS_FUNCTION UnaryFunctorType +for_each_team_impl(const TeamHandleType& teamHandle, IteratorType first, + IteratorType last, UnaryFunctorType functor) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for( + TeamThreadRange(teamHandle, 0, num_elements), + StdForEachFunctor(first, functor)); + teamHandle.team_barrier(); + return functor; +} + +template +KOKKOS_FUNCTION IteratorType +for_each_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, + SizeType n, UnaryFunctorType functor) { + auto last = first + n; + Impl::static_assert_random_access_and_accessible(teamHandle, first, last); + Impl::expect_valid_range(first, last); + + if (n == 0) { + return first; + } + + for_each_team_impl(teamHandle, first, last, std::move(functor)); + // no neeed to fence since for_each_team_impl fences already return last; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp new file mode 100644 index 0000000000..8151ee3495 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_FunctorsForExclusiveScan.hpp @@ -0,0 +1,220 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP +#define KOKKOS_STD_ALGORITHMS_FUNCTORS_FOR_EXCLUSIVE_SCAN_IMPL_HPP + +#include +#include "Kokkos_ValueWrapperForNoNeutralElement.hpp" + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template +using ex_scan_has_reduction_identity_sum_t = + decltype(Kokkos::reduction_identity::sum()); + +template +struct ExclusiveScanDefaultFunctorForKnownNeutralElement { + using execution_space = ExeSpace; + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorForKnownNeutralElement(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + if (final_pass) m_first_dest[i] = update + m_init_value; + update += m_first_from[i]; + } +}; + +template +struct ExclusiveScanDefaultFunctorWithValueWrapper { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + + KOKKOS_FUNCTION + ExclusiveScanDefaultFunctorWithValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = update.val + m_init_value; + } + } + + const auto tmp = value_type{m_first_from[i], false}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(value_type& update) const { + update.val = {}; + update.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + + if (update.is_initial) { + update.val = input.val; + update.is_initial = false; + } else { + update.val = update.val + input.val; + } + } +}; + +template +struct TransformExclusiveScanFunctorWithValueWrapper { + using execution_space = ExeSpace; + using value_type = + ::Kokkos::Experimental::Impl::ValueWrapperForNoNeutralElement; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctorWithValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, value_type& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update.val, m_init_value); + } + } + + const auto tmp = value_type{m_unary_op(m_first_from[i]), false}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION void init(value_type& value) const { + value.val = {}; + value.is_initial = true; + } + + KOKKOS_FUNCTION + void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + + if (update.is_initial) { + update.val = input.val; + } else { + update.val = m_binary_op(update.val, input.val); + } + update.is_initial = false; + } +}; + +template +struct TransformExclusiveScanFunctorWithoutValueWrapper { + using execution_space = ExeSpace; + + ValueType m_init_value; + FirstFrom m_first_from; + FirstDest m_first_dest; + BinaryOpType m_binary_op; + UnaryOpType m_unary_op; + + KOKKOS_FUNCTION + TransformExclusiveScanFunctorWithoutValueWrapper(ValueType init, + FirstFrom first_from, + FirstDest first_dest, + BinaryOpType bop, + UnaryOpType uop) + : m_init_value(std::move(init)), + m_first_from(std::move(first_from)), + m_first_dest(std::move(first_dest)), + m_binary_op(std::move(bop)), + m_unary_op(std::move(uop)) {} + + KOKKOS_FUNCTION + void operator()(const IndexType i, ValueType& update, + const bool final_pass) const { + if (final_pass) { + if (i == 0) { + // for both ExclusiveScan and TransformExclusiveScan, + // init is unmodified + m_first_dest[i] = m_init_value; + } else { + m_first_dest[i] = m_binary_op(update, m_init_value); + } + } + + const auto tmp = ValueType{m_unary_op(m_first_from[i])}; + this->join(update, tmp); + } + + KOKKOS_FUNCTION + void init(ValueType& update) const { update = {}; } + + KOKKOS_FUNCTION + void join(ValueType& update, const ValueType& input) const { + update = m_binary_op(update, input); + } +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp index 228390bdff..157de1125e 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_GenerateGenerateN.hpp @@ -41,32 +41,65 @@ struct StdGenerateFunctor { : m_first(std::move(_first)), m_generator(std::move(_g)) {} }; +// +// generate impl +// template -void generate_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, Generator g) { +void generate_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Generator g) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); - // aliases - using func_t = StdGenerateFunctor; - // run const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_for(label, RangePolicy(ex, 0, num_elements), - func_t(first, g)); + StdGenerateFunctor(first, g)); ex.fence("Kokkos::generate: fence after operation"); } +template +KOKKOS_FUNCTION void generate_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + Generator g) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // run + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_for(TeamThreadRange(teamHandle, 0, num_elements), + StdGenerateFunctor(first, g)); + teamHandle.team_barrier(); +} + +// +// generate_n impl +// template -IteratorType generate_n_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, Size count, Generator g) { +IteratorType generate_n_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, Size count, + Generator g) { if (count <= 0) { return first; } - generate_impl(label, ex, first, first + count, g); + generate_exespace_impl(label, ex, first, first + count, g); + return first + count; +} + +template +KOKKOS_FUNCTION IteratorType +generate_n_team_impl(const TeamHandleType& teamHandle, IteratorType first, + Size count, Generator g) { + if (count <= 0) { + return first; + } + + generate_team_impl(teamHandle, first, first + count, g); return first + count; } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index ecd6ff39cd..0b4acec0fe 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -101,9 +101,12 @@ struct InclusiveScanDefaultFunctor { } }; +// +// exespace impl +// template -OutputIteratorType inclusive_scan_default_op_impl( +OutputIteratorType inclusive_scan_default_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest) { @@ -143,7 +146,7 @@ OutputIteratorType inclusive_scan_default_op_impl( // ------------------------------------------------------------- template -OutputIteratorType inclusive_scan_custom_binary_op_impl( +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op) { @@ -158,7 +161,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( using value_type = std::remove_const_t; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = TransformInclusiveScanNoInitValueFunctor< + using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor< ExecutionSpace, index_type, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -179,7 +182,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( // ------------------------------------------------------------- template -OutputIteratorType inclusive_scan_custom_binary_op_impl( +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( const std::string& label, const ExecutionSpace& ex, InputIteratorType first_from, InputIteratorType last_from, OutputIteratorType first_dest, BinaryOpType binary_op, @@ -193,7 +196,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( // aliases using index_type = typename InputIteratorType::difference_type; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; - using func_type = TransformInclusiveScanWithInitValueFunctor< + using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -203,13 +206,142 @@ OutputIteratorType inclusive_scan_custom_binary_op_impl( ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, - unary_op_type(), init_value)); + unary_op_type(), std::move(init_value))); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); // return return first_dest + num_elements; } +// +// team impl +// +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_default_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + using value_type = + std::remove_const_t; + + // #if defined(KOKKOS_ENABLE_CUDA) + + using exe_space = typename TeamHandleType::execution_space; + using index_type = typename InputIteratorType::difference_type; + using func_type = std::conditional_t< + ::Kokkos::is_detected::value, + InclusiveScanDefaultFunctorForKnownIdentityElement< + exe_space, index_type, value_type, InputIteratorType, + OutputIteratorType>, + InclusiveScanDefaultFunctor>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest)); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl +// ------------------------------------------------------------- +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + using value_type = + std::remove_const_t; + + static_assert( + ::Kokkos::is_detected_v, + "At the moment inclusive_scan doesn't support types without reduction " + "identity"); + + // #if defined(KOKKOS_ENABLE_CUDA) + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TeamTransformInclusiveScanNoInitValueFunctor< + exe_space, value_type, InputIteratorType, OutputIteratorType, + BinaryOpType, unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + + ::Kokkos::parallel_scan( + TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest, binary_op, unary_op_type())); + teamHandle.team_barrier(); + + return first_dest + num_elements; +} + +// ------------------------------------------------------------- +// inclusive_scan_custom_binary_op_impl with init_value +// ------------------------------------------------------------- +template +KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( + const TeamHandleType& teamHandle, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op, ValueType init_value) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first_from, + first_dest); + Impl::static_assert_iterators_have_matching_difference_type(first_from, + first_dest); + Impl::expect_valid_range(first_from, last_from); + + static_assert( + ::Kokkos::is_detected_v, + "At the moment inclusive_scan doesn't support types without reduction " + "identity"); + + // #if defined(KOKKOS_ENABLE_CUDA) + + // aliases + using exe_space = typename TeamHandleType::execution_space; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using func_type = TeamTransformInclusiveScanWithInitValueFunctor< + exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, + unary_op_type>; + + // run + const auto num_elements = + Kokkos::Experimental::distance(first_from, last_from); + ::Kokkos::parallel_scan(TeamThreadRange(teamHandle, 0, num_elements), + func_type(first_from, first_dest, binary_op, + unary_op_type(), std::move(init_value))); + teamHandle.team_barrier(); + + // return + return first_dest + num_elements; +} + } // namespace Impl } // namespace Experimental } // namespace Kokkos diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp index 0fe2d246ff..281efca36b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp @@ -62,9 +62,9 @@ struct StdIsPartitionedFunctor { }; template -bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - PredicateType pred) { +bool is_partitioned_exespace_impl(const std::string& label, + const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType pred) { // true if all elements in the range [first, last) that satisfy // the predicate "pred" appear before all elements that don't. // Also returns true if [first, last) is empty. @@ -97,6 +97,7 @@ bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, const auto num_elements = Kokkos::Experimental::distance(first, last); ::Kokkos::parallel_reduce(label, RangePolicy(ex, 0, num_elements), + func_t(first, reducer, pred), reducer); // fence not needed because reducing into scalar @@ -109,8 +110,72 @@ bool is_partitioned_impl(const std::string& label, const ExecutionSpace& ex, if (red_result.max_loc_true != red_id_max && red_result.min_loc_false != red_id_min) { + // this occurs when the reduction yields nontrivial values return red_result.max_loc_true < red_result.min_loc_false; + } else if (red_result.max_loc_true == red_id_max && + red_result.min_loc_false == 0) { + // this occurs when all values do NOT satisfy + // the predicate, and this corner case should also be true + return true; } else if (first + red_result.max_loc_true == --last) { + // this occurs when all values satisfy the predicate, + // this corner case should also be true + return true; + } else { + return false; + } +} + +template +KOKKOS_FUNCTION bool is_partitioned_team_impl(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last, + PredicateType pred) { + /* see exespace impl for the description of the impl */ + + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + // trivial case + if (first == last) { + return true; + } + + // aliases + using index_type = typename IteratorType::difference_type; + using reducer_type = StdIsPartitioned; + using reduction_value_type = typename reducer_type::value_type; + using func_t = + StdIsPartitionedFunctor; + + // run + reduction_value_type red_result; + reducer_type reducer(red_result); + const auto num_elements = Kokkos::Experimental::distance(first, last); + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, num_elements), + func_t(first, reducer, pred), reducer); + + // fence not needed because reducing into scalar + + // decide and return + constexpr index_type red_id_min = + ::Kokkos::reduction_identity::min(); + constexpr index_type red_id_max = + ::Kokkos::reduction_identity::max(); + + if (red_result.max_loc_true != red_id_max && + red_result.min_loc_false != red_id_min) { + // this occurs when the reduction yields nontrivial values + return red_result.max_loc_true < red_result.min_loc_false; + } else if (red_result.max_loc_true == red_id_max && + red_result.min_loc_false == 0) { + // this occurs when all values do NOT satisfy + // the predicate, and this corner case should also be true + return true; + } else if (first + red_result.max_loc_true == --last) { + // this occurs when all values satisfy the predicate, + // this corner case should also be true return true; } else { return false; diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp index 4696821586..b2c912848a 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSorted.hpp @@ -48,10 +48,13 @@ struct StdIsSortedFunctor { : m_first(std::move(_first1)), m_comparator(std::move(comparator)) {} }; +// +// exespace impl +// template -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last, - ComparatorType comp) { +bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -75,11 +78,49 @@ bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, } template -bool is_sorted_impl(const std::string& label, const ExecutionSpace& ex, - IteratorType first, IteratorType last) { +bool is_sorted_exespace_impl(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; - return is_sorted_impl(label, ex, first, last, pred_t()); + return is_sorted_exespace_impl(label, ex, first, last, pred_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, + IteratorType first, IteratorType last, + ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + if (num_elements <= 1) { + return true; + } + + // use num_elements-1 because each index handles i and i+1 + const auto num_elements_minus_one = num_elements - 1; + + // result is incremented by one if sorting breaks at index i + std::size_t result = 0; + ::Kokkos::parallel_reduce( + TeamThreadRange(teamHandle, 0, num_elements_minus_one), + // use CTAD here + StdIsSortedFunctor(first, std::move(comp)), result); + + return result == 0; +} + +template +KOKKOS_FUNCTION bool is_sorted_team_impl(const TeamHandleType& teamHandle, + IteratorType first, + IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate; + return is_sorted_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp index 2a0c112bf5..d33580ca53 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp @@ -54,10 +54,15 @@ struct StdIsSortedUntilFunctor { m_reducer(std::move(reducer)) {} }; +// +// overloads accepting exespace +// template -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last, ComparatorType comp) { +IteratorType is_sorted_until_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first); Impl::expect_valid_range(first, last); @@ -81,7 +86,6 @@ IteratorType is_sorted_until_impl(const std::string& label, label, // use num_elements-1 because each index handles i and i+1 RangePolicy(ex, 0, num_elements - 1), - // use CTAD StdIsSortedUntilFunctor(first, comp, reducer), reducer); /* If the reduction result is equal to the initial value, @@ -98,12 +102,66 @@ IteratorType is_sorted_until_impl(const std::string& label, } template -IteratorType is_sorted_until_impl(const std::string& label, - const ExecutionSpace& ex, IteratorType first, - IteratorType last) { +IteratorType is_sorted_until_exespace_impl(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last) { using value_type = typename IteratorType::value_type; using pred_t = Impl::StdAlgoLessThanBinaryPredicate; - return is_sorted_until_impl(label, ex, first, last, pred_t()); + return is_sorted_until_exespace_impl(label, ex, first, last, pred_t()); +} + +// +// overloads accepting team handle +// +template +KOKKOS_FUNCTION IteratorType +is_sorted_until_team_impl(const ExecutionSpace& teamHandle, IteratorType first, + IteratorType last, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first); + Impl::expect_valid_range(first, last); + + const auto num_elements = Kokkos::Experimental::distance(first, last); + + // trivial case + if (num_elements <= 1) { + return last; + } + + /* + Do a par_reduce computing the *min* index that breaks the sorting. + If one such index is found, then the range is sorted until that element, + if no such index is found, then it means the range is sorted until the end. + */ + using index_type = typename IteratorType::difference_type; + index_type red_result; + index_type red_result_init; + ::Kokkos::Min reducer(red_result); + reducer.init(red_result_init); + ::Kokkos::parallel_reduce( // use num_elements-1 because each index handles i + // and i+1 + TeamThreadRange(teamHandle, 0, num_elements - 1), + StdIsSortedUntilFunctor(first, comp, reducer), reducer); + teamHandle.team_barrier(); + + /* If the reduction result is equal to the initial value, + and it means the range is sorted until the end */ + if (red_result == red_result_init) { + return last; + } else { + /* If such index is found, then the range is sorted until there and + we need to return an iterator past the element found so do +1 */ + return first + (red_result + 1); + } +} + +template +KOKKOS_FUNCTION IteratorType is_sorted_until_team_impl( + const ExecutionSpace& teamHandle, IteratorType first, IteratorType last) { + using value_type = typename IteratorType::value_type; + using pred_t = Impl::StdAlgoLessThanBinaryPredicate; + return is_sorted_until_team_impl(teamHandle, first, last, pred_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp index ad7f59232e..b95a66c3bd 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp @@ -84,13 +84,15 @@ struct StdLexicographicalCompareFunctor { m_comparator(std::move(_comp)) {} }; +// +// exespace impl +// template -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2, - ComparatorType comp) { +bool lexicographical_compare_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, + ComparatorType comp) { // checks Impl::static_assert_random_access_and_accessible(ex, first1, first2); Impl::static_assert_iterators_have_matching_difference_type(first1, first2); @@ -139,16 +141,84 @@ bool lexicographical_compare_impl(const std::string& label, } template -bool lexicographical_compare_impl(const std::string& label, - const ExecutionSpace& ex, - IteratorType1 first1, IteratorType1 last1, - IteratorType2 first2, IteratorType2 last2) { +bool lexicographical_compare_exespace_impl( + const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { using value_type_1 = typename IteratorType1::value_type; using value_type_2 = typename IteratorType2::value_type; using predicate_t = Impl::StdAlgoLessThanBinaryPredicate; - return lexicographical_compare_impl(label, ex, first1, last1, first2, last2, - predicate_t()); + return lexicographical_compare_exespace_impl(label, ex, first1, last1, first2, + last2, predicate_t()); +} + +// +// team impl +// +template +KOKKOS_FUNCTION bool lexicographical_compare_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, ComparatorType comp) { + // checks + Impl::static_assert_random_access_and_accessible(teamHandle, first1, first2); + Impl::static_assert_iterators_have_matching_difference_type(first1, first2); + Impl::expect_valid_range(first1, last1); + Impl::expect_valid_range(first2, last2); + + // aliases + using index_type = typename IteratorType1::difference_type; + using reducer_type = FirstLoc; + using reduction_value_type = typename reducer_type::value_type; + + // run + const auto d1 = Kokkos::Experimental::distance(first1, last1); + const auto d2 = Kokkos::Experimental::distance(first2, last2); + const auto range = Kokkos::min(d1, d2); + reduction_value_type red_result; + reducer_type reducer(red_result); + using func1_t = + StdLexicographicalCompareFunctor; + + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, range), + func1_t(first1, first2, reducer, comp), reducer); + + teamHandle.team_barrier(); + + // no mismatch + if (red_result.min_loc_true == + ::Kokkos::reduction_identity::min()) { + auto new_last1 = first1 + range; + auto new_last2 = first2 + range; + bool is_prefix = (new_last1 == last1) && (new_last2 != last2); + return is_prefix; + } + + // check mismatched + int less = 0; + auto it1 = first1 + red_result.min_loc_true; + auto it2 = first2 + red_result.min_loc_true; + using func2_t = StdCompareFunctor; + ::Kokkos::parallel_reduce(TeamThreadRange(teamHandle, 0, 1), + func2_t(it1, it2, comp), less); + + teamHandle.team_barrier(); + + return static_cast(less); +} + +template +KOKKOS_FUNCTION bool lexicographical_compare_team_impl( + const TeamHandleType& teamHandle, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + using value_type_1 = typename IteratorType1::value_type; + using value_type_2 = typename IteratorType2::value_type; + using predicate_t = + Impl::StdAlgoLessThanBinaryPredicate; + return lexicographical_compare_team_impl(teamHandle, first1, last1, first2, + last2, predicate_t()); } } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp index 048420f7a8..2f51db03b4 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_MinMaxMinmaxElement.hpp @@ -63,12 +63,16 @@ struct StdMinMaxElemFunctor { : m_first(std::move(first)), m_reducer(std::move(reducer)) {} }; +// +// exespace impl +// template