Merge branch 'develop' into general-triclinic

2024-03-08 21:32:01 -05:00
parent 835b65b644 bd99c2d661
commit 01d412bdbf
444 changed files with 10212 additions and 3787 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -37,7 +37,7 @@ src/MESONT/*          @iafoss
 src/ML-HDNNP/*        @singraber
 src/ML-IAP/*          @athomps
 src/ML-PACE/*         @yury-lysogorskiy
-src/ML-POD/*          @exapde @rohskopf
+src/ML-POD/*          @exapde
 src/MOFFF/*           @hheenen
 src/MOLFILE/*         @akohlmey
 src/NETCDF/*          @pastewka
@ -65,9 +65,12 @@ src/MANYBODY/pair_nb3b_screened.*   @flodesani
 src/REPLICA/*_grem.*                @dstelter92
 src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel
 src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps
+src/EXTRA-FIX/fix_deform_pressure.* @jtclemm
 src/MISC/*_tracker.*                @jtclemm
 src/MC/fix_gcmc.*                   @athomps
 src/MC/fix_sgcmc.*                  @athomps
+src/REAXFF/compute_reaxff_atom.*    @rbberger
+src/KOKKOS/compute_reaxff_atom_kokkos.*    @rbberger
 src/REPLICA/fix_pimd_langevin.*     @Yi-FanLi

 # core LAMMPS classes
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -222,6 +222,10 @@ endif()
 add_executable(lmp ${MAIN_SOURCES})
 target_link_libraries(lmp PRIVATE lammps)
 set_target_properties(lmp PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY})
+# re-export all symbols for plugins
+if(PKG_PLUGIN AND (NOT ((CMAKE_SYSTEM_NAME STREQUAL "Windows"))))
+  set_target_properties(lmp PROPERTIES ENABLE_EXPORTS TRUE)
+endif()
 install(TARGETS lmp EXPORT LAMMPS_Targets DESTINATION ${CMAKE_INSTALL_BINDIR})

 option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF)
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@ -45,8 +45,8 @@ if(DOWNLOAD_KOKKOS)
  list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
  list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
  include(ExternalProject)
-  set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
-  set(KOKKOS_MD5 "731647b61a4233f568d583702e9cd6d1" CACHE STRING "MD5 checksum of KOKKOS tarball")
+  set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
+  set(KOKKOS_MD5 "16b9b09ae947d434dfb58fc5c87c2b76" CACHE STRING "MD5 checksum of KOKKOS tarball")
  mark_as_advanced(KOKKOS_URL)
  mark_as_advanced(KOKKOS_MD5)
  GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -71,7 +71,7 @@ if(DOWNLOAD_KOKKOS)
  add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
  add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
 elseif(EXTERNAL_KOKKOS)
-  find_package(Kokkos 4.2.00 REQUIRED CONFIG)
+  find_package(Kokkos 4.2.01 REQUIRED CONFIG)
  target_link_libraries(lammps PRIVATE Kokkos::kokkos)
 else()
  set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
@ -139,8 +139,12 @@ if(PKG_KSPACE)
      message(WARNING "Using KISS FFT with the CUDA backend of Kokkos may be sub-optimal.")
      target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS)
    elseif(FFT_KOKKOS STREQUAL "CUFFT")
+      find_library(CUFFT_LIBRARY cufft)
+      if (CUFFT_LIBRARY STREQUAL "CUFFT_LIBRARY-NOTFOUND")
+        message(FATAL_ERROR "Required cuFFT library not found. Check your environment or set CUFFT_LIBRARY to its location")
+      endif()
      target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_CUFFT)
-      target_link_libraries(lammps PRIVATE cufft)
+      target_link_libraries(lammps PRIVATE ${CUFFT_LIBRARY})
    endif()
  elseif(Kokkos_ENABLE_HIP)
    if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "HIPFFT")))
--- a/cmake/presets/kokkos-cuda.cmake
+++ b/cmake/presets/kokkos-cuda.cmake
@ -10,7 +10,7 @@ get_filename_component(NVCC_WRAPPER_CMD ${CMAKE_CURRENT_SOURCE_DIR}/../lib/kokko
 set(CMAKE_CXX_COMPILER ${NVCC_WRAPPER_CMD} CACHE FILEPATH "" FORCE)

 # If KSPACE is also enabled, use CUFFT for FFTs
-set(FFT_KOKKOS "CUFFT" CACHE STRING FORCE)
+set(FFT_KOKKOS "CUFFT" CACHE STRING "" FORCE)

 # hide deprecation warnings temporarily for stable release
 set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE)
--- a/cmake/presets/kokkos-hip.cmake
+++ b/cmake/presets/kokkos-hip.cmake
@ -13,7 +13,7 @@ set(CMAKE_CXX_COMPILER hipcc CACHE STRING "" FORCE)
 set(CMAKE_TUNE_FLAGS "-munsafe-fp-atomics" CACHE STRING "" FORCE)

 # If KSPACE is also enabled, use CUFFT for FFTs
-set(FFT_KOKKOS "HIPFFT" CACHE STRING FORCE)
+set(FFT_KOKKOS "HIPFFT" CACHE STRING "" FORCE)

 # hide deprecation warnings temporarily for stable release
 set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE)
--- a/doc/src/Bibliography.rst
+++ b/doc/src/Bibliography.rst
@ -877,6 +877,9 @@ Bibliography
 **(PLUMED)**
   G.A. Tribello, M. Bonomi, D. Branduardi, C. Camilloni and G. Bussi, Comp. Phys. Comm 185, 604 (2014)

+**(Pavlov)**
+D Pavlov, V Galigerov, D Kolotinskii, V Nikolskiy, V Stegailov, International Journal of High Performance Computing Applications, 38, 34-49 (2024).
+
 **(Paquay)**
   Paquay and Kusters, Biophys. J., 110, 6, (2016). preprint available at `arXiv:1411.3019 <https://arxiv.org/abs/1411.3019/>`_.

--- a/doc/src/Commands_fix.rst
+++ b/doc/src/Commands_fix.rst
@ -61,6 +61,7 @@ OPT.
   * :doc:`controller <fix_controller>`
   * :doc:`damping/cundall <fix_damping_cundall>`
   * :doc:`deform (k) <fix_deform>`
+   * :doc:`deform/pressure <fix_deform_pressure>`
   * :doc:`deposit <fix_deposit>`
   * :doc:`dpd/energy (k) <fix_dpd_energy>`
   * :doc:`drag <fix_drag>`
@ -262,6 +263,7 @@ OPT.
   * :doc:`wall/body/polyhedron <fix_wall_body_polyhedron>`
   * :doc:`wall/colloid <fix_wall>`
   * :doc:`wall/ees <fix_wall_ees>`
+   * :doc:`wall/flow (k) <fix_wall_flow>`
   * :doc:`wall/gran (k) <fix_wall_gran>`
   * :doc:`wall/gran/region <fix_wall_gran_region>`
   * :doc:`wall/harmonic <fix_wall>`
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@ -256,6 +256,7 @@ OPT.
   * :doc:`rann <pair_rann>`
   * :doc:`reaxff (ko) <pair_reaxff>`
   * :doc:`rebo (io) <pair_airebo>`
+   * :doc:`rebomos (o) <pair_rebomos>`
   * :doc:`resquared (go) <pair_resquared>`
   * :doc:`saip/metal (t) <pair_saip_metal>`
   * :doc:`sdpd/taitwater/isothermal <pair_sdpd_taitwater_isothermal>`
--- a/doc/src/Developer_updating.rst
+++ b/doc/src/Developer_updating.rst
@ -18,6 +18,7 @@ Available topics in mostly chronological order are:
 - `Setting flags in the constructor`_
 - `Rename of pack/unpack_comm() to pack/unpack_forward_comm()`_
 - `Use ev_init() to initialize variables derived from eflag and vflag`_
+- `Use utils::count_words() functions instead of atom->count_words()`_
 - `Use utils::numeric() functions instead of force->numeric()`_
 - `Use utils::open_potential() function to open potential files`_
 - `Use symbolic Atom and AtomVec constants instead of numerical values`_
@ -130,6 +131,41 @@ Not applying this change will not cause a compilation error, but
 can lead to inconsistent behavior and incorrect tallying of
 energy or virial.

+Use utils::count_words() functions instead of atom->count_words()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. versionchanged:: 2Jun2020
+
+The "count_words()" functions for parsing text have been moved from the
+Atom class to the :doc:`utils namespace <Developer_utils>`.  The
+"count_words()" function in "utils" uses the Tokenizer class internally
+to split a line into words and count them, thus it will not modify the
+argument string as the function in the Atoms class did and thus had a
+variant using a copy buffer.  Unlike the old version, the new version
+does not remove comments. For that you can use the
+:cpp:func:`utils::trim_comment() function
+<LAMMPS_NS::utils::trim_comment>` as shown in the example below.
+
+Old:
+
+.. code-block:: c++
+
+   nwords = atom->count_words(line);
+   int nwords = atom->count_words(buf);
+
+New:
+
+.. code-block:: c++
+
+   nwords = utils::count_words(line);
+   int nwords = utils::count_words(utils::trim_comment(buf));
+
+.. seealso::
+
+   :cpp:func:`utils::count_words() <LAMMPS_NS::utils::count_words>`,
+   :cpp:func:`utils::trim_comments() <LAMMPS_NS::utils::trim_comments>`
+
+
 Use utils::numeric() functions instead of force->numeric()
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

@ -137,11 +173,12 @@ Use utils::numeric() functions instead of force->numeric()

 The "numeric()" conversion functions (including "inumeric()",
 "bnumeric()", and "tnumeric()") have been moved from the Force class to
-the utils namespace.  Also they take an additional argument that selects
-whether the ``Error::all()`` or ``Error::one()`` function should be
-called in case of an error.  The former should be used when *all* MPI
-processes call the conversion function and the latter *must* be used
-when they are called from only one or a subset of the MPI processes.
+the :doc:`utils namespace <Developer_utils>`.  Also they take an
+additional argument that selects whether the ``Error::all()`` or
+``Error::one()`` function should be called in case of an error.  The
+former should be used when *all* MPI processes call the conversion
+function and the latter *must* be used when they are called from only
+one or a subset of the MPI processes.

 Old:

--- a/doc/src/Howto_granular.rst
+++ b/doc/src/Howto_granular.rst
@ -45,10 +45,15 @@ atoms, and should be used for granular system instead of the fix style

 To model heat conduction, one must add the temperature and heatflow
 atom variables with:
+
 * :doc:`fix property/atom <fix_property_atom>`
+
 a temperature integration fix
+
 * :doc:`fix heat/flow <fix_heat_flow>`
+
 and a heat conduction option defined in both
+
 * :doc:`pair_style granular <pair_granular>`
 * :doc:`fix wall/gran <fix_wall_gran>`

--- a/doc/src/Speed_kokkos.rst
+++ b/doc/src/Speed_kokkos.rst
@ -20,11 +20,28 @@ including Sikandar Mashayak (UIUC), Ray Shan (Sandia), and Dan Ibanez
 (Sandia). For more information on developing using Kokkos abstractions
 see the `Kokkos Wiki <https://github.com/kokkos/kokkos/wiki>`_.

-Kokkos currently provides support for 4 modes of execution (per MPI
+.. note::
+
+   The Kokkos library is under active development and tracking the
+   availability of accelerator hardware, so is the KOKKOS package in
+   LAMMPS.  This means that only a certain range of versions of the
+   Kokkos library are compatible with the KOKKOS package of a certain
+   range of LAMMPS versions.  For that reason LAMMPS comes with a
+   bundled version of the Kokkos library that has been validated on
+   multiple platforms and may contain selected back-ported bug fixes
+   from upstream Kokkos versions.  While it is possible to build LAMMPS
+   with an external version of Kokkos, it is untested and may result in
+   incorrect execution or crashes.
+
+Kokkos currently provides full support for 4 modes of execution (per MPI
 task). These are Serial (MPI-only for CPUs and Intel Phi), OpenMP
-(threading for many-core CPUs and Intel Phi), CUDA (for NVIDIA
-GPUs) and HIP (for AMD GPUs). You choose the mode at build time to
-produce an executable compatible with a specific hardware.
+(threading for many-core CPUs and Intel Phi), CUDA (for NVIDIA GPUs) and
+HIP (for AMD GPUs).  Additional modes (e.g. OpenMP target, Intel data
+center GPUs) are under development.  You choose the mode at build time
+to produce an executable compatible with a specific hardware.
+
+The following compatibility notes have been last updated for LAMMPS
+version 23 November 2023 and Kokkos version 4.2.

 .. admonition:: C++17 support
   :class: note
@ -63,13 +80,13 @@ produce an executable compatible with a specific hardware.
   LAMMPS command line or by using the command :doc:`package kokkos
   gpu/aware off <package>` in the input file.

-.. admonition:: AMD GPU support
+.. admonition:: Intel Data Center GPU support
   :class: note

-   To build with Kokkos the HIPCC compiler from the AMD ROCm software
-   version 3.5 or later is required.  Supporting this Kokkos mode in
-   LAMMPS is still work in progress.  Please contact the LAMMPS developers
-   if you run into problems.
+   Support for Kokkos with Intel Data Center GPU accelerators (formerly
+   known under the code name "Ponte Vecchio") in LAMMPS is still a work
+   in progress.  Only a subset of the functionality works correctly.
+   Please contact the LAMMPS developers if you run into problems.

 Building LAMMPS with the KOKKOS package
 """""""""""""""""""""""""""""""""""""""
@ -292,6 +309,10 @@ one or more nodes, each with two GPUs:
   settings. Experimenting with its options can provide a speed-up for
   specific calculations. For example:

+.. code-block:: bash
+
+   mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj      # Newton on, half neighbor list, set binsize = neighbor ghost cutoff
+
 .. note::

   The default binsize for :doc:`atom sorting <atom_modify>` on GPUs
@ -302,9 +323,15 @@ one or more nodes, each with two GPUs:
   frequent sorting than default (e.g. sorting every 100 time steps
   instead of 1000) may improve performance.

-.. code-block:: bash
+.. note::

-   mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj      # Newton on, half neighbor list, set binsize = neighbor ghost cutoff
+   When running on GPUs with many MPI ranks (tens of thousands and
+   more), the creation of the atom map (required for molecular systems)
+   on the GPU can slow down significantly or run out of GPU memory and
+   thus slow down the whole calculation or cause a crash.  You can use
+   the "-pk kokkos atom/map no" :doc:`command-line switch <Run_options>`
+   of the :doc:`package kokkos atom/map no <package>` command to create
+   the atom map on the CPU instead.

 .. note::

@ -420,11 +447,18 @@ Generally speaking, the following rules of thumb apply:
  of using single or mixed precision with the GPU package depends
  significantly on the hardware in use and the simulated system and pair
  style.
-* When running on Intel hardware, KOKKOS is not as fast as
+* When running on Intel Phi hardware, KOKKOS is not as fast as
  the INTEL package, which is optimized for x86 hardware (not just
  from Intel) and compilation with the Intel compilers.  The INTEL
  package also can increase the vector length of vector instructions
  by switching to single or mixed precision mode.
+* The KOKKOS package by default assumes that you are using exactly one
+  MPI rank per GPU. When trying to use multiple MPI ranks per GPU it is
+  mandatory to enable `CUDA Multi-Process Service (MPS)
+  <https://docs.nvidia.com/deploy/mps/index.html>`_ to get good
+  performance.  In this case it is better to not use all available
+  MPI ranks in order to avoid competing with the MPS daemon for
+  CPU resources.

 See the `Benchmark page <https://www.lammps.org/bench.html>`_ of the
 LAMMPS website for performance of the KOKKOS package on different
--- a/doc/src/compute_adf.rst
+++ b/doc/src/compute_adf.rst
@ -204,8 +204,23 @@ angles per atom satisfying the ADF criteria.
 Restrictions
 """"""""""""

-This compute is part of the EXTRA-COMPUTE package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+This compute is part of the EXTRA-COMPUTE package.  It is only enabled
+if LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+By default, the ADF is not computed for distances longer than the
+largest force cutoff, since the neighbor list creation will only contain
+pairs up to that distance (plus neighbor list skin).  If you use outer
+cutoffs larger than that, you must use :doc:`neighbor style 'bin' or
+'nsq' <neighbor>`.
+
+If you want an ADF for a larger outer cutoff, you can also use the
+:doc:`rerun <rerun>` command to post-process a dump file, use :doc:`pair
+style zero <pair_zero>` and set the force cutoff to be larger in the
+rerun script.  Note that in the rerun context, the force cutoff is
+arbitrary and with pair style zero you are not computing any forces, and
+since you are not running dynamics you are not changing the model that
+generated the trajectory.

 The ADF is not computed for neighbors outside the force cutoff,
 since processors (in parallel) don't know about atom coordinates for
--- a/doc/src/compute_ave_sphere_atom.rst
+++ b/doc/src/compute_ave_sphere_atom.rst
@ -102,6 +102,8 @@ This compute is part of the EXTRA-COMPUTE package.  It is only enabled
 if LAMMPS was built with that package.  See the :doc:`Build package
 <Build_package>` page for more info.

+This compute requires :doc:`neighbor styles 'bin' or 'nsq' <neighbor>`.
+
 Related commands
 """"""""""""""""

--- a/doc/src/compute_composition_atom.rst
+++ b/doc/src/compute_composition_atom.rst
@ -107,6 +107,8 @@ This compute is part of the EXTRA-COMPUTE package.  It is only enabled
 if LAMMPS was built with that package.  See the :doc:`Build package
 <Build_package>` page for more info.

+This compute requires :doc:`neighbor styles 'bin' or 'nsq' <neighbor>`.
+
 Related commands
 """"""""""""""""

--- a/doc/src/compute_efield_wolf_atom.rst
+++ b/doc/src/compute_efield_wolf_atom.rst
@ -106,6 +106,8 @@ Restrictions
 This compute is part of the EXTRA-COMPUTE package. It is only enabled if
 LAMMPS was built with that package.

+This compute requires :doc:`neighbor styles 'bin' or 'nsq' <neighbor>`.
+
 Related commands
 """"""""""""""""

--- a/doc/src/compute_fabric.rst
+++ b/doc/src/compute_fabric.rst
@ -64,7 +64,7 @@ tangential force tensor. The contact tensor is calculated as

 .. math::

-   C_{ab}  =  \frac{15}{2} (\phi_{ab} - \mathrm{Tr}(\phi) \delta_{ab})
+   C_{ab}  =  \frac{15}{2} (\phi_{ab} - \frac{1}{3} \mathrm{Tr}(\phi) \delta_{ab})

 where :math:`a` and :math:`b` are the :math:`x`, :math:`y`, :math:`z`
 directions, :math:`\delta_{ab}` is the Kronecker delta function, and
@ -83,7 +83,7 @@ The branch tensor is calculated as

 .. math::

-   B_{ab}  =  \frac{15}{6 \mathrm{Tr}(D)} (D_{ab} - \mathrm{Tr}(D) \delta_{ab})
+   B_{ab}  =  \frac{15}{2\, \mathrm{Tr}(D)} (D_{ab} - \frac{1}{3} \mathrm{Tr}(D) \delta_{ab})

 where the tensor :math:`D` is defined as

@ -101,7 +101,7 @@ The normal force fabric tensor is calculated as

 .. math::

-   F^n_{ab}  =  \frac{15}{6\, \mathrm{Tr}(N)} (N_{ab} - \mathrm{Tr}(N) \delta_{ab})
+   F^n_{ab}  =  \frac{15}{2\, \mathrm{Tr}(N)} (N_{ab} - \frac{1}{3} \mathrm{Tr}(N) \delta_{ab})

 where the tensor :math:`N` is defined as

@ -119,7 +119,7 @@ as

 .. math::

-   F^t_{ab}  =  \frac{15}{9\, \mathrm{Tr}(N)} (T_{ab} - \mathrm{Tr}(T) \delta_{ab})
+   F^t_{ab}  =  \frac{5}{\mathrm{Tr}(N)} (T_{ab} - \frac{1}{3} \mathrm{Tr}(T) \delta_{ab})

 where the tensor :math:`T` is defined as

--- a/doc/src/compute_property_atom.rst
+++ b/doc/src/compute_property_atom.rst
@ -23,6 +23,7 @@ Syntax
                             spx, spy, spz, sp, fmx, fmy, fmz,
                             nbonds,
                             radius, diameter, omegax, omegay, omegaz,
+                             temperature, heatflow,
                             angmomx, angmomy, angmomz,
                             shapex, shapey, shapez,
                             quatw, quati, quatj, quatk, tqx, tqy, tqz,
@ -56,6 +57,8 @@ Syntax
           *nbonds* = number of bonds assigned to an atom
           *radius,diameter* = radius,diameter of spherical particle
           *omegax,omegay,omegaz* = angular velocity of spherical particle
+           *temperature* = internal temperature of spherical particle
+           *heatflow* = internal heat flow of spherical particle
           *angmomx,angmomy,angmomz* = angular momentum of aspherical particle
           *shapex,shapey,shapez* = 3 diameters of aspherical particle
           *quatw,quati,quatj,quatk* = quaternion components for aspherical or body particles
--- a/doc/src/compute_rdf.rst
+++ b/doc/src/compute_rdf.rst
@ -176,22 +176,29 @@ also numbers :math:`\ge 0.0`.
 Restrictions
 """"""""""""

-The RDF is not computed for distances longer than the force cutoff,
-since processors (in parallel) do not know about atom coordinates for
-atoms further away than that distance.  If you want an RDF for larger
-distances, you can use the :doc:`rerun <rerun>` command to post-process
-a dump file and set the cutoff for the potential to be longer in the
+By default, the RDF is not computed for distances longer than the
+largest force cutoff, since the neighbor list creation will only contain
+pairs up to that distance (plus neighbor list skin).  This distance can
+be increased using the *cutoff* keyword but this keyword is only valid
+with :doc:`neighbor styles 'bin' and 'nsq' <neighbor>`.
+
+If you want an RDF for larger distances, you can also use the
+:doc:`rerun <rerun>` command to post-process a dump file, use :doc:`pair
+style zero <pair_zero>` and set the force cutoff to be longer in the
 rerun script.  Note that in the rerun context, the force cutoff is
-arbitrary, since you are not running dynamics and thus are not changing
-your model.  The definition of :math:`g(r)` used by LAMMPS is only appropriate
-for characterizing atoms that are uniformly distributed throughout the
-simulation cell. In such cases, the coordination number is still
-correct and meaningful.  As an example, if a large simulation cell
-contains only one atom of type *itypeN* and one of *jtypeN*, then :math:`g(r)`
-will register an arbitrarily large spike at whatever distance they
-happen to be at, and zero everywhere else.
-The function :math:`\text{coord}(r)` will show a step
-change from zero to one at the location of the spike in :math:`g(r)`.
+arbitrary and with pair style zero you are not computing any forces, and
+you are not running dynamics you are not changing the model that
+generated the trajectory.
+
+The definition of :math:`g(r)` used by LAMMPS is only appropriate for
+characterizing atoms that are uniformly distributed throughout the
+simulation cell. In such cases, the coordination number is still correct
+and meaningful.  As an example, if a large simulation cell contains only
+one atom of type *itypeN* and one of *jtypeN*, then :math:`g(r)` will
+register an arbitrarily large spike at whatever distance they happen to
+be at, and zero everywhere else.  The function :math:`\text{coord}(r)`
+will show a step change from zero to one at the location of the spike in
+:math:`g(r)`.

 .. note::

--- a/doc/src/dump.rst
+++ b/doc/src/dump.rst
@ -104,7 +104,6 @@ Syntax
                               q, mux, muy, muz, mu,
                               radius, diameter, omegax, omegay, omegaz,
                               angmomx, angmomy, angmomz, tqx, tqy, tqz,
-                               heatflow, temperature,
                               c_ID, c_ID[I], f_ID, f_ID[I], v_name,
                               i_name, d_name, i2_name[I], d2_name[I]

@ -131,8 +130,6 @@ Syntax
           omegax,omegay,omegaz = angular velocity of spherical particle
           angmomx,angmomy,angmomz = angular momentum of aspherical particle
           tqx,tqy,tqz = torque on finite-size particles
-           heatflow = rate of heat flow into particle
-           temperature = temperature of particle
           c_ID = per-atom vector calculated by a compute with ID
           c_ID[I] = Ith column of per-atom array calculated by a compute with ID, I can include wildcard (see below)
           f_ID = per-atom vector calculated by a fix with ID
--- a/doc/src/fix.rst
+++ b/doc/src/fix.rst
@ -226,6 +226,7 @@ accelerated styles exist.
 * :doc:`controller <fix_controller>` - apply control loop feedback mechanism
 * :doc:`damping/cundall <fix_damping_cundall>` - Cundall non-viscous damping for granular simulations
 * :doc:`deform <fix_deform>` - change the simulation box size/shape
+* :doc:`deform/pressure <fix_deform_pressure>` - change the simulation box size/shape with additional loading conditions
 * :doc:`deposit <fix_deposit>` - add new atoms above a surface
 * :doc:`dpd/energy <fix_dpd_energy>` - constant energy dissipative particle dynamics
 * :doc:`drag <fix_drag>` - drag atoms towards a defined coordinate
@ -427,6 +428,7 @@ accelerated styles exist.
 * :doc:`wall/body/polyhedron <fix_wall_body_polyhedron>` - time integration for body particles of style :doc:`rounded/polyhedron <Howto_body>`
 * :doc:`wall/colloid <fix_wall>` - Lennard-Jones wall interacting with finite-size particles
 * :doc:`wall/ees <fix_wall_ees>` - wall for ellipsoidal particles
+* :doc:`wall/flow <fix_wall_flow>` - flow boundary conditions
 * :doc:`wall/gran <fix_wall_gran>` - frictional wall(s) for granular simulations
 * :doc:`wall/gran/region <fix_wall_gran_region>` - :doc:`fix wall/region <fix_wall_region>` equivalent for use with granular particles
 * :doc:`wall/harmonic <fix_wall>` - harmonic spring wall
--- a/doc/src/fix_balance.rst
+++ b/doc/src/fix_balance.rst
@ -14,15 +14,15 @@ Syntax
 * balance = style name of this fix command
 * Nfreq = perform dynamic load balancing every this many steps
 * thresh = imbalance threshold that must be exceeded to perform a re-balance
-* style = *shift* or *rcb*
-
+* style = *shift* or *rcb* or *report*
  .. parsed-literal::

-       shift args = dimstr Niter stopthresh
+       *shift* args = dimstr Niter stopthresh
         dimstr = sequence of letters containing *x* or *y* or *z*, each not more than once
         Niter = # of times to iterate within each dimension of dimstr sequence
         stopthresh = stop balancing when this imbalance threshold is reached
       *rcb* args = none
+       *report* args = none

 * zero or more keyword/arg pairs may be appended
 * keyword = *weight* or *out*
@ -70,6 +70,13 @@ re-balancing is performed periodically during the simulation.  To
 perform "static" balancing, before or between runs, see the
 :doc:`balance <balance>` command.

+.. versionadded:: TBD
+
+The *report* balance style only computes the load imbalance but
+does not attempt any re-balancing.  This way the load imbalance
+information can be used otherwise, for instance for stopping a
+run with :doc:`fix halt <fix_halt>`.
+
 Load-balancing is typically most useful if the particles in the
 simulation box have a spatially-varying density distribution or
 where the computational cost varies significantly between different
--- a/doc/src/fix_charge_regulation.rst
+++ b/doc/src/fix_charge_regulation.rst
@ -253,11 +253,11 @@ built with that package.  See the :doc:`Build package <Build_package>`
 page for more info.

 The :doc:`atom_style <atom_style>`, used must contain the charge
-property, for example, the style could be *charge* or *full*. Only
-usable for 3D simulations. Atoms specified as free ions cannot be part
-of rigid bodies or molecules and cannot have bonding interactions. The
-scheme is limited to integer charges, any atoms with non-integer charges
-will not be considered by the fix.
+property and have per atom type masses, for example, the style could be
+*charge* or *full*. Only usable for 3D simulations.  Atoms specified as
+free ions cannot be part of rigid bodies or molecules and cannot have
+bonding interactions.  The scheme is limited to integer charges, any
+atoms with non-integer charges will not be considered by the fix.

 All interaction potentials used must be continuous, otherwise the MD
 integration and the particle exchange MC moves do not correspond to the
--- a/doc/src/fix_deform.rst
+++ b/doc/src/fix_deform.rst
@ -4,6 +4,9 @@
 fix deform command
 ==================

+:doc:`fix deform/pressure <fix_deform_pressure>` command
+========================================================
+
 Accelerator Variants: *deform/kk*

 Syntax
@ -11,18 +14,18 @@ Syntax

 .. code-block:: LAMMPS

-   fix ID group-ID deform N parameter args ... keyword value ...
+   fix ID group-ID fix_style N parameter style args ... keyword value ...

 * ID, group-ID are documented in :doc:`fix <fix>` command
-* deform = style name of this fix command
+* fix_style = *deform* or *deform/pressure*
 * N = perform box deformation every this many timesteps
-* one or more parameter/arg pairs may be appended
+* one or more parameter/style/args sequences of arguments may be appended

  .. parsed-literal::

     parameter = *x* or *y* or *z* or *xy* or *xz* or *yz*
       *x*, *y*, *z* args = style value(s)
-         style = *final* or *delta* or *scale* or *vel* or *erate* or *trate* or *volume* or *wiggle* or *variable*
+         style = *final* or *delta* or *scale* or *vel* or *erate* or *trate* or *volume* or *wiggle* or *variable* or *pressure* or *pressure/mean*
           *final* values = lo hi
             lo hi = box boundaries at end of run (distance units)
           *delta* values = dlo dhi
@ -43,8 +46,15 @@ Syntax
           *variable* values = v_name1 v_name2
             v_name1 = variable with name1 for box length change as function of time
             v_name2 = variable with name2 for change rate as function of time
+           *pressure* values = target gain (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+           *pressure/mean* values = target gain (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+
       *xy*, *xz*, *yz* args = style value
-         style = *final* or *delta* or *vel* or *erate* or *trate* or *wiggle*
+         style = *final* or *delta* or *vel* or *erate* or *trate* or *wiggle* or *variable*
           *final* value = tilt
             tilt = tilt factor at end of run (distance units)
           *delta* value = dtilt
@ -62,9 +72,12 @@ Syntax
           *variable* values = v_name1 v_name2
             v_name1 = variable with name1 for tilt change as function of time
             v_name2 = variable with name2 for change rate as function of time
+           *pressure* values = target gain (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)

 * zero or more keyword/value pairs may be appended
-* keyword = *remap* or *flip* or *units*
+* keyword = *remap* or *flip* or *units* or *couple* or *vol/balance/p* or *max/rate* or *normalize/pressure*

  .. parsed-literal::

@ -77,6 +90,15 @@ Syntax
       *units* value = *lattice* or *box*
         lattice = distances are defined in lattice units
         box = distances are defined in simulation box units
+       *couple* value = *none* or *xyz* or *xy* or *yz* or *xz* (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+         couple pressure values of various dimensions
+       *vol/balance/p* value = *yes* or *no* (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+         Modifies the behavior of the *volume* option to try and balance pressures
+       *max/rate* value = *rate* (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+         rate = maximum strain rate for pressure control
+       *normalize/pressure* value = *yes* or *no* (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+         Modifies pressure controls such that the deviation in pressure is normalized by the target pressure
+

 Examples
 """"""""
@ -88,6 +110,8 @@ Examples
   fix 1 all deform 1 xy erate 0.001 remap v
   fix 1 all deform 10 y delta -0.5 0.5 xz vel 1.0

+See examples for :doc:`fix deform/pressure <fix_deform_pressure>` on its doc page
+
 Description
 """""""""""

@ -95,29 +119,46 @@ Change the volume and/or shape of the simulation box during a dynamics
 run.  Orthogonal simulation boxes have 3 adjustable parameters
 (x,y,z).  Triclinic (non-orthogonal) simulation boxes have 6
 adjustable parameters (x,y,z,xy,xz,yz).  Any or all of them can be
-adjusted independently and simultaneously by this command.
+adjusted independently and simultaneously.

-This fix can be used to perform non-equilibrium MD (NEMD) simulations
-of a continuously strained system.  See the :doc:`fix nvt/sllod <fix_nvt_sllod>` and :doc:`compute temp/deform <compute_temp_deform>` commands for more details.  Note
-that simulation of a continuously extended system (extensional flow)
-can be modeled using the :ref:`UEF package <PKG-UEF>` and its :doc:`fix commands <fix_nh_uef>`.
+The fix deform command allows use of all the arguments listed above,
+except those flagged as available ONLY for the :doc:`fix
+deform/pressure <fix_deform_pressure>` command, which are
+pressure-based controls.  The fix deform/pressure command allows use
+of all the arguments listed above.
+
+The rest of this doc page explains the options common to both
+commands.  The :doc:`fix deform/pressure <fix_deform_pressure>` doc
+page explains the options available ONLY with the fix deform/pressure
+command.  Note that a simulation can define only a single deformation
+command: fix deform or fix deform/pressure.
+
+Both these fixes can be used to perform non-equilibrium MD (NEMD)
+simulations of a continuously strained system.  See the :doc:`fix
+nvt/sllod <fix_nvt_sllod>` and :doc:`compute temp/deform
+<compute_temp_deform>` commands for more details.  Note that
+simulation of a continuously extended system (extensional flow) can be
+modeled using the :ref:`UEF package <PKG-UEF>` and its :doc:`fix
+commands <fix_nh_uef>`.

 For the *x*, *y*, *z* parameters, the associated dimension cannot be
 shrink-wrapped.  For the *xy*, *yz*, *xz* parameters, the associated
-second dimension cannot be shrink-wrapped.  Dimensions not varied by this
-command can be periodic or non-periodic.  Dimensions corresponding to
-unspecified parameters can also be controlled by a :doc:`fix npt <fix_nh>` or :doc:`fix nph <fix_nh>` command.
+second dimension cannot be shrink-wrapped.  Dimensions not varied by
+this command can be periodic or non-periodic.  Dimensions
+corresponding to unspecified parameters can also be controlled by a
+:doc:`fix npt <fix_nh>` or :doc:`fix nph <fix_nh>` command.

 The size and shape of the simulation box at the beginning of the
-simulation run were either specified by the
-:doc:`create_box <create_box>` or :doc:`read_data <read_data>` or
-:doc:`read_restart <read_restart>` command used to setup the simulation
-initially if it is the first run, or they are the values from the end
-of the previous run.  The :doc:`create_box <create_box>`, :doc:`read data <read_data>`, and :doc:`read_restart <read_restart>` commands
-specify whether the simulation box is orthogonal or non-orthogonal
-(triclinic) and explain the meaning of the xy,xz,yz tilt factors.  If
-fix deform changes the xy,xz,yz tilt factors, then the simulation box
-must be triclinic, even if its initial tilt factors are 0.0.
+simulation run were either specified by the :doc:`create_box
+<create_box>` or :doc:`read_data <read_data>` or :doc:`read_restart
+<read_restart>` command used to setup the simulation initially if it
+is the first run, or they are the values from the end of the previous
+run.  The :doc:`create_box <create_box>`, :doc:`read data
+<read_data>`, and :doc:`read_restart <read_restart>` commands specify
+whether the simulation box is orthogonal or non-orthogonal (triclinic)
+and explain the meaning of the xy,xz,yz tilt factors.  If fix deform
+changes the xy,xz,yz tilt factors, then the simulation box must be
+triclinic, even if its initial tilt factors are 0.0.

 As described below, the desired simulation box size and shape at the
 end of the run are determined by the parameters of the fix deform
@ -258,21 +299,22 @@ of the units keyword below.

 The *variable* style changes the specified box length dimension by
 evaluating a variable, which presumably is a function of time.  The
-variable with *name1* must be an :doc:`equal-style variable <variable>`
-and should calculate a change in box length in units of distance.
-Note that this distance is in box units, not lattice units; see the
-discussion of the *units* keyword below.  The formula associated with
-variable *name1* can reference the current timestep.  Note that it
-should return the "change" in box length, not the absolute box length.
-This means it should evaluate to 0.0 when invoked on the initial
-timestep of the run following the definition of fix deform.  It should
-evaluate to a value > 0.0 to dilate the box at future times, or a
-value < 0.0 to compress the box.
+variable with *name1* must be an :doc:`equal-style variable
+<variable>` and should calculate a change in box length in units of
+distance.  Note that this distance is in box units, not lattice units;
+see the discussion of the *units* keyword below.  The formula
+associated with variable *name1* can reference the current timestep.
+Note that it should return the "change" in box length, not the
+absolute box length.  This means it should evaluate to 0.0 when
+invoked on the initial timestep of the run following the definition of
+fix deform.  It should evaluate to a value > 0.0 to dilate the box at
+future times, or a value < 0.0 to compress the box.

-The variable *name2* must also be an :doc:`equal-style variable <variable>` and should calculate the rate of box length
-change, in units of distance/time, i.e. the time-derivative of the
-*name1* variable.  This quantity is used internally by LAMMPS to reset
-atom velocities when they cross periodic boundaries.  It is computed
+The variable *name2* must also be an :doc:`equal-style variable
+<variable>` and should calculate the rate of box length change, in
+units of distance/time, i.e. the time-derivative of the *name1*
+variable.  This quantity is used internally by LAMMPS to reset atom
+velocities when they cross periodic boundaries.  It is computed
 internally for the other styles, but you must provide it when using an
 arbitrary variable.

@ -414,12 +456,13 @@ can reference the current timestep.  Note that it should return the
 should evaluate to 0.0 when invoked on the initial timestep of the run
 following the definition of fix deform.

-The variable *name2* must also be an :doc:`equal-style variable <variable>` and should calculate the rate of tilt change,
-in units of distance/time, i.e. the time-derivative of the *name1*
-variable.  This quantity is used internally by LAMMPS to reset atom
-velocities when they cross periodic boundaries.  It is computed
-internally for the other styles, but you must provide it when using an
-arbitrary variable.
+The variable *name2* must also be an :doc:`equal-style variable
+<variable>` and should calculate the rate of tilt change, in units of
+distance/time, i.e. the time-derivative of the *name1* variable.  This
+quantity is used internally by LAMMPS to reset atom velocities when
+they cross periodic boundaries.  It is computed internally for the
+other styles, but you must provide it when using an arbitrary
+variable.

 Here is an example of using the *variable* style to perform the same
 box deformation as the *wiggle* style formula listed above, where we
@ -510,33 +553,40 @@ box without explicit remapping of their coordinates.
 .. note::

   For non-equilibrium MD (NEMD) simulations using "remap v" it is
-   usually desirable that the fluid (or flowing material, e.g. granular
-   particles) stream with a velocity profile consistent with the
-   deforming box.  As mentioned above, using a thermostat such as :doc:`fix nvt/sllod <fix_nvt_sllod>` or :doc:`fix lavgevin <fix_langevin>`
-   (with a bias provided by :doc:`compute temp/deform <compute_temp_deform>`), will typically accomplish
-   that.  If you do not use a thermostat, then there is no driving force
-   pushing the atoms to flow in a manner consistent with the deforming
-   box.  E.g. for a shearing system the box deformation velocity may vary
+   usually desirable that the fluid (or flowing material,
+   e.g. granular particles) stream with a velocity profile consistent
+   with the deforming box.  As mentioned above, using a thermostat
+   such as :doc:`fix nvt/sllod <fix_nvt_sllod>` or :doc:`fix lavgevin
+   <fix_langevin>` (with a bias provided by :doc:`compute temp/deform
+   <compute_temp_deform>`), will typically accomplish that.  If you do
+   not use a thermostat, then there is no driving force pushing the
+   atoms to flow in a manner consistent with the deforming box.
+   E.g. for a shearing system the box deformation velocity may vary
   from 0 at the bottom to 10 at the top of the box.  But the stream
-   velocity profile of the atoms may vary from -5 at the bottom to +5 at
-   the top.  You can monitor these effects using the :doc:`fix ave/chunk <fix_ave_chunk>`, :doc:`compute temp/deform <compute_temp_deform>`, and :doc:`compute temp/profile <compute_temp_profile>` commands.  One way to induce
-   atoms to stream consistent with the box deformation is to give them an
+   velocity profile of the atoms may vary from -5 at the bottom to +5
+   at the top.  You can monitor these effects using the :doc:`fix
+   ave/chunk <fix_ave_chunk>`, :doc:`compute temp/deform
+   <compute_temp_deform>`, and :doc:`compute temp/profile
+   <compute_temp_profile>` commands.  One way to induce atoms to
+   stream consistent with the box deformation is to give them an
   initial velocity profile, via the :doc:`velocity ramp <velocity>`
-   command, that matches the box deformation rate.  This also typically
-   helps the system come to equilibrium more quickly, even if a
-   thermostat is used.
+   command, that matches the box deformation rate.  This also
+   typically helps the system come to equilibrium more quickly, even
+   if a thermostat is used.

 .. note::

   If a :doc:`fix rigid <fix_rigid>` is defined for rigid bodies, and
   *remap* is set to *x*, then the center-of-mass coordinates of rigid
-   bodies will be remapped to the changing simulation box.  This will be
-   done regardless of whether atoms in the rigid bodies are in the fix
-   deform group or not.  The velocity of the centers of mass are not
-   remapped even if *remap* is set to *v*, since :doc:`fix nvt/sllod <fix_nvt_sllod>` does not currently do anything special
+   bodies will be remapped to the changing simulation box.  This will
+   be done regardless of whether atoms in the rigid bodies are in the
+   fix deform group or not.  The velocity of the centers of mass are
+   not remapped even if *remap* is set to *v*, since :doc:`fix
+   nvt/sllod <fix_nvt_sllod>` does not currently do anything special
   for rigid particles.  If you wish to perform a NEMD simulation of
   rigid particles, you can either thermostat them independently or
-   include a background fluid and thermostat the fluid via :doc:`fix nvt/sllod <fix_nvt_sllod>`.
+   include a background fluid and thermostat the fluid via :doc:`fix
+   nvt/sllod <fix_nvt_sllod>`.

 The *flip* keyword allows the tilt factors for a triclinic box to
 exceed half the distance of the parallel box length, as discussed
@ -568,7 +618,8 @@ command if you want to include lattice spacings in a variable formula.
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""

-This fix will restore the initial box settings from :doc:`binary restart files <restart>`, which allows the fix to be properly continue
+This fix will restore the initial box settings from :doc:`binary
+restart files <restart>`, which allows the fix to be properly continue
 deformation, when using the start/stop options of the :doc:`run <run>`
 command.  None of the :doc:`fix_modify <fix_modify>` options are
 relevant to this fix.  No global or per-atom quantities are stored by
@ -586,12 +637,14 @@ Restrictions
 You cannot apply x, y, or z deformations to a dimension that is
 shrink-wrapped via the :doc:`boundary <boundary>` command.

-You cannot apply xy, yz, or xz deformations to a second dimension (y in
-xy) that is shrink-wrapped via the :doc:`boundary <boundary>` command.
+You cannot apply xy, yz, or xz deformations to a second dimension (y
+in xy) that is shrink-wrapped via the :doc:`boundary <boundary>`
+command.

 Related commands
 """"""""""""""""

+:doc:`fix deform/pressure <fix_deform_pressure>`,
 :doc:`change_box <change_box>`

 Default
--- a/doc/src/fix_deform_pressure.rst
+++ b/doc/src/fix_deform_pressure.rst
@ -0,0 +1,319 @@
+.. index:: fix deform/pressure
+
+fix deform/pressure command
+===========================
+
+Syntax
+""""""
+
+.. parsed-literal::
+
+   fix ID group-ID deform/pressure N parameter style args ... keyword value ...
+
+* ID, group-ID are documented in :doc:`fix <fix>` command
+* deform/pressure = style name of this fix command
+* N = perform box deformation every this many timesteps
+* one or more parameter/arg sequences may be appended
+
+  .. parsed-literal::
+
+     parameter = *x* or *y* or *z* or *xy* or *xz* or *yz* or *box*
+       *x*, *y*, *z* args = style value(s)
+         style = *final* or *delta* or *scale* or *vel* or *erate* or *trate* or *volume* or *wiggle* or *variable* or *pressure* or *pressure/mean*
+           *pressure* values = target gain
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+           *pressure/mean* values = target gain
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+           NOTE: All other styles are documented by the :doc:`fix deform <fix_deform>` command
+
+       *xy*, *xz*, *yz* args = style value
+         style = *final* or *delta* or *vel* or *erate* or *trate* or *wiggle* or *variable* or *pressure*
+           *pressure* values = target gain
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+           NOTE: All other styles are documented by the :doc:`fix deform <fix_deform>` command
+
+       *box* = style value
+         style = *volume* or *pressure*
+           *volume* value = none = isotropically adjust system to preserve volume of system
+           *pressure* values = target gain
+             target = target mean pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+
+* zero or more keyword/value pairs may be appended
+* keyword = *remap* or *flip* or *units* or *couple* or *vol/balance/p* or *max/rate* or *normalize/pressure*
+
+  .. parsed-literal::
+
+       *couple* value = *none* or *xyz* or *xy* or *yz* or *xz*
+         couple pressure values of various dimensions
+       *vol/balance/p* value = *yes* or *no*
+         Modifies the behavior of the *volume* option to try and balance pressures
+       *max/rate* value = *rate*
+         rate = maximum strain rate for pressure control
+       *normalize/pressure* value = *yes* or *no*
+         Modifies pressure controls such that the deviation in pressure is normalized by the target pressure
+       NOTE: All other keywords are documented by the :doc:`fix deform <fix_deform>` command
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   fix 1 all deform/pressure 1 x pressure 2.0 0.1 normalize/pressure yes max/rate 0.001
+   fix 1 all deform/pressure 1 x trate 0.1 y volume z volume vol/balance/p yes
+   fix 1 all deform/pressure 1 x trate 0.1 y pressure/mean 0.0 1.0 z pressure/mean 0.0 1.0
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+This fix is an extension of the :doc:`fix deform <fix_deform>`
+command, which allows all of its options to be used as well as new
+pressure-based controls implemented by this command.
+
+All arguments described on the :doc:`fix deform <fix_deform>` doc page
+also apply to this fix unless otherwise noted below.  The rest of this
+doc page explains the arguments specific to this fix.  Note that a
+simulation can define only a single deformation command: fix deform or
+fix deform/pressure.
+
+----------
+
+For the *x*, *y*, and *z* parameters, this is the meaning of the
+styles and values provided by this fix.
+
+The *pressure* style adjusts a dimension's box length to control the
+corresponding component of the pressure tensor. This option attempts to
+maintain a specified target pressure using a linear controller where the
+box length :math:`L` evolves according to the equation
+
+.. parsed-literal::
+
+   \frac{d L(t)}{dt} = L(t) k (P_t - P)
+
+where :math:`k` is a proportional gain constant, :math:`P_t` is the target
+pressure, and :math:`P` is the current pressure along that dimension. This
+approach is similar to the method used to control the pressure by
+:doc:`fix press/berendsen <fix_press_berendsen>`. The target pressure
+accepts either a constant numeric value or a LAMMPS :ref:`variable <variable>`.
+Notably, this variable can be a function of time or other components of
+the pressure tensor. By default, :math:`k` has units of 1/(time * pressure)
+although this will change if the *normalize/pressure* option is set as
+:ref:`discussed below <deform_normalize>`. There is no proven method
+to choosing an appropriate value of :math:`k` as it will depend on the
+specific details of a simulation. Testing different values is recommended.
+
+By default, there is no limit on the resulting strain rate in any dimension.
+A maximum limit can be applied using the :ref:`max/rate <deform_max_rate>`
+option. Akin to :doc:`fix nh <fix_nh>`, pressures in different dimensions
+can be coupled using the :ref:`couple <deform_couple>` option. This means
+the instantaneous pressure along coupled dimensions are averaged and the box
+strains identically along the coupled dimensions.
+
+The *pressure/mean* style changes a dimension's box length to maintain
+a constant mean pressure defined as the trace of the pressure tensor.
+This option has identical arguments to the *pressure* style and a similar
+functional equation, except the current and target pressures refer to the
+mean trace of the pressure tensor. All options for the *pressure* style
+also apply to the *pressure/mean* style except for the
+:ref:`couple <deform_couple>` option.
+
+Note that while this style can be identical to coupled *pressure* styles,
+it is generally not the same. For instance in 2D, a coupled *pressure*
+style in the *x* and *y* dimensions would be equivalent to using the
+*pressure/mean* style with identical settings in each dimension. However,
+it would not be the same if settings (e.g. gain constants) were used in
+the *x* and *y* dimensions or if the *pressure/mean* command was only applied
+along one dimension.
+
+----------
+
+For the *xy*, *xz*, and *yz* parameters, this is the meaning of the
+styles and values provided by this fix.  Note that changing the
+tilt factors of a triclinic box does not change its volume.
+
+The *pressure* style adjusts a tilt factor to control the corresponding
+off-diagonal component of the pressure tensor. This option attempts to
+maintain a specified target value using a linear controller where the
+tilt factor T evolves according to the equation
+
+.. parsed-literal::
+
+   \frac{d T(t)}{dt} = L(t) k (P - P_t)
+
+where :math:`k` is a proportional gain constant, :math:`P_t` is the
+target pressure, :math:`P` is the current pressure, and :math:`L` is
+the perpendicular box length. The target pressure accepts either a
+constant numeric value or a LAMMPS :ref:`variable
+<variable>`. Notably, this variable can be a function of time or other
+components of the pressure tensor. By default, :math:`k` has units of
+1/(time * pressure) although this will change if the
+*normalize/pessure* option is set as :ref:`discussed below
+<deform_normalize>`.  There is no proven method to choosing an
+appropriate value of :math:`k` as it will depend on the specific
+details of a simulation and testing different values is
+recommended. One can also apply a maximum limit to the magnitude of
+the applied strain using the :ref:`max/rate <deform_max_rate>` option.
+
+----------
+
+The *box* parameter provides an additional control over the *x*, *y*,
+and *z* box lengths by isotropically dilating or contracting the box
+to either maintain a fixed mean pressure or volume. This isotropic
+scaling is applied after the box is deformed by the above *x*, *y*,
+*z*, *xy*, *xz*, and *yz* styles, acting as a second deformation
+step. This parameter will change the overall strain rate in the *x*,
+*y*, or *z* dimensions.  This parameter can only be used in
+combination with the *x*, *y*, or *z* commands: *vel*, *erate*,
+*trate*, *pressure*, or *wiggle*. This is the meaning of its styles
+and values.
+
+The *volume* style isotropically scales box lengths to maintain a constant
+box volume in response to deformation from other parameters. This style
+may be useful in scenarios where one wants to apply a constant deviatoric
+pressure using *pressure* styles in the *x*, *y*, and *z* dimensions (
+deforming the shape of the box), while maintaining a constant volume.
+
+The *pressure* style isotropically scales box lengths in an attempt to
+maintain a target mean pressure (the trace of the pressure tensor) of the
+system. This is accomplished by isotropically scaling all box lengths
+:math:`L` by an additional factor of :math:`k (P_t - P_m)` where :math:`k`
+is the proportional gain constant, :math:`P_t` is the target pressure, and
+:math:`P_m` is the current mean pressure. This style may be useful in
+scenarios where one wants to apply a constant deviatoric strain rate
+using various strain-based styles (e.g. *trate*) along the *x*, *y*, and *z*
+dimensions (deforming the shape of the box), while maintaining a mean pressure.
+
+----------
+
+The optional keywords provided by this fix are described below.
+
+.. _deform_normalize:
+
+The *normalize/pressure* keyword changes how box dimensions evolve when
+using the *pressure* or *pressure/mean* deformation styles. If the
+*deform/normalize* value is set to *yes*, then the deviation from the
+target pressure is normalized by the absolute value of the target
+pressure such that the proportional gain constant scales a percentage
+error and has units of 1/time. If the target pressure is ever zero, this
+will produce an error unless the *max/rate* keyword is defined,
+described below, which will cap the divergence.
+
+.. _deform_max_rate:
+
+The *max/rate* keyword sets an upper threshold, *rate*, that limits the
+maximum magnitude of the instantaneous strain rate applied in any dimension.
+This keyword only applies to the *pressure* and *pressure/mean* options. If
+a pressure-controlled rate is used for both *box* and either *x*, *y*, or
+*z*, then this threshold will apply separately to each individual controller
+such that the cumulative strain rate on a box dimension may be up to twice
+the value of *rate*.
+
+.. _deform_couple:
+
+The *couple* keyword allows two or three of the diagonal components of
+the pressure tensor to be "coupled" together for the *pressure* option.
+The value specified with the keyword determines which are coupled. For
+example, *xz* means the *Pxx* and *Pzz* components of the stress tensor
+are coupled. *Xyz* means all 3 diagonal components are coupled. Coupling
+means two things: the instantaneous stress will be computed as an average
+of the corresponding diagonal components, and the coupled box dimensions
+will be changed together in lockstep, meaning coupled dimensions will be
+dilated or contracted by the same percentage every timestep. If a *pressure*
+style is defined for more than one coupled dimension, the target pressures
+and gain constants must be identical. Alternatively, if a *pressure*
+style is only defined for one of the coupled dimensions, its settings are
+copied to other dimensions with undefined styles. *Couple xyz* can be used
+for a 2d simulation; the *z* dimension is simply ignored.
+
+.. _deform_balance:
+
+The *vol/balance/p* keyword modifies the behavior of the *volume* style when
+applied to two of the *x*, *y*, and *z* dimensions. Instead of straining
+the two dimensions in lockstep, the two dimensions are allowed to
+separately dilate or contract in a manner to maintain a constant
+volume while simultaneously trying to keep the pressure along each
+dimension equal using a method described in :ref:`(Huang2014) <Huang2014>`.
+
+----------
+
+If any pressure controls are used, this fix computes a temperature and
+pressure each timestep. To do this, the fix creates its own computes
+of style "temp" and "pressure", as if these commands had been issued:
+
+.. code-block:: LAMMPS
+
+   compute fix-ID_temp group-ID temp
+   compute fix-ID_press group-ID pressure fix-ID_temp
+
+See the :doc:`compute temp <compute_temp>` and :doc:`compute pressure
+<compute_pressure>` commands for details.  Note that the IDs of the
+new computes are the fix-ID + underscore + "temp" or fix_ID
+ underscore + "press", and the group for the new computes is the same
+as the fix group.
+
+Note that these are NOT the computes used by thermodynamic output (see
+the :doc:`thermo_style <thermo_style>` command) with ID =
+*thermo_temp* and *thermo_press*.  This means you can change the
+attributes of this fix's temperature or pressure via the
+:doc:`compute_modify <compute_modify>` command or print this
+temperature or pressure during thermodynamic output via the
+:doc:`thermo_style custom <thermo_style>` command using the
+appropriate compute-ID. It also means that changing attributes of
+*thermo_temp* or *thermo_press* will have no effect on this fix.
+
+Restart, fix_modify, output, run start/stop, minimize info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+This fix will restore the initial box settings from :doc:`binary
+restart files <restart>`, which allows the fix to be properly continue
+deformation, when using the start/stop options of the :doc:`run <run>`
+command.  No global or per-atom quantities are stored by this fix for
+access by various :doc:`output commands <Howto_output>`.
+
+If any pressure controls are used, the :doc:`fix_modify <fix_modify>`
+*temp* and *press* options are supported by this fix, unlike in
+:doc:`fix deform <fix_deform>`.  You can use them to assign a
+:doc:`compute <compute>` you have defined to this fix which will be
+used in its temperature and pressure calculations.  If you do this,
+note that the kinetic energy derived from the compute temperature
+should be consistent with the virial term computed using all atoms for
+the pressure.  LAMMPS will warn you if you choose to compute
+temperature on a subset of atoms.
+
+This fix can perform deformation over multiple runs, using the *start*
+and *stop* keywords of the :doc:`run <run>` command.  See the
+:doc:`run <run>` command for details of how to do this.
+
+This fix is not invoked during :doc:`energy minimization <minimize>`.
+
+Restrictions
+""""""""""""
+
+You cannot apply x, y, or z deformations to a dimension that is
+shrink-wrapped via the :doc:`boundary <boundary>` command.
+
+You cannot apply xy, yz, or xz deformations to a second dimension (y
+in xy) that is shrink-wrapped via the :doc:`boundary <boundary>`
+command.
+
+Related commands
+""""""""""""""""
+
+:doc:`fix deform <fix_deform>`, :doc:`change_box <change_box>`
+
+Default
+"""""""
+
+The option defaults are normalize/pressure = no.
+
+----------
+
+.. _Huang2014:
+
+**(Huang2014)** X. Huang, "Exploring critical-state behavior using DEM",
+Doctoral dissertation, Imperial College. (2014). https://doi.org/10.25560/25316
--- a/doc/src/fix_electrode.rst
+++ b/doc/src/fix_electrode.rst
@ -45,7 +45,7 @@ Syntax
                rng_v = integer used to initialize random number generator

 * zero or more keyword/value pairs may be appended
-* keyword = *algo* or *symm* or *couple* or *etypes* or *ffield* or *write_mat* or *write_inv* or *read_mat* or *read_inv*
+* keyword = *algo* or *symm* or *couple* or *etypes* or *ffield* or *write_mat* or *write_inv* or *read_mat* or *read_inv* or *qtotal* or *eta*

 .. parsed-literal::

@ -68,6 +68,10 @@ Syntax
        filename = file from which to read elastance matrix
    *read_inv* value = filename
        filename = file from which to read inverted matrix
+    *qtotal* value = number or *v_* equal-style variable
+        add overall potential so that all electrode charges add up to *qtotal*
+    *eta* value = d_propname
+        d_propname = a custom double vector defined via fix property/atom

 Examples
 """"""""
@ -249,6 +253,26 @@ be enabled if any electrode particle has the same type as any
 electrolyte particle (which would be unusual in a typical simulation)
 and the fix will issue an error in that case.

+.. versionadded:: TBD
+
+The keyword *qtotal* causes *fix electrode/conp* and *fix electrode/thermo*
+to add an overall potential to all electrodes so that the total charge on
+the electrodes is a specified amount (which may be an equal-style variable).
+For example, if a user wanted to simulate a solution of excess cations
+such that the total electrolyte charge is +2, setting *qtotal -2* would cause
+the total electrode charge to be -2, so that the simulation box remains overall
+electroneutral. Since *fix electrode/conq* constrains the total charges of
+individual electrodes, and since *symm on* constrains the total charge of all
+electrodes to be zero, either option is incompatible with the *qtotal* keyword
+(even if *qtotal* is set to zero).
+
+.. versionadded:: TBD
+
+The keyword *eta* takes the name of a custom double vector defined via fix
+property/atom.  The values will be used instead of the standard eta value.  The
+property/atom fix must be for vector of double values and use the *ghost on*
+option.
+
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""

--- a/doc/src/fix_gcmc.rst
+++ b/doc/src/fix_gcmc.rst
@ -440,8 +440,11 @@ This fix is part of the MC package.  It is only enabled if LAMMPS was
 built with that package.  See the :doc:`Build package <Build_package>`
 doc page for more info.

+This fix style requires an :doc:`atom style <atom_style>` with per atom
+type masses.
+
 Do not set "neigh_modify once yes" or else this fix will never be
-called.  Reneighboring is required.
+called.  Reneighboring is **required**.

 Only usable for 3D simulations.

--- a/doc/src/fix_nonaffine_displacement.rst
+++ b/doc/src/fix_nonaffine_displacement.rst
@ -86,8 +86,8 @@ Both of these methods require defining a reference state. With the *fixed* refer
 style, the user picks a specific timestep *nstep* at which particle positions are saved.
 If peratom data is accessed from this compute prior to this timestep, it will simply be
 zeroed. The *update* reference style implies the reference state will be updated every
-*nstep* timesteps. The *offset* reference only applies to the *d2min* metric and will
-update the reference state *nstep* timesteps before a multiple of *nevery* timesteps.
+*nstep* timesteps. The *offset* reference will update the reference state *nstep*
+timesteps before a multiple of *nevery* timesteps.


 ----------
--- a/doc/src/fix_property_atom.rst
+++ b/doc/src/fix_property_atom.rst
@ -22,6 +22,8 @@ Syntax
       *mol* = molecule IDs
       *q* = charge
       *rmass* = per-atom mass
+       *temperature* = internal temperature of atom
+       *heatflow* = internal heat flow of atom
       i_name = new integer vector referenced by name
       d_name = new floating-point vector referenced by name
       i2_name = new integer array referenced by name
@ -59,14 +61,18 @@ these properties for each atom in the system when a data file is read.
 This fix augments the set of per-atom properties with new custom
 ones. This can be useful in several scenarios.

-If the atom style does not define molecule IDs, per-atom charge, or
-per-atom mass, they can be added using the *mol*\ , *q* or *rmass*
+If the atom style does not define molecule IDs, per-atom charge,
+per-atom mass, internal temperature, or internal heat flow, they can
+be added using the *mol*\ , *q*, *rmass*, *temperature*, or *heatflow*
 keywords.  This could be useful to define "molecules" to use as rigid
 bodies with the :doc:`fix rigid <fix_rigid>` command, or to carry
 around an extra flag with atoms (stored as a molecule ID) that can be
 used by various commands like :doc:`compute chunk/atom
 <compute_chunk_atom>` to group atoms without having to use the group
 command (which is limited to a total of 32 groups including *all*\ ).
+For finite-size particles, an internal temperature and heat flow can
+be used to model heat conduction as in the
+:doc:`GRANULAR package <Howto_granular>`.

 Another application is to use the *rmass* flag in order to have
 per-atom masses instead of per-type masses.  This could be used to
@ -85,9 +91,10 @@ properties that are not needed such as bond lists, which incurs some
 overhead when there are no bonds.

 In the future, we may add additional existing per-atom properties to
-fix property/atom, similar to *mol*\ , *q* or *rmass*\ , which
-"turn-on" specific properties defined by some atom styles, so they can
-be easily used by atom styles that do not define them.
+fix property/atom, similar to *mol*\ , *q*, *rmass*\ , *temperature*\ ,
+or *heatflow* which "turn-on" specific properties defined by some atom
+styles, so they can be easily used by atom styles that do not define
+them.

 More generally, the *i_name* and *d_name* options allow one or more
 new custom per-atom vectors to be defined.  Likewise the *i2_name* and
--- a/doc/src/fix_sgcmc.rst
+++ b/doc/src/fix_sgcmc.rst
@ -155,6 +155,9 @@ This fix is part of the MC package. It is only enabled if LAMMPS was
 built with that package.  See the :doc:`Build package <Build_package>`
 page for more info.

+This fix style requires an :doc:`atom style <atom_style>` with per atom
+type masses.
+
 At present the fix provides optimized subroutines for EAM type
 potentials (see above) that calculate potential energy changes due to
 *local* atom type swaps very efficiently.  Other potentials are
--- a/doc/src/fix_ttm.rst
+++ b/doc/src/fix_ttm.rst
@ -96,11 +96,11 @@ each processor, which is acceptable when the overall grid is reasonably
 small.  For larger grids you should use fix *ttm/grid* instead.

 Fix *ttm/mod* adds options to account for external heat sources (e.g. at
-a surface) and for specifying parameters that allow the electronic
-heat capacity to depend strongly on electronic temperature.  It is
-more expensive computationally than fix *ttm* because it treats the
-thermal diffusion equation as non-linear.  More details on fix *ttm/mod*
-are given below.
+a surface) and for specifying parameters that allow the electronic heat
+capacity to depend strongly on electronic temperature.  It is more
+expensive computationally than fix *ttm* because it treats the thermal
+diffusion equation as non-linear.  More details on fix *ttm/mod* are
+given below.

 Heat transfer between the electronic and atomic subsystems is carried
 out via an inhomogeneous Langevin thermostat.  Only atoms in the fix
@ -303,15 +303,15 @@ The current fix ttm/mod implementation allows TTM simulations with a
 vacuum. The vacuum region is defined as the grid cells with zero
 electronic temperature. The numerical scheme does not allow energy
 exchange with such cells. Since the material can expand to previously
-unoccupied region in some simulations, the vacuum border can be
-allowed to move. It is controlled by the *surface_movement* parameter
-in the *init_file*. If it is set to 1, then "vacuum" cells can be
-changed to "electron-filled" cells with the temperature *T_e_min* if
-atoms move into them (currently only implemented for the case of
-1-dimensional motion of flat surface normal to the X axis). The
-initial borders of vacuum can be set in the *init_file* via *lsurface*
-and *rsurface* parameters. In this case, electronic pressure gradient
-is calculated as
+unoccupied region in some simulations, the vacuum border can be allowed
+to move. It is controlled by the *surface_movement* parameter in the
+*init_file*. If it is set to 1, then "vacuum" cells can be changed to
+"electron-filled" cells with the temperature *T_e_min* if atoms move
+into them (currently only implemented for the case of 1-dimensional
+motion of a flat surface normal to the X axis). The initial locations of
+the interfaces of the electron density to the vacuum can be set in the
+*init_file* via *lsurface* and *rsurface* parameters. In this case,
+electronic pressure gradient is calculated as

 .. math::

--- a/doc/src/fix_wall_flow.rst
+++ b/doc/src/fix_wall_flow.rst
@ -0,0 +1,175 @@
+.. index:: fix wall/flow
+.. index:: fix wall/flow/kk
+
+fix wall/flow command
+=====================
+
+Accelerator Variants: *wall/flow/kk*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   fix ID group-ID wall/flow axis vflow T seed N coords ... keyword value
+
+* ID, group-ID are documented in :doc:`fix <fix>` command
+* wall/flow = style name of this fix command
+* axis = flow axis (*x*, *y*, or *z*)
+* vflow = generated flow velocity in *axis* direction (velocity units)
+* T = flow temperature (temperature units)
+* seed = random seed for stochasticity (positive integer)
+* N = number of walls
+* coords = list of N wall positions along the *axis* direction in ascending order (distance units)
+* zero or more keyword/value pairs may be appended
+* keyword = *units*
+
+  .. parsed-literal::
+
+       *units* value = *lattice* or *box*
+         *lattice* = wall positions are defined in lattice units
+         *box* = the wall positions are defined in simulation box units
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   fix 1 all wall/flow x 0.4 1.5 593894 4 2.0 4.0 6.0 8.0
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+This fix implements flow boundary conditions (FBC) introduced in
+:ref:`(Pavlov1) <fbc-Pavlov1>` and :ref:`(Pavlov2) <fbc-Pavlov2>`.
+The goal is to generate a stationary flow with a shifted Maxwell
+velocity distribution:
+
+.. math::
+
+   f_a(v_a) \propto \exp{\left(-\frac{m (v_a-v_{\text{flow}})^2}{2 kB T}\right)}
+
+where :math:`v_a` is the component of velocity along the specified
+*axis* argument (a = x,y,z), :math:`v_{\text{flow}}` is the flow
+velocity specified as the *vflow* argument, *T* is the specified flow
+temperature, *m* is the particle mass, and *kB* is the Boltzmann
+constant.
+
+This is achieved by defining a series of *N* transparent walls along
+the flow *axis* direction.  Each wall is at the specified position
+listed in the *coords* argument.  Note that an additional transparent
+wall is defined by the code at the boundary of the (periodic)
+simulation domain in the *axis* direction.  So there are effectively
+N+1 walls.
+
+Each time a particle in the specified group passes through one of the
+transparent walls, its velocity is re-assigned.  Particles not in the
+group do not interact with the wall. This can be used, for example, to
+add obstacles composed of atoms, or to simulate a solution of complex
+molecules in a one-atom liquid (note that the fix has been tested for
+one-atom systems only).
+
+Conceptually, the velocity re-assignment represents creation of a new
+particle within the system with simultaneous removal of the particle
+which passed through the wall.  The velocity components in directions
+parallel to the wall are re-assigned according to the standard Maxwell
+velocity distribution for the specified temperature *T*.  The velocity
+component perpendicular to the wall is re-assigned according to the
+shifted Maxwell distribution defined above:
+
+.. math::
+
+   f_{\text{a generated}}(v_a) \propto v_a f_a(v_a)
+
+It can be shown that for an ideal-gas scenario this procedure makes
+the velocity distribution of particles between walls exactly as
+desired.
+
+Since in most cases simulated systems are not an ideal gas, multiple
+walls can be defined, since a single wall may not be sufficient for
+maintaining a stationary flow without "congestion" which can manifest
+itself as regions in the flow with increased particle density located
+upstream from static obstacles.
+
+For the same reason, the actual temperature and velocity of the
+generated flow may differ from what is requested.  The degree of
+discrepancy is determined by how different from an ideal gas the
+simulated system is.  Therefore, a calibration procedure may be
+required for such a system as described in :ref:`(Pavlov)
+<fbc-Pavlov2>`.
+
+Note that the interactions between particles on different sides of a
+transparent wall are not disabled or neglected.  Likewise particle
+positions are not altered by the velocity reassignment.  This removes
+the need to modify the force field to work correctly in cases when a
+particle is close to a wall.
+
+For example, if particle positions were uniformly redistributed across
+the surface of a wall, two particles could end up too close to each
+other, potentially causing the simulation to explode.  However due to
+this compromise, some collective phenomena such as regions with
+increased/decreased density or collective movements are not fully
+removed when particles cross a wall.  This unwanted consequence can
+also be potentially mitigated by using more multiple walls.
+
+.. note::
+
+  When the specified flow has a high velocity, a lost atoms error can
+  occur (see :doc:`error messages <Errors_messages>`).  If this
+  happens, you should ensure the checks for neighbor list rebuilds,
+  set via the :doc:`neigh_modify <neigh_modify>` command, are as
+  conservative as possible (every timestep if needed).  Those are the
+  default settings.
+
+Restart, fix_modify, output, run start/stop, minimize info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+No information about this fix is written to :doc:`binary restart files
+<restart>`.
+
+None of the :doc:`fix_modify <fix_modify>` options are relevant to
+this fix.
+
+No global or per-atom quantities are stored by this fix for access by
+various :doc:`output commands <Howto_output>`.
+
+No parameter of this fix can be used with the *start/stop* keywords of
+the :doc:`run <run>` command.
+
+This fix is not invoked during :doc:`energy minimization <minimize>`.
+
+Restrictions
+""""""""""""
+
+Fix *wall_flow* is part of the EXTRA-FIX package.  It is only enabled
+if LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Flow boundary conditions should not be used with rigid bodies such as
+those defined by a "fix rigid" command.
+
+This fix can only be used with periodic boundary conditions along the
+flow axis. The size of the box in this direction must not change. Also,
+the fix is designed to work only in an orthogonal simulation box.
+
+Related commands
+""""""""""""""""
+
+:doc:`fix wall/reflect <fix_wall>` command
+
+Default
+"""""""
+
+The default for the units keyword is lattice.
+
+----------
+
+.. _fbc-Pavlov1:
+
+**(Pavlov1)** Pavlov, Kolotinskii, Stegailov, "GPU-Based Molecular Dynamics of Turbulent Liquid Flows with OpenMM", Proceedings of PPAM-2022, LNCS (Springer), vol. 13826, pp. 346-358 (2023)
+
+.. _fbc-Pavlov2:
+
+**(Pavlov2)** Pavlov, Galigerov, Kolotinskii, Nikolskiy, Stegailov, "GPU-based Molecular Dynamics of Fluid Flows: Reaching for Turbulence", Int. J. High Perf. Comp. Appl., (2024)
--- a/doc/src/fix_widom.rst
+++ b/doc/src/fix_widom.rst
@ -195,8 +195,11 @@ doc page for more info.
 Do not set "neigh_modify once yes" or else this fix will never be
 called.  Reneighboring is **required**.

-Can be run in parallel, but aspects of the GCMC part will not scale well
-in parallel. Only usable for 3D simulations.
+This fix style requires an :doc:`atom style <atom_style>` with per atom
+type masses.
+
+Can be run in parallel, but some aspects of the insertion procedure
+will not scale well in parallel. Only usable for 3D simulations.


 Related commands
--- a/doc/src/pair_airebo.rst
+++ b/doc/src/pair_airebo.rst
@ -156,7 +156,7 @@ pair_coeff command:
 The first 2 arguments must be \* \* so as to span all LAMMPS atom types.
 The first three C arguments map LAMMPS atom types 1,2,3 to the C
 element in the AIREBO file.  The final H argument maps LAMMPS atom
-type 4 to the H element in the SW file.  If a mapping value is
+type 4 to the H element in the AIREBO file.  If a mapping value is
 specified as NULL, the mapping is not performed.  This can be used
 when a *airebo* potential is used as part of the *hybrid* pair style.
 The NULL values are placeholders for atom types that will be used with
@ -222,12 +222,12 @@ enabled if LAMMPS was built with that package.  See the :doc:`Build package <Bui
 These pair potentials require the :doc:`newton <newton>` setting to be
 "on" for pair interactions.

-The CH.airebo and CH.airebo-m potential files provided with LAMMPS
-(see the potentials directory) are parameterized for metal :doc:`units <units>`.
-You can use the AIREBO, AIREBO-M or REBO potential with any LAMMPS units,
-but you would need to create your own AIREBO or AIREBO-M potential file
-with coefficients listed in the appropriate units, if your simulation
-does not use "metal" units.
+The CH.airebo and CH.airebo-m potential files provided with LAMMPS (see
+the potentials directory) are parameterized for metal :doc:`units
+<units>`.  You can use the pair styles with *any* LAMMPS units, but you
+would need to create your own AIREBO or AIREBO-M potential file with
+coefficients listed in the appropriate units, if your simulation does
+not use "metal" units.

 The pair styles provided here **only** support potential files parameterized
 for the elements carbon and hydrogen (designated with "C" and "H" in the
--- a/doc/src/pair_dsmc.rst
+++ b/doc/src/pair_dsmc.rst
@ -138,8 +138,12 @@ This pair style can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""

-This style is part of the MC package.  It is only enabled if LAMMPS
-was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+This pair style is part of the MC package.  It is only enabled if LAMMPS
+was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+This pair style requires an :doc:`atom style <atom_style>` with per
+atom type masses.

 Related commands
 """"""""""""""""
--- a/doc/src/pair_meam.rst
+++ b/doc/src/pair_meam.rst
@ -427,8 +427,8 @@ package. They are only enabled if LAMMPS was built with that package.
 See the :doc:`Build package <Build_package>` page for more info.

 The maximum number of elements, that can be read from the MEAM library
-file, is determined at compile time. The default is 5.  If you need
-support for more elements, you have to change the the constant 'maxelt'
+file, is determined at compile time. The default is 8.  If you need
+support for more elements, you have to change the the constant 'MAXELT'
 at the beginning of the file ``src/MEAM/meam.h`` and update/recompile
 LAMMPS.  There is no limit on the number of atoms types.

--- a/doc/src/pair_rebomos.rst
+++ b/doc/src/pair_rebomos.rst
@ -0,0 +1,150 @@
+.. index:: pair_style rebomos
+.. index:: pair_style rebomos/omp
+
+pair_style rebomos command
+==========================
+
+Accelerator Variants: *rebomos/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   pair_style rebomos
+
+* rebomos = name of this pair style
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   pair_style rebomos
+   pair_coeff * * ../potentials/MoS.rebomos Mo S
+
+Example input scripts available: examples/threebody/
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+The *rebomos* pair style computes the interactions between molybdenum
+and sulfur atoms :ref:`(Stewart) <Stewart>` utilizing an adaptive
+interatomic reactive empirical bond order potential that is similar in
+form to the AIREBO potential :ref:`(Stuart) <Stuart2>`.  The potential
+is based on an earlier parameterizations for :math:`\text{MoS}_2`
+developed by :ref:`(Liang) <Liang>`.
+
+The REBOMoS potential consists of two terms:
+
+.. math::
+
+   E & = \frac{1}{2} \sum_i \sum_{j \neq i}
+   \left[ E^{\text{REBO}}_{ij} + E^{\text{LJ}}_{ij}  \right] \\
+
+The :math:`E^{\text{REBO}}` term describes the covalently bonded
+interactions between Mo and S atoms while the :math:`E^{\text{LJ}}` term
+describes longer range dispersion forces between layers.  A cubic spline
+function is applied to smoothly switch between covalent bonding at short
+distances to dispersion interactions at longer distances. This allows
+the model to capture bond formation and breaking events which may occur
+between adjacent MoS2 layers, edges, defects, and more.
+
+----------
+
+Only a single pair_coeff command is used with the *rebomos* pair style
+which specifies an REBOMoS potential file with parameters for Mo and S.
+These are mapped to LAMMPS atom types by specifying N additional
+arguments after the filename in the pair_coeff command, where N is the
+number of LAMMPS atom types:
+
+* filename
+* :math:`N` element names = mapping of REBOMoS elements to atom types
+
+See the :doc:`pair_coeff <pair_coeff>` page for alternate ways
+to specify the path for the potential file.
+
+As an example, if your LAMMPS simulation has three atom types and you want
+the first two to be Mo, and the third to be S, you would use the following
+pair_coeff command:
+
+.. code-block:: LAMMPS
+
+   pair_coeff * * MoS.rebomos Mo Mo S
+
+The first 2 arguments must be \* \* so as to span all LAMMPS atom types.
+The first two Mo arguments map LAMMPS atom types 1 and 2 to the Mo
+element in the REBOMoS file.  The final S argument maps LAMMPS atom type
+3 to the S element in the REBOMoS file.  If a mapping value is specified
+as NULL, the mapping is not performed.  This can be used when a
+*rebomos* potential is used as part of the *hybrid* pair style.  The
+NULL values are placeholders for atom types that will be used with other
+potentials.
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Mixing, shift, table, tail correction, restart, rRESPA info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+This pair style does not support the :doc:`pair_modify <pair_modify>`
+mix, shift, table, and tail options.
+
+This pair style does not write their information to :doc:`binary restart
+files <restart>`, since it is stored in potential files.  Thus, you need
+to re-specify the pair_style and pair_coeff commands in an input script
+that reads a restart file.
+
+This pair styles can only be used via the *pair* keyword of the
+:doc:`run_style respa <run_style>` command.  It does not support the
+*inner*, *middle*, *outer* keywords.
+
+Restrictions
+""""""""""""
+
+This pair style is part of the MANYBODY package.  It is only enabled if
+LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+These pair potentials require the :doc:`newton <newton>` setting to be
+"on" for pair interactions.
+
+The MoS.rebomos potential file provided with LAMMPS (see the potentials
+directory) is parameterized for metal :doc:`units <units>`.  You can use
+the *rebomos* pair style with any LAMMPS units setting, but you would
+need to create your own REBOMoS potential file with coefficients listed
+in the appropriate units.
+
+The pair style provided here **only** supports potential files parameterized
+for the elements molybdenum and sulfur (designated with "Mo" and "S" in the
+*pair_coeff* command.  Using potential files for other elements will trigger
+an error.
+
+Related commands
+""""""""""""""""
+
+:doc:`pair_coeff <pair_coeff>`, :doc:`pair style rebo <pair_airebo>`
+
+Default
+"""""""
+
+none
+
+----------
+
+.. _Stewart:
+
+**(Steward)**  Stewart, Spearot, Modelling Simul. Mater. Sci. Eng. 21, 045003, (2013).
+
+.. _Stuart2:
+
+**(Stuart)** Stuart, Tutein, Harrison, J Chem Phys, 112, 6472-6486, (2000).
+
+.. _Liang:
+
+**(Liang)**  Liang, Phillpot, Sinnott Phys. Rev. B79 245110, (2009), Erratum: Phys. Rev. B85 199903(E), (2012)
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@ -333,6 +333,7 @@ accelerated styles exist.
 * :doc:`rann <pair_rann>` -
 * :doc:`reaxff <pair_reaxff>` - ReaxFF potential
 * :doc:`rebo <pair_airebo>` - second generation REBO potential of Brenner
+* :doc:`rebomos <pair_rebomos>` - REBOMoS potential for MoS2
 * :doc:`resquared <pair_resquared>` - Everaers RE-Squared ellipsoidal potential
 * :doc:`saip/metal <pair_saip_metal>` - interlayer potential for hetero-junctions formed with hexagonal 2D materials and metal surfaces
 * :doc:`sdpd/taitwater/isothermal <pair_sdpd_taitwater_isothermal>` - smoothed dissipative particle dynamics for water at isothermal conditions
--- a/doc/src/processors.rst
+++ b/doc/src/processors.rst
@ -25,6 +25,8 @@ Syntax
           *numa* params = none
           *custom* params = infile
             infile = file containing grid layout
+       *numa_nodes* arg = Nn
+             Nn = number of numa domains per node
       *map* arg = *cart* or *cart/reorder* or *xyz* or *xzy* or *yxz* or *yzx* or *zxy* or *zyx*
          cart = use MPI_Cart() methods to map processors to 3d grid with reorder = 0
          cart/reorder = use MPI_Cart() methods to map processors to 3d grid with reorder = 1
@ -159,24 +161,28 @@ surface-to-volume ratio of each processor's subdomain.

 The *numa* style operates similar to the *twolevel* keyword except
 that it auto-detects which cores are running on which nodes.
-Currently, it does this in only 2 levels, but it may be extended in
-the future to account for socket topology and other non-uniform memory
-access (NUMA) costs.  It also uses a different algorithm than the
-*twolevel* keyword for doing the two-level factorization of the
-simulation box into a 3d processor grid to minimize off-node
-communication, and it does its own MPI-based mapping of nodes and
+It will also subdivide the cores into numa domains. Currently, the
+number of numa domains is not autodetected and must be specified using
+the *numa_nodes* keyword; otherwise, the default value is used. The
+*numa* style uses a different algorithm than the *twolevel* keyword for
+doing the two-level factorization of the simulation box into a 3d
+processor grid to minimize off-node communication and communication
+across numa domains. It does its own MPI-based mapping of nodes and
 cores to the regular 3d grid.  Thus it may produce a different layout
 of the processors than the *twolevel* options.

 The *numa* style will give an error if the number of MPI processes is
 not divisible by the number of cores used per node, or any of the Px
-or Py of Pz values is greater than 1.
+or Py or Pz values is greater than 1.

 .. note::

   Unlike the *twolevel* style, the *numa* style does not require
   any particular ordering of MPI ranks in order to work correctly. This
   is because it auto-detects which processes are running on which nodes.
+   However, it assumes that the lowest ranks are in the first numa
+   domain, and so forth. MPI rank orderings that do not preserve this
+   property might result in more intranode communication between CPUs.

 The *custom* style uses the file *infile* to define both the 3d
 factorization and the mapping of processors to the grid.
@ -207,6 +213,14 @@ any order, but no processor ID should appear more than once.

 ----------

+The *numa_nodes* keyword is used to specifiy the number of numa domains
+per node. It is currently only used by the *numa* style for two-level
+factorization to reduce the amount of MPI communications between CPUs.
+A good setting for this will typically be equal to the number of CPU
+sockets per node.
+
+----------
+
 The *map* keyword affects how the P processor IDs (from 0 to P-1) are
 mapped to the 3d grid of processors.  It is only used by the
 *onelevel* and *twolevel* grid settings.
@ -356,5 +370,5 @@ Related commands
 Default
 """""""

-The option defaults are Px Py Pz = \* \* \*, grid = onelevel, and map =
-cart.
+The option defaults are Px Py Pz = \* \* \*, grid = onelevel, map =
+cart, and numa_nodes = 2.
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -1783,6 +1783,7 @@ Kolafa
 Kollman
 kolmogorov
 Kolmogorov
+Kolotinskii
 Kondor
 konglt
 Koning
@ -2279,6 +2280,7 @@ morris
 Morriss
 morse
 Morteza
+MoS
 Mosayebi
 Moseler
 Moskalev
@ -2791,6 +2793,7 @@ PEigenDense
 Peng
 peptide
 peratom
+Perf
 Pergamon
 pergrid
 peri
@ -3087,6 +3090,7 @@ reaxff
 ReaxFF
 REAXFF
 rebo
+rebomos
 recurse
 recursing
 Ree
@ -3614,6 +3618,7 @@ tesselation
 tesselations
 Tetot
 tex
+textrm
 tfac
 tfmc
 tfMC
@ -3901,7 +3906,9 @@ Verlet
 versa
 Verstraelen
 ves
+vf
 vflag
+vflow
 vfrac
 vhi
 vibrational
--- a/examples/PACKAGES/electrode/madelung/eval.py
+++ b/examples/PACKAGES/electrode/madelung/eval.py
@ -1,7 +1,7 @@
 #!/usr/env/python3

-import sys
 import os.path as op
+import sys


 def rel_error(out, ref):
@ -49,5 +49,5 @@ for label, ref, out in out_lines:
    error = rel_error(out, ref)
    lines.append(f"{label}: {out:.5f}, {error:.5f}\n")

-with open("madelung.txt", 'a') as f:
+with open("madelung.txt", "a") as f:
    f.writelines(lines)
--- a/examples/PACKAGES/electrode/madelung/in.eta
+++ b/examples/PACKAGES/electrode/madelung/in.eta
@ -0,0 +1,14 @@
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 2.0
+set group top d_eta 2.0
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv
+
+run 0
+
--- a/examples/PACKAGES/electrode/madelung/in.eta_cg
+++ b/examples/PACKAGES/electrode/madelung/in.eta_cg
@ -0,0 +1,14 @@
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 0.5
+set group top d_eta 3.0
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta algo cg 1e-6
+
+run 0
+
--- a/examples/PACKAGES/electrode/madelung/in.eta_mix
+++ b/examples/PACKAGES/electrode/madelung/in.eta_mix
@ -0,0 +1,14 @@
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 0.5
+set group top d_eta 3.0
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv
+
+run 0
+
--- a/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta.g++.1
+++ b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta.g++.1
@ -0,0 +1,138 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-668-g5b6c0c6b56)
+  using 1 OpenMP thread(s) per MPI task
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+# set boundary in main script because ffield is periodic
+units real
+# distribute electrode atoms among all processors:
+if "$(extract_setting(world_size) % 2) == 0" then "processors * * 2"
+
+atom_style full
+pair_style lj/cut/coul/long 12
+
+read_data "data.au-elyt"
+Reading data file ...
+  orthogonal box = (0 0 -10) to (1 1 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  4 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.000 seconds
+  read_data CPU = 0.003 seconds
+
+group bot type 1
+1 atoms in group bot
+group top type 2
+1 atoms in group top
+
+# get electrode charges
+variable q atom q
+compute qbot bot reduce sum v_q
+compute qtop top reduce sum v_q
+
+compute compute_pe all pe
+variable vpe equal c_compute_pe
+variable charge equal c_qtop
+fix fxprint all print 1 "${vpe}, ${charge}" file "out.csv"
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 2.0
+Setting atom values ...
+  1 settings made for d_eta
+set group top d_eta 2.0
+Setting atom values ...
+  1 settings made for d_eta
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv
+2 atoms in group conp_group
+
+run 0
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix electrode command:
+
+@article{Ahrens2022
+author = {Ahrens-Iwers, Ludwig J.V. and Janssen, Mahijs and Tee, Shern R. and Mei{\ss}ner, Robert H.},
+doi = {10.1063/5.0099239},
+title = {{ELECTRODE: An electrochemistry package for LAMMPS}},
+journal = {The Journal of Chemical Physics},
+year = {2022}
+volume = {157},
+pages = {084801},
+}
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Ewald/electrode initialization ...
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
+WARNING: For better accuracy use 'pair_modify table 0' (src/kspace.cpp:365)
+  G vector (1/distance) = 0.32261103
+  estimated absolute RMS force accuracy = 3.8272011e-06
+  estimated relative force accuracy = 1.1525502e-08
+  KSpace vectors: actual max1d max3d = 52 50 515150
+                  kxmax kymax kzmax  = 1 1 50
+Generated 3 of 3 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 1 1 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+  (2) fix electrode/conp, perpetual, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+WARNING: Proc sub-domain size < neighbor skin, could lead to lost atoms (src/domain.cpp:965)
+139.943964815502, 0.279214485147238
+Per MPI rank memory allocation (min/avg/max) = 144.2 | 144.2 | 144.2 Mbytes
+   Step         PotEng         c_qbot         c_qtop    
+         0   139.94396     -0.27921449     0.27921449   
+Loop time of 2.191e-06 on 1 procs for 0 steps with 4 atoms
+
+91.3% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Bond    | 0          | 0          | 0          |   0.0 |  0.00
+Kspace  | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 2.191e-06  |            |       |100.00
+
+Nlocal:              4 ave           4 max           4 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           3596 ave        3596 max        3596 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           4790 ave        4790 max        4790 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 4790
+Ave neighs/atom = 1197.5
+Ave special neighs/atom = 0
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
--- a/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_cg.g++.1
+++ b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_cg.g++.1
@ -0,0 +1,139 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-668-g5b6c0c6b56)
+  using 1 OpenMP thread(s) per MPI task
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+# set boundary in main script because ffield is periodic
+units real
+# distribute electrode atoms among all processors:
+if "$(extract_setting(world_size) % 2) == 0" then "processors * * 2"
+
+atom_style full
+pair_style lj/cut/coul/long 12
+
+read_data "data.au-elyt"
+Reading data file ...
+  orthogonal box = (0 0 -10) to (1 1 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  4 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.000 seconds
+  read_data CPU = 0.003 seconds
+
+group bot type 1
+1 atoms in group bot
+group top type 2
+1 atoms in group top
+
+# get electrode charges
+variable q atom q
+compute qbot bot reduce sum v_q
+compute qtop top reduce sum v_q
+
+compute compute_pe all pe
+variable vpe equal c_compute_pe
+variable charge equal c_qtop
+fix fxprint all print 1 "${vpe}, ${charge}" file "out.csv"
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 0.5
+Setting atom values ...
+  1 settings made for d_eta
+set group top d_eta 3.0
+Setting atom values ...
+  1 settings made for d_eta
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta algo cg 1e-6
+2 atoms in group conp_group
+
+run 0
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix electrode command:
+
+@article{Ahrens2022
+author = {Ahrens-Iwers, Ludwig J.V. and Janssen, Mahijs and Tee, Shern R. and Mei{\ss}ner, Robert H.},
+doi = {10.1063/5.0099239},
+title = {{ELECTRODE: An electrochemistry package for LAMMPS}},
+journal = {The Journal of Chemical Physics},
+year = {2022}
+volume = {157},
+pages = {084801},
+}
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Ewald/electrode initialization ...
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
+WARNING: For better accuracy use 'pair_modify table 0' (src/kspace.cpp:365)
+  G vector (1/distance) = 0.32261103
+  estimated absolute RMS force accuracy = 3.8272011e-06
+  estimated relative force accuracy = 1.1525502e-08
+  KSpace vectors: actual max1d max3d = 52 50 515150
+                  kxmax kymax kzmax  = 1 1 50
+Generated 3 of 3 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 1 1 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+  (2) fix electrode/conp, perpetual, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+WARNING: Proc sub-domain size < neighbor skin, could lead to lost atoms (src/domain.cpp:965)
+165.519373910316, 0.29521534552818
+Per MPI rank memory allocation (min/avg/max) = 144.2 | 144.2 | 144.2 Mbytes
+   Step         PotEng         c_qbot         c_qtop    
+         0   165.51937     -0.29521535     0.29521535   
+Loop time of 2.797e-06 on 1 procs for 0 steps with 4 atoms
+
+71.5% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Bond    | 0          | 0          | 0          |   0.0 |  0.00
+Kspace  | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 2.797e-06  |            |       |100.00
+
+Nlocal:              4 ave           4 max           4 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           3596 ave        3596 max        3596 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           4790 ave        4790 max        4790 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 4790
+Ave neighs/atom = 1197.5
+Ave special neighs/atom = 0
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Average conjugate gradient steps: 1
+Total wall time: 0:00:00
--- a/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_mix.g++.1
+++ b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_mix.g++.1
@ -0,0 +1,138 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-668-g5b6c0c6b56)
+  using 1 OpenMP thread(s) per MPI task
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+# set boundary in main script because ffield is periodic
+units real
+# distribute electrode atoms among all processors:
+if "$(extract_setting(world_size) % 2) == 0" then "processors * * 2"
+
+atom_style full
+pair_style lj/cut/coul/long 12
+
+read_data "data.au-elyt"
+Reading data file ...
+  orthogonal box = (0 0 -10) to (1 1 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  4 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.000 seconds
+  read_data CPU = 0.003 seconds
+
+group bot type 1
+1 atoms in group bot
+group top type 2
+1 atoms in group top
+
+# get electrode charges
+variable q atom q
+compute qbot bot reduce sum v_q
+compute qtop top reduce sum v_q
+
+compute compute_pe all pe
+variable vpe equal c_compute_pe
+variable charge equal c_qtop
+fix fxprint all print 1 "${vpe}, ${charge}" file "out.csv"
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 0.5
+Setting atom values ...
+  1 settings made for d_eta
+set group top d_eta 3.0
+Setting atom values ...
+  1 settings made for d_eta
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv
+2 atoms in group conp_group
+
+run 0
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix electrode command:
+
+@article{Ahrens2022
+author = {Ahrens-Iwers, Ludwig J.V. and Janssen, Mahijs and Tee, Shern R. and Mei{\ss}ner, Robert H.},
+doi = {10.1063/5.0099239},
+title = {{ELECTRODE: An electrochemistry package for LAMMPS}},
+journal = {The Journal of Chemical Physics},
+year = {2022}
+volume = {157},
+pages = {084801},
+}
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Ewald/electrode initialization ...
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
+WARNING: For better accuracy use 'pair_modify table 0' (src/kspace.cpp:365)
+  G vector (1/distance) = 0.32261103
+  estimated absolute RMS force accuracy = 3.8272011e-06
+  estimated relative force accuracy = 1.1525502e-08
+  KSpace vectors: actual max1d max3d = 52 50 515150
+                  kxmax kymax kzmax  = 1 1 50
+Generated 3 of 3 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 1 1 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+  (2) fix electrode/conp, perpetual, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+WARNING: Proc sub-domain size < neighbor skin, could lead to lost atoms (src/domain.cpp:965)
+165.519373910316, 0.295215345528172
+Per MPI rank memory allocation (min/avg/max) = 144.2 | 144.2 | 144.2 Mbytes
+   Step         PotEng         c_qbot         c_qtop    
+         0   165.51937     -0.29521535     0.29521535   
+Loop time of 2.18e-06 on 1 procs for 0 steps with 4 atoms
+
+91.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Bond    | 0          | 0          | 0          |   0.0 |  0.00
+Kspace  | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 2.18e-06   |            |       |100.00
+
+Nlocal:              4 ave           4 max           4 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           3596 ave        3596 max        3596 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           4790 ave        4790 max        4790 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 4790
+Ave neighs/atom = 1197.5
+Ave special neighs/atom = 0
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
--- a/examples/PACKAGES/electrode/madelung/plate_cap.py
+++ b/examples/PACKAGES/electrode/madelung/plate_cap.py
@ -3,7 +3,6 @@
 import numpy as np
 from scipy.special import erf

-ETA = 2
 SQRT2 = np.sqrt(2)
 COULOMB = 332.06371  #  Coulomb constant in Lammps 'real' units
 QE2F = 23.060549
@ -17,14 +16,14 @@ def lattice(length):
    return np.array(np.meshgrid(x, y)).T.reshape(-1, 2)


-def a_element(r):
+def a_element(r, eta):
    """Coulomb contribution of two Gaussians"""
-    return erf(ETA / SQRT2 * r) / r
+    return erf(eta * r) / r


-def b_element(r, q):
+def b_element(r, q, eta):
    """Coulomb contribution of a Gaussian with a point charge"""
-    return q * erf(ETA * r) / r
+    return q * erf(eta * r) / r


 a = 1  # nearest neighbor distance i.e. lattice constant / sqrt(2)
@ -38,27 +37,31 @@ v = np.array([-0.5, 0.5]) * (QE2F / COULOMB)
 distances = a * np.linalg.norm(lattice(LENGTH), axis=1)
 opposite_distances = np.sqrt(np.square(distances) + distance_plates**2)

+for name, eta_elec in [("", [2.0, 2.0]), ("_eta_mix", [0.5, 3.0])]:
+    eta_mix = np.prod(eta_elec) / np.sqrt(np.sum(np.square(eta_elec)))
    # self interaction and within original box
-A_11 = np.sqrt(2 / np.pi) * ETA
-A_12 = erf(ETA * distance_plates / SQRT2) / distance_plates
+    A_11 = np.sqrt(2 / np.pi) * eta_elec[0]
+    A_22 = np.sqrt(2 / np.pi) * eta_elec[1]
+    A_12 = erf(eta_mix * distance_plates) / distance_plates

    # interaction with periodic images
-A_11 += 4 * np.sum(a_element(distances))
-A_12 += 4 * np.sum(a_element(opposite_distances))
-A = np.array([[A_11, A_12], [A_12, A_11]])
+    A_11 += 4 * np.sum(a_element(distances, eta_elec[0] / SQRT2))
+    A_22 += 4 * np.sum(a_element(distances, eta_elec[1] / SQRT2))
+    A_12 += 4 * np.sum(a_element(opposite_distances, eta_mix))
+    A = np.array([[A_11, A_12], [A_12, A_22]])
    inv = np.linalg.inv(A)
    e = np.array([1, 1])
    inv -= np.matmul(inv, np.matmul(np.outer(e, e), inv)) / np.dot(e, np.dot(inv, e))

    # electrode-electrolyte interaction
    b = []
-for x in x_elec:
+    for x, eta in zip(x_elec, eta_elec):
        bi = 0
        for y, q in zip(x_elyt, q_elyt):
            d = abs(y - x)
-        bi += b_element(d, q)
+            bi += b_element(d, q, eta)
            image_distances = np.sqrt(np.square(distances) + d**2)
-        bi += 4 * np.sum(b_element(image_distances, q))
+            bi += 4 * np.sum(b_element(image_distances, q, eta))
        b.append(bi)
    b = np.array(b)

@ -75,10 +78,11 @@ energy_elyt = 0.5 * np.dot(q_elyt, np.dot(elyt, q_elyt))
    q = np.dot(inv, v - b)
    energy = COULOMB * (0.5 * np.dot(q, np.dot(A, q)) + np.dot(b, q) + energy_elyt)

-print(
-    "length, energy / kcal/mol, q1 / e, q2 / e, inv11 / A, inv12 / A, b1 / e/A, b2 / e/A"
+    with open(f"plate_cap{name}.csv", "w") as f:
+        f.write(
+            "length, energy / kcal/mol, q1 / e, q2 / e, inv11 / A, inv12 / A, b1 / e/A, b2 / e/A\n"
        )
-print(
+        f.write(
            ", ".join(
                [
                    str(LENGTH),
@ -91,4 +95,5 @@ print(
                    f"{b[1]:.8f}",
                ]
            )
+            + "\n"
        )
--- a/examples/PACKAGES/electrode/madelung/test.sh
+++ b/examples/PACKAGES/electrode/madelung/test.sh
@ -7,17 +7,27 @@ if [ ! -f $lmpbin ]; then
 fi

 ref_out="plate_cap.csv"
-if [ ! -f $ref_out ]; then
+ref_mix_out="plate_cap_eta_mix.csv"
+if [ ! -f $ref_out ] || [ ! -f $ref_mix_out ]; then
    echo "Generating reference data"
-    python3 plate_cap.py > $ref_out
+    python3 plate_cap.py
 fi

 echo "Running Lammps inputs"
+# w/o eta mixing
 rm -rf madelung.txt && touch madelung.txt
-for file in in.*; do
+for file in in.eta in.ewald-ew3dc in.ewald-ew2d in.pppm-ew3dc in.cg; do
    printf "\n$file\n" >> madelung.txt
    rm -f out.csv inv.csv vec.csv 
    $lmpbin -i $file &> /dev/null
    python3 eval.py $ref_out out.csv inv.csv vec.csv
 done
+
+# with eta mixing
+for file in in.eta_mix in.eta_cg; do
+    printf "\n$file\n" >> madelung.txt
+    rm -f out.csv inv.csv vec.csv 
+    $lmpbin -i $file &> /dev/null
+    python3 eval.py $ref_mix_out out.csv inv.csv vec.csv
+done
 cat madelung.txt
--- a/examples/granular/in.pour.heat
+++ b/examples/granular/in.pour.heat
@ -73,7 +73,8 @@ thermo          100

 timestep        0.001

-#dump           1 all custom 1000 ${name}.dump id type radius mass x y z temperature heatflow
+compute        1 all property/atom temperature heatflow
+#dump           1 all custom 1000 ${name}.dump id type radius mass x y z c_1[*]

 run             100000

--- a/examples/meam/msmeam/HGa.meam
+++ b/examples/meam/msmeam/HGa.meam
@ -1,30 +0,0 @@
-bkgd_dyn        =       1
-emb_lin_neg = 1
-augt1=0 
-ialloy=1 
-rc	=	 5.9 
-#H
-attrac(1,1)=0.460 
-repuls(1,1)=0.460 
-Cmin(1,1,1)=1.3 # PuMS
-Cmax(1,1,1)= 2.80 
-nn2(1,1)=1
-#Ga
-rho0(2)         =       0.6
-attrac(2,2)=0.097 
-repuls(2,2)=0.097 
-nn2(2,2)=1
-#HGa
-attrac(1,2)=0.300 
-repuls(1,2)=0.300 
-lattce(1,2)=l12 
-re(1,2)=3.19 
-delta(1,2)=-0.48  
-alpha(1,2)=6.6 
-Cmin(1,1,2)=2.0 
-Cmin(2,1,2)= 2.0 
-Cmin(1,2,1)=2.0 
-Cmin(2,2,1)     =       1.4
-Cmin(1,2,2)     =       1.4
-Cmin(1,1,2)     =       1.4
-nn2(1,2)=1
--- a/examples/meam/msmeam/HGa.msmeam
+++ b/examples/meam/msmeam/HGa.msmeam
@ -0,0 +1 @@
+../../../potentials/HGa.msmeam
--- a/examples/meam/msmeam/data.msmeam.bu
+++ b/examples/meam/msmeam/data.msmeam.bu
@ -1,25 +0,0 @@
-LAMMPS data file via write_data, version 16 Feb 2016, timestep = 1
-
-3 atoms
-2 atom types
-
-4.0000000000000000e+00 4.0000000000000000e+00 xlo xhi
-4.0000000000000000e+00 4.0000000000000000e+00 ylo yhi
-4.0000000000000000e+00 4.0000000000000000e+00 zlo zhi
-
-Masses
-
-1 1.0079
-2 69.723
-
-Atoms # atomic
-
-1 1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0 0 0
-2 2 2.2000000000000002e+00 0.0000000000000000e+00 0.0000000000000000e+00 0 0 0
-3 2 2.9999999999999999e-01 2.2999999999999998e+00 0.0000000000000000e+00 0 0 0
-
-Velocities
-
-1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
-2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
-3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
--- a/examples/meam/msmeam/in.msmeam
+++ b/examples/meam/msmeam/in.msmeam
@ -1,5 +1,3 @@
-echo	both
-log	log.msmeam
 # Test of MEAM potential for HGa

 # ------------------------ INITIALIZATION ----------------------------
@ -26,6 +24,6 @@ compute pot_energy all pe/atom
 compute stress all stress/atom NULL
 dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
 run     1
-write_data	data.msmeam
+#write_data      data.msmeam

 print "All done!"
--- a/examples/meam/msmeam/library.msmeam
+++ b/examples/meam/msmeam/library.msmeam
@ -1,14 +0,0 @@
-# DATE: 2018-09-22 UNITS: metal CONTRIBUTOR: Steve Valone, smv@lanl.gov CITATION: Baskes, PRB 1992; smv, sr, mib, JNM 2010
-# ms-meam data format May 2010
-#  elt   lat    z     ielement      atwt
-#  alpha b0     b1    b2     b3     b1m     b2m   b3m      alat   esub   asub
-#    -   t0     t1    t2     t3     t1m     t2m   t3m      rozero ibar
-#  NOTE:  leading character cannot be a space
-
-'H'    'dim'  1.0   1      1.0079
-2.960  2.960  3.0   1.0    1.0    1.0    3.0  1.0       0.741  2.235  2.50
-1.0    0.44721 0.0  0.00   0.0    0.31623 0  6.70 0
-
-'Ga4'  'fcc'  12.0  31     69.723
-4.42   4.80   3.10  6.00   0.00   0.0    0.0  0.5       4.247  2.897  0.97
-1.0    1.649 1.435  0.00   0.0    0.0  2.0       0.70   0
--- a/examples/meam/msmeam/library.msmeam
+++ b/examples/meam/msmeam/library.msmeam
@ -0,0 +1 @@
+../../../potentials/library.msmeam
--- a/examples/meam/msmeam/log.1Mar2024.msmeam.g++.1
+++ b/examples/meam/msmeam/log.1Mar2024.msmeam.g++.1
@ -0,0 +1,126 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-182-g93942f2013-modified)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+# Test of MEAM potential for HGa
+
+# ------------------------ INITIALIZATION ----------------------------
+units           metal
+dimension       3
+boundary        p       p       p
+atom_style      atomic
+variable latparam equal 4.646
+variable ncell equal 3
+
+# ----------------------- ATOM DEFINITION ----------------------------
+region          box block -4 4 -4 4 -4 4
+create_box      2 box
+Created orthogonal box = (-4 -4 -4) to (4 4 4)
+  1 by 1 by 1 MPI processor grid
+
+#
+
+include potential.mod
+# NOTE: This script can be modified for different pair styles
+# See in.elastic for more info.
+
+variable Pu string H
+print "potential chosen ${Pu}"
+potential chosen H
+# Choose potential
+pair_style meam/ms
+print		"we just executed"
+we just executed
+
+pair_coeff      * * library.msmeam ${Pu} Ga4  HGa.msmeam ${Pu} Ga4
+pair_coeff      * * library.msmeam H Ga4  HGa.msmeam ${Pu} Ga4
+pair_coeff      * * library.msmeam H Ga4  HGa.msmeam H Ga4
+Reading MEAM library file library.msmeam with DATE: 2018-09-22
+# Setup neighbor style
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Setup minimization style
+variable dmax equal 1.0e-2
+min_style	     cg
+min_modify	     dmax ${dmax} line quadratic
+min_modify	     dmax 0.01 line quadratic
+compute eng all pe/atom
+compute eatoms all reduce sum c_eng
+
+# Setup output
+thermo		100
+thermo_style custom step temp etotal  press pxx pyy pzz pxy pxz pyz lx ly lz vol c_eatoms
+thermo_modify norm yes
+create_atoms    1 single 0 0 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+create_atoms    2 single 2.2 0 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+create_atoms    2 single 0.3 2.3 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+# ---------- Define Settings ---------------------
+variable        teng equal "c_eatoms"
+compute pot_energy all pe/atom
+compute stress all stress/atom NULL
+# dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
+run     1
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 6.9
+  ghost atom cutoff = 6.9
+  binsize = 3.45, bins = 3 3 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair meam/ms, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+  (2) pair meam/ms, perpetual, half/full from (1)
+      attributes: half, newton on
+      pair build: halffull/newton
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 8.587 | 8.587 | 8.587 Mbytes
+   Step          Temp          TotEng         Press           Pxx            Pyy            Pzz            Pxy            Pxz            Pyz             Lx             Ly             Lz           Volume        c_eatoms   
+         0   0              15.433079      491354.7       838670.96      635393.15      0              80195.797      0              0              8              8              8              512            15.433079    
+         1   0              15.433079      491354.7       838670.96      635393.15      0              80195.797      0              0              8              8              8              512            15.433079    
+Loop time of 4.4446e-05 on 1 procs for 1 steps with 3 atoms
+
+Performance: 1943.932 ns/day, 0.012 hours/ns, 22499.213 timesteps/s, 67.498 katom-step/s
+31.5% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.9908e-05 | 2.9908e-05 | 2.9908e-05 |   0.0 | 67.29
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 1.033e-06  | 1.033e-06  | 1.033e-06  |   0.0 |  2.32
+Output  | 9.347e-06  | 9.347e-06  | 9.347e-06  |   0.0 | 21.03
+Modify  | 2.02e-07   | 2.02e-07   | 2.02e-07   |   0.0 |  0.45
+Other   |            | 3.956e-06  |            |       |  8.90
+
+Nlocal:              3 ave           3 max           3 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:             78 ave          78 max          78 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:              7 ave           7 max           7 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:           14 ave          14 max          14 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 14
+Ave neighs/atom = 4.6666667
+Neighbor list builds = 0
+Dangerous builds = 0
+#write_data      data.msmeam
+
+print "All done!"
+All done!
+Total wall time: 0:00:00
--- a/examples/meam/msmeam/log.1Mar2024.msmeam.g++.4
+++ b/examples/meam/msmeam/log.1Mar2024.msmeam.g++.4
@ -0,0 +1,126 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-182-g93942f2013-modified)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+# Test of MEAM potential for HGa
+
+# ------------------------ INITIALIZATION ----------------------------
+units           metal
+dimension       3
+boundary        p       p       p
+atom_style      atomic
+variable latparam equal 4.646
+variable ncell equal 3
+
+# ----------------------- ATOM DEFINITION ----------------------------
+region          box block -4 4 -4 4 -4 4
+create_box      2 box
+Created orthogonal box = (-4 -4 -4) to (4 4 4)
+  1 by 2 by 2 MPI processor grid
+
+#
+
+include potential.mod
+# NOTE: This script can be modified for different pair styles
+# See in.elastic for more info.
+
+variable Pu string H
+print "potential chosen ${Pu}"
+potential chosen H
+# Choose potential
+pair_style meam/ms
+print		"we just executed"
+we just executed
+
+pair_coeff      * * library.msmeam ${Pu} Ga4  HGa.msmeam ${Pu} Ga4
+pair_coeff      * * library.msmeam H Ga4  HGa.msmeam ${Pu} Ga4
+pair_coeff      * * library.msmeam H Ga4  HGa.msmeam H Ga4
+Reading MEAM library file library.msmeam with DATE: 2018-09-22
+# Setup neighbor style
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Setup minimization style
+variable dmax equal 1.0e-2
+min_style	     cg
+min_modify	     dmax ${dmax} line quadratic
+min_modify	     dmax 0.01 line quadratic
+compute eng all pe/atom
+compute eatoms all reduce sum c_eng
+
+# Setup output
+thermo		100
+thermo_style custom step temp etotal  press pxx pyy pzz pxy pxz pyz lx ly lz vol c_eatoms
+thermo_modify norm yes
+create_atoms    1 single 0 0 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+create_atoms    2 single 2.2 0 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+create_atoms    2 single 0.3 2.3 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+# ---------- Define Settings ---------------------
+variable        teng equal "c_eatoms"
+compute pot_energy all pe/atom
+compute stress all stress/atom NULL
+# dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
+run     1
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 6.9
+  ghost atom cutoff = 6.9
+  binsize = 3.45, bins = 3 3 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair meam/ms, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+  (2) pair meam/ms, perpetual, half/full from (1)
+      attributes: half, newton on
+      pair build: halffull/newton
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 7.965 | 8.123 | 8.594 Mbytes
+   Step          Temp          TotEng         Press           Pxx            Pyy            Pzz            Pxy            Pxz            Pyz             Lx             Ly             Lz           Volume        c_eatoms   
+         0   0              15.433079      491354.7       838670.96      635393.15      0              80195.797      0              0              8              8              8              512            15.433079    
+         1   0              15.433079      491354.7       838670.96      635393.15      0              80195.797      0              0              8              8              8              512            15.433079    
+Loop time of 8.70645e-05 on 4 procs for 1 steps with 3 atoms
+
+Performance: 992.368 ns/day, 0.024 hours/ns, 11485.738 timesteps/s, 34.457 katom-step/s
+29.0% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 4.3957e-05 | 4.67e-05   | 5.1056e-05 |   0.0 | 53.64
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 1.105e-05  | 1.3822e-05 | 1.7033e-05 |   0.0 | 15.88
+Output  | 1.5765e-05 | 1.9045e-05 | 2.5216e-05 |   0.0 | 21.87
+Modify  | 2.58e-07   | 3.465e-07  | 3.81e-07   |   0.0 |  0.40
+Other   |            | 7.151e-06  |            |       |  8.21
+
+Nlocal:           0.75 ave           3 max           0 min
+Histogram: 3 0 0 0 0 0 0 0 0 1
+Nghost:          38.25 ave          42 max          36 min
+Histogram: 2 0 0 0 0 1 0 0 0 1
+Neighs:           1.75 ave           7 max           0 min
+Histogram: 3 0 0 0 0 0 0 0 0 1
+FullNghs:          3.5 ave          14 max           0 min
+Histogram: 3 0 0 0 0 0 0 0 0 1
+
+Total # of neighbors = 14
+Ave neighs/atom = 4.6666667
+Neighbor list builds = 0
+Dangerous builds = 0
+#write_data      data.msmeam
+
+print "All done!"
+All done!
+Total wall time: 0:00:00
--- a/examples/meam/msmeam/log.msmeam.bu
+++ b/examples/meam/msmeam/log.msmeam.bu
@ -1,107 +0,0 @@
-# Test of MEAM potential for HGa
-
-# ------------------------ INITIALIZATION ----------------------------
-units           metal
-dimension       3
-boundary        p       p       p
-atom_style      atomic
-variable latparam equal 4.646
-variable ncell equal 3
-
-# ----------------------- ATOM DEFINITION ----------------------------
-region          box block -4 4 -4 4 -4 4
-create_box      2 box
-Created orthogonal box = (-4 -4 -4) to (4 4 4)
-  1 by 1 by 1 MPI processor grid
-
-#
-
-include potential.mod
-# NOTE: This script can be modified for different pair styles
-# See in.elastic for more info.
-
-variable Pu string H
-print "potential chosen ${Pu}"
-potential chosen H
-# Choose potential
-pair_style      MSmeam
-print		"we just executed"
-we just executed
-
-pair_coeff      * * library.MSmeam ${Pu} Ga4  HGaMS.meam ${Pu} Ga4
-pair_coeff      * * library.MSmeam H Ga4  HGaMS.meam ${Pu} Ga4
-pair_coeff      * * library.MSmeam H Ga4  HGaMS.meam H Ga4
-Reading potential file library.MSmeam with DATE: 2018-09-22
-# Setup neighbor style
-neighbor 1.0 nsq
-neigh_modify once no every 1 delay 0 check yes
-
-# Setup minimization style
-variable dmax equal 1.0e-2
-min_style	     cg
-min_modify	     dmax ${dmax} line quadratic
-min_modify	     dmax 0.01 line quadratic
-compute eng all pe/atom
-compute eatoms all reduce sum c_eng
-
-# Setup output
-thermo		100
-thermo_style custom step temp etotal  press pxx pyy pzz pxy pxz pyz lx ly lz vol c_eatoms
-thermo_modify norm yes
-create_atoms    1 single 0 0 0  units box
-Created 1 atoms
-create_atoms    2 single 2.2 0 0  units box
-Created 1 atoms
-create_atoms    2 single 0.3 2.3 0  units box
-Created 1 atoms
-# ---------- Define Settings ---------------------
-variable	teng equal "c_eatoms"
-compute pot_energy all pe/atom
-compute stress all stress/atom NULL
-dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
-run	1
-WARNING: No fixes defined, atoms won't move (../verlet.cpp:55)
-Neighbor list info ...
-  2 neighbor list requests
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 6.9
-  ghost atom cutoff = 6.9
-Memory usage per processor = 12.9295 Mbytes
-Step Temp TotEng Press Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume eatoms 
-       0            0    15.433079    491354.68    838670.91    635393.13            0    80195.793            0            0            8            8            8          512    15.433079 
-       1            0    15.433079    491354.68    838670.91    635393.13            0    80195.793            0            0            8            8            8          512    15.433079 
-Loop time of 0.000172138 on 1 procs for 1 steps with 3 atoms
-
-Performance: 501.922 ns/day, 0.048 hours/ns, 5809.285 timesteps/s
-81.3% CPU use with 1 MPI tasks x no OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
---------------------------------------------------------------
-Pair    | 6.6996e-05 | 6.6996e-05 | 6.6996e-05 |   0.0 | 38.92
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 1.9073e-06 | 1.9073e-06 | 1.9073e-06 |   0.0 |  1.11
-Output  | 9.7036e-05 | 9.7036e-05 | 9.7036e-05 |   0.0 | 56.37
-Modify  | 0          | 0          | 0          |   0.0 |  0.00
-Other   |            | 6.199e-06  |            |       |  3.60
-
-Nlocal:    3 ave 3 max 3 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    78 ave 78 max 78 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    7 ave 7 max 7 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-FullNghs:  14 ave 14 max 14 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 14
-Ave neighs/atom = 4.66667
-Neighbor list builds = 0
-Dangerous builds = 0
-write_data	data.msmeam
-
-print "All done!"
-All done!
-Total wall time: 0:00:00
-
--- a/examples/meam/msmeam/msmeam.dump.bu
+++ b/examples/meam/msmeam/msmeam.dump.bu
@ -1,24 +0,0 @@
-ITEM: TIMESTEP
-0
-ITEM: NUMBER OF ATOMS
-3
-ITEM: BOX BOUNDS pp pp pp
-4 4
-4 4
-4 4
-ITEM: ATOMS id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6] 
-1 0 0 0 -131.925 -88.3005 0 22.9153 -2.147e+08 -1.62661e+08 -0 -2.05301e+07 -0 -0 
-2 2.2 0 0 120.809 -0.482171 0 14.7692 -2.12028e+08 -0 -0 403352 -0 -0 
-3 0.3 2.3 0 11.1159 88.7827 0 8.61478 -2.67145e+06 -1.62661e+08 -0 -2.09335e+07 -0 -0 
-ITEM: TIMESTEP
-1
-ITEM: NUMBER OF ATOMS
-3
-ITEM: BOX BOUNDS pp pp pp
-4 4
-4 4
-4 4
-ITEM: ATOMS id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6] 
-1 0 0 0 -131.925 -88.3005 0 22.9153 -2.147e+08 -1.62661e+08 -0 -2.05301e+07 -0 -0 
-2 2.2 0 0 120.809 -0.482171 0 14.7692 -2.12028e+08 -0 -0 403352 -0 -0 
-3 0.3 2.3 0 11.1159 88.7827 0 8.61478 -2.67145e+06 -1.62661e+08 -0 -2.09335e+07 -0 -0 
--- a/examples/meam/msmeam/potential.mod
+++ b/examples/meam/msmeam/potential.mod
@ -7,7 +7,7 @@ print "potential chosen ${Pu}"
 pair_style meam/ms
 print		"we just executed"

-pair_coeff      * * library.msmeam ${Pu} Ga4  HGa.meam ${Pu} Ga4
+pair_coeff      * * library.msmeam ${Pu} Ga4  HGa.msmeam ${Pu} Ga4
 # Setup neighbor style
 neighbor 1.0 bin
 neigh_modify once no every 1 delay 0 check yes
--- a/examples/plugins/LAMMPSInterfaceCXX.cmake
+++ b/examples/plugins/LAMMPSInterfaceCXX.cmake
@ -23,12 +23,14 @@ function(validate_option name values)
 endfunction(validate_option)

 #################################################################################
-# LAMMPS C++ interface. We only need the header related parts.
+# LAMMPS C++ interface. We only need the header related parts for shared linkage
+# but the library .a file for real static or quasi-static linkage (of LAMMPS).
 add_library(lammps INTERFACE)
 target_include_directories(lammps INTERFACE ${LAMMPS_HEADER_DIR})
 if((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND CMAKE_CROSSCOMPILING)
  target_link_libraries(lammps INTERFACE ${CMAKE_BINARY_DIR}/../liblammps.dll.a)
 endif()
+
 ################################################################################
 # MPI configuration
 if(NOT CMAKE_CROSSCOMPILING)
@ -82,13 +84,9 @@ if(BUILD_MPI)
      # Download and configure custom MPICH files for Windows
      message(STATUS "Downloading and configuring MPICH-1.4.1 for Windows")
      set(MPICH2_WIN64_DEVEL_URL "${LAMMPS_THIRDPARTY_URL}/mpich2-win64-devel.tar.gz" CACHE STRING "URL for MPICH2 (win64) tarball")
-      set(MPICH2_WIN32_DEVEL_URL "${LAMMPS_THIRDPARTY_URL}/mpich2-win32-devel.tar.gz" CACHE STRING "URL for MPICH2 (win32) tarball")
      set(MPICH2_WIN64_DEVEL_MD5 "4939fdb59d13182fd5dd65211e469f14" CACHE STRING "MD5 checksum of MPICH2 (win64) tarball")
-      set(MPICH2_WIN32_DEVEL_MD5 "a61d153500dce44e21b755ee7257e031" CACHE STRING "MD5 checksum of MPICH2 (win32) tarball")
      mark_as_advanced(MPICH2_WIN64_DEVEL_URL)
-      mark_as_advanced(MPICH2_WIN32_DEVEL_URL)
      mark_as_advanced(MPICH2_WIN64_DEVEL_MD5)
-      mark_as_advanced(MPICH2_WIN32_DEVEL_MD5)

      include(ExternalProject)
      if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
@ -131,6 +129,8 @@ else()
  target_include_directories(lammps INTERFACE "${LAMMPS_SOURCE_DIR}/STUBS")
 endif()

+################
+# integer size selection
 set(LAMMPS_SIZES "smallbig" CACHE STRING "LAMMPS integer sizes (smallsmall: all 32-bit, smallbig: 64-bit #atoms #timesteps, bigbig: also 64-bit imageint, 64-bit atom ids)")
 set(LAMMPS_SIZES_VALUES smallbig bigbig smallsmall)
 set_property(CACHE LAMMPS_SIZES PROPERTY STRINGS ${LAMMPS_SIZES_VALUES})
--- a/examples/threebody/MoS.rebomos
+++ b/examples/threebody/MoS.rebomos
@ -0,0 +1 @@
+../../potentials/MoS.rebomos
--- a/examples/threebody/in.mos2-bulk
+++ b/examples/threebody/in.mos2-bulk
@ -0,0 +1,35 @@
+units metal
+
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000 &
+                   a2 -1.5964590311 2.7651481541  0.0000000000 &
+                   a3  0.0000000000 0.0000000000 13.9827680588 &
+                basis  0.0000000000 0.000000000   $(3.0/4.0)   &
+                basis  0.0000000000 0.000000000   $(1.0/4.0)   &
+                basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989  &
+                basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996  &
+                basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989  &
+                basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011  &
+                origin 0.1 0.1 0.1
+
+region box prism 0 4 0 8 0 1 -2.0 0.0 0.0
+create_box 2 box
+create_atoms 2 box &
+   basis 1 1 &
+   basis 2 1 &
+   basis 3 2 &
+   basis 4 2 &
+   basis 5 2 &
+   basis 6 2
+
+mass                    1 95.95  #Mo
+mass                    2 32.065 #S
+
+pair_style rebomos
+pair_coeff * * MoS.rebomos Mo S
+
+thermo_style custom step temp press pe ke cellgamma vol
+thermo 10
+#dump 1 all atom 10 MoS.lammpstrj
+fix 1 all nve
+run 20
+
--- a/examples/threebody/in.mos2.rebomos
+++ b/examples/threebody/in.mos2.rebomos
@ -0,0 +1,31 @@
+# monolayer MoS2
+units           metal
+boundary        p p f
+processors      * * 1
+atom_modify map array
+
+atom_style      atomic
+read_data       single_layer_MoS2.data
+
+mass            * 32.065        # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
+mass            1 95.94         # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
+
+########################## Define potentials ################################
+pair_style      rebomos
+pair_coeff      * * MoS.rebomos Mo S S
+#########################################################################
+
+### Simulation settings ####
+timestep        0.001
+velocity        all create 300.0 12345 loop geom
+
+############################
+
+# Output
+thermo          500
+thermo_style    custom step etotal pe ke temp
+thermo_modify   lost warn
+
+###### Run molecular dynamics ######
+fix             thermostat all nve
+run             5000
--- a/examples/threebody/log.22Feb24.mos2-bulk.g++.1
+++ b/examples/threebody/log.22Feb24.mos2-bulk.g++.1
@ -0,0 +1,85 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-73-g36fa601fe0)
+  using 1 OpenMP thread(s) per MPI task
+units metal
+
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   $(3.0/4.0)                   basis  0.0000000000 0.000000000   $(1.0/4.0)                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   $(1.0/4.0)                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  0.66666666666666662966   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  0.66666666666666662966   0.33333333333333331483    0.637991011                  origin 0.1 0.1 0.1
+Lattice spacing in x,y,z = 4.7867748 2.7651482 13.982768
+
+region box prism 0 4 0 8 0 1 -2.0 0.0 0.0
+create_box 2 box
+Created triclinic box = (0 0 0) to (19.147099 22.121185 13.982768) with tilt (-9.5735495 0 0)
+  1 by 1 by 1 MPI processor grid
+create_atoms 2 box    basis 1 1    basis 2 1    basis 3 2    basis 4 2    basis 5 2    basis 6 2
+Created 288 atoms
+  using lattice units in triclinic box = (0 0 0) to (19.147099 22.121185 13.982768) with tilt (-9.5735495 0 0)
+  create_atoms CPU = 0.000 seconds
+
+mass                    1 95.95  #Mo
+mass                    2 32.065 #S
+
+pair_style rebomos
+pair_coeff * * MoS.rebomos Mo S
+Reading rebomos potential file MoS.rebomos with DATE: 2013-11-04
+
+thermo_style custom step temp press pe ke cellgamma vol
+thermo 10
+#dump 1 all atom 10 MoS.lammpstrj
+fix 1 all nve
+run 20
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 13.4
+  ghost atom cutoff = 13.4
+  binsize = 6.7, bins = 5 4 3
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair rebomos, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.996 | 4.996 | 4.996 Mbytes
+   Step          Temp          Press          PotEng         KinEng       CellGamma        Volume    
+         0   0              28799.53      -2061.6112      0              113.40187      5922.4926    
+        10   80.776057      13540.088     -2064.6132      2.9966028      113.40187      5922.4926    
+        20   146.17503     -20669.371     -2067.0428      5.4227518      113.40187      5922.4926    
+Loop time of 0.058071 on 1 procs for 20 steps with 288 atoms
+
+Performance: 29.757 ns/day, 0.807 hours/ns, 344.406 timesteps/s, 99.189 katom-step/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.057666   | 0.057666   | 0.057666   |   0.0 | 99.30
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.00024654 | 0.00024654 | 0.00024654 |   0.0 |  0.42
+Output  | 2.3975e-05 | 2.3975e-05 | 2.3975e-05 |   0.0 |  0.04
+Modify  | 3.8394e-05 | 3.8394e-05 | 3.8394e-05 |   0.0 |  0.07
+Other   |            | 9.596e-05  |            |       |  0.17
+
+Nlocal:            288 ave         288 max         288 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           4285 ave        4285 max        4285 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:       142848 ave      142848 max      142848 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 142848
+Ave neighs/atom = 496
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
--- a/examples/threebody/log.22Feb24.mos2-bulk.g++.4
+++ b/examples/threebody/log.22Feb24.mos2-bulk.g++.4
@ -0,0 +1,85 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-73-g36fa601fe0)
+  using 1 OpenMP thread(s) per MPI task
+units metal
+
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   $(3.0/4.0)                   basis  0.0000000000 0.000000000   $(1.0/4.0)                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   $(1.0/4.0)                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  0.66666666666666662966   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  0.66666666666666662966   0.33333333333333331483    0.637991011                  origin 0.1 0.1 0.1
+Lattice spacing in x,y,z = 4.7867748 2.7651482 13.982768
+
+region box prism 0 4 0 8 0 1 -2.0 0.0 0.0
+create_box 2 box
+Created triclinic box = (0 0 0) to (19.147099 22.121185 13.982768) with tilt (-9.5735495 0 0)
+  2 by 2 by 1 MPI processor grid
+create_atoms 2 box    basis 1 1    basis 2 1    basis 3 2    basis 4 2    basis 5 2    basis 6 2
+Created 288 atoms
+  using lattice units in triclinic box = (0 0 0) to (19.147099 22.121185 13.982768) with tilt (-9.5735495 0 0)
+  create_atoms CPU = 0.000 seconds
+
+mass                    1 95.95  #Mo
+mass                    2 32.065 #S
+
+pair_style rebomos
+pair_coeff * * MoS.rebomos Mo S
+Reading rebomos potential file MoS.rebomos with DATE: 2013-11-04
+
+thermo_style custom step temp press pe ke cellgamma vol
+thermo 10
+#dump 1 all atom 10 MoS.lammpstrj
+fix 1 all nve
+run 20
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 13.4
+  ghost atom cutoff = 13.4
+  binsize = 6.7, bins = 5 4 3
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair rebomos, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.15 | 4.151 | 4.151 Mbytes
+   Step          Temp          Press          PotEng         KinEng       CellGamma        Volume    
+         0   0              28799.53      -2061.6112      0              113.40187      5922.4926    
+        10   80.776057      13540.088     -2064.6132      2.9966028      113.40187      5922.4926    
+        20   146.17503     -20669.371     -2067.0428      5.4227518      113.40187      5922.4926    
+Loop time of 0.0219485 on 4 procs for 20 steps with 288 atoms
+
+Performance: 78.730 ns/day, 0.305 hours/ns, 911.225 timesteps/s, 262.433 katom-step/s
+96.3% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.018118   | 0.019372   | 0.020087   |   0.5 | 88.26
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0015635  | 0.0023195  | 0.0035967  |   1.6 | 10.57
+Output  | 2.5017e-05 | 4.6834e-05 | 0.00010543 |   0.0 |  0.21
+Modify  | 1.3954e-05 | 1.423e-05  | 1.4594e-05 |   0.0 |  0.06
+Other   |            | 0.0001957  |            |       |  0.89
+
+Nlocal:             72 ave          72 max          72 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:         2771.5 ave        2775 max        2768 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Neighs:              0 ave           0 max           0 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:        35712 ave       35712 max       35712 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 142848
+Ave neighs/atom = 496
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
--- a/examples/threebody/log.22Feb24.mos2.rebomos.g++.1
+++ b/examples/threebody/log.22Feb24.mos2.rebomos.g++.1
@ -0,0 +1,95 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-73-g36fa601fe0)
+  using 1 OpenMP thread(s) per MPI task
+# monolayer MoS2
+units           metal
+boundary        p p f
+processors      * * 1
+atom_modify map array
+
+atom_style      atomic
+read_data       single_layer_MoS2.data
+Reading data file ...
+  triclinic box = (0 0 -100) to (51.15232 44.299209 100) with tilt (25.57616 0 0)
+WARNING: Triclinic box skew is large. LAMMPS will run inefficiently. (src/domain.cpp:219)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  768 atoms
+  read_data CPU = 0.002 seconds
+
+mass            * 32.065        # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
+mass            1 95.94         # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
+
+########################## Define potentials ################################
+pair_style      rebomos
+pair_coeff      * * MoS.rebomos Mo S S
+Reading rebomos potential file MoS.rebomos with DATE: 2013-11-04
+#########################################################################
+
+### Simulation settings ####
+timestep        0.001
+velocity        all create 300.0 12345 loop geom
+
+############################
+
+# Output
+thermo          500
+thermo_style    custom step etotal pe ke temp
+thermo_modify   lost warn
+
+###### Run molecular dynamics ######
+fix             thermostat all nve
+run             5000
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 13.4
+  ghost atom cutoff = 13.4
+  binsize = 6.7, bins = 12 7 30
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair rebomos, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.473 | 4.473 | 4.473 Mbytes
+   Step         TotEng         PotEng         KinEng          Temp     
+         0  -5466.9785     -5496.7212      29.742759      300          
+       500  -5466.964      -5482.6985      15.734505      158.7059     
+      1000  -5466.9615     -5480.9492      13.98763       141.08607    
+      1500  -5466.964      -5482.6912      15.727258      158.63281    
+      2000  -5466.9657     -5483.3606      16.394878      165.36675    
+      2500  -5466.9624     -5481.6253      14.662948      147.89765    
+      3000  -5466.9642     -5482.7515      15.7873        159.23842    
+      3500  -5466.9654     -5483.3789      16.413502      165.5546     
+      4000  -5466.9628     -5481.848       14.885236      150.13977    
+      4500  -5466.9648     -5483.5045      16.539775      166.82825    
+      5000  -5466.9649     -5483.4932      16.528298      166.71249    
+Loop time of 19.1009 on 1 procs for 5000 steps with 768 atoms
+
+Performance: 22.617 ns/day, 1.061 hours/ns, 261.768 timesteps/s, 201.038 katom-step/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 19.042     | 19.042     | 19.042     |   0.0 | 99.69
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.018451   | 0.018451   | 0.018451   |   0.0 |  0.10
+Output  | 0.00015575 | 0.00015575 | 0.00015575 |   0.0 |  0.00
+Modify  | 0.023931   | 0.023931   | 0.023931   |   0.0 |  0.13
+Other   |            | 0.01658    |            |       |  0.09
+
+Nlocal:            768 ave         768 max         768 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           1158 ave        1158 max        1158 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:       141824 ave      141824 max      141824 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 141824
+Ave neighs/atom = 184.66667
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:19
--- a/examples/threebody/log.22Feb24.mos2.rebomos.g++.4
+++ b/examples/threebody/log.22Feb24.mos2.rebomos.g++.4
@ -0,0 +1,95 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-73-g36fa601fe0)
+  using 1 OpenMP thread(s) per MPI task
+# monolayer MoS2
+units           metal
+boundary        p p f
+processors      * * 1
+atom_modify map array
+
+atom_style      atomic
+read_data       single_layer_MoS2.data
+Reading data file ...
+  triclinic box = (0 0 -100) to (51.15232 44.299209 100) with tilt (25.57616 0 0)
+WARNING: Triclinic box skew is large. LAMMPS will run inefficiently. (src/domain.cpp:219)
+  2 by 2 by 1 MPI processor grid
+  reading atoms ...
+  768 atoms
+  read_data CPU = 0.002 seconds
+
+mass            * 32.065        # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
+mass            1 95.94         # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
+
+########################## Define potentials ################################
+pair_style      rebomos
+pair_coeff      * * MoS.rebomos Mo S S
+Reading rebomos potential file MoS.rebomos with DATE: 2013-11-04
+#########################################################################
+
+### Simulation settings ####
+timestep        0.001
+velocity        all create 300.0 12345 loop geom
+
+############################
+
+# Output
+thermo          500
+thermo_style    custom step etotal pe ke temp
+thermo_modify   lost warn
+
+###### Run molecular dynamics ######
+fix             thermostat all nve
+run             5000
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 13.4
+  ghost atom cutoff = 13.4
+  binsize = 6.7, bins = 12 7 30
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair rebomos, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.045 | 4.045 | 4.045 Mbytes
+   Step         TotEng         PotEng         KinEng          Temp     
+         0  -5466.9785     -5496.7212      29.742759      300          
+       500  -5466.964      -5482.6985      15.734505      158.7059     
+      1000  -5466.9615     -5480.9492      13.98763       141.08607    
+      1500  -5466.964      -5482.6912      15.727258      158.63281    
+      2000  -5466.9657     -5483.3606      16.394878      165.36675    
+      2500  -5466.9624     -5481.6253      14.662948      147.89765    
+      3000  -5466.9642     -5482.7515      15.7873        159.23842    
+      3500  -5466.9654     -5483.3789      16.413502      165.5546     
+      4000  -5466.9628     -5481.848       14.885236      150.13977    
+      4500  -5466.9648     -5483.5045      16.539775      166.82825    
+      5000  -5466.9649     -5483.4932      16.528298      166.71249    
+Loop time of 5.69326 on 4 procs for 5000 steps with 768 atoms
+
+Performance: 75.879 ns/day, 0.316 hours/ns, 878.231 timesteps/s, 674.482 katom-step/s
+98.6% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.2611     | 5.3666     | 5.4358     |   3.0 | 94.26
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.23476    | 0.30106    | 0.40642    |  12.8 |  5.29
+Output  | 0.00014996 | 0.0004478  | 0.0013353  |   0.0 |  0.01
+Modify  | 0.0068861  | 0.0069917  | 0.0072247  |   0.2 |  0.12
+Other   |            | 0.01814    |            |       |  0.32
+
+Nlocal:            192 ave         194 max         190 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Nghost:            710 ave         712 max         708 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Neighs:              0 ave           0 max           0 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:        35456 ave       35824 max       35088 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+
+Total # of neighbors = 141824
+Ave neighs/atom = 184.66667
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:05
--- a/examples/wall/in.wall.flow
+++ b/examples/wall/in.wall.flow
@ -0,0 +1,79 @@
+variable nrun equal 1000
+variable dump_count equal 10
+
+variable nwall equal 4
+variable w1 equal 67
+variable w2 equal 71
+variable w3 equal 75
+variable w4 equal 79
+
+variable x_cylinder equal 20
+variable y_cylinder equal 17
+variable r_cylinder equal 4
+
+variable MASS  equal 1
+variable TEMP  equal 0.4
+variable VFLOW equal 0.5
+
+units         lj
+atom_style    atomic
+
+lattice       fcc 0.3
+region        sim_box block 0 84 0 34 0 10
+
+boundary      p p p
+
+create_box    2 sim_box
+region        reg_cylinder cylinder z ${x_cylinder} ${y_cylinder} ${r_cylinder} EDGE EDGE
+
+create_atoms  1 box
+
+## setup obstacle ##
+group  g_obst region reg_cylinder
+group  g_flow subtract all g_obst
+set    group g_obst type 2 
+
+mass          1 ${MASS}
+mass          2 ${MASS}
+
+velocity  g_flow create ${TEMP} 4928459 rot yes dist gaussian
+velocity  g_obst set    0.0 0.0 0.0
+
+pair_style  lj/cut 1.122462
+pair_coeff  1 1 1.0 1.0
+pair_coeff  1 2 1.0 1.0
+pair_coeff  2 2 1.0 1.0
+pair_modify shift yes
+
+neighbor      0.3 bin
+neigh_modify  delay 0 every 20 check no
+
+fix    1     g_flow nve
+fix    2     g_flow wall/flow  x ${VFLOW} ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+
+variable dump_every equal ${nrun}/${dump_count}
+variable thermo_every equal ${dump_every}
+variable restart_every equal ${nrun}/10
+
+##### uncomment for grid aggregation #####
+#variable gr_Nx equal 42
+#variable gr_Ny equal 17
+#variable gr_Nz equal 1
+#variable gr_Nevery equal ${dump_every}
+#variable gr_Nrepeat equal 1
+#variable gr_Nfreq equal ${dump_every}
+#fix    3     g_flow ave/grid ${gr_Nevery} ${gr_Nrepeat} ${gr_Nfreq} ${gr_Nx} ${gr_Ny} ${gr_Nz} vx vy vz density/mass norm all ave one
+#compute ct_gridId g_flow property/grid ${gr_Nx} ${gr_Ny} ${gr_Nz} id
+#dump   dmp_grid  g_flow grid ${dump_every} grid.lammpstrj c_ct_gridId:grid:data f_3:grid:data[*]
+##########################################
+
+#dump   dmp_coord all atom ${dump_every} dump.lammpstrj
+
+#compute ct_Temp   g_flow temp/com
+#thermo_style custom step temp epair emol etotal press c_ct_Temp
+
+#restart  ${restart_every} flow.restart
+
+timestep 0.005
+thermo   ${thermo_every}
+run      ${nrun}
--- a/examples/wall/log.7Feb24.wall.flow.g++.1
+++ b/examples/wall/log.7Feb24.wall.flow.g++.1
@ -0,0 +1,182 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-758-ge33590b2fc-modified)
+  using 1 OpenMP thread(s) per MPI task
+variable nrun equal 1000
+variable dump_count equal 10
+
+variable nwall equal 4
+variable w1 equal 67
+variable w2 equal 71
+variable w3 equal 75
+variable w4 equal 79
+
+variable x_cylinder equal 20
+variable y_cylinder equal 17
+variable r_cylinder equal 4
+
+variable MASS  equal 1
+variable TEMP  equal 0.4
+variable VFLOW equal 0.5
+
+units         lj
+atom_style    atomic
+
+lattice       fcc 0.3
+Lattice spacing in x,y,z = 2.3712622 2.3712622 2.3712622
+region        sim_box block 0 84 0 34 0 10
+
+boundary      p p p
+
+create_box    2 sim_box
+Created orthogonal box = (0 0 0) to (199.18603 80.622915 23.712622)
+  1 by 1 by 1 MPI processor grid
+region        reg_cylinder cylinder z ${x_cylinder} ${y_cylinder} ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 ${y_cylinder} ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 17 ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 17 4 EDGE EDGE
+
+create_atoms  1 box
+Created 114240 atoms
+  using lattice units in orthogonal box = (0 0 0) to (199.18603 80.622915 23.712622)
+  create_atoms CPU = 0.010 seconds
+
+## setup obstacle ##
+group  g_obst region reg_cylinder
+1950 atoms in group g_obst
+group  g_flow subtract all g_obst
+112290 atoms in group g_flow
+set    group g_obst type 2
+Setting atom values ...
+  1950 settings made for type
+
+mass          1 ${MASS}
+mass          1 1
+mass          2 ${MASS}
+mass          2 1
+
+velocity  g_flow create ${TEMP} 4928459 rot yes dist gaussian
+velocity  g_flow create 0.4 4928459 rot yes dist gaussian
+velocity  g_obst set    0.0 0.0 0.0
+
+pair_style  lj/cut 1.122462
+pair_coeff  1 1 1.0 1.0
+pair_coeff  1 2 1.0 1.0
+pair_coeff  2 2 1.0 1.0
+pair_modify shift yes
+
+neighbor      0.3 bin
+neigh_modify  delay 0 every 20 check no
+
+fix    1     g_flow nve
+fix    2     g_flow wall/flow  x ${VFLOW} ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 75 ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 75 79
+
+variable dump_every equal ${nrun}/${dump_count}
+variable dump_every equal 1000/${dump_count}
+variable dump_every equal 1000/10
+variable thermo_every equal ${dump_every}
+variable thermo_every equal 100
+variable restart_every equal ${nrun}/10
+variable restart_every equal 1000/10
+
+##### uncomment for grid aggregation #####
+#variable gr_Nx equal 42
+#variable gr_Ny equal 17
+#variable gr_Nz equal 1
+#variable gr_Nevery equal ${dump_every}
+#variable gr_Nrepeat equal 1
+#variable gr_Nfreq equal ${dump_every}
+#fix    3     g_flow ave/grid ${gr_Nevery} ${gr_Nrepeat} ${gr_Nfreq} ${gr_Nx} ${gr_Ny} ${gr_Nz} vx vy vz density/mass norm all ave one
+#compute ct_gridId g_flow property/grid ${gr_Nx} ${gr_Ny} ${gr_Nz} id
+#dump   dmp_grid  g_flow grid ${dump_every} grid.lammpstrj c_ct_gridId:grid:data f_3:grid:data[*]
+##########################################
+
+#dump   dmp_coord all atom ${dump_every} dump.lammpstrj
+
+#compute ct_Temp   g_flow temp/com
+#thermo_style custom step temp epair emol etotal press c_ct_Temp
+
+#restart  ${restart_every} flow.restart
+
+timestep 0.005
+thermo   ${thermo_every}
+thermo   100
+run      ${nrun}
+run      1000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix wall/flow command: doi:10.1177/10943420231213013
+
+@Article{Pavlov-etal-IJHPCA-2024,
+ author = {Daniil Pavlov and Vladislav Galigerov and Daniil Kolotinskii and Vsevolod Nikolskiy and Vladimir Stegailov},
+ title = {GPU-based molecular dynamics of fluid flows: Reaching for turbulence},
+ journal = {The International Journal of High Performance Computing Applications},
+ year =    2024,
+ volume =  38,
+ number =  1,
+ pages =   34-49
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 20 steps, delay = 0 steps, check = no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.422462
+  ghost atom cutoff = 1.422462
+  binsize = 0.711231, bins = 281 114 34
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 26.69 | 26.69 | 26.69 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+         0   0.39317221     0              0              0.58975315     0.11795063   
+       100   0.3671684      0.045118445    0              0.59586622     0.27378331   
+       200   0.3732041      0.036897471    0              0.59669873     0.24917809   
+       300   0.37432305     0.036501844    0              0.5979815      0.24715194   
+       400   0.37603886     0.035350565    0              0.59940392     0.24480762   
+       500   0.37617142     0.036949771    0              0.60120196     0.24862985   
+       600   0.37751983     0.036484268    0              0.60275905     0.24784635   
+       700   0.37787831     0.037327783    0              0.60414029     0.25060427   
+       800   0.37959242     0.036206184    0              0.60558983     0.2476903    
+       900   0.38019033     0.036874395    0              0.6071549      0.24984211   
+      1000   0.38070666     0.037068948    0              0.60812395     0.25041936   
+Loop time of 5.61598 on 1 procs for 1000 steps with 114240 atoms
+
+Performance: 76923.319 tau/day, 178.063 timesteps/s, 20.342 Matom-step/s
+99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.6351     | 2.6351     | 2.6351     |   0.0 | 46.92
+Neigh   | 1.2994     | 1.2994     | 1.2994     |   0.0 | 23.14
+Comm    | 0.26576    | 0.26576    | 0.26576    |   0.0 |  4.73
+Output  | 0.0030531  | 0.0030531  | 0.0030531  |   0.0 |  0.05
+Modify  | 1.3019     | 1.3019     | 1.3019     |   0.0 | 23.18
+Other   |            | 0.1107     |            |       |  1.97
+
+Nlocal:         114240 ave      114240 max      114240 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:          20119 ave       20119 max       20119 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:         164018 ave      164018 max      164018 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 164018
+Ave neighs/atom = 1.4357318
+Neighbor list builds = 50
+Dangerous builds not checked
+Total wall time: 0:00:05
--- a/examples/wall/log.7Feb24.wall.flow.g++.4
+++ b/examples/wall/log.7Feb24.wall.flow.g++.4
@ -0,0 +1,182 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-758-ge33590b2fc-modified)
+  using 1 OpenMP thread(s) per MPI task
+variable nrun equal 1000
+variable dump_count equal 10
+
+variable nwall equal 4
+variable w1 equal 67
+variable w2 equal 71
+variable w3 equal 75
+variable w4 equal 79
+
+variable x_cylinder equal 20
+variable y_cylinder equal 17
+variable r_cylinder equal 4
+
+variable MASS  equal 1
+variable TEMP  equal 0.4
+variable VFLOW equal 0.5
+
+units         lj
+atom_style    atomic
+
+lattice       fcc 0.3
+Lattice spacing in x,y,z = 2.3712622 2.3712622 2.3712622
+region        sim_box block 0 84 0 34 0 10
+
+boundary      p p p
+
+create_box    2 sim_box
+Created orthogonal box = (0 0 0) to (199.18603 80.622915 23.712622)
+  4 by 1 by 1 MPI processor grid
+region        reg_cylinder cylinder z ${x_cylinder} ${y_cylinder} ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 ${y_cylinder} ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 17 ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 17 4 EDGE EDGE
+
+create_atoms  1 box
+Created 114240 atoms
+  using lattice units in orthogonal box = (0 0 0) to (199.18603 80.622915 23.712622)
+  create_atoms CPU = 0.003 seconds
+
+## setup obstacle ##
+group  g_obst region reg_cylinder
+1950 atoms in group g_obst
+group  g_flow subtract all g_obst
+112290 atoms in group g_flow
+set    group g_obst type 2
+Setting atom values ...
+  1950 settings made for type
+
+mass          1 ${MASS}
+mass          1 1
+mass          2 ${MASS}
+mass          2 1
+
+velocity  g_flow create ${TEMP} 4928459 rot yes dist gaussian
+velocity  g_flow create 0.4 4928459 rot yes dist gaussian
+velocity  g_obst set    0.0 0.0 0.0
+
+pair_style  lj/cut 1.122462
+pair_coeff  1 1 1.0 1.0
+pair_coeff  1 2 1.0 1.0
+pair_coeff  2 2 1.0 1.0
+pair_modify shift yes
+
+neighbor      0.3 bin
+neigh_modify  delay 0 every 20 check no
+
+fix    1     g_flow nve
+fix    2     g_flow wall/flow  x ${VFLOW} ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 75 ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 75 79
+
+variable dump_every equal ${nrun}/${dump_count}
+variable dump_every equal 1000/${dump_count}
+variable dump_every equal 1000/10
+variable thermo_every equal ${dump_every}
+variable thermo_every equal 100
+variable restart_every equal ${nrun}/10
+variable restart_every equal 1000/10
+
+##### uncomment for grid aggregation #####
+#variable gr_Nx equal 42
+#variable gr_Ny equal 17
+#variable gr_Nz equal 1
+#variable gr_Nevery equal ${dump_every}
+#variable gr_Nrepeat equal 1
+#variable gr_Nfreq equal ${dump_every}
+#fix    3     g_flow ave/grid ${gr_Nevery} ${gr_Nrepeat} ${gr_Nfreq} ${gr_Nx} ${gr_Ny} ${gr_Nz} vx vy vz density/mass norm all ave one
+#compute ct_gridId g_flow property/grid ${gr_Nx} ${gr_Ny} ${gr_Nz} id
+#dump   dmp_grid  g_flow grid ${dump_every} grid.lammpstrj c_ct_gridId:grid:data f_3:grid:data[*]
+##########################################
+
+#dump   dmp_coord all atom ${dump_every} dump.lammpstrj
+
+#compute ct_Temp   g_flow temp/com
+#thermo_style custom step temp epair emol etotal press c_ct_Temp
+
+#restart  ${restart_every} flow.restart
+
+timestep 0.005
+thermo   ${thermo_every}
+thermo   100
+run      ${nrun}
+run      1000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix wall/flow command: doi:10.1177/10943420231213013
+
+@Article{Pavlov-etal-IJHPCA-2024,
+ author = {Daniil Pavlov and Vladislav Galigerov and Daniil Kolotinskii and Vsevolod Nikolskiy and Vladimir Stegailov},
+ title = {GPU-based molecular dynamics of fluid flows: Reaching for turbulence},
+ journal = {The International Journal of High Performance Computing Applications},
+ year =    2024,
+ volume =  38,
+ number =  1,
+ pages =   34-49
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 20 steps, delay = 0 steps, check = no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.422462
+  ghost atom cutoff = 1.422462
+  binsize = 0.711231, bins = 281 114 34
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 8.496 | 8.496 | 8.496 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+         0   0.39317221     0              0              0.58975315     0.11795063   
+       100   0.36726398     0.045386014    0              0.59627716     0.27402111   
+       200   0.37384538     0.036574547    0              0.5973377      0.24836729   
+       300   0.37487455     0.036519645    0              0.59882654     0.24691726   
+       400   0.37591417     0.036405755    0              0.60027207     0.24700641   
+       500   0.37654714     0.037008829    0              0.60182459     0.24883444   
+       600   0.3778008      0.03663706     0              0.6033333      0.24874392   
+       700   0.37851338     0.036714175    0              0.60447928     0.24881829   
+       800   0.37984876     0.036237049    0              0.6060052      0.24843003   
+       900   0.38022763     0.036847615    0              0.60718407     0.24987198   
+      1000   0.38084717     0.037139994    0              0.60840575     0.25070072   
+Loop time of 2.20347 on 4 procs for 1000 steps with 114240 atoms
+
+Performance: 196054.093 tau/day, 453.829 timesteps/s, 51.845 Matom-step/s
+95.6% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.67927    | 0.70882    | 0.73473    |   2.4 | 32.17
+Neigh   | 0.32928    | 0.34467    | 0.36084    |   2.0 | 15.64
+Comm    | 0.3211     | 0.36609    | 0.40741    |   6.1 | 16.61
+Output  | 0.0017748  | 0.0032465  | 0.0046508  |   2.1 |  0.15
+Modify  | 0.71135    | 0.74424    | 0.76001    |   2.3 | 33.78
+Other   |            | 0.03641    |            |       |  1.65
+
+Nlocal:          28560 ave       29169 max       27884 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Nghost:        6452.25 ave        6546 max        6368 min
+Histogram: 1 0 0 0 2 0 0 0 0 1
+Neighs:          40893 ave       42032 max       39445 min
+Histogram: 1 0 0 0 1 0 0 1 0 1
+
+Total # of neighbors = 163572
+Ave neighs/atom = 1.4318277
+Neighbor list builds = 50
+Dangerous builds not checked
+Total wall time: 0:00:02
--- a/lib/kokkos/CHANGELOG.md
+++ b/lib/kokkos/CHANGELOG.md
@ -1,5 +1,26 @@
 # CHANGELOG

+## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01)
+
+### Backend and Architecture Enhancements:
+
+#### CUDA:
+- Add warp sync for `parallel_reduce` to avoid race condition [\#6630](https://github.com/kokkos/kokkos/pull/6630), [\#6746](https://github.com/kokkos/kokkos/pull/6746)
+
+#### HIP:
+- Fix Graph "multiple definition of" linking error (missing `inline` specifier) [\#6624](https://github.com/kokkos/kokkos/pull/6624)
+- Add support for gfx940 (AMD Instinct MI300 GPU) [\#6671](https://github.com/kokkos/kokkos/pull/6671)
+
+### Build System
+- CMake: Don't let Kokkos set `CMAKE_CXX_FLAGS` for Trilinos builds [\#6742](https://github.com/kokkos/kokkos/pull/6742)
+
+### Bug Fixes
+- Remove deprecation warning for `AllocationMechanism` for GCC <11.0 [\#6653](https://github.com/kokkos/kokkos/pull/6653)
+- Fix bug early tools finalize with non-default host execution instances [\#6635](https://github.com/kokkos/kokkos/pull/6635)
+- Fix various issues for MSVC CUDA builds [\#6659](https://github.com/kokkos/kokkos/pull/6659)
+- Fix "extra `;`" warning with `-pedantic` flag in `<Kokkos_SIMD_Scalar.hpp>` [\#6510](https://github.com/kokkos/kokkos/pull/6510)
+
 ## [4.2.00](https://github.com/kokkos/kokkos/tree/4.2.00) (2023-11-06)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/4.1.00...4.2.00)

--- a/lib/kokkos/CMakeLists.txt
+++ b/lib/kokkos/CMakeLists.txt
@ -151,7 +151,7 @@ ENDIF()

 set(Kokkos_VERSION_MAJOR 4)
 set(Kokkos_VERSION_MINOR 2)
-set(Kokkos_VERSION_PATCH 0)
+set(Kokkos_VERSION_PATCH 1)
 set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
 message(STATUS "Kokkos version: ${Kokkos_VERSION}")
 math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
@ -252,7 +252,6 @@ ENDIF()
 # subpackages

 ## This restores the old behavior of ProjectCompilerPostConfig.cmake
-# It sets the CMAKE_CXX_FLAGS globally to those used by Kokkos
 # We must do this before KOKKOS_PACKAGE_DECL
 IF (KOKKOS_HAS_TRILINOS)
  # Overwrite the old flags at the top-level
@ -280,21 +279,13 @@ IF (KOKKOS_HAS_TRILINOS)
    SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
    LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
  ENDFOREACH()
-  SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
  IF (KOKKOS_ENABLE_CUDA)
    STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS    "${KOKKOS_CUDA_OPTIONS}")
    FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
      SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
      LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG})
    ENDFOREACH()
-    SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_CXX_FLAGS} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS}")
  ENDIF()
-  # Both parent scope and this package
-  # In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in
-  # TRILINOS_TOPLEVEL_CXX_FLAGS
-  SET(CMAKE_CXX_FLAGS "${TRILINOS_TOPLEVEL_CXX_FLAGS} ${KOKKOSCORE_CXX_FLAGS}" PARENT_SCOPE)
-  SET(CMAKE_CXX_FLAGS "${TRILINOS_TOPLEVEL_CXX_FLAGS} ${KOKKOSCORE_CXX_FLAGS}")
-  #CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here
  #These flags get set up in KOKKOS_PACKAGE_DECL, which means they
  #must be configured before KOKKOS_PACKAGE_DECL
  SET(KOKKOS_ALL_COMPILE_OPTIONS
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@ -12,7 +12,7 @@ endif

 KOKKOS_VERSION_MAJOR = 4
 KOKKOS_VERSION_MINOR = 2
-KOKKOS_VERSION_PATCH = 0
+KOKKOS_VERSION_PATCH = 1
 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)

 # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
 # NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
 # ARM:      ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
 # IBM:      BGQ,Power7,Power8,Power9
-# AMD-GPUS: GFX906,GFX908,GFX90A,GFX942,GFX1030,GFX1100
+# AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100
 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
 KOKKOS_ARCH ?= ""
@ -416,6 +416,8 @@ endif
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906))
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908))
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A))
+KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
+KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030))
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100))

@ -1113,6 +1115,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1)
  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
  KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a
 endif
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1)
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
+  KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx940
+endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1)
  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942")
  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp
@ -199,7 +199,8 @@ auto create_deep_copyable_compatible_view_with_same_extent(ViewType view) {
  // this is needed for intel to avoid
  // error #1011: missing return statement at end of non-void function
 #if defined KOKKOS_COMPILER_INTEL ||                                  \
-    (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130)
+    (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
+     !defined(KOKKOS_COMPILER_MSVC))
  __builtin_unreachable();
 #endif
 }
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp
@ -139,7 +139,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
  auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
  Kokkos::View<ValueType**, Kokkos::HostSpace> stdDestView("stdDestView",
                                                           numTeams, numCols);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
  for (std::size_t i = 0; i < sourceView.extent(0); ++i) {
    auto rowFrom = Kokkos::subview(sourceViewBeforeOp_h, i, Kokkos::ALL());
    auto rowDest = Kokkos::subview(stdDestView, i, Kokkos::ALL());
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp
@ -191,7 +191,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId,
  // -----------------------------------------------
  auto returnView_h            = create_host_space_copy(returnView);
  auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);

  for (std::size_t i = 0; i < dataView_dc_h.extent(0); ++i) {
    auto myRow = Kokkos::subview(dataView_dc_h, i, Kokkos::ALL());
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp
@ -240,7 +240,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId,
      "stdDestTrueView", numTeams, numCols);
  Kokkos::View<ValueType**, Kokkos::HostSpace> stdDestFalseView(
      "stdDestFalseView", numTeams, numCols);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);

  for (std::size_t i = 0; i < sourceView_dc_h.extent(0); ++i) {
    auto myRowSource    = Kokkos::subview(sourceView_dc_h, i, Kokkos::ALL());
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp
@ -197,7 +197,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId,
  auto distancesView_h         = create_host_space_copy(distancesView);
  auto dataViewAfterOp_h       = create_host_space_copy(dataView);
  auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);

  for (std::size_t i = 0; i < dataView_dc_h.extent(0); ++i) {
    auto myRow = Kokkos::subview(dataView_dc_h, i, Kokkos::ALL());
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp
@ -138,7 +138,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
  auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
  Kokkos::View<ValueType**, Kokkos::HostSpace> stdDestView("stdDestView",
                                                           numTeams, numCols);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
  for (std::size_t i = 0; i < destViewAfterOp_h.extent(0); ++i) {
    auto rowFrom =
        Kokkos::subview(cloneOfSourceViewBeforeOp_h, i, Kokkos::ALL());
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp
@ -127,7 +127,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
  // -----------------------------------------------
  // check against std
  // -----------------------------------------------
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
  auto dataViewAfterOp_h       = create_host_space_copy(dataView);
  auto distancesView_h         = create_host_space_copy(distancesView);
  auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp
@ -145,7 +145,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
  auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
  Kokkos::View<ValueType**, Kokkos::HostSpace> stdDestView("stdDestView",
                                                           numTeams, numCols);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
  for (std::size_t i = 0; i < sourceView.extent(0); ++i) {
    auto rowFrom =
        Kokkos::subview(cloneOfSourceViewBeforeOp_h, i, Kokkos::ALL());
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp
@ -103,7 +103,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
      stdDataView(i, j) = cloneOfDataViewBeforeOp_h(i, j);
    }
  }
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
  for (std::size_t i = 0; i < dataView.extent(0); ++i) {
    auto thisRow = Kokkos::subview(stdDataView, i, Kokkos::ALL());
    std::replace_if(KE::begin(thisRow), KE::end(thisRow), predicate, newVal);
--- a/lib/kokkos/cmake/KokkosCore_config.h.in
+++ b/lib/kokkos/cmake/KokkosCore_config.h.in
@ -114,6 +114,7 @@
 #cmakedefine KOKKOS_ARCH_AMD_GFX906
 #cmakedefine KOKKOS_ARCH_AMD_GFX908
 #cmakedefine KOKKOS_ARCH_AMD_GFX90A
+#cmakedefine KOKKOS_ARCH_AMD_GFX940
 #cmakedefine KOKKOS_ARCH_AMD_GFX942
 #cmakedefine KOKKOS_ARCH_AMD_GFX1030
 #cmakedefine KOKKOS_ARCH_AMD_GFX1100
--- a/lib/kokkos/cmake/kokkos_arch.cmake
+++ b/lib/kokkos/cmake/kokkos_arch.cmake
@ -94,9 +94,9 @@ IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_OPENACC OR K
 ENDIF()

 # AMD archs ordered in decreasing priority of autodetection
-LIST(APPEND SUPPORTED_AMD_GPUS       MI300)
-LIST(APPEND SUPPORTED_AMD_ARCHS      AMD_GFX942)
-LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx942)
+LIST(APPEND SUPPORTED_AMD_GPUS       MI300 MI300)
+LIST(APPEND SUPPORTED_AMD_ARCHS      AMD_GFX942 AMD_GFX940)
+LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx942 gfx940)
 LIST(APPEND SUPPORTED_AMD_GPUS       MI200    MI200       MI100    MI100)
 LIST(APPEND SUPPORTED_AMD_ARCHS      VEGA90A  AMD_GFX90A  VEGA908  AMD_GFX908)
 LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx90a   gfx90a      gfx908   gfx908)
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp
@ -309,6 +309,11 @@ class ParallelReduce<CombinedFunctorReducerType,

      if (CudaTraits::WarpSize < word_count.value) {
        __syncthreads();
+      } else {
+        // In the above call to final(), shared might have been updated by a
+        // single thread within a warp without synchronization. Synchronize
+        // threads within warp to avoid potential race condition.
+        __syncwarp(0xffffffff);
      }

      for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) {
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp
@ -243,6 +243,12 @@ class ParallelReduce<CombinedFunctorReducerType, Kokkos::RangePolicy<Traits...>,

      if (CudaTraits::WarpSize < word_count.value) {
        __syncthreads();
+      } else if (word_count.value > 1) {
+        // Inside cuda_single_inter_block_reduce_scan() above, shared[i] below
+        // might have been updated by a single thread within a warp without
+        // synchronization afterwards. Synchronize threads within warp to avoid
+        // potential racecondition.
+        __syncwarp(0xffffffff);
      }

      for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) {
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
@ -742,6 +742,11 @@ class ParallelReduce<CombinedFunctorReducerType,

      if (CudaTraits::WarpSize < word_count.value) {
        __syncthreads();
+      } else {
+        // In the above call to final(), shared might have been updated by a
+        // single thread within a warp without synchronization. Synchronize
+        // threads within warp to avoid potential race condition.
+        __syncwarp(0xffffffff);
      }

      for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) {
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp
@ -83,7 +83,7 @@ class GraphImpl<Kokkos::HIP> {
  hipGraphExec_t m_graph_exec = nullptr;
 };

-GraphImpl<Kokkos::HIP>::~GraphImpl() {
+inline GraphImpl<Kokkos::HIP>::~GraphImpl() {
  m_execution_space.fence("Kokkos::GraphImpl::~GraphImpl: Graph Destruction");
  KOKKOS_EXPECTS(m_graph);
  if (m_graph_exec) {
@ -92,12 +92,12 @@ GraphImpl<Kokkos::HIP>::~GraphImpl() {
  KOKKOS_IMPL_HIP_SAFE_CALL(hipGraphDestroy(m_graph));
 }

-GraphImpl<Kokkos::HIP>::GraphImpl(Kokkos::HIP instance)
+inline GraphImpl<Kokkos::HIP>::GraphImpl(Kokkos::HIP instance)
    : m_execution_space(std::move(instance)) {
  KOKKOS_IMPL_HIP_SAFE_CALL(hipGraphCreate(&m_graph, 0));
 }

-void GraphImpl<Kokkos::HIP>::add_node(
+inline void GraphImpl<Kokkos::HIP>::add_node(
    std::shared_ptr<aggregate_node_impl_t> const& arg_node_ptr) {
  // All of the predecessors are just added as normal, so all we need to
  // do here is add an empty node
@ -110,7 +110,7 @@ void GraphImpl<Kokkos::HIP>::add_node(
 // Requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl
 // Also requires that the kernel has the graph node tag in it's policy
 template <class NodeImpl>
-void GraphImpl<Kokkos::HIP>::add_node(
+inline void GraphImpl<Kokkos::HIP>::add_node(
    std::shared_ptr<NodeImpl> const& arg_node_ptr) {
  static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value);
  KOKKOS_EXPECTS(arg_node_ptr);
@ -129,8 +129,8 @@ void GraphImpl<Kokkos::HIP>::add_node(
 // already been added to this graph and NodeImpl is a specialization of
 // GraphNodeImpl that has already been added to this graph.
 template <class NodeImplPtr, class PredecessorRef>
-void GraphImpl<Kokkos::HIP>::add_predecessor(NodeImplPtr arg_node_ptr,
-                                             PredecessorRef arg_pred_ref) {
+inline void GraphImpl<Kokkos::HIP>::add_predecessor(
+    NodeImplPtr arg_node_ptr, PredecessorRef arg_pred_ref) {
  KOKKOS_EXPECTS(arg_node_ptr);
  auto pred_ptr = GraphAccess::get_node_ptr(arg_pred_ref);
  KOKKOS_EXPECTS(pred_ptr);
@ -145,7 +145,7 @@ void GraphImpl<Kokkos::HIP>::add_predecessor(NodeImplPtr arg_node_ptr,
      hipGraphAddDependencies(m_graph, &pred_node, &node, 1));
 }

-void GraphImpl<Kokkos::HIP>::submit() {
+inline void GraphImpl<Kokkos::HIP>::submit() {
  if (!m_graph_exec) {
    instantiate_graph();
  }
@ -153,12 +153,12 @@ void GraphImpl<Kokkos::HIP>::submit() {
      hipGraphLaunch(m_graph_exec, m_execution_space.hip_stream()));
 }

-Kokkos::HIP const& GraphImpl<Kokkos::HIP>::get_execution_space() const
+inline Kokkos::HIP const& GraphImpl<Kokkos::HIP>::get_execution_space() const
    noexcept {
  return m_execution_space;
 }

-auto GraphImpl<Kokkos::HIP>::create_root_node_ptr() {
+inline auto GraphImpl<Kokkos::HIP>::create_root_node_ptr() {
  KOKKOS_EXPECTS(m_graph);
  KOKKOS_EXPECTS(!m_graph_exec);
  auto rv = std::make_shared<root_node_impl_t>(get_execution_space(),
@ -172,7 +172,7 @@ auto GraphImpl<Kokkos::HIP>::create_root_node_ptr() {
 }

 template <class... PredecessorRefs>
-auto GraphImpl<Kokkos::HIP>::create_aggregate_ptr(PredecessorRefs&&...) {
+inline auto GraphImpl<Kokkos::HIP>::create_aggregate_ptr(PredecessorRefs&&...) {
  // The attachment to predecessors, which is all we really need, happens
  // in the generic layer, which calls through to add_predecessor for
  // each predecessor ref, so all we need to do here is create the (trivial)
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
@ -30,7 +30,8 @@ namespace Impl {

 struct HIPTraits {
 #if defined(KOKKOS_ARCH_AMD_GFX906) || defined(KOKKOS_ARCH_AMD_GFX908) || \
-    defined(KOKKOS_ARCH_AMD_GFX90A) || defined(KOKKOS_ARCH_AMD_GFX942)
+    defined(KOKKOS_ARCH_AMD_GFX90A) || defined(KOKKOS_ARCH_AMD_GFX940) || \
+    defined(KOKKOS_ARCH_AMD_GFX942)
  static constexpr int WarpSize       = 64;
  static constexpr int WarpIndexMask  = 0x003f; /* hexadecimal for 63 */
  static constexpr int WarpIndexShift = 6;      /* WarpSize == 1 << WarpShift*/
--- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp
@ -75,7 +75,14 @@ class HostSpace {
  /**\brief  Non-default memory space instance to choose allocation mechansim,
   * if available */

-  enum KOKKOS_DEPRECATED AllocationMechanism {
+#if defined(KOKKOS_COMPILER_GNU) && KOKKOS_COMPILER_GNU < 1100
+  // We see deprecation warnings even when not using the deprecated
+  // HostSpace constructor below when using gcc before release 11.
+  enum
+#else
+  enum KOKKOS_DEPRECATED
+#endif
+      AllocationMechanism {
        STD_MALLOC,
        POSIX_MEMALIGN,
        POSIX_MMAP,
--- a/lib/kokkos/core/src/Kokkos_Printf.hpp
+++ b/lib/kokkos/core/src/Kokkos_Printf.hpp
@ -31,7 +31,7 @@ namespace Kokkos {
 // backends. The GPU backends always return 1 and NVHPC only compiles if we
 // don't ask for the return value.
 template <typename... Args>
-KOKKOS_FORCEINLINE_FUNCTION void printf(const char* format, Args... args) {
+KOKKOS_FUNCTION void printf(const char* format, Args... args) {
 #ifdef KOKKOS_ENABLE_SYCL
  // Some compilers warn if "args" is empty and format is not a string literal
  if constexpr (sizeof...(Args) == 0)
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
@ -359,8 +359,6 @@ void OpenMPInternal::finalize() {
  }

  m_initialized = false;
-
-  Kokkos::Profiling::finalize();
 }

 void OpenMPInternal::print_configuration(std::ostream &s) const {
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp
@ -219,6 +219,8 @@ KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions,
    Exec::validate_partition_impl(prev_instance->m_pool_size, num_partitions,
                                  partition_size);

+    OpenMP::memory_space space;
+
 #pragma omp parallel num_threads(num_partitions)
    {
      Exec thread_local_instance(partition_size);
--- a/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp
+++ b/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp
@ -58,8 +58,6 @@ void SerialInternal::finalize() {
    m_thread_team_data.scratch_assign(nullptr, 0, 0, 0, 0, 0);
  }

-  Kokkos::Profiling::finalize();
-
  m_is_initialized = false;
 }

--- a/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp
+++ b/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp
@ -30,6 +30,7 @@ static_assert(false,

 #include <cstddef>
 #include <iosfwd>
+#include <iterator>
 #include <mutex>
 #include <thread>
 #include <Kokkos_Core_fwd.hpp>
--- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
@ -815,8 +815,6 @@ void ThreadsExec::finalize() {
  s_threads_process.m_pool_size      = 1;
  s_threads_process.m_pool_fan_size  = 0;
  s_threads_process.m_pool_state     = ThreadsExec::Inactive;
-
-  Kokkos::Profiling::finalize();
 }

 //----------------------------------------------------------------------------
--- a/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp
+++ b/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp
@ -31,7 +31,8 @@
 #endif

 #if defined KOKKOS_COMPILER_INTEL ||                                  \
-    (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130)
+    (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
+     !defined(KOKKOS_COMPILER_MSVC))
 #define MATHEMATICAL_FUNCTIONS_TEST_UNREACHABLE __builtin_unreachable();
 #else
 #define MATHEMATICAL_FUNCTIONS_TEST_UNREACHABLE
@ -394,10 +395,12 @@ DEFINE_UNARY_FUNCTION_EVAL(log2, 2);
 DEFINE_UNARY_FUNCTION_EVAL(log1p, 2);
 #endif

-#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 DEFINE_UNARY_FUNCTION_EVAL(sqrt, 2);
 DEFINE_UNARY_FUNCTION_EVAL(cbrt, 2);
+#endif

+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1
 DEFINE_UNARY_FUNCTION_EVAL(sin, 2);
 DEFINE_UNARY_FUNCTION_EVAL(cos, 2);
 DEFINE_UNARY_FUNCTION_EVAL(tan, 2);
@ -483,11 +486,9 @@ DEFINE_UNARY_FUNCTION_EVAL(logb, 2);
  };                                                                           \
  constexpr char math_function_name<MathBinaryFunction_##FUNC>::name[]

-#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 DEFINE_BINARY_FUNCTION_EVAL(pow, 2);
 DEFINE_BINARY_FUNCTION_EVAL(hypot, 2);
-#endif
-#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 DEFINE_BINARY_FUNCTION_EVAL(nextafter, 1);
 DEFINE_BINARY_FUNCTION_EVAL(copysign, 1);
 #endif
@ -519,7 +520,7 @@ DEFINE_BINARY_FUNCTION_EVAL(copysign, 1);
  };                                                                          \
  constexpr char math_function_name<MathTernaryFunction_##FUNC>::name[]

-#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 DEFINE_TERNARY_FUNCTION_EVAL(hypot, 2);
 DEFINE_TERNARY_FUNCTION_EVAL(fma, 2);
 #endif
@ -787,7 +788,9 @@ TEST(TEST_CATEGORY, mathematical_functions_trigonometric_functions) {

  // TODO atan2
 }
+#endif

+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 TEST(TEST_CATEGORY, mathematical_functions_power_functions) {
  TEST_MATH_FUNCTION(sqrt)({0, 1, 2, 3, 5, 7, 11});
  TEST_MATH_FUNCTION(sqrt)({0l, 1l, 2l, 3l, 5l, 7l, 11l});
@ -1568,6 +1571,7 @@ TEST(TEST_CATEGORY, mathematical_functions_ieee_remainder_function) {

 // TODO: TestFpClassify, see https://github.com/kokkos/kokkos/issues/6279

+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 template <class Space>
 struct TestIsFinite {
  TestIsFinite() { run(); }
@ -1591,6 +1595,7 @@ struct TestIsFinite {
      ++e;
      Kokkos::printf("failed isfinite(float)\n");
    }
+#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC))
    if (!isfinite(static_cast<KE::half_t>(2.f))
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
        || isfinite(quiet_NaN<KE::half_t>::value) ||
@ -1611,6 +1616,7 @@ struct TestIsFinite {
      ++e;
      Kokkos::printf("failed isfinite(KE::bhalf_t)\n");
    }
+#endif
    if (!isfinite(3.)
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
        || isfinite(quiet_NaN<double>::value) ||
@ -1670,6 +1676,7 @@ struct TestIsInf {
      ++e;
      Kokkos::printf("failed isinf(float)\n");
    }
+#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC))
    if (isinf(static_cast<KE::half_t>(2.f))
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
        || isinf(quiet_NaN<KE::half_t>::value) ||
@ -1690,6 +1697,7 @@ struct TestIsInf {
      ++e;
      Kokkos::printf("failed isinf(KE::bhalf_t)\n");
    }
+#endif
    if (isinf(3.)
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
        || isinf(quiet_NaN<double>::value) ||
@ -1748,6 +1756,7 @@ struct TestIsNaN {
      ++e;
      Kokkos::printf("failed isnan(float)\n");
    }
+#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC))
    if (isnan(static_cast<KE::half_t>(2.f))
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
        || !isnan(quiet_NaN<KE::half_t>::value) ||
@ -1777,6 +1786,7 @@ struct TestIsNaN {
      ++e;
      Kokkos::printf("failed isnan(double)\n");
    }
+#endif
 #ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS
    if (isnan(4.l) || !isnan(quiet_NaN<long double>::value) ||
        !isnan(signaling_NaN<long double>::value) ||
@ -1803,6 +1813,7 @@ struct TestIsNaN {
 TEST(TEST_CATEGORY, mathematical_functions_isnan) {
  TestIsNaN<TEST_EXECSPACE>();
 }
+#endif

 // TODO: TestSignBit, see https://github.com/kokkos/kokkos/issues/6279
 #endif
--- a/lib/kokkos/core/unit_test/TestNumericTraits.hpp
+++ b/lib/kokkos/core/unit_test/TestNumericTraits.hpp
@ -110,8 +110,8 @@ struct TestNumericTraits {

  KOKKOS_FUNCTION void operator()(Epsilon, int, int& e) const {
    using Kokkos::Experimental::epsilon;
-    auto const eps = epsilon<T>::value;
-    auto const one = T(1);
+    T const eps = epsilon<T>::value;
+    T const one = 1;
    // Avoid higher precision intermediate representation
    compare() = one + eps;
    e += (int)!(compare() != one);
--- a/lib/kokkos/generate_makefile.bash
+++ b/lib/kokkos/generate_makefile.bash
@ -160,6 +160,7 @@ display_help_text() {
      echo "                 AMD_GFX906      = AMD GPU MI50/MI60 GFX906"
      echo "                 AMD_GFX908      = AMD GPU MI100 GFX908"
      echo "                 AMD_GFX90A      = AMD GPU MI200 GFX90A"
+      echo "                 AMD_GFX940      = AMD GPU MI300 GFX940"
      echo "                 AMD_GFX942      = AMD GPU MI300 GFX942"
      echo "                 AMD_GFX1030     = AMD GPU V620/W6800 GFX1030"
      echo "                 AMD_GFX1100     = AMD GPU RX 7900 XT(X) GFX1100"
--- a/Show More
+++ b/Show More