diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 1b4cae3aaa..ed37fa80b9 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -37,7 +37,7 @@ src/MESONT/*          @iafoss
 src/ML-HDNNP/*        @singraber
 src/ML-IAP/*          @athomps
 src/ML-PACE/*         @yury-lysogorskiy
-src/ML-POD/*          @exapde @rohskopf
+src/ML-POD/*          @exapde
 src/MOFFF/*           @hheenen
 src/MOLFILE/*         @akohlmey
 src/NETCDF/*          @pastewka
@@ -65,9 +65,12 @@ src/MANYBODY/pair_nb3b_screened.*   @flodesani
 src/REPLICA/*_grem.*                @dstelter92
 src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel
 src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps
+src/EXTRA-FIX/fix_deform_pressure.* @jtclemm
 src/MISC/*_tracker.*                @jtclemm
 src/MC/fix_gcmc.*                   @athomps
 src/MC/fix_sgcmc.*                  @athomps
+src/REAXFF/compute_reaxff_atom.*    @rbberger
+src/KOKKOS/compute_reaxff_atom_kokkos.*    @rbberger
 src/REPLICA/fix_pimd_langevin.*     @Yi-FanLi
 
 # core LAMMPS classes
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index ad7f9da35e..f87c92396f 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -222,6 +222,10 @@ endif()
 add_executable(lmp ${MAIN_SOURCES})
 target_link_libraries(lmp PRIVATE lammps)
 set_target_properties(lmp PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY})
+# re-export all symbols for plugins
+if(PKG_PLUGIN AND (NOT ((CMAKE_SYSTEM_NAME STREQUAL "Windows"))))
+  set_target_properties(lmp PROPERTIES ENABLE_EXPORTS TRUE)
+endif()
 install(TARGETS lmp EXPORT LAMMPS_Targets DESTINATION ${CMAKE_INSTALL_BINDIR})
 
 option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF)
diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake
index ce55c83b08..9324ea95c4 100644
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@@ -45,8 +45,8 @@ if(DOWNLOAD_KOKKOS)
   list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
   list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
   include(ExternalProject)
-  set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
-  set(KOKKOS_MD5 "731647b61a4233f568d583702e9cd6d1" CACHE STRING "MD5 checksum of KOKKOS tarball")
+  set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
+  set(KOKKOS_MD5 "16b9b09ae947d434dfb58fc5c87c2b76" CACHE STRING "MD5 checksum of KOKKOS tarball")
   mark_as_advanced(KOKKOS_URL)
   mark_as_advanced(KOKKOS_MD5)
   GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@@ -71,7 +71,7 @@ if(DOWNLOAD_KOKKOS)
   add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
   add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
 elseif(EXTERNAL_KOKKOS)
-  find_package(Kokkos 4.2.00 REQUIRED CONFIG)
+  find_package(Kokkos 4.2.01 REQUIRED CONFIG)
   target_link_libraries(lammps PRIVATE Kokkos::kokkos)
 else()
   set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
@@ -139,8 +139,12 @@ if(PKG_KSPACE)
       message(WARNING "Using KISS FFT with the CUDA backend of Kokkos may be sub-optimal.")
       target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_KISS)
     elseif(FFT_KOKKOS STREQUAL "CUFFT")
+      find_library(CUFFT_LIBRARY cufft)
+      if (CUFFT_LIBRARY STREQUAL "CUFFT_LIBRARY-NOTFOUND")
+        message(FATAL_ERROR "Required cuFFT library not found. Check your environment or set CUFFT_LIBRARY to its location")
+      endif()
       target_compile_definitions(lammps PRIVATE -DFFT_KOKKOS_CUFFT)
-      target_link_libraries(lammps PRIVATE cufft)
+      target_link_libraries(lammps PRIVATE ${CUFFT_LIBRARY})
     endif()
   elseif(Kokkos_ENABLE_HIP)
     if(NOT ((FFT_KOKKOS STREQUAL "KISS") OR (FFT_KOKKOS STREQUAL "HIPFFT")))
diff --git a/cmake/presets/kokkos-cuda.cmake b/cmake/presets/kokkos-cuda.cmake
index 3205387044..878ce0c566 100644
--- a/cmake/presets/kokkos-cuda.cmake
+++ b/cmake/presets/kokkos-cuda.cmake
@@ -10,7 +10,7 @@ get_filename_component(NVCC_WRAPPER_CMD ${CMAKE_CURRENT_SOURCE_DIR}/../lib/kokko
 set(CMAKE_CXX_COMPILER ${NVCC_WRAPPER_CMD} CACHE FILEPATH "" FORCE)
 
 # If KSPACE is also enabled, use CUFFT for FFTs
-set(FFT_KOKKOS "CUFFT" CACHE STRING FORCE)
+set(FFT_KOKKOS "CUFFT" CACHE STRING "" FORCE)
 
 # hide deprecation warnings temporarily for stable release
 set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE)
diff --git a/cmake/presets/kokkos-hip.cmake b/cmake/presets/kokkos-hip.cmake
index ffc259a225..38bf27092f 100644
--- a/cmake/presets/kokkos-hip.cmake
+++ b/cmake/presets/kokkos-hip.cmake
@@ -13,7 +13,7 @@ set(CMAKE_CXX_COMPILER hipcc CACHE STRING "" FORCE)
 set(CMAKE_TUNE_FLAGS "-munsafe-fp-atomics" CACHE STRING "" FORCE)
 
 # If KSPACE is also enabled, use CUFFT for FFTs
-set(FFT_KOKKOS "HIPFFT" CACHE STRING FORCE)
+set(FFT_KOKKOS "HIPFFT" CACHE STRING "" FORCE)
 
 # hide deprecation warnings temporarily for stable release
 set(Kokkos_ENABLE_DEPRECATION_WARNINGS OFF CACHE BOOL "" FORCE)
diff --git a/doc/src/Bibliography.rst b/doc/src/Bibliography.rst
index 4ed8e73dfe..9778340c94 100644
--- a/doc/src/Bibliography.rst
+++ b/doc/src/Bibliography.rst
@@ -877,6 +877,9 @@ Bibliography
 **(PLUMED)**
    G.A. Tribello, M. Bonomi, D. Branduardi, C. Camilloni and G. Bussi, Comp. Phys. Comm 185, 604 (2014)
 
+**(Pavlov)**
+D Pavlov, V Galigerov, D Kolotinskii, V Nikolskiy, V Stegailov, International Journal of High Performance Computing Applications, 38, 34-49 (2024).
+
 **(Paquay)**
    Paquay and Kusters, Biophys. J., 110, 6, (2016). preprint available at `arXiv:1411.3019 <https://arxiv.org/abs/1411.3019/>`_.
 
diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst
index e89e302673..a7648218fa 100644
--- a/doc/src/Commands_fix.rst
+++ b/doc/src/Commands_fix.rst
@@ -61,6 +61,7 @@ OPT.
    * :doc:`controller <fix_controller>`
    * :doc:`damping/cundall <fix_damping_cundall>`
    * :doc:`deform (k) <fix_deform>`
+   * :doc:`deform/pressure <fix_deform_pressure>`
    * :doc:`deposit <fix_deposit>`
    * :doc:`dpd/energy (k) <fix_dpd_energy>`
    * :doc:`drag <fix_drag>`
@@ -262,6 +263,7 @@ OPT.
    * :doc:`wall/body/polyhedron <fix_wall_body_polyhedron>`
    * :doc:`wall/colloid <fix_wall>`
    * :doc:`wall/ees <fix_wall_ees>`
+   * :doc:`wall/flow (k) <fix_wall_flow>`
    * :doc:`wall/gran (k) <fix_wall_gran>`
    * :doc:`wall/gran/region <fix_wall_gran_region>`
    * :doc:`wall/harmonic <fix_wall>`
diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index 9f2bdbce79..9bbe216dec 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -256,6 +256,7 @@ OPT.
    * :doc:`rann <pair_rann>`
    * :doc:`reaxff (ko) <pair_reaxff>`
    * :doc:`rebo (io) <pair_airebo>`
+   * :doc:`rebomos (o) <pair_rebomos>`
    * :doc:`resquared (go) <pair_resquared>`
    * :doc:`saip/metal (t) <pair_saip_metal>`
    * :doc:`sdpd/taitwater/isothermal <pair_sdpd_taitwater_isothermal>`
diff --git a/doc/src/Developer_updating.rst b/doc/src/Developer_updating.rst
index cd61eaa5a1..83491ac095 100644
--- a/doc/src/Developer_updating.rst
+++ b/doc/src/Developer_updating.rst
@@ -18,6 +18,7 @@ Available topics in mostly chronological order are:
 - `Setting flags in the constructor`_
 - `Rename of pack/unpack_comm() to pack/unpack_forward_comm()`_
 - `Use ev_init() to initialize variables derived from eflag and vflag`_
+- `Use utils::count_words() functions instead of atom->count_words()`_
 - `Use utils::numeric() functions instead of force->numeric()`_
 - `Use utils::open_potential() function to open potential files`_
 - `Use symbolic Atom and AtomVec constants instead of numerical values`_
@@ -130,6 +131,41 @@ Not applying this change will not cause a compilation error, but
 can lead to inconsistent behavior and incorrect tallying of
 energy or virial.
 
+Use utils::count_words() functions instead of atom->count_words()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. versionchanged:: 2Jun2020
+
+The "count_words()" functions for parsing text have been moved from the
+Atom class to the :doc:`utils namespace <Developer_utils>`.  The
+"count_words()" function in "utils" uses the Tokenizer class internally
+to split a line into words and count them, thus it will not modify the
+argument string as the function in the Atoms class did and thus had a
+variant using a copy buffer.  Unlike the old version, the new version
+does not remove comments. For that you can use the
+:cpp:func:`utils::trim_comment() function
+<LAMMPS_NS::utils::trim_comment>` as shown in the example below.
+
+Old:
+
+.. code-block:: c++
+
+   nwords = atom->count_words(line);
+   int nwords = atom->count_words(buf);
+
+New:
+
+.. code-block:: c++
+
+   nwords = utils::count_words(line);
+   int nwords = utils::count_words(utils::trim_comment(buf));
+
+.. seealso::
+
+   :cpp:func:`utils::count_words() <LAMMPS_NS::utils::count_words>`,
+   :cpp:func:`utils::trim_comments() <LAMMPS_NS::utils::trim_comments>`
+
+
 Use utils::numeric() functions instead of force->numeric()
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -137,11 +173,12 @@ Use utils::numeric() functions instead of force->numeric()
 
 The "numeric()" conversion functions (including "inumeric()",
 "bnumeric()", and "tnumeric()") have been moved from the Force class to
-the utils namespace.  Also they take an additional argument that selects
-whether the ``Error::all()`` or ``Error::one()`` function should be
-called in case of an error.  The former should be used when *all* MPI
-processes call the conversion function and the latter *must* be used
-when they are called from only one or a subset of the MPI processes.
+the :doc:`utils namespace <Developer_utils>`.  Also they take an
+additional argument that selects whether the ``Error::all()`` or
+``Error::one()`` function should be called in case of an error.  The
+former should be used when *all* MPI processes call the conversion
+function and the latter *must* be used when they are called from only
+one or a subset of the MPI processes.
 
 Old:
 
diff --git a/doc/src/Howto_granular.rst b/doc/src/Howto_granular.rst
index c22cab66bc..b0c801be11 100644
--- a/doc/src/Howto_granular.rst
+++ b/doc/src/Howto_granular.rst
@@ -45,10 +45,15 @@ atoms, and should be used for granular system instead of the fix style
 
 To model heat conduction, one must add the temperature and heatflow
 atom variables with:
+
 * :doc:`fix property/atom <fix_property_atom>`
+
 a temperature integration fix
+
 * :doc:`fix heat/flow <fix_heat_flow>`
+
 and a heat conduction option defined in both
+
 * :doc:`pair_style granular <pair_granular>`
 * :doc:`fix wall/gran <fix_wall_gran>`
 
diff --git a/doc/src/Speed_kokkos.rst b/doc/src/Speed_kokkos.rst
index 1cae518f96..41ae4a4dfb 100644
--- a/doc/src/Speed_kokkos.rst
+++ b/doc/src/Speed_kokkos.rst
@@ -20,11 +20,28 @@ including Sikandar Mashayak (UIUC), Ray Shan (Sandia), and Dan Ibanez
 (Sandia). For more information on developing using Kokkos abstractions
 see the `Kokkos Wiki <https://github.com/kokkos/kokkos/wiki>`_.
 
-Kokkos currently provides support for 4 modes of execution (per MPI
+.. note::
+
+   The Kokkos library is under active development and tracking the
+   availability of accelerator hardware, so is the KOKKOS package in
+   LAMMPS.  This means that only a certain range of versions of the
+   Kokkos library are compatible with the KOKKOS package of a certain
+   range of LAMMPS versions.  For that reason LAMMPS comes with a
+   bundled version of the Kokkos library that has been validated on
+   multiple platforms and may contain selected back-ported bug fixes
+   from upstream Kokkos versions.  While it is possible to build LAMMPS
+   with an external version of Kokkos, it is untested and may result in
+   incorrect execution or crashes.
+
+Kokkos currently provides full support for 4 modes of execution (per MPI
 task). These are Serial (MPI-only for CPUs and Intel Phi), OpenMP
-(threading for many-core CPUs and Intel Phi), CUDA (for NVIDIA
-GPUs) and HIP (for AMD GPUs). You choose the mode at build time to
-produce an executable compatible with a specific hardware.
+(threading for many-core CPUs and Intel Phi), CUDA (for NVIDIA GPUs) and
+HIP (for AMD GPUs).  Additional modes (e.g. OpenMP target, Intel data
+center GPUs) are under development.  You choose the mode at build time
+to produce an executable compatible with a specific hardware.
+
+The following compatibility notes have been last updated for LAMMPS
+version 23 November 2023 and Kokkos version 4.2.
 
 .. admonition:: C++17 support
    :class: note
@@ -54,22 +71,22 @@ produce an executable compatible with a specific hardware.
    :class: note
 
    Kokkos with CUDA currently implicitly assumes that the MPI library is
-   GPU-aware. This is not always the case, especially when using
+   GPU-aware.  This is not always the case, especially when using
    pre-compiled MPI libraries provided by a Linux distribution. This is
    not a problem when using only a single GPU with a single MPI
-   rank. When running with multiple MPI ranks, you may see segmentation
+   rank.  When running with multiple MPI ranks, you may see segmentation
    faults without GPU-aware MPI support. These can be avoided by adding
    the flags :doc:`-pk kokkos gpu/aware off <Run_options>` to the
    LAMMPS command line or by using the command :doc:`package kokkos
    gpu/aware off <package>` in the input file.
 
-.. admonition:: AMD GPU support
+.. admonition:: Intel Data Center GPU support
    :class: note
 
-   To build with Kokkos the HIPCC compiler from the AMD ROCm software
-   version 3.5 or later is required.  Supporting this Kokkos mode in
-   LAMMPS is still work in progress.  Please contact the LAMMPS developers
-   if you run into problems.
+   Support for Kokkos with Intel Data Center GPU accelerators (formerly
+   known under the code name "Ponte Vecchio") in LAMMPS is still a work
+   in progress.  Only a subset of the functionality works correctly.
+   Please contact the LAMMPS developers if you run into problems.
 
 Building LAMMPS with the KOKKOS package
 """""""""""""""""""""""""""""""""""""""
@@ -292,6 +309,10 @@ one or more nodes, each with two GPUs:
    settings. Experimenting with its options can provide a speed-up for
    specific calculations. For example:
 
+.. code-block:: bash
+
+   mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj      # Newton on, half neighbor list, set binsize = neighbor ghost cutoff
+
 .. note::
 
    The default binsize for :doc:`atom sorting <atom_modify>` on GPUs
@@ -302,9 +323,15 @@ one or more nodes, each with two GPUs:
    frequent sorting than default (e.g. sorting every 100 time steps
    instead of 1000) may improve performance.
 
-.. code-block:: bash
+.. note::
 
-   mpirun -np 2 lmp_kokkos_cuda_openmpi -k on g 2 -sf kk -pk kokkos newton on neigh half binsize 2.8 -in in.lj      # Newton on, half neighbor list, set binsize = neighbor ghost cutoff
+   When running on GPUs with many MPI ranks (tens of thousands and
+   more), the creation of the atom map (required for molecular systems)
+   on the GPU can slow down significantly or run out of GPU memory and
+   thus slow down the whole calculation or cause a crash.  You can use
+   the "-pk kokkos atom/map no" :doc:`command-line switch <Run_options>`
+   of the :doc:`package kokkos atom/map no <package>` command to create
+   the atom map on the CPU instead.
 
 .. note::
 
@@ -416,15 +443,22 @@ Generally speaking, the following rules of thumb apply:
   performance of a KOKKOS style is a bit slower than the OPENMP
   package.
 * When running large number of atoms per GPU, KOKKOS is typically faster
-  than the GPU package when compiled for double precision. The benefit
+  than the GPU package when compiled for double precision.  The benefit
   of using single or mixed precision with the GPU package depends
   significantly on the hardware in use and the simulated system and pair
   style.
-* When running on Intel hardware, KOKKOS is not as fast as
+* When running on Intel Phi hardware, KOKKOS is not as fast as
   the INTEL package, which is optimized for x86 hardware (not just
   from Intel) and compilation with the Intel compilers.  The INTEL
   package also can increase the vector length of vector instructions
   by switching to single or mixed precision mode.
+* The KOKKOS package by default assumes that you are using exactly one
+  MPI rank per GPU. When trying to use multiple MPI ranks per GPU it is
+  mandatory to enable `CUDA Multi-Process Service (MPS)
+  <https://docs.nvidia.com/deploy/mps/index.html>`_ to get good
+  performance.  In this case it is better to not use all available
+  MPI ranks in order to avoid competing with the MPS daemon for
+  CPU resources.
 
 See the `Benchmark page <https://www.lammps.org/bench.html>`_ of the
 LAMMPS website for performance of the KOKKOS package on different
diff --git a/doc/src/compute_adf.rst b/doc/src/compute_adf.rst
index fc1ad1ae0a..a43a10207c 100644
--- a/doc/src/compute_adf.rst
+++ b/doc/src/compute_adf.rst
@@ -204,8 +204,23 @@ angles per atom satisfying the ADF criteria.
 Restrictions
 """"""""""""
 
-This compute is part of the EXTRA-COMPUTE package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+This compute is part of the EXTRA-COMPUTE package.  It is only enabled
+if LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+By default, the ADF is not computed for distances longer than the
+largest force cutoff, since the neighbor list creation will only contain
+pairs up to that distance (plus neighbor list skin).  If you use outer
+cutoffs larger than that, you must use :doc:`neighbor style 'bin' or
+'nsq' <neighbor>`.
+
+If you want an ADF for a larger outer cutoff, you can also use the
+:doc:`rerun <rerun>` command to post-process a dump file, use :doc:`pair
+style zero <pair_zero>` and set the force cutoff to be larger in the
+rerun script.  Note that in the rerun context, the force cutoff is
+arbitrary and with pair style zero you are not computing any forces, and
+since you are not running dynamics you are not changing the model that
+generated the trajectory.
 
 The ADF is not computed for neighbors outside the force cutoff,
 since processors (in parallel) don't know about atom coordinates for
diff --git a/doc/src/compute_ave_sphere_atom.rst b/doc/src/compute_ave_sphere_atom.rst
index ecb67ae7b5..4640b8534a 100644
--- a/doc/src/compute_ave_sphere_atom.rst
+++ b/doc/src/compute_ave_sphere_atom.rst
@@ -102,6 +102,8 @@ This compute is part of the EXTRA-COMPUTE package.  It is only enabled
 if LAMMPS was built with that package.  See the :doc:`Build package
 <Build_package>` page for more info.
 
+This compute requires :doc:`neighbor styles 'bin' or 'nsq' <neighbor>`.
+
 Related commands
 """"""""""""""""
 
diff --git a/doc/src/compute_composition_atom.rst b/doc/src/compute_composition_atom.rst
index e973eaa234..c3e6fb7c60 100644
--- a/doc/src/compute_composition_atom.rst
+++ b/doc/src/compute_composition_atom.rst
@@ -107,6 +107,8 @@ This compute is part of the EXTRA-COMPUTE package.  It is only enabled
 if LAMMPS was built with that package.  See the :doc:`Build package
 <Build_package>` page for more info.
 
+This compute requires :doc:`neighbor styles 'bin' or 'nsq' <neighbor>`.
+
 Related commands
 """"""""""""""""
 
diff --git a/doc/src/compute_efield_wolf_atom.rst b/doc/src/compute_efield_wolf_atom.rst
index 1a709dc9f2..93bfa55151 100644
--- a/doc/src/compute_efield_wolf_atom.rst
+++ b/doc/src/compute_efield_wolf_atom.rst
@@ -106,6 +106,8 @@ Restrictions
 This compute is part of the EXTRA-COMPUTE package. It is only enabled if
 LAMMPS was built with that package.
 
+This compute requires :doc:`neighbor styles 'bin' or 'nsq' <neighbor>`.
+
 Related commands
 """"""""""""""""
 
diff --git a/doc/src/compute_fabric.rst b/doc/src/compute_fabric.rst
index b38ffafa48..77586e617a 100644
--- a/doc/src/compute_fabric.rst
+++ b/doc/src/compute_fabric.rst
@@ -64,7 +64,7 @@ tangential force tensor. The contact tensor is calculated as
 
 .. math::
 
-   C_{ab}  =  \frac{15}{2} (\phi_{ab} - \mathrm{Tr}(\phi) \delta_{ab})
+   C_{ab}  =  \frac{15}{2} (\phi_{ab} - \frac{1}{3} \mathrm{Tr}(\phi) \delta_{ab})
 
 where :math:`a` and :math:`b` are the :math:`x`, :math:`y`, :math:`z`
 directions, :math:`\delta_{ab}` is the Kronecker delta function, and
@@ -83,7 +83,7 @@ The branch tensor is calculated as
 
 .. math::
 
-   B_{ab}  =  \frac{15}{6 \mathrm{Tr}(D)} (D_{ab} - \mathrm{Tr}(D) \delta_{ab})
+   B_{ab}  =  \frac{15}{2\, \mathrm{Tr}(D)} (D_{ab} - \frac{1}{3} \mathrm{Tr}(D) \delta_{ab})
 
 where the tensor :math:`D` is defined as
 
@@ -101,7 +101,7 @@ The normal force fabric tensor is calculated as
 
 .. math::
 
-   F^n_{ab}  =  \frac{15}{6\, \mathrm{Tr}(N)} (N_{ab} - \mathrm{Tr}(N) \delta_{ab})
+   F^n_{ab}  =  \frac{15}{2\, \mathrm{Tr}(N)} (N_{ab} - \frac{1}{3} \mathrm{Tr}(N) \delta_{ab})
 
 where the tensor :math:`N` is defined as
 
@@ -119,7 +119,7 @@ as
 
 .. math::
 
-   F^t_{ab}  =  \frac{15}{9\, \mathrm{Tr}(N)} (T_{ab} - \mathrm{Tr}(T) \delta_{ab})
+   F^t_{ab}  =  \frac{5}{\mathrm{Tr}(N)} (T_{ab} - \frac{1}{3} \mathrm{Tr}(T) \delta_{ab})
 
 where the tensor :math:`T` is defined as
 
diff --git a/doc/src/compute_property_atom.rst b/doc/src/compute_property_atom.rst
index b03d6eb74e..4484c3b697 100644
--- a/doc/src/compute_property_atom.rst
+++ b/doc/src/compute_property_atom.rst
@@ -23,8 +23,9 @@ Syntax
                              spx, spy, spz, sp, fmx, fmy, fmz,
                              nbonds,
                              radius, diameter, omegax, omegay, omegaz,
+                             temperature, heatflow,
                              angmomx, angmomy, angmomz,
-                             shapex,shapey, shapez,
+                             shapex, shapey, shapez,
                              quatw, quati, quatj, quatk, tqx, tqy, tqz,
                              end1x, end1y, end1z, end2x, end2y, end2z,
                              corner1x, corner1y, corner1z,
@@ -56,6 +57,8 @@ Syntax
            *nbonds* = number of bonds assigned to an atom
            *radius,diameter* = radius,diameter of spherical particle
            *omegax,omegay,omegaz* = angular velocity of spherical particle
+           *temperature* = internal temperature of spherical particle
+           *heatflow* = internal heat flow of spherical particle
            *angmomx,angmomy,angmomz* = angular momentum of aspherical particle
            *shapex,shapey,shapez* = 3 diameters of aspherical particle
            *quatw,quati,quatj,quatk* = quaternion components for aspherical or body particles
diff --git a/doc/src/compute_rdf.rst b/doc/src/compute_rdf.rst
index ed73800f82..85e758016e 100644
--- a/doc/src/compute_rdf.rst
+++ b/doc/src/compute_rdf.rst
@@ -176,22 +176,29 @@ also numbers :math:`\ge 0.0`.
 Restrictions
 """"""""""""
 
-The RDF is not computed for distances longer than the force cutoff,
-since processors (in parallel) do not know about atom coordinates for
-atoms further away than that distance.  If you want an RDF for larger
-distances, you can use the :doc:`rerun <rerun>` command to post-process
-a dump file and set the cutoff for the potential to be longer in the
+By default, the RDF is not computed for distances longer than the
+largest force cutoff, since the neighbor list creation will only contain
+pairs up to that distance (plus neighbor list skin).  This distance can
+be increased using the *cutoff* keyword but this keyword is only valid
+with :doc:`neighbor styles 'bin' and 'nsq' <neighbor>`.
+
+If you want an RDF for larger distances, you can also use the
+:doc:`rerun <rerun>` command to post-process a dump file, use :doc:`pair
+style zero <pair_zero>` and set the force cutoff to be longer in the
 rerun script.  Note that in the rerun context, the force cutoff is
-arbitrary, since you are not running dynamics and thus are not changing
-your model.  The definition of :math:`g(r)` used by LAMMPS is only appropriate
-for characterizing atoms that are uniformly distributed throughout the
-simulation cell. In such cases, the coordination number is still
-correct and meaningful.  As an example, if a large simulation cell
-contains only one atom of type *itypeN* and one of *jtypeN*, then :math:`g(r)`
-will register an arbitrarily large spike at whatever distance they
-happen to be at, and zero everywhere else.
-The function :math:`\text{coord}(r)` will show a step
-change from zero to one at the location of the spike in :math:`g(r)`.
+arbitrary and with pair style zero you are not computing any forces, and
+you are not running dynamics you are not changing the model that
+generated the trajectory.
+
+The definition of :math:`g(r)` used by LAMMPS is only appropriate for
+characterizing atoms that are uniformly distributed throughout the
+simulation cell. In such cases, the coordination number is still correct
+and meaningful.  As an example, if a large simulation cell contains only
+one atom of type *itypeN* and one of *jtypeN*, then :math:`g(r)` will
+register an arbitrarily large spike at whatever distance they happen to
+be at, and zero everywhere else.  The function :math:`\text{coord}(r)`
+will show a step change from zero to one at the location of the spike in
+:math:`g(r)`.
 
 .. note::
 
diff --git a/doc/src/dump.rst b/doc/src/dump.rst
index 2df4f984cf..2e61ad9921 100644
--- a/doc/src/dump.rst
+++ b/doc/src/dump.rst
@@ -104,7 +104,6 @@ Syntax
                                q, mux, muy, muz, mu,
                                radius, diameter, omegax, omegay, omegaz,
                                angmomx, angmomy, angmomz, tqx, tqy, tqz,
-                               heatflow, temperature,
                                c_ID, c_ID[I], f_ID, f_ID[I], v_name,
                                i_name, d_name, i2_name[I], d2_name[I]
 
@@ -131,8 +130,6 @@ Syntax
            omegax,omegay,omegaz = angular velocity of spherical particle
            angmomx,angmomy,angmomz = angular momentum of aspherical particle
            tqx,tqy,tqz = torque on finite-size particles
-           heatflow = rate of heat flow into particle
-           temperature = temperature of particle
            c_ID = per-atom vector calculated by a compute with ID
            c_ID[I] = Ith column of per-atom array calculated by a compute with ID, I can include wildcard (see below)
            f_ID = per-atom vector calculated by a fix with ID
diff --git a/doc/src/fix.rst b/doc/src/fix.rst
index 69a7212487..4cd21353c7 100644
--- a/doc/src/fix.rst
+++ b/doc/src/fix.rst
@@ -226,6 +226,7 @@ accelerated styles exist.
 * :doc:`controller <fix_controller>` - apply control loop feedback mechanism
 * :doc:`damping/cundall <fix_damping_cundall>` - Cundall non-viscous damping for granular simulations
 * :doc:`deform <fix_deform>` - change the simulation box size/shape
+* :doc:`deform/pressure <fix_deform_pressure>` - change the simulation box size/shape with additional loading conditions
 * :doc:`deposit <fix_deposit>` - add new atoms above a surface
 * :doc:`dpd/energy <fix_dpd_energy>` - constant energy dissipative particle dynamics
 * :doc:`drag <fix_drag>` - drag atoms towards a defined coordinate
@@ -427,6 +428,7 @@ accelerated styles exist.
 * :doc:`wall/body/polyhedron <fix_wall_body_polyhedron>` - time integration for body particles of style :doc:`rounded/polyhedron <Howto_body>`
 * :doc:`wall/colloid <fix_wall>` - Lennard-Jones wall interacting with finite-size particles
 * :doc:`wall/ees <fix_wall_ees>` - wall for ellipsoidal particles
+* :doc:`wall/flow <fix_wall_flow>` - flow boundary conditions
 * :doc:`wall/gran <fix_wall_gran>` - frictional wall(s) for granular simulations
 * :doc:`wall/gran/region <fix_wall_gran_region>` - :doc:`fix wall/region <fix_wall_region>` equivalent for use with granular particles
 * :doc:`wall/harmonic <fix_wall>` - harmonic spring wall
diff --git a/doc/src/fix_balance.rst b/doc/src/fix_balance.rst
index 0672a05470..0a0ea64c6a 100644
--- a/doc/src/fix_balance.rst
+++ b/doc/src/fix_balance.rst
@@ -14,15 +14,15 @@ Syntax
 * balance = style name of this fix command
 * Nfreq = perform dynamic load balancing every this many steps
 * thresh = imbalance threshold that must be exceeded to perform a re-balance
-* style = *shift* or *rcb*
-
+* style = *shift* or *rcb* or *report*
   .. parsed-literal::
 
-       shift args = dimstr Niter stopthresh
+       *shift* args = dimstr Niter stopthresh
          dimstr = sequence of letters containing *x* or *y* or *z*, each not more than once
          Niter = # of times to iterate within each dimension of dimstr sequence
          stopthresh = stop balancing when this imbalance threshold is reached
        *rcb* args = none
+       *report* args = none
 
 * zero or more keyword/arg pairs may be appended
 * keyword = *weight* or *out*
@@ -70,6 +70,13 @@ re-balancing is performed periodically during the simulation.  To
 perform "static" balancing, before or between runs, see the
 :doc:`balance <balance>` command.
 
+.. versionadded:: TBD
+
+The *report* balance style only computes the load imbalance but
+does not attempt any re-balancing.  This way the load imbalance
+information can be used otherwise, for instance for stopping a
+run with :doc:`fix halt <fix_halt>`.
+
 Load-balancing is typically most useful if the particles in the
 simulation box have a spatially-varying density distribution or
 where the computational cost varies significantly between different
diff --git a/doc/src/fix_charge_regulation.rst b/doc/src/fix_charge_regulation.rst
index bc2651a55b..091eeae417 100644
--- a/doc/src/fix_charge_regulation.rst
+++ b/doc/src/fix_charge_regulation.rst
@@ -253,11 +253,11 @@ built with that package.  See the :doc:`Build package <Build_package>`
 page for more info.
 
 The :doc:`atom_style <atom_style>`, used must contain the charge
-property, for example, the style could be *charge* or *full*. Only
-usable for 3D simulations. Atoms specified as free ions cannot be part
-of rigid bodies or molecules and cannot have bonding interactions. The
-scheme is limited to integer charges, any atoms with non-integer charges
-will not be considered by the fix.
+property and have per atom type masses, for example, the style could be
+*charge* or *full*. Only usable for 3D simulations.  Atoms specified as
+free ions cannot be part of rigid bodies or molecules and cannot have
+bonding interactions.  The scheme is limited to integer charges, any
+atoms with non-integer charges will not be considered by the fix.
 
 All interaction potentials used must be continuous, otherwise the MD
 integration and the particle exchange MC moves do not correspond to the
diff --git a/doc/src/fix_deform.rst b/doc/src/fix_deform.rst
index ee010f5645..9146b987c8 100644
--- a/doc/src/fix_deform.rst
+++ b/doc/src/fix_deform.rst
@@ -4,6 +4,9 @@
 fix deform command
 ==================
 
+:doc:`fix deform/pressure <fix_deform_pressure>` command
+========================================================
+
 Accelerator Variants: *deform/kk*
 
 Syntax
@@ -11,18 +14,18 @@ Syntax
 
 .. code-block:: LAMMPS
 
-   fix ID group-ID deform N parameter args ... keyword value ...
+   fix ID group-ID fix_style N parameter style args ... keyword value ...
 
 * ID, group-ID are documented in :doc:`fix <fix>` command
-* deform = style name of this fix command
+* fix_style = *deform* or *deform/pressure*
 * N = perform box deformation every this many timesteps
-* one or more parameter/arg pairs may be appended
+* one or more parameter/style/args sequences of arguments may be appended
 
   .. parsed-literal::
 
      parameter = *x* or *y* or *z* or *xy* or *xz* or *yz*
        *x*, *y*, *z* args = style value(s)
-         style = *final* or *delta* or *scale* or *vel* or *erate* or *trate* or *volume* or *wiggle* or *variable*
+         style = *final* or *delta* or *scale* or *vel* or *erate* or *trate* or *volume* or *wiggle* or *variable* or *pressure* or *pressure/mean*
            *final* values = lo hi
              lo hi = box boundaries at end of run (distance units)
            *delta* values = dlo dhi
@@ -43,8 +46,15 @@ Syntax
            *variable* values = v_name1 v_name2
              v_name1 = variable with name1 for box length change as function of time
              v_name2 = variable with name2 for change rate as function of time
+           *pressure* values = target gain (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+           *pressure/mean* values = target gain (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+
        *xy*, *xz*, *yz* args = style value
-         style = *final* or *delta* or *vel* or *erate* or *trate* or *wiggle*
+         style = *final* or *delta* or *vel* or *erate* or *trate* or *wiggle* or *variable*
            *final* value = tilt
              tilt = tilt factor at end of run (distance units)
            *delta* value = dtilt
@@ -62,9 +72,12 @@ Syntax
            *variable* values = v_name1 v_name2
              v_name1 = variable with name1 for tilt change as function of time
              v_name2 = variable with name2 for change rate as function of time
+           *pressure* values = target gain (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
 
 * zero or more keyword/value pairs may be appended
-* keyword = *remap* or *flip* or *units*
+* keyword = *remap* or *flip* or *units* or *couple* or *vol/balance/p* or *max/rate* or *normalize/pressure*
 
   .. parsed-literal::
 
@@ -77,6 +90,15 @@ Syntax
        *units* value = *lattice* or *box*
          lattice = distances are defined in lattice units
          box = distances are defined in simulation box units
+       *couple* value = *none* or *xyz* or *xy* or *yz* or *xz* (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+         couple pressure values of various dimensions
+       *vol/balance/p* value = *yes* or *no* (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+         Modifies the behavior of the *volume* option to try and balance pressures
+       *max/rate* value = *rate* (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+         rate = maximum strain rate for pressure control
+       *normalize/pressure* value = *yes* or *no* (ONLY available in :doc:`fix deform/pressure <fix_deform_pressure>` command)
+         Modifies pressure controls such that the deviation in pressure is normalized by the target pressure
+
 
 Examples
 """"""""
@@ -88,6 +110,8 @@ Examples
    fix 1 all deform 1 xy erate 0.001 remap v
    fix 1 all deform 10 y delta -0.5 0.5 xz vel 1.0
 
+See examples for :doc:`fix deform/pressure <fix_deform_pressure>` on its doc page
+
 Description
 """""""""""
 
@@ -95,29 +119,46 @@ Change the volume and/or shape of the simulation box during a dynamics
 run.  Orthogonal simulation boxes have 3 adjustable parameters
 (x,y,z).  Triclinic (non-orthogonal) simulation boxes have 6
 adjustable parameters (x,y,z,xy,xz,yz).  Any or all of them can be
-adjusted independently and simultaneously by this command.
+adjusted independently and simultaneously.
 
-This fix can be used to perform non-equilibrium MD (NEMD) simulations
-of a continuously strained system.  See the :doc:`fix nvt/sllod <fix_nvt_sllod>` and :doc:`compute temp/deform <compute_temp_deform>` commands for more details.  Note
-that simulation of a continuously extended system (extensional flow)
-can be modeled using the :ref:`UEF package <PKG-UEF>` and its :doc:`fix commands <fix_nh_uef>`.
+The fix deform command allows use of all the arguments listed above,
+except those flagged as available ONLY for the :doc:`fix
+deform/pressure <fix_deform_pressure>` command, which are
+pressure-based controls.  The fix deform/pressure command allows use
+of all the arguments listed above.
+
+The rest of this doc page explains the options common to both
+commands.  The :doc:`fix deform/pressure <fix_deform_pressure>` doc
+page explains the options available ONLY with the fix deform/pressure
+command.  Note that a simulation can define only a single deformation
+command: fix deform or fix deform/pressure.
+
+Both these fixes can be used to perform non-equilibrium MD (NEMD)
+simulations of a continuously strained system.  See the :doc:`fix
+nvt/sllod <fix_nvt_sllod>` and :doc:`compute temp/deform
+<compute_temp_deform>` commands for more details.  Note that
+simulation of a continuously extended system (extensional flow) can be
+modeled using the :ref:`UEF package <PKG-UEF>` and its :doc:`fix
+commands <fix_nh_uef>`.
 
 For the *x*, *y*, *z* parameters, the associated dimension cannot be
 shrink-wrapped.  For the *xy*, *yz*, *xz* parameters, the associated
-second dimension cannot be shrink-wrapped.  Dimensions not varied by this
-command can be periodic or non-periodic.  Dimensions corresponding to
-unspecified parameters can also be controlled by a :doc:`fix npt <fix_nh>` or :doc:`fix nph <fix_nh>` command.
+second dimension cannot be shrink-wrapped.  Dimensions not varied by
+this command can be periodic or non-periodic.  Dimensions
+corresponding to unspecified parameters can also be controlled by a
+:doc:`fix npt <fix_nh>` or :doc:`fix nph <fix_nh>` command.
 
 The size and shape of the simulation box at the beginning of the
-simulation run were either specified by the
-:doc:`create_box <create_box>` or :doc:`read_data <read_data>` or
-:doc:`read_restart <read_restart>` command used to setup the simulation
-initially if it is the first run, or they are the values from the end
-of the previous run.  The :doc:`create_box <create_box>`, :doc:`read data <read_data>`, and :doc:`read_restart <read_restart>` commands
-specify whether the simulation box is orthogonal or non-orthogonal
-(triclinic) and explain the meaning of the xy,xz,yz tilt factors.  If
-fix deform changes the xy,xz,yz tilt factors, then the simulation box
-must be triclinic, even if its initial tilt factors are 0.0.
+simulation run were either specified by the :doc:`create_box
+<create_box>` or :doc:`read_data <read_data>` or :doc:`read_restart
+<read_restart>` command used to setup the simulation initially if it
+is the first run, or they are the values from the end of the previous
+run.  The :doc:`create_box <create_box>`, :doc:`read data
+<read_data>`, and :doc:`read_restart <read_restart>` commands specify
+whether the simulation box is orthogonal or non-orthogonal (triclinic)
+and explain the meaning of the xy,xz,yz tilt factors.  If fix deform
+changes the xy,xz,yz tilt factors, then the simulation box must be
+triclinic, even if its initial tilt factors are 0.0.
 
 As described below, the desired simulation box size and shape at the
 end of the run are determined by the parameters of the fix deform
@@ -258,21 +299,22 @@ of the units keyword below.
 
 The *variable* style changes the specified box length dimension by
 evaluating a variable, which presumably is a function of time.  The
-variable with *name1* must be an :doc:`equal-style variable <variable>`
-and should calculate a change in box length in units of distance.
-Note that this distance is in box units, not lattice units; see the
-discussion of the *units* keyword below.  The formula associated with
-variable *name1* can reference the current timestep.  Note that it
-should return the "change" in box length, not the absolute box length.
-This means it should evaluate to 0.0 when invoked on the initial
-timestep of the run following the definition of fix deform.  It should
-evaluate to a value > 0.0 to dilate the box at future times, or a
-value < 0.0 to compress the box.
+variable with *name1* must be an :doc:`equal-style variable
+<variable>` and should calculate a change in box length in units of
+distance.  Note that this distance is in box units, not lattice units;
+see the discussion of the *units* keyword below.  The formula
+associated with variable *name1* can reference the current timestep.
+Note that it should return the "change" in box length, not the
+absolute box length.  This means it should evaluate to 0.0 when
+invoked on the initial timestep of the run following the definition of
+fix deform.  It should evaluate to a value > 0.0 to dilate the box at
+future times, or a value < 0.0 to compress the box.
 
-The variable *name2* must also be an :doc:`equal-style variable <variable>` and should calculate the rate of box length
-change, in units of distance/time, i.e. the time-derivative of the
-*name1* variable.  This quantity is used internally by LAMMPS to reset
-atom velocities when they cross periodic boundaries.  It is computed
+The variable *name2* must also be an :doc:`equal-style variable
+<variable>` and should calculate the rate of box length change, in
+units of distance/time, i.e. the time-derivative of the *name1*
+variable.  This quantity is used internally by LAMMPS to reset atom
+velocities when they cross periodic boundaries.  It is computed
 internally for the other styles, but you must provide it when using an
 arbitrary variable.
 
@@ -414,12 +456,13 @@ can reference the current timestep.  Note that it should return the
 should evaluate to 0.0 when invoked on the initial timestep of the run
 following the definition of fix deform.
 
-The variable *name2* must also be an :doc:`equal-style variable <variable>` and should calculate the rate of tilt change,
-in units of distance/time, i.e. the time-derivative of the *name1*
-variable.  This quantity is used internally by LAMMPS to reset atom
-velocities when they cross periodic boundaries.  It is computed
-internally for the other styles, but you must provide it when using an
-arbitrary variable.
+The variable *name2* must also be an :doc:`equal-style variable
+<variable>` and should calculate the rate of tilt change, in units of
+distance/time, i.e. the time-derivative of the *name1* variable.  This
+quantity is used internally by LAMMPS to reset atom velocities when
+they cross periodic boundaries.  It is computed internally for the
+other styles, but you must provide it when using an arbitrary
+variable.
 
 Here is an example of using the *variable* style to perform the same
 box deformation as the *wiggle* style formula listed above, where we
@@ -510,33 +553,40 @@ box without explicit remapping of their coordinates.
 .. note::
 
    For non-equilibrium MD (NEMD) simulations using "remap v" it is
-   usually desirable that the fluid (or flowing material, e.g. granular
-   particles) stream with a velocity profile consistent with the
-   deforming box.  As mentioned above, using a thermostat such as :doc:`fix nvt/sllod <fix_nvt_sllod>` or :doc:`fix lavgevin <fix_langevin>`
-   (with a bias provided by :doc:`compute temp/deform <compute_temp_deform>`), will typically accomplish
-   that.  If you do not use a thermostat, then there is no driving force
-   pushing the atoms to flow in a manner consistent with the deforming
-   box.  E.g. for a shearing system the box deformation velocity may vary
+   usually desirable that the fluid (or flowing material,
+   e.g. granular particles) stream with a velocity profile consistent
+   with the deforming box.  As mentioned above, using a thermostat
+   such as :doc:`fix nvt/sllod <fix_nvt_sllod>` or :doc:`fix lavgevin
+   <fix_langevin>` (with a bias provided by :doc:`compute temp/deform
+   <compute_temp_deform>`), will typically accomplish that.  If you do
+   not use a thermostat, then there is no driving force pushing the
+   atoms to flow in a manner consistent with the deforming box.
+   E.g. for a shearing system the box deformation velocity may vary
    from 0 at the bottom to 10 at the top of the box.  But the stream
-   velocity profile of the atoms may vary from -5 at the bottom to +5 at
-   the top.  You can monitor these effects using the :doc:`fix ave/chunk <fix_ave_chunk>`, :doc:`compute temp/deform <compute_temp_deform>`, and :doc:`compute temp/profile <compute_temp_profile>` commands.  One way to induce
-   atoms to stream consistent with the box deformation is to give them an
+   velocity profile of the atoms may vary from -5 at the bottom to +5
+   at the top.  You can monitor these effects using the :doc:`fix
+   ave/chunk <fix_ave_chunk>`, :doc:`compute temp/deform
+   <compute_temp_deform>`, and :doc:`compute temp/profile
+   <compute_temp_profile>` commands.  One way to induce atoms to
+   stream consistent with the box deformation is to give them an
    initial velocity profile, via the :doc:`velocity ramp <velocity>`
-   command, that matches the box deformation rate.  This also typically
-   helps the system come to equilibrium more quickly, even if a
-   thermostat is used.
+   command, that matches the box deformation rate.  This also
+   typically helps the system come to equilibrium more quickly, even
+   if a thermostat is used.
 
 .. note::
 
    If a :doc:`fix rigid <fix_rigid>` is defined for rigid bodies, and
    *remap* is set to *x*, then the center-of-mass coordinates of rigid
-   bodies will be remapped to the changing simulation box.  This will be
-   done regardless of whether atoms in the rigid bodies are in the fix
-   deform group or not.  The velocity of the centers of mass are not
-   remapped even if *remap* is set to *v*, since :doc:`fix nvt/sllod <fix_nvt_sllod>` does not currently do anything special
+   bodies will be remapped to the changing simulation box.  This will
+   be done regardless of whether atoms in the rigid bodies are in the
+   fix deform group or not.  The velocity of the centers of mass are
+   not remapped even if *remap* is set to *v*, since :doc:`fix
+   nvt/sllod <fix_nvt_sllod>` does not currently do anything special
    for rigid particles.  If you wish to perform a NEMD simulation of
    rigid particles, you can either thermostat them independently or
-   include a background fluid and thermostat the fluid via :doc:`fix nvt/sllod <fix_nvt_sllod>`.
+   include a background fluid and thermostat the fluid via :doc:`fix
+   nvt/sllod <fix_nvt_sllod>`.
 
 The *flip* keyword allows the tilt factors for a triclinic box to
 exceed half the distance of the parallel box length, as discussed
@@ -568,7 +618,8 @@ command if you want to include lattice spacings in a variable formula.
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
-This fix will restore the initial box settings from :doc:`binary restart files <restart>`, which allows the fix to be properly continue
+This fix will restore the initial box settings from :doc:`binary
+restart files <restart>`, which allows the fix to be properly continue
 deformation, when using the start/stop options of the :doc:`run <run>`
 command.  None of the :doc:`fix_modify <fix_modify>` options are
 relevant to this fix.  No global or per-atom quantities are stored by
@@ -586,12 +637,14 @@ Restrictions
 You cannot apply x, y, or z deformations to a dimension that is
 shrink-wrapped via the :doc:`boundary <boundary>` command.
 
-You cannot apply xy, yz, or xz deformations to a second dimension (y in
-xy) that is shrink-wrapped via the :doc:`boundary <boundary>` command.
+You cannot apply xy, yz, or xz deformations to a second dimension (y
+in xy) that is shrink-wrapped via the :doc:`boundary <boundary>`
+command.
 
 Related commands
 """"""""""""""""
 
+:doc:`fix deform/pressure <fix_deform_pressure>`,
 :doc:`change_box <change_box>`
 
 Default
diff --git a/doc/src/fix_deform_pressure.rst b/doc/src/fix_deform_pressure.rst
new file mode 100644
index 0000000000..f85ad37238
--- /dev/null
+++ b/doc/src/fix_deform_pressure.rst
@@ -0,0 +1,319 @@
+.. index:: fix deform/pressure
+
+fix deform/pressure command
+===========================
+
+Syntax
+""""""
+
+.. parsed-literal::
+
+   fix ID group-ID deform/pressure N parameter style args ... keyword value ...
+
+* ID, group-ID are documented in :doc:`fix <fix>` command
+* deform/pressure = style name of this fix command
+* N = perform box deformation every this many timesteps
+* one or more parameter/arg sequences may be appended
+
+  .. parsed-literal::
+
+     parameter = *x* or *y* or *z* or *xy* or *xz* or *yz* or *box*
+       *x*, *y*, *z* args = style value(s)
+         style = *final* or *delta* or *scale* or *vel* or *erate* or *trate* or *volume* or *wiggle* or *variable* or *pressure* or *pressure/mean*
+           *pressure* values = target gain
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+           *pressure/mean* values = target gain
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+           NOTE: All other styles are documented by the :doc:`fix deform <fix_deform>` command
+
+       *xy*, *xz*, *yz* args = style value
+         style = *final* or *delta* or *vel* or *erate* or *trate* or *wiggle* or *variable* or *pressure*
+           *pressure* values = target gain
+             target = target pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+           NOTE: All other styles are documented by the :doc:`fix deform <fix_deform>` command
+
+       *box* = style value
+         style = *volume* or *pressure*
+           *volume* value = none = isotropically adjust system to preserve volume of system
+           *pressure* values = target gain
+             target = target mean pressure (pressure units)
+             gain = proportional gain constant (1/(time * pressure) or 1/time units)
+
+* zero or more keyword/value pairs may be appended
+* keyword = *remap* or *flip* or *units* or *couple* or *vol/balance/p* or *max/rate* or *normalize/pressure*
+
+  .. parsed-literal::
+
+       *couple* value = *none* or *xyz* or *xy* or *yz* or *xz*
+         couple pressure values of various dimensions
+       *vol/balance/p* value = *yes* or *no*
+         Modifies the behavior of the *volume* option to try and balance pressures
+       *max/rate* value = *rate*
+         rate = maximum strain rate for pressure control
+       *normalize/pressure* value = *yes* or *no*
+         Modifies pressure controls such that the deviation in pressure is normalized by the target pressure
+       NOTE: All other keywords are documented by the :doc:`fix deform <fix_deform>` command
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   fix 1 all deform/pressure 1 x pressure 2.0 0.1 normalize/pressure yes max/rate 0.001
+   fix 1 all deform/pressure 1 x trate 0.1 y volume z volume vol/balance/p yes
+   fix 1 all deform/pressure 1 x trate 0.1 y pressure/mean 0.0 1.0 z pressure/mean 0.0 1.0
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+This fix is an extension of the :doc:`fix deform <fix_deform>`
+command, which allows all of its options to be used as well as new
+pressure-based controls implemented by this command.
+
+All arguments described on the :doc:`fix deform <fix_deform>` doc page
+also apply to this fix unless otherwise noted below.  The rest of this
+doc page explains the arguments specific to this fix.  Note that a
+simulation can define only a single deformation command: fix deform or
+fix deform/pressure.
+
+----------
+
+For the *x*, *y*, and *z* parameters, this is the meaning of the
+styles and values provided by this fix.
+
+The *pressure* style adjusts a dimension's box length to control the
+corresponding component of the pressure tensor. This option attempts to
+maintain a specified target pressure using a linear controller where the
+box length :math:`L` evolves according to the equation
+
+.. parsed-literal::
+
+   \frac{d L(t)}{dt} = L(t) k (P_t - P)
+
+where :math:`k` is a proportional gain constant, :math:`P_t` is the target
+pressure, and :math:`P` is the current pressure along that dimension. This
+approach is similar to the method used to control the pressure by
+:doc:`fix press/berendsen <fix_press_berendsen>`. The target pressure
+accepts either a constant numeric value or a LAMMPS :ref:`variable <variable>`.
+Notably, this variable can be a function of time or other components of
+the pressure tensor. By default, :math:`k` has units of 1/(time * pressure)
+although this will change if the *normalize/pressure* option is set as
+:ref:`discussed below <deform_normalize>`. There is no proven method
+to choosing an appropriate value of :math:`k` as it will depend on the
+specific details of a simulation. Testing different values is recommended.
+
+By default, there is no limit on the resulting strain rate in any dimension.
+A maximum limit can be applied using the :ref:`max/rate <deform_max_rate>`
+option. Akin to :doc:`fix nh <fix_nh>`, pressures in different dimensions
+can be coupled using the :ref:`couple <deform_couple>` option. This means
+the instantaneous pressure along coupled dimensions are averaged and the box
+strains identically along the coupled dimensions.
+
+The *pressure/mean* style changes a dimension's box length to maintain
+a constant mean pressure defined as the trace of the pressure tensor.
+This option has identical arguments to the *pressure* style and a similar
+functional equation, except the current and target pressures refer to the
+mean trace of the pressure tensor. All options for the *pressure* style
+also apply to the *pressure/mean* style except for the
+:ref:`couple <deform_couple>` option.
+
+Note that while this style can be identical to coupled *pressure* styles,
+it is generally not the same. For instance in 2D, a coupled *pressure*
+style in the *x* and *y* dimensions would be equivalent to using the
+*pressure/mean* style with identical settings in each dimension. However,
+it would not be the same if settings (e.g. gain constants) were used in
+the *x* and *y* dimensions or if the *pressure/mean* command was only applied
+along one dimension.
+
+----------
+
+For the *xy*, *xz*, and *yz* parameters, this is the meaning of the
+styles and values provided by this fix.  Note that changing the
+tilt factors of a triclinic box does not change its volume.
+
+The *pressure* style adjusts a tilt factor to control the corresponding
+off-diagonal component of the pressure tensor. This option attempts to
+maintain a specified target value using a linear controller where the
+tilt factor T evolves according to the equation
+
+.. parsed-literal::
+
+   \frac{d T(t)}{dt} = L(t) k (P - P_t)
+
+where :math:`k` is a proportional gain constant, :math:`P_t` is the
+target pressure, :math:`P` is the current pressure, and :math:`L` is
+the perpendicular box length. The target pressure accepts either a
+constant numeric value or a LAMMPS :ref:`variable
+<variable>`. Notably, this variable can be a function of time or other
+components of the pressure tensor. By default, :math:`k` has units of
+1/(time * pressure) although this will change if the
+*normalize/pessure* option is set as :ref:`discussed below
+<deform_normalize>`.  There is no proven method to choosing an
+appropriate value of :math:`k` as it will depend on the specific
+details of a simulation and testing different values is
+recommended. One can also apply a maximum limit to the magnitude of
+the applied strain using the :ref:`max/rate <deform_max_rate>` option.
+
+----------
+
+The *box* parameter provides an additional control over the *x*, *y*,
+and *z* box lengths by isotropically dilating or contracting the box
+to either maintain a fixed mean pressure or volume. This isotropic
+scaling is applied after the box is deformed by the above *x*, *y*,
+*z*, *xy*, *xz*, and *yz* styles, acting as a second deformation
+step. This parameter will change the overall strain rate in the *x*,
+*y*, or *z* dimensions.  This parameter can only be used in
+combination with the *x*, *y*, or *z* commands: *vel*, *erate*,
+*trate*, *pressure*, or *wiggle*. This is the meaning of its styles
+and values.
+
+The *volume* style isotropically scales box lengths to maintain a constant
+box volume in response to deformation from other parameters. This style
+may be useful in scenarios where one wants to apply a constant deviatoric
+pressure using *pressure* styles in the *x*, *y*, and *z* dimensions (
+deforming the shape of the box), while maintaining a constant volume.
+
+The *pressure* style isotropically scales box lengths in an attempt to
+maintain a target mean pressure (the trace of the pressure tensor) of the
+system. This is accomplished by isotropically scaling all box lengths
+:math:`L` by an additional factor of :math:`k (P_t - P_m)` where :math:`k`
+is the proportional gain constant, :math:`P_t` is the target pressure, and
+:math:`P_m` is the current mean pressure. This style may be useful in
+scenarios where one wants to apply a constant deviatoric strain rate
+using various strain-based styles (e.g. *trate*) along the *x*, *y*, and *z*
+dimensions (deforming the shape of the box), while maintaining a mean pressure.
+
+----------
+
+The optional keywords provided by this fix are described below.
+
+.. _deform_normalize:
+
+The *normalize/pressure* keyword changes how box dimensions evolve when
+using the *pressure* or *pressure/mean* deformation styles. If the
+*deform/normalize* value is set to *yes*, then the deviation from the
+target pressure is normalized by the absolute value of the target
+pressure such that the proportional gain constant scales a percentage
+error and has units of 1/time. If the target pressure is ever zero, this
+will produce an error unless the *max/rate* keyword is defined,
+described below, which will cap the divergence.
+
+.. _deform_max_rate:
+
+The *max/rate* keyword sets an upper threshold, *rate*, that limits the
+maximum magnitude of the instantaneous strain rate applied in any dimension.
+This keyword only applies to the *pressure* and *pressure/mean* options. If
+a pressure-controlled rate is used for both *box* and either *x*, *y*, or
+*z*, then this threshold will apply separately to each individual controller
+such that the cumulative strain rate on a box dimension may be up to twice
+the value of *rate*.
+
+.. _deform_couple:
+
+The *couple* keyword allows two or three of the diagonal components of
+the pressure tensor to be "coupled" together for the *pressure* option.
+The value specified with the keyword determines which are coupled. For
+example, *xz* means the *Pxx* and *Pzz* components of the stress tensor
+are coupled. *Xyz* means all 3 diagonal components are coupled. Coupling
+means two things: the instantaneous stress will be computed as an average
+of the corresponding diagonal components, and the coupled box dimensions
+will be changed together in lockstep, meaning coupled dimensions will be
+dilated or contracted by the same percentage every timestep. If a *pressure*
+style is defined for more than one coupled dimension, the target pressures
+and gain constants must be identical. Alternatively, if a *pressure*
+style is only defined for one of the coupled dimensions, its settings are
+copied to other dimensions with undefined styles. *Couple xyz* can be used
+for a 2d simulation; the *z* dimension is simply ignored.
+
+.. _deform_balance:
+
+The *vol/balance/p* keyword modifies the behavior of the *volume* style when
+applied to two of the *x*, *y*, and *z* dimensions. Instead of straining
+the two dimensions in lockstep, the two dimensions are allowed to
+separately dilate or contract in a manner to maintain a constant
+volume while simultaneously trying to keep the pressure along each
+dimension equal using a method described in :ref:`(Huang2014) <Huang2014>`.
+
+----------
+
+If any pressure controls are used, this fix computes a temperature and
+pressure each timestep. To do this, the fix creates its own computes
+of style "temp" and "pressure", as if these commands had been issued:
+
+.. code-block:: LAMMPS
+
+   compute fix-ID_temp group-ID temp
+   compute fix-ID_press group-ID pressure fix-ID_temp
+
+See the :doc:`compute temp <compute_temp>` and :doc:`compute pressure
+<compute_pressure>` commands for details.  Note that the IDs of the
+new computes are the fix-ID + underscore + "temp" or fix_ID
++ underscore + "press", and the group for the new computes is the same
+as the fix group.
+
+Note that these are NOT the computes used by thermodynamic output (see
+the :doc:`thermo_style <thermo_style>` command) with ID =
+*thermo_temp* and *thermo_press*.  This means you can change the
+attributes of this fix's temperature or pressure via the
+:doc:`compute_modify <compute_modify>` command or print this
+temperature or pressure during thermodynamic output via the
+:doc:`thermo_style custom <thermo_style>` command using the
+appropriate compute-ID. It also means that changing attributes of
+*thermo_temp* or *thermo_press* will have no effect on this fix.
+
+Restart, fix_modify, output, run start/stop, minimize info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+This fix will restore the initial box settings from :doc:`binary
+restart files <restart>`, which allows the fix to be properly continue
+deformation, when using the start/stop options of the :doc:`run <run>`
+command.  No global or per-atom quantities are stored by this fix for
+access by various :doc:`output commands <Howto_output>`.
+
+If any pressure controls are used, the :doc:`fix_modify <fix_modify>`
+*temp* and *press* options are supported by this fix, unlike in
+:doc:`fix deform <fix_deform>`.  You can use them to assign a
+:doc:`compute <compute>` you have defined to this fix which will be
+used in its temperature and pressure calculations.  If you do this,
+note that the kinetic energy derived from the compute temperature
+should be consistent with the virial term computed using all atoms for
+the pressure.  LAMMPS will warn you if you choose to compute
+temperature on a subset of atoms.
+
+This fix can perform deformation over multiple runs, using the *start*
+and *stop* keywords of the :doc:`run <run>` command.  See the
+:doc:`run <run>` command for details of how to do this.
+
+This fix is not invoked during :doc:`energy minimization <minimize>`.
+
+Restrictions
+""""""""""""
+
+You cannot apply x, y, or z deformations to a dimension that is
+shrink-wrapped via the :doc:`boundary <boundary>` command.
+
+You cannot apply xy, yz, or xz deformations to a second dimension (y
+in xy) that is shrink-wrapped via the :doc:`boundary <boundary>`
+command.
+
+Related commands
+""""""""""""""""
+
+:doc:`fix deform <fix_deform>`, :doc:`change_box <change_box>`
+
+Default
+"""""""
+
+The option defaults are normalize/pressure = no.
+
+----------
+
+.. _Huang2014:
+
+**(Huang2014)** X. Huang, "Exploring critical-state behavior using DEM",
+Doctoral dissertation, Imperial College. (2014). https://doi.org/10.25560/25316
diff --git a/doc/src/fix_electrode.rst b/doc/src/fix_electrode.rst
index 3d543f08d2..8a7a44454d 100644
--- a/doc/src/fix_electrode.rst
+++ b/doc/src/fix_electrode.rst
@@ -45,7 +45,7 @@ Syntax
                 rng_v = integer used to initialize random number generator
 
 * zero or more keyword/value pairs may be appended
-* keyword = *algo* or *symm* or *couple* or *etypes* or *ffield* or *write_mat* or *write_inv* or *read_mat* or *read_inv*
+* keyword = *algo* or *symm* or *couple* or *etypes* or *ffield* or *write_mat* or *write_inv* or *read_mat* or *read_inv* or *qtotal* or *eta*
 
 .. parsed-literal::
 
@@ -68,6 +68,10 @@ Syntax
         filename = file from which to read elastance matrix
     *read_inv* value = filename
         filename = file from which to read inverted matrix
+    *qtotal* value = number or *v_* equal-style variable
+        add overall potential so that all electrode charges add up to *qtotal*
+    *eta* value = d_propname
+        d_propname = a custom double vector defined via fix property/atom
 
 Examples
 """"""""
@@ -249,6 +253,26 @@ be enabled if any electrode particle has the same type as any
 electrolyte particle (which would be unusual in a typical simulation)
 and the fix will issue an error in that case.
 
+.. versionadded:: TBD
+
+The keyword *qtotal* causes *fix electrode/conp* and *fix electrode/thermo*
+to add an overall potential to all electrodes so that the total charge on
+the electrodes is a specified amount (which may be an equal-style variable).
+For example, if a user wanted to simulate a solution of excess cations
+such that the total electrolyte charge is +2, setting *qtotal -2* would cause
+the total electrode charge to be -2, so that the simulation box remains overall
+electroneutral. Since *fix electrode/conq* constrains the total charges of
+individual electrodes, and since *symm on* constrains the total charge of all
+electrodes to be zero, either option is incompatible with the *qtotal* keyword
+(even if *qtotal* is set to zero).
+
+.. versionadded:: TBD
+
+The keyword *eta* takes the name of a custom double vector defined via fix
+property/atom.  The values will be used instead of the standard eta value.  The
+property/atom fix must be for vector of double values and use the *ghost on*
+option.
+
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
diff --git a/doc/src/fix_gcmc.rst b/doc/src/fix_gcmc.rst
index 13ae509684..a21e85d803 100644
--- a/doc/src/fix_gcmc.rst
+++ b/doc/src/fix_gcmc.rst
@@ -440,8 +440,11 @@ This fix is part of the MC package.  It is only enabled if LAMMPS was
 built with that package.  See the :doc:`Build package <Build_package>`
 doc page for more info.
 
+This fix style requires an :doc:`atom style <atom_style>` with per atom
+type masses.
+
 Do not set "neigh_modify once yes" or else this fix will never be
-called.  Reneighboring is required.
+called.  Reneighboring is **required**.
 
 Only usable for 3D simulations.
 
diff --git a/doc/src/fix_nonaffine_displacement.rst b/doc/src/fix_nonaffine_displacement.rst
index c6dfbc2e49..0a271ebc32 100644
--- a/doc/src/fix_nonaffine_displacement.rst
+++ b/doc/src/fix_nonaffine_displacement.rst
@@ -86,8 +86,8 @@ Both of these methods require defining a reference state. With the *fixed* refer
 style, the user picks a specific timestep *nstep* at which particle positions are saved.
 If peratom data is accessed from this compute prior to this timestep, it will simply be
 zeroed. The *update* reference style implies the reference state will be updated every
-*nstep* timesteps. The *offset* reference only applies to the *d2min* metric and will
-update the reference state *nstep* timesteps before a multiple of *nevery* timesteps.
+*nstep* timesteps. The *offset* reference will update the reference state *nstep*
+timesteps before a multiple of *nevery* timesteps.
 
 
 ----------
diff --git a/doc/src/fix_property_atom.rst b/doc/src/fix_property_atom.rst
index b177fe9a96..d20358b7a7 100644
--- a/doc/src/fix_property_atom.rst
+++ b/doc/src/fix_property_atom.rst
@@ -22,6 +22,8 @@ Syntax
        *mol* = molecule IDs
        *q* = charge
        *rmass* = per-atom mass
+       *temperature* = internal temperature of atom
+       *heatflow* = internal heat flow of atom
        i_name = new integer vector referenced by name
        d_name = new floating-point vector referenced by name
        i2_name = new integer array referenced by name
@@ -59,14 +61,18 @@ these properties for each atom in the system when a data file is read.
 This fix augments the set of per-atom properties with new custom
 ones. This can be useful in several scenarios.
 
-If the atom style does not define molecule IDs, per-atom charge, or
-per-atom mass, they can be added using the *mol*\ , *q* or *rmass*
+If the atom style does not define molecule IDs, per-atom charge,
+per-atom mass, internal temperature, or internal heat flow, they can
+be added using the *mol*\ , *q*, *rmass*, *temperature*, or *heatflow*
 keywords.  This could be useful to define "molecules" to use as rigid
 bodies with the :doc:`fix rigid <fix_rigid>` command, or to carry
 around an extra flag with atoms (stored as a molecule ID) that can be
 used by various commands like :doc:`compute chunk/atom
 <compute_chunk_atom>` to group atoms without having to use the group
 command (which is limited to a total of 32 groups including *all*\ ).
+For finite-size particles, an internal temperature and heat flow can
+be used to model heat conduction as in the
+:doc:`GRANULAR package <Howto_granular>`.
 
 Another application is to use the *rmass* flag in order to have
 per-atom masses instead of per-type masses.  This could be used to
@@ -85,9 +91,10 @@ properties that are not needed such as bond lists, which incurs some
 overhead when there are no bonds.
 
 In the future, we may add additional existing per-atom properties to
-fix property/atom, similar to *mol*\ , *q* or *rmass*\ , which
-"turn-on" specific properties defined by some atom styles, so they can
-be easily used by atom styles that do not define them.
+fix property/atom, similar to *mol*\ , *q*, *rmass*\ , *temperature*\ ,
+or *heatflow* which "turn-on" specific properties defined by some atom
+styles, so they can be easily used by atom styles that do not define
+them.
 
 More generally, the *i_name* and *d_name* options allow one or more
 new custom per-atom vectors to be defined.  Likewise the *i2_name* and
diff --git a/doc/src/fix_sgcmc.rst b/doc/src/fix_sgcmc.rst
index 63cfaf22da..bcdbdf2736 100644
--- a/doc/src/fix_sgcmc.rst
+++ b/doc/src/fix_sgcmc.rst
@@ -155,6 +155,9 @@ This fix is part of the MC package. It is only enabled if LAMMPS was
 built with that package.  See the :doc:`Build package <Build_package>`
 page for more info.
 
+This fix style requires an :doc:`atom style <atom_style>` with per atom
+type masses.
+
 At present the fix provides optimized subroutines for EAM type
 potentials (see above) that calculate potential energy changes due to
 *local* atom type swaps very efficiently.  Other potentials are
diff --git a/doc/src/fix_ttm.rst b/doc/src/fix_ttm.rst
index ccf7f16554..5a7f864686 100644
--- a/doc/src/fix_ttm.rst
+++ b/doc/src/fix_ttm.rst
@@ -96,11 +96,11 @@ each processor, which is acceptable when the overall grid is reasonably
 small.  For larger grids you should use fix *ttm/grid* instead.
 
 Fix *ttm/mod* adds options to account for external heat sources (e.g. at
-a surface) and for specifying parameters that allow the electronic
-heat capacity to depend strongly on electronic temperature.  It is
-more expensive computationally than fix *ttm* because it treats the
-thermal diffusion equation as non-linear.  More details on fix *ttm/mod*
-are given below.
+a surface) and for specifying parameters that allow the electronic heat
+capacity to depend strongly on electronic temperature.  It is more
+expensive computationally than fix *ttm* because it treats the thermal
+diffusion equation as non-linear.  More details on fix *ttm/mod* are
+given below.
 
 Heat transfer between the electronic and atomic subsystems is carried
 out via an inhomogeneous Langevin thermostat.  Only atoms in the fix
@@ -303,15 +303,15 @@ The current fix ttm/mod implementation allows TTM simulations with a
 vacuum. The vacuum region is defined as the grid cells with zero
 electronic temperature. The numerical scheme does not allow energy
 exchange with such cells. Since the material can expand to previously
-unoccupied region in some simulations, the vacuum border can be
-allowed to move. It is controlled by the *surface_movement* parameter
-in the *init_file*. If it is set to 1, then "vacuum" cells can be
-changed to "electron-filled" cells with the temperature *T_e_min* if
-atoms move into them (currently only implemented for the case of
-1-dimensional motion of flat surface normal to the X axis). The
-initial borders of vacuum can be set in the *init_file* via *lsurface*
-and *rsurface* parameters. In this case, electronic pressure gradient
-is calculated as
+unoccupied region in some simulations, the vacuum border can be allowed
+to move. It is controlled by the *surface_movement* parameter in the
+*init_file*. If it is set to 1, then "vacuum" cells can be changed to
+"electron-filled" cells with the temperature *T_e_min* if atoms move
+into them (currently only implemented for the case of 1-dimensional
+motion of a flat surface normal to the X axis). The initial locations of
+the interfaces of the electron density to the vacuum can be set in the
+*init_file* via *lsurface* and *rsurface* parameters. In this case,
+electronic pressure gradient is calculated as
 
 .. math::
 
diff --git a/doc/src/fix_wall_flow.rst b/doc/src/fix_wall_flow.rst
new file mode 100644
index 0000000000..b40ba9697f
--- /dev/null
+++ b/doc/src/fix_wall_flow.rst
@@ -0,0 +1,175 @@
+.. index:: fix wall/flow
+.. index:: fix wall/flow/kk
+
+fix wall/flow command
+=====================
+
+Accelerator Variants: *wall/flow/kk*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   fix ID group-ID wall/flow axis vflow T seed N coords ... keyword value
+
+* ID, group-ID are documented in :doc:`fix <fix>` command
+* wall/flow = style name of this fix command
+* axis = flow axis (*x*, *y*, or *z*)
+* vflow = generated flow velocity in *axis* direction (velocity units)
+* T = flow temperature (temperature units)
+* seed = random seed for stochasticity (positive integer)
+* N = number of walls
+* coords = list of N wall positions along the *axis* direction in ascending order (distance units)
+* zero or more keyword/value pairs may be appended
+* keyword = *units*
+
+  .. parsed-literal::
+
+       *units* value = *lattice* or *box*
+         *lattice* = wall positions are defined in lattice units
+         *box* = the wall positions are defined in simulation box units
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   fix 1 all wall/flow x 0.4 1.5 593894 4 2.0 4.0 6.0 8.0
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+This fix implements flow boundary conditions (FBC) introduced in
+:ref:`(Pavlov1) <fbc-Pavlov1>` and :ref:`(Pavlov2) <fbc-Pavlov2>`.
+The goal is to generate a stationary flow with a shifted Maxwell
+velocity distribution:
+
+.. math::
+
+   f_a(v_a) \propto \exp{\left(-\frac{m (v_a-v_{\text{flow}})^2}{2 kB T}\right)}
+
+where :math:`v_a` is the component of velocity along the specified
+*axis* argument (a = x,y,z), :math:`v_{\text{flow}}` is the flow
+velocity specified as the *vflow* argument, *T* is the specified flow
+temperature, *m* is the particle mass, and *kB* is the Boltzmann
+constant.
+
+This is achieved by defining a series of *N* transparent walls along
+the flow *axis* direction.  Each wall is at the specified position
+listed in the *coords* argument.  Note that an additional transparent
+wall is defined by the code at the boundary of the (periodic)
+simulation domain in the *axis* direction.  So there are effectively
+N+1 walls.
+
+Each time a particle in the specified group passes through one of the
+transparent walls, its velocity is re-assigned.  Particles not in the
+group do not interact with the wall. This can be used, for example, to
+add obstacles composed of atoms, or to simulate a solution of complex
+molecules in a one-atom liquid (note that the fix has been tested for
+one-atom systems only).
+
+Conceptually, the velocity re-assignment represents creation of a new
+particle within the system with simultaneous removal of the particle
+which passed through the wall.  The velocity components in directions
+parallel to the wall are re-assigned according to the standard Maxwell
+velocity distribution for the specified temperature *T*.  The velocity
+component perpendicular to the wall is re-assigned according to the
+shifted Maxwell distribution defined above:
+
+.. math::
+
+   f_{\text{a generated}}(v_a) \propto v_a f_a(v_a)
+
+It can be shown that for an ideal-gas scenario this procedure makes
+the velocity distribution of particles between walls exactly as
+desired.
+
+Since in most cases simulated systems are not an ideal gas, multiple
+walls can be defined, since a single wall may not be sufficient for
+maintaining a stationary flow without "congestion" which can manifest
+itself as regions in the flow with increased particle density located
+upstream from static obstacles.
+
+For the same reason, the actual temperature and velocity of the
+generated flow may differ from what is requested.  The degree of
+discrepancy is determined by how different from an ideal gas the
+simulated system is.  Therefore, a calibration procedure may be
+required for such a system as described in :ref:`(Pavlov)
+<fbc-Pavlov2>`.
+
+Note that the interactions between particles on different sides of a
+transparent wall are not disabled or neglected.  Likewise particle
+positions are not altered by the velocity reassignment.  This removes
+the need to modify the force field to work correctly in cases when a
+particle is close to a wall.
+
+For example, if particle positions were uniformly redistributed across
+the surface of a wall, two particles could end up too close to each
+other, potentially causing the simulation to explode.  However due to
+this compromise, some collective phenomena such as regions with
+increased/decreased density or collective movements are not fully
+removed when particles cross a wall.  This unwanted consequence can
+also be potentially mitigated by using more multiple walls.
+
+.. note::
+
+  When the specified flow has a high velocity, a lost atoms error can
+  occur (see :doc:`error messages <Errors_messages>`).  If this
+  happens, you should ensure the checks for neighbor list rebuilds,
+  set via the :doc:`neigh_modify <neigh_modify>` command, are as
+  conservative as possible (every timestep if needed).  Those are the
+  default settings.
+
+Restart, fix_modify, output, run start/stop, minimize info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+No information about this fix is written to :doc:`binary restart files
+<restart>`.
+
+None of the :doc:`fix_modify <fix_modify>` options are relevant to
+this fix.
+
+No global or per-atom quantities are stored by this fix for access by
+various :doc:`output commands <Howto_output>`.
+
+No parameter of this fix can be used with the *start/stop* keywords of
+the :doc:`run <run>` command.
+
+This fix is not invoked during :doc:`energy minimization <minimize>`.
+
+Restrictions
+""""""""""""
+
+Fix *wall_flow* is part of the EXTRA-FIX package.  It is only enabled
+if LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Flow boundary conditions should not be used with rigid bodies such as
+those defined by a "fix rigid" command.
+
+This fix can only be used with periodic boundary conditions along the
+flow axis. The size of the box in this direction must not change. Also,
+the fix is designed to work only in an orthogonal simulation box.
+
+Related commands
+""""""""""""""""
+
+:doc:`fix wall/reflect <fix_wall>` command
+
+Default
+"""""""
+
+The default for the units keyword is lattice.
+
+----------
+
+.. _fbc-Pavlov1:
+
+**(Pavlov1)** Pavlov, Kolotinskii, Stegailov, "GPU-Based Molecular Dynamics of Turbulent Liquid Flows with OpenMM", Proceedings of PPAM-2022, LNCS (Springer), vol. 13826, pp. 346-358 (2023)
+
+.. _fbc-Pavlov2:
+
+**(Pavlov2)** Pavlov, Galigerov, Kolotinskii, Nikolskiy, Stegailov, "GPU-based Molecular Dynamics of Fluid Flows: Reaching for Turbulence", Int. J. High Perf. Comp. Appl., (2024)
diff --git a/doc/src/fix_widom.rst b/doc/src/fix_widom.rst
index ff66095db5..43e081800f 100644
--- a/doc/src/fix_widom.rst
+++ b/doc/src/fix_widom.rst
@@ -195,8 +195,11 @@ doc page for more info.
 Do not set "neigh_modify once yes" or else this fix will never be
 called.  Reneighboring is **required**.
 
-Can be run in parallel, but aspects of the GCMC part will not scale well
-in parallel. Only usable for 3D simulations.
+This fix style requires an :doc:`atom style <atom_style>` with per atom
+type masses.
+
+Can be run in parallel, but some aspects of the insertion procedure
+will not scale well in parallel. Only usable for 3D simulations.
 
 
 Related commands
diff --git a/doc/src/pair_airebo.rst b/doc/src/pair_airebo.rst
index 9a1e4e5518..ce574cc734 100644
--- a/doc/src/pair_airebo.rst
+++ b/doc/src/pair_airebo.rst
@@ -156,7 +156,7 @@ pair_coeff command:
 The first 2 arguments must be \* \* so as to span all LAMMPS atom types.
 The first three C arguments map LAMMPS atom types 1,2,3 to the C
 element in the AIREBO file.  The final H argument maps LAMMPS atom
-type 4 to the H element in the SW file.  If a mapping value is
+type 4 to the H element in the AIREBO file.  If a mapping value is
 specified as NULL, the mapping is not performed.  This can be used
 when a *airebo* potential is used as part of the *hybrid* pair style.
 The NULL values are placeholders for atom types that will be used with
@@ -222,12 +222,12 @@ enabled if LAMMPS was built with that package.  See the :doc:`Build package <Bui
 These pair potentials require the :doc:`newton <newton>` setting to be
 "on" for pair interactions.
 
-The CH.airebo and CH.airebo-m potential files provided with LAMMPS
-(see the potentials directory) are parameterized for metal :doc:`units <units>`.
-You can use the AIREBO, AIREBO-M or REBO potential with any LAMMPS units,
-but you would need to create your own AIREBO or AIREBO-M potential file
-with coefficients listed in the appropriate units, if your simulation
-does not use "metal" units.
+The CH.airebo and CH.airebo-m potential files provided with LAMMPS (see
+the potentials directory) are parameterized for metal :doc:`units
+<units>`.  You can use the pair styles with *any* LAMMPS units, but you
+would need to create your own AIREBO or AIREBO-M potential file with
+coefficients listed in the appropriate units, if your simulation does
+not use "metal" units.
 
 The pair styles provided here **only** support potential files parameterized
 for the elements carbon and hydrogen (designated with "C" and "H" in the
diff --git a/doc/src/pair_dsmc.rst b/doc/src/pair_dsmc.rst
index edac1d7a65..09bb5d90af 100644
--- a/doc/src/pair_dsmc.rst
+++ b/doc/src/pair_dsmc.rst
@@ -138,8 +138,12 @@ This pair style can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""
 
-This style is part of the MC package.  It is only enabled if LAMMPS
-was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+This pair style is part of the MC package.  It is only enabled if LAMMPS
+was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+This pair style requires an :doc:`atom style <atom_style>` with per
+atom type masses.
 
 Related commands
 """"""""""""""""
diff --git a/doc/src/pair_meam.rst b/doc/src/pair_meam.rst
index 57c40aa6ee..bafa9fb3c9 100644
--- a/doc/src/pair_meam.rst
+++ b/doc/src/pair_meam.rst
@@ -427,8 +427,8 @@ package. They are only enabled if LAMMPS was built with that package.
 See the :doc:`Build package <Build_package>` page for more info.
 
 The maximum number of elements, that can be read from the MEAM library
-file, is determined at compile time. The default is 5.  If you need
-support for more elements, you have to change the the constant 'maxelt'
+file, is determined at compile time. The default is 8.  If you need
+support for more elements, you have to change the the constant 'MAXELT'
 at the beginning of the file ``src/MEAM/meam.h`` and update/recompile
 LAMMPS.  There is no limit on the number of atoms types.
 
diff --git a/doc/src/pair_rebomos.rst b/doc/src/pair_rebomos.rst
new file mode 100644
index 0000000000..9f4b8006c1
--- /dev/null
+++ b/doc/src/pair_rebomos.rst
@@ -0,0 +1,150 @@
+.. index:: pair_style rebomos
+.. index:: pair_style rebomos/omp
+
+pair_style rebomos command
+==========================
+
+Accelerator Variants: *rebomos/omp*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   pair_style rebomos
+
+* rebomos = name of this pair style
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   pair_style rebomos
+   pair_coeff * * ../potentials/MoS.rebomos Mo S
+
+Example input scripts available: examples/threebody/
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+The *rebomos* pair style computes the interactions between molybdenum
+and sulfur atoms :ref:`(Stewart) <Stewart>` utilizing an adaptive
+interatomic reactive empirical bond order potential that is similar in
+form to the AIREBO potential :ref:`(Stuart) <Stuart2>`.  The potential
+is based on an earlier parameterizations for :math:`\text{MoS}_2`
+developed by :ref:`(Liang) <Liang>`.
+
+The REBOMoS potential consists of two terms:
+
+.. math::
+
+   E & = \frac{1}{2} \sum_i \sum_{j \neq i}
+   \left[ E^{\text{REBO}}_{ij} + E^{\text{LJ}}_{ij}  \right] \\
+
+The :math:`E^{\text{REBO}}` term describes the covalently bonded
+interactions between Mo and S atoms while the :math:`E^{\text{LJ}}` term
+describes longer range dispersion forces between layers.  A cubic spline
+function is applied to smoothly switch between covalent bonding at short
+distances to dispersion interactions at longer distances. This allows
+the model to capture bond formation and breaking events which may occur
+between adjacent MoS2 layers, edges, defects, and more.
+
+----------
+
+Only a single pair_coeff command is used with the *rebomos* pair style
+which specifies an REBOMoS potential file with parameters for Mo and S.
+These are mapped to LAMMPS atom types by specifying N additional
+arguments after the filename in the pair_coeff command, where N is the
+number of LAMMPS atom types:
+
+* filename
+* :math:`N` element names = mapping of REBOMoS elements to atom types
+
+See the :doc:`pair_coeff <pair_coeff>` page for alternate ways
+to specify the path for the potential file.
+
+As an example, if your LAMMPS simulation has three atom types and you want
+the first two to be Mo, and the third to be S, you would use the following
+pair_coeff command:
+
+.. code-block:: LAMMPS
+
+   pair_coeff * * MoS.rebomos Mo Mo S
+
+The first 2 arguments must be \* \* so as to span all LAMMPS atom types.
+The first two Mo arguments map LAMMPS atom types 1 and 2 to the Mo
+element in the REBOMoS file.  The final S argument maps LAMMPS atom type
+3 to the S element in the REBOMoS file.  If a mapping value is specified
+as NULL, the mapping is not performed.  This can be used when a
+*rebomos* potential is used as part of the *hybrid* pair style.  The
+NULL values are placeholders for atom types that will be used with other
+potentials.
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Mixing, shift, table, tail correction, restart, rRESPA info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+This pair style does not support the :doc:`pair_modify <pair_modify>`
+mix, shift, table, and tail options.
+
+This pair style does not write their information to :doc:`binary restart
+files <restart>`, since it is stored in potential files.  Thus, you need
+to re-specify the pair_style and pair_coeff commands in an input script
+that reads a restart file.
+
+This pair styles can only be used via the *pair* keyword of the
+:doc:`run_style respa <run_style>` command.  It does not support the
+*inner*, *middle*, *outer* keywords.
+
+Restrictions
+""""""""""""
+
+This pair style is part of the MANYBODY package.  It is only enabled if
+LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+These pair potentials require the :doc:`newton <newton>` setting to be
+"on" for pair interactions.
+
+The MoS.rebomos potential file provided with LAMMPS (see the potentials
+directory) is parameterized for metal :doc:`units <units>`.  You can use
+the *rebomos* pair style with any LAMMPS units setting, but you would
+need to create your own REBOMoS potential file with coefficients listed
+in the appropriate units.
+
+The pair style provided here **only** supports potential files parameterized
+for the elements molybdenum and sulfur (designated with "Mo" and "S" in the
+*pair_coeff* command.  Using potential files for other elements will trigger
+an error.
+
+Related commands
+""""""""""""""""
+
+:doc:`pair_coeff <pair_coeff>`, :doc:`pair style rebo <pair_airebo>`
+
+Default
+"""""""
+
+none
+
+----------
+
+.. _Stewart:
+
+**(Steward)**  Stewart, Spearot, Modelling Simul. Mater. Sci. Eng. 21, 045003, (2013).
+
+.. _Stuart2:
+
+**(Stuart)** Stuart, Tutein, Harrison, J Chem Phys, 112, 6472-6486, (2000).
+
+.. _Liang:
+
+**(Liang)**  Liang, Phillpot, Sinnott Phys. Rev. B79 245110, (2009), Erratum: Phys. Rev. B85 199903(E), (2012)
diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst
index a2467bff2b..53bf269e1c 100644
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@@ -333,6 +333,7 @@ accelerated styles exist.
 * :doc:`rann <pair_rann>` -
 * :doc:`reaxff <pair_reaxff>` - ReaxFF potential
 * :doc:`rebo <pair_airebo>` - second generation REBO potential of Brenner
+* :doc:`rebomos <pair_rebomos>` - REBOMoS potential for MoS2
 * :doc:`resquared <pair_resquared>` - Everaers RE-Squared ellipsoidal potential
 * :doc:`saip/metal <pair_saip_metal>` - interlayer potential for hetero-junctions formed with hexagonal 2D materials and metal surfaces
 * :doc:`sdpd/taitwater/isothermal <pair_sdpd_taitwater_isothermal>` - smoothed dissipative particle dynamics for water at isothermal conditions
diff --git a/doc/src/processors.rst b/doc/src/processors.rst
index 921bbcc667..a11febb1c2 100644
--- a/doc/src/processors.rst
+++ b/doc/src/processors.rst
@@ -25,6 +25,8 @@ Syntax
            *numa* params = none
            *custom* params = infile
              infile = file containing grid layout
+       *numa_nodes* arg = Nn
+             Nn = number of numa domains per node
        *map* arg = *cart* or *cart/reorder* or *xyz* or *xzy* or *yxz* or *yzx* or *zxy* or *zyx*
           cart = use MPI_Cart() methods to map processors to 3d grid with reorder = 0
           cart/reorder = use MPI_Cart() methods to map processors to 3d grid with reorder = 1
@@ -159,24 +161,28 @@ surface-to-volume ratio of each processor's subdomain.
 
 The *numa* style operates similar to the *twolevel* keyword except
 that it auto-detects which cores are running on which nodes.
-Currently, it does this in only 2 levels, but it may be extended in
-the future to account for socket topology and other non-uniform memory
-access (NUMA) costs.  It also uses a different algorithm than the
-*twolevel* keyword for doing the two-level factorization of the
-simulation box into a 3d processor grid to minimize off-node
-communication, and it does its own MPI-based mapping of nodes and
+It will also subdivide the cores into numa domains. Currently, the
+number of numa domains is not autodetected and must be specified using
+the *numa_nodes* keyword; otherwise, the default value is used. The
+*numa* style uses a different algorithm than the *twolevel* keyword for
+doing the two-level factorization of the simulation box into a 3d
+processor grid to minimize off-node communication and communication
+across numa domains. It does its own MPI-based mapping of nodes and
 cores to the regular 3d grid.  Thus it may produce a different layout
 of the processors than the *twolevel* options.
 
 The *numa* style will give an error if the number of MPI processes is
 not divisible by the number of cores used per node, or any of the Px
-or Py of Pz values is greater than 1.
+or Py or Pz values is greater than 1.
 
 .. note::
 
    Unlike the *twolevel* style, the *numa* style does not require
-   any particular ordering of MPI ranks i norder to work correctly.  This
+   any particular ordering of MPI ranks in order to work correctly. This
    is because it auto-detects which processes are running on which nodes.
+   However, it assumes that the lowest ranks are in the first numa
+   domain, and so forth. MPI rank orderings that do not preserve this
+   property might result in more intranode communication between CPUs.
 
 The *custom* style uses the file *infile* to define both the 3d
 factorization and the mapping of processors to the grid.
@@ -207,6 +213,14 @@ any order, but no processor ID should appear more than once.
 
 ----------
 
+The *numa_nodes* keyword is used to specifiy the number of numa domains
+per node. It is currently only used by the *numa* style for two-level
+factorization to reduce the amount of MPI communications between CPUs.
+A good setting for this will typically be equal to the number of CPU
+sockets per node.
+
+----------
+
 The *map* keyword affects how the P processor IDs (from 0 to P-1) are
 mapped to the 3d grid of processors.  It is only used by the
 *onelevel* and *twolevel* grid settings.
@@ -356,5 +370,5 @@ Related commands
 Default
 """""""
 
-The option defaults are Px Py Pz = \* \* \*, grid = onelevel, and map =
-cart.
+The option defaults are Px Py Pz = \* \* \*, grid = onelevel, map =
+cart, and numa_nodes = 2.
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index dc9fdf24bd..39be61d332 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -1783,6 +1783,7 @@ Kolafa
 Kollman
 kolmogorov
 Kolmogorov
+Kolotinskii
 Kondor
 konglt
 Koning
@@ -2279,6 +2280,7 @@ morris
 Morriss
 morse
 Morteza
+MoS
 Mosayebi
 Moseler
 Moskalev
@@ -2791,6 +2793,7 @@ PEigenDense
 Peng
 peptide
 peratom
+Perf
 Pergamon
 pergrid
 peri
@@ -3087,6 +3090,7 @@ reaxff
 ReaxFF
 REAXFF
 rebo
+rebomos
 recurse
 recursing
 Ree
@@ -3614,6 +3618,7 @@ tesselation
 tesselations
 Tetot
 tex
+textrm
 tfac
 tfmc
 tfMC
@@ -3901,7 +3906,9 @@ Verlet
 versa
 Verstraelen
 ves
+vf
 vflag
+vflow
 vfrac
 vhi
 vibrational
diff --git a/examples/PACKAGES/electrode/madelung/eval.py b/examples/PACKAGES/electrode/madelung/eval.py
index 2f5a355d9b..feda0e384e 100644
--- a/examples/PACKAGES/electrode/madelung/eval.py
+++ b/examples/PACKAGES/electrode/madelung/eval.py
@@ -1,7 +1,7 @@
 #!/usr/env/python3
 
-import sys
 import os.path as op
+import sys
 
 
 def rel_error(out, ref):
@@ -49,5 +49,5 @@ for label, ref, out in out_lines:
     error = rel_error(out, ref)
     lines.append(f"{label}: {out:.5f}, {error:.5f}\n")
 
-with open("madelung.txt", 'a') as f:
+with open("madelung.txt", "a") as f:
     f.writelines(lines)
diff --git a/examples/PACKAGES/electrode/madelung/in.eta b/examples/PACKAGES/electrode/madelung/in.eta
new file mode 100644
index 0000000000..3a45bb1bf5
--- /dev/null
+++ b/examples/PACKAGES/electrode/madelung/in.eta
@@ -0,0 +1,14 @@
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 2.0
+set group top d_eta 2.0
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv
+
+run 0
+
diff --git a/examples/PACKAGES/electrode/madelung/in.eta_cg b/examples/PACKAGES/electrode/madelung/in.eta_cg
new file mode 100644
index 0000000000..5ac8cddf17
--- /dev/null
+++ b/examples/PACKAGES/electrode/madelung/in.eta_cg
@@ -0,0 +1,14 @@
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 0.5
+set group top d_eta 3.0
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta algo cg 1e-6
+
+run 0
+
diff --git a/examples/PACKAGES/electrode/madelung/in.eta_mix b/examples/PACKAGES/electrode/madelung/in.eta_mix
new file mode 100644
index 0000000000..d00e008fa4
--- /dev/null
+++ b/examples/PACKAGES/electrode/madelung/in.eta_mix
@@ -0,0 +1,14 @@
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 0.5
+set group top d_eta 3.0
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv
+
+run 0
+
diff --git a/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta.g++.1 b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta.g++.1
new file mode 100644
index 0000000000..daf0563799
--- /dev/null
+++ b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta.g++.1
@@ -0,0 +1,138 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-668-g5b6c0c6b56)
+  using 1 OpenMP thread(s) per MPI task
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+# set boundary in main script because ffield is periodic
+units real
+# distribute electrode atoms among all processors:
+if "$(extract_setting(world_size) % 2) == 0" then "processors * * 2"
+
+atom_style full
+pair_style lj/cut/coul/long 12
+
+read_data "data.au-elyt"
+Reading data file ...
+  orthogonal box = (0 0 -10) to (1 1 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  4 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.000 seconds
+  read_data CPU = 0.003 seconds
+
+group bot type 1
+1 atoms in group bot
+group top type 2
+1 atoms in group top
+
+# get electrode charges
+variable q atom q
+compute qbot bot reduce sum v_q
+compute qtop top reduce sum v_q
+
+compute compute_pe all pe
+variable vpe equal c_compute_pe
+variable charge equal c_qtop
+fix fxprint all print 1 "${vpe}, ${charge}" file "out.csv"
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 2.0
+Setting atom values ...
+  1 settings made for d_eta
+set group top d_eta 2.0
+Setting atom values ...
+  1 settings made for d_eta
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv
+2 atoms in group conp_group
+
+run 0
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix electrode command:
+
+@article{Ahrens2022
+author = {Ahrens-Iwers, Ludwig J.V. and Janssen, Mahijs and Tee, Shern R. and Mei{\ss}ner, Robert H.},
+doi = {10.1063/5.0099239},
+title = {{ELECTRODE: An electrochemistry package for LAMMPS}},
+journal = {The Journal of Chemical Physics},
+year = {2022}
+volume = {157},
+pages = {084801},
+}
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Ewald/electrode initialization ...
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
+WARNING: For better accuracy use 'pair_modify table 0' (src/kspace.cpp:365)
+  G vector (1/distance) = 0.32261103
+  estimated absolute RMS force accuracy = 3.8272011e-06
+  estimated relative force accuracy = 1.1525502e-08
+  KSpace vectors: actual max1d max3d = 52 50 515150
+                  kxmax kymax kzmax  = 1 1 50
+Generated 3 of 3 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 1 1 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+  (2) fix electrode/conp, perpetual, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+WARNING: Proc sub-domain size < neighbor skin, could lead to lost atoms (src/domain.cpp:965)
+139.943964815502, 0.279214485147238
+Per MPI rank memory allocation (min/avg/max) = 144.2 | 144.2 | 144.2 Mbytes
+   Step         PotEng         c_qbot         c_qtop    
+         0   139.94396     -0.27921449     0.27921449   
+Loop time of 2.191e-06 on 1 procs for 0 steps with 4 atoms
+
+91.3% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Bond    | 0          | 0          | 0          |   0.0 |  0.00
+Kspace  | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 2.191e-06  |            |       |100.00
+
+Nlocal:              4 ave           4 max           4 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           3596 ave        3596 max        3596 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           4790 ave        4790 max        4790 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 4790
+Ave neighs/atom = 1197.5
+Ave special neighs/atom = 0
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
diff --git a/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_cg.g++.1 b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_cg.g++.1
new file mode 100644
index 0000000000..edb2e434e6
--- /dev/null
+++ b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_cg.g++.1
@@ -0,0 +1,139 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-668-g5b6c0c6b56)
+  using 1 OpenMP thread(s) per MPI task
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+# set boundary in main script because ffield is periodic
+units real
+# distribute electrode atoms among all processors:
+if "$(extract_setting(world_size) % 2) == 0" then "processors * * 2"
+
+atom_style full
+pair_style lj/cut/coul/long 12
+
+read_data "data.au-elyt"
+Reading data file ...
+  orthogonal box = (0 0 -10) to (1 1 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  4 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.000 seconds
+  read_data CPU = 0.003 seconds
+
+group bot type 1
+1 atoms in group bot
+group top type 2
+1 atoms in group top
+
+# get electrode charges
+variable q atom q
+compute qbot bot reduce sum v_q
+compute qtop top reduce sum v_q
+
+compute compute_pe all pe
+variable vpe equal c_compute_pe
+variable charge equal c_qtop
+fix fxprint all print 1 "${vpe}, ${charge}" file "out.csv"
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 0.5
+Setting atom values ...
+  1 settings made for d_eta
+set group top d_eta 3.0
+Setting atom values ...
+  1 settings made for d_eta
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta algo cg 1e-6
+2 atoms in group conp_group
+
+run 0
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix electrode command:
+
+@article{Ahrens2022
+author = {Ahrens-Iwers, Ludwig J.V. and Janssen, Mahijs and Tee, Shern R. and Mei{\ss}ner, Robert H.},
+doi = {10.1063/5.0099239},
+title = {{ELECTRODE: An electrochemistry package for LAMMPS}},
+journal = {The Journal of Chemical Physics},
+year = {2022}
+volume = {157},
+pages = {084801},
+}
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Ewald/electrode initialization ...
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
+WARNING: For better accuracy use 'pair_modify table 0' (src/kspace.cpp:365)
+  G vector (1/distance) = 0.32261103
+  estimated absolute RMS force accuracy = 3.8272011e-06
+  estimated relative force accuracy = 1.1525502e-08
+  KSpace vectors: actual max1d max3d = 52 50 515150
+                  kxmax kymax kzmax  = 1 1 50
+Generated 3 of 3 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 1 1 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+  (2) fix electrode/conp, perpetual, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+WARNING: Proc sub-domain size < neighbor skin, could lead to lost atoms (src/domain.cpp:965)
+165.519373910316, 0.29521534552818
+Per MPI rank memory allocation (min/avg/max) = 144.2 | 144.2 | 144.2 Mbytes
+   Step         PotEng         c_qbot         c_qtop    
+         0   165.51937     -0.29521535     0.29521535   
+Loop time of 2.797e-06 on 1 procs for 0 steps with 4 atoms
+
+71.5% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Bond    | 0          | 0          | 0          |   0.0 |  0.00
+Kspace  | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 2.797e-06  |            |       |100.00
+
+Nlocal:              4 ave           4 max           4 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           3596 ave        3596 max        3596 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           4790 ave        4790 max        4790 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 4790
+Ave neighs/atom = 1197.5
+Ave special neighs/atom = 0
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Average conjugate gradient steps: 1
+Total wall time: 0:00:00
diff --git a/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_mix.g++.1 b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_mix.g++.1
new file mode 100644
index 0000000000..51eda0d870
--- /dev/null
+++ b/examples/PACKAGES/electrode/madelung/log.19Feb2024.eta_mix.g++.1
@@ -0,0 +1,138 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-668-g5b6c0c6b56)
+  using 1 OpenMP thread(s) per MPI task
+boundary p p f
+kspace_style ewald/electrode 1.0e-8
+kspace_modify slab 8.0 # ew3dc
+
+include "settings.mod" # styles, computes, groups and fixes
+# set boundary in main script because ffield is periodic
+units real
+# distribute electrode atoms among all processors:
+if "$(extract_setting(world_size) % 2) == 0" then "processors * * 2"
+
+atom_style full
+pair_style lj/cut/coul/long 12
+
+read_data "data.au-elyt"
+Reading data file ...
+  orthogonal box = (0 0 -10) to (1 1 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  4 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.000 seconds
+  read_data CPU = 0.003 seconds
+
+group bot type 1
+1 atoms in group bot
+group top type 2
+1 atoms in group top
+
+# get electrode charges
+variable q atom q
+compute qbot bot reduce sum v_q
+compute qtop top reduce sum v_q
+
+compute compute_pe all pe
+variable vpe equal c_compute_pe
+variable charge equal c_qtop
+fix fxprint all print 1 "${vpe}, ${charge}" file "out.csv"
+
+thermo_style custom step pe c_qbot c_qtop
+fix feta all property/atom d_eta ghost on
+set group bot d_eta 0.5
+Setting atom values ...
+  1 settings made for d_eta
+set group top d_eta 3.0
+Setting atom values ...
+  1 settings made for d_eta
+fix conp bot electrode/conp 0 2 couple top 1 symm on eta d_eta write_inv inv.csv write_vec vec.csv
+2 atoms in group conp_group
+
+run 0
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix electrode command:
+
+@article{Ahrens2022
+author = {Ahrens-Iwers, Ludwig J.V. and Janssen, Mahijs and Tee, Shern R. and Mei{\ss}ner, Robert H.},
+doi = {10.1063/5.0099239},
+title = {{ELECTRODE: An electrochemistry package for LAMMPS}},
+journal = {The Journal of Chemical Physics},
+year = {2022}
+volume = {157},
+pages = {084801},
+}
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Ewald/electrode initialization ...
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
+WARNING: For better accuracy use 'pair_modify table 0' (src/kspace.cpp:365)
+  G vector (1/distance) = 0.32261103
+  estimated absolute RMS force accuracy = 3.8272011e-06
+  estimated relative force accuracy = 1.1525502e-08
+  KSpace vectors: actual max1d max3d = 52 50 515150
+                  kxmax kymax kzmax  = 1 1 50
+Generated 3 of 3 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 1 1 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+  (2) fix electrode/conp, perpetual, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+WARNING: Proc sub-domain size < neighbor skin, could lead to lost atoms (src/domain.cpp:965)
+165.519373910316, 0.295215345528172
+Per MPI rank memory allocation (min/avg/max) = 144.2 | 144.2 | 144.2 Mbytes
+   Step         PotEng         c_qbot         c_qtop    
+         0   165.51937     -0.29521535     0.29521535   
+Loop time of 2.18e-06 on 1 procs for 0 steps with 4 atoms
+
+91.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Bond    | 0          | 0          | 0          |   0.0 |  0.00
+Kspace  | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 2.18e-06   |            |       |100.00
+
+Nlocal:              4 ave           4 max           4 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           3596 ave        3596 max        3596 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           4790 ave        4790 max        4790 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 4790
+Ave neighs/atom = 1197.5
+Ave special neighs/atom = 0
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
diff --git a/examples/PACKAGES/electrode/madelung/plate_cap.py b/examples/PACKAGES/electrode/madelung/plate_cap.py
index 62d52fe102..fcca166869 100755
--- a/examples/PACKAGES/electrode/madelung/plate_cap.py
+++ b/examples/PACKAGES/electrode/madelung/plate_cap.py
@@ -3,7 +3,6 @@
 import numpy as np
 from scipy.special import erf
 
-ETA = 2
 SQRT2 = np.sqrt(2)
 COULOMB = 332.06371  #  Coulomb constant in Lammps 'real' units
 QE2F = 23.060549
@@ -17,14 +16,14 @@ def lattice(length):
     return np.array(np.meshgrid(x, y)).T.reshape(-1, 2)
 
 
-def a_element(r):
+def a_element(r, eta):
     """Coulomb contribution of two Gaussians"""
-    return erf(ETA / SQRT2 * r) / r
+    return erf(eta * r) / r
 
 
-def b_element(r, q):
+def b_element(r, q, eta):
     """Coulomb contribution of a Gaussian with a point charge"""
-    return q * erf(ETA * r) / r
+    return q * erf(eta * r) / r
 
 
 a = 1  # nearest neighbor distance i.e. lattice constant / sqrt(2)
@@ -36,59 +35,65 @@ v = np.array([-0.5, 0.5]) * (QE2F / COULOMB)
 
 # distances to images within electrode and to opposite electrode
 distances = a * np.linalg.norm(lattice(LENGTH), axis=1)
-opposite_distances = np.sqrt(np.square(distances) + distance_plates ** 2)
+opposite_distances = np.sqrt(np.square(distances) + distance_plates**2)
 
-# self interaction and within original box
-A_11 = np.sqrt(2 / np.pi) * ETA
-A_12 = erf(ETA * distance_plates / SQRT2) / distance_plates
+for name, eta_elec in [("", [2.0, 2.0]), ("_eta_mix", [0.5, 3.0])]:
+    eta_mix = np.prod(eta_elec) / np.sqrt(np.sum(np.square(eta_elec)))
+    # self interaction and within original box
+    A_11 = np.sqrt(2 / np.pi) * eta_elec[0]
+    A_22 = np.sqrt(2 / np.pi) * eta_elec[1]
+    A_12 = erf(eta_mix * distance_plates) / distance_plates
 
-# interaction with periodic images
-A_11 += 4 * np.sum(a_element(distances))
-A_12 += 4 * np.sum(a_element(opposite_distances))
-A = np.array([[A_11, A_12], [A_12, A_11]])
-inv = np.linalg.inv(A)
-e = np.array([1, 1])
-inv -= np.matmul(inv, np.matmul(np.outer(e, e), inv)) / np.dot(e, np.dot(inv, e))
+    # interaction with periodic images
+    A_11 += 4 * np.sum(a_element(distances, eta_elec[0] / SQRT2))
+    A_22 += 4 * np.sum(a_element(distances, eta_elec[1] / SQRT2))
+    A_12 += 4 * np.sum(a_element(opposite_distances, eta_mix))
+    A = np.array([[A_11, A_12], [A_12, A_22]])
+    inv = np.linalg.inv(A)
+    e = np.array([1, 1])
+    inv -= np.matmul(inv, np.matmul(np.outer(e, e), inv)) / np.dot(e, np.dot(inv, e))
 
-# electrode-electrolyte interaction
-b = []
-for x in x_elec:
-    bi = 0
-    for y, q in zip(x_elyt, q_elyt):
-        d = abs(y - x)
-        bi += b_element(d, q)
-        image_distances = np.sqrt(np.square(distances) + d ** 2)
-        bi += 4 * np.sum(b_element(image_distances, q))
-    b.append(bi)
-b = np.array(b)
+    # electrode-electrolyte interaction
+    b = []
+    for x, eta in zip(x_elec, eta_elec):
+        bi = 0
+        for y, q in zip(x_elyt, q_elyt):
+            d = abs(y - x)
+            bi += b_element(d, q, eta)
+            image_distances = np.sqrt(np.square(distances) + d**2)
+            bi += 4 * np.sum(b_element(image_distances, q, eta))
+        b.append(bi)
+    b = np.array(b)
 
-# electrolyte-electrolyte energy
-elyt_11 = 4 * np.sum(1 / distances)
-distance_elyt = x_elyt[1] - x_elyt[0]
-elyt_12 = 1 / distance_elyt + 4 * np.sum(
-    1 / np.sqrt(np.square(distances) + distance_elyt ** 2)
-)
-elyt = np.array([[elyt_11, elyt_12], [elyt_12, elyt_11]])
-energy_elyt = 0.5 * np.dot(q_elyt, np.dot(elyt, q_elyt))
-
-# electrode charges and energy
-q = np.dot(inv, v - b)
-energy = COULOMB * (0.5 * np.dot(q, np.dot(A, q)) + np.dot(b, q) + energy_elyt)
-
-print(
-    "length, energy / kcal/mol, q1 / e, q2 / e, inv11 / A, inv12 / A, b1 / e/A, b2 / e/A"
-)
-print(
-    ", ".join(
-        [
-            str(LENGTH),
-            f"{energy:.8f}",
-            f"{q[0]:.10f}",
-            f"{q[1]:.10f}",
-            f"{inv[0, 0]:.10f}",
-            f"{inv[0, 1]:.10f}",
-            f"{b[0]:.8f}",
-            f"{b[1]:.8f}",
-        ]
+    # electrolyte-electrolyte energy
+    elyt_11 = 4 * np.sum(1 / distances)
+    distance_elyt = x_elyt[1] - x_elyt[0]
+    elyt_12 = 1 / distance_elyt + 4 * np.sum(
+        1 / np.sqrt(np.square(distances) + distance_elyt**2)
     )
-)
+    elyt = np.array([[elyt_11, elyt_12], [elyt_12, elyt_11]])
+    energy_elyt = 0.5 * np.dot(q_elyt, np.dot(elyt, q_elyt))
+
+    # electrode charges and energy
+    q = np.dot(inv, v - b)
+    energy = COULOMB * (0.5 * np.dot(q, np.dot(A, q)) + np.dot(b, q) + energy_elyt)
+
+    with open(f"plate_cap{name}.csv", "w") as f:
+        f.write(
+            "length, energy / kcal/mol, q1 / e, q2 / e, inv11 / A, inv12 / A, b1 / e/A, b2 / e/A\n"
+        )
+        f.write(
+            ", ".join(
+                [
+                    str(LENGTH),
+                    f"{energy:.8f}",
+                    f"{q[0]:.10f}",
+                    f"{q[1]:.10f}",
+                    f"{inv[0, 0]:.10f}",
+                    f"{inv[0, 1]:.10f}",
+                    f"{b[0]:.8f}",
+                    f"{b[1]:.8f}",
+                ]
+            )
+            + "\n"
+        )
diff --git a/examples/PACKAGES/electrode/madelung/test.sh b/examples/PACKAGES/electrode/madelung/test.sh
index edac04f5b1..a558ee6711 100644
--- a/examples/PACKAGES/electrode/madelung/test.sh
+++ b/examples/PACKAGES/electrode/madelung/test.sh
@@ -7,17 +7,27 @@ if [ ! -f $lmpbin ]; then
 fi
 
 ref_out="plate_cap.csv"
-if [ ! -f $ref_out ]; then
+ref_mix_out="plate_cap_eta_mix.csv"
+if [ ! -f $ref_out ] || [ ! -f $ref_mix_out ]; then
     echo "Generating reference data"
-    python3 plate_cap.py > $ref_out
+    python3 plate_cap.py
 fi
 
 echo "Running Lammps inputs"
+# w/o eta mixing
 rm -rf madelung.txt && touch madelung.txt
-for file in in.*; do
+for file in in.eta in.ewald-ew3dc in.ewald-ew2d in.pppm-ew3dc in.cg; do
     printf "\n$file\n" >> madelung.txt
     rm -f out.csv inv.csv vec.csv 
     $lmpbin -i $file &> /dev/null
     python3 eval.py $ref_out out.csv inv.csv vec.csv
 done
+
+# with eta mixing
+for file in in.eta_mix in.eta_cg; do
+    printf "\n$file\n" >> madelung.txt
+    rm -f out.csv inv.csv vec.csv 
+    $lmpbin -i $file &> /dev/null
+    python3 eval.py $ref_mix_out out.csv inv.csv vec.csv
+done
 cat madelung.txt
diff --git a/examples/airebo/in.airebo-0-0 b/examples/airebo/in.airebo-0-0
index 077da68912..0e71644127 100644
--- a/examples/airebo/in.airebo-0-0
+++ b/examples/airebo/in.airebo-0-0
@@ -1,22 +1,22 @@
 # AIREBO polyethelene benchmark
 
-units		    metal
-atom_style	    atomic
+units               metal
+atom_style          atomic
 
-read_data	    data.airebo
+read_data           data.airebo
 
-replicate	    17 16 2
+replicate           17 16 2
 
-neighbor	    0.5 bin
-neigh_modify	    delay 5 every 1
+neighbor            0.5 bin
+neigh_modify        delay 5 every 1
 
-pair_style	    airebo 3.0 0 0
-pair_coeff	    * * CH.airebo C H
+pair_style          airebo 3.0 0 0
+pair_coeff          * * CH.airebo C H
 
-velocity	    all create 300.0 761341
+velocity            all create 300.0 761341
 
-fix		    1 all nve
-timestep	    0.0005
+fix                 1 all nve
+timestep            0.0005
 
-thermo		    10
-run		    100
+thermo              10
+run                 100
diff --git a/examples/airebo/in.rebo2 b/examples/airebo/in.rebo2
index e06cf462ca..319a60bd50 100644
--- a/examples/airebo/in.rebo2
+++ b/examples/airebo/in.rebo2
@@ -1,22 +1,22 @@
 # REBO polyethelene benchmark
 
-units		    metal
-atom_style	    atomic
+units               metal
+atom_style          atomic
 
-read_data	    data.airebo
+read_data           data.airebo
 
-replicate	    17 16 2
+replicate           17 16 2
 
-neighbor	    0.5 bin
-neigh_modify	    delay 5 every 1
+neighbor            0.5 bin
+neigh_modify        delay 5 every 1
 
-pair_style	    rebo
-pair_coeff	    * * CH.rebo C H
+pair_style          rebo
+pair_coeff          * * CH.rebo C H
 
-velocity	    all create 300.0 761341
+velocity            all create 300.0 761341
 
-fix		    1 all nve
-timestep	    0.0005
+fix                 1 all nve
+timestep            0.0005
 
-thermo		    10
-run		    100
+thermo              10
+run                 100
diff --git a/examples/granular/in.pour.heat b/examples/granular/in.pour.heat
index 907e56dc39..cc6b03f7d0 100644
--- a/examples/granular/in.pour.heat
+++ b/examples/granular/in.pour.heat
@@ -73,7 +73,8 @@ thermo          100
 
 timestep        0.001
 
-#dump           1 all custom 1000 ${name}.dump id type radius mass x y z temperature heatflow
+compute        1 all property/atom temperature heatflow
+#dump           1 all custom 1000 ${name}.dump id type radius mass x y z c_1[*]
 
 run             100000
 
diff --git a/examples/meam/msmeam/HGa.meam b/examples/meam/msmeam/HGa.meam
deleted file mode 100644
index 9f01501c16..0000000000
--- a/examples/meam/msmeam/HGa.meam
+++ /dev/null
@@ -1,30 +0,0 @@
-bkgd_dyn        =       1
-emb_lin_neg = 1
-augt1=0 
-ialloy=1 
-rc	=	 5.9 
-#H
-attrac(1,1)=0.460 
-repuls(1,1)=0.460 
-Cmin(1,1,1)=1.3 # PuMS
-Cmax(1,1,1)= 2.80 
-nn2(1,1)=1
-#Ga
-rho0(2)         =       0.6
-attrac(2,2)=0.097 
-repuls(2,2)=0.097 
-nn2(2,2)=1
-#HGa
-attrac(1,2)=0.300 
-repuls(1,2)=0.300 
-lattce(1,2)=l12 
-re(1,2)=3.19 
-delta(1,2)=-0.48  
-alpha(1,2)=6.6 
-Cmin(1,1,2)=2.0 
-Cmin(2,1,2)= 2.0 
-Cmin(1,2,1)=2.0 
-Cmin(2,2,1)     =       1.4
-Cmin(1,2,2)     =       1.4
-Cmin(1,1,2)     =       1.4
-nn2(1,2)=1
diff --git a/examples/meam/msmeam/HGa.msmeam b/examples/meam/msmeam/HGa.msmeam
new file mode 120000
index 0000000000..5629006d1d
--- /dev/null
+++ b/examples/meam/msmeam/HGa.msmeam
@@ -0,0 +1 @@
+../../../potentials/HGa.msmeam
\ No newline at end of file
diff --git a/examples/meam/msmeam/data.msmeam.bu b/examples/meam/msmeam/data.msmeam.bu
deleted file mode 100644
index 576a3c50de..0000000000
--- a/examples/meam/msmeam/data.msmeam.bu
+++ /dev/null
@@ -1,25 +0,0 @@
-LAMMPS data file via write_data, version 16 Feb 2016, timestep = 1
-
-3 atoms
-2 atom types
-
--4.0000000000000000e+00 4.0000000000000000e+00 xlo xhi
--4.0000000000000000e+00 4.0000000000000000e+00 ylo yhi
--4.0000000000000000e+00 4.0000000000000000e+00 zlo zhi
-
-Masses
-
-1 1.0079
-2 69.723
-
-Atoms # atomic
-
-1 1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0 0 0
-2 2 2.2000000000000002e+00 0.0000000000000000e+00 0.0000000000000000e+00 0 0 0
-3 2 2.9999999999999999e-01 2.2999999999999998e+00 0.0000000000000000e+00 0 0 0
-
-Velocities
-
-1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
-2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
-3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
diff --git a/examples/meam/msmeam/in.msmeam b/examples/meam/msmeam/in.msmeam
index 82ffb89a13..e8d13f8682 100644
--- a/examples/meam/msmeam/in.msmeam
+++ b/examples/meam/msmeam/in.msmeam
@@ -1,5 +1,3 @@
-echo	both
-log	log.msmeam
 # Test of MEAM potential for HGa
 
 # ------------------------ INITIALIZATION ----------------------------
@@ -21,11 +19,11 @@ create_atoms    1 single 0 0 0  units box
 create_atoms    2 single 2.2 0 0  units box
 create_atoms    2 single 0.3 2.3 0  units box
 # ---------- Define Settings ---------------------
-variable	teng equal "c_eatoms"
+variable        teng equal "c_eatoms"
 compute pot_energy all pe/atom
 compute stress all stress/atom NULL
 dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
-run	1
-write_data	data.msmeam
+run     1
+#write_data      data.msmeam
 
 print "All done!"
diff --git a/examples/meam/msmeam/library.msmeam b/examples/meam/msmeam/library.msmeam
deleted file mode 100644
index 9937eaee08..0000000000
--- a/examples/meam/msmeam/library.msmeam
+++ /dev/null
@@ -1,14 +0,0 @@
-# DATE: 2018-09-22 UNITS: metal CONTRIBUTOR: Steve Valone, smv@lanl.gov CITATION: Baskes, PRB 1992; smv, sr, mib, JNM 2010
-# ms-meam data format May 2010
-#  elt   lat    z     ielement      atwt
-#  alpha b0     b1    b2     b3     b1m     b2m   b3m      alat   esub   asub
-#    -   t0     t1    t2     t3     t1m     t2m   t3m      rozero ibar
-#  NOTE:  leading character cannot be a space
-
-'H'    'dim'  1.0   1      1.0079
-2.960  2.960  3.0   1.0    1.0    1.0    3.0  1.0       0.741  2.235  2.50
-1.0    0.44721 0.0  0.00   0.0    0.31623 0  6.70 0
-
-'Ga4'  'fcc'  12.0  31     69.723
-4.42   4.80   3.10  6.00   0.00   0.0    0.0  0.5       4.247  2.897  0.97
-1.0    1.649 1.435  0.00   0.0    0.0  2.0       0.70   0
diff --git a/examples/meam/msmeam/library.msmeam b/examples/meam/msmeam/library.msmeam
new file mode 120000
index 0000000000..2226ef99da
--- /dev/null
+++ b/examples/meam/msmeam/library.msmeam
@@ -0,0 +1 @@
+../../../potentials/library.msmeam
\ No newline at end of file
diff --git a/examples/meam/msmeam/log.1Mar2024.msmeam.g++.1 b/examples/meam/msmeam/log.1Mar2024.msmeam.g++.1
new file mode 100644
index 0000000000..70fbbdd89c
--- /dev/null
+++ b/examples/meam/msmeam/log.1Mar2024.msmeam.g++.1
@@ -0,0 +1,126 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-182-g93942f2013-modified)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+# Test of MEAM potential for HGa
+
+# ------------------------ INITIALIZATION ----------------------------
+units           metal
+dimension       3
+boundary        p       p       p
+atom_style      atomic
+variable latparam equal 4.646
+variable ncell equal 3
+
+# ----------------------- ATOM DEFINITION ----------------------------
+region          box block -4 4 -4 4 -4 4
+create_box      2 box
+Created orthogonal box = (-4 -4 -4) to (4 4 4)
+  1 by 1 by 1 MPI processor grid
+
+#
+
+include potential.mod
+# NOTE: This script can be modified for different pair styles
+# See in.elastic for more info.
+
+variable Pu string H
+print "potential chosen ${Pu}"
+potential chosen H
+# Choose potential
+pair_style meam/ms
+print		"we just executed"
+we just executed
+
+pair_coeff      * * library.msmeam ${Pu} Ga4  HGa.msmeam ${Pu} Ga4
+pair_coeff      * * library.msmeam H Ga4  HGa.msmeam ${Pu} Ga4
+pair_coeff      * * library.msmeam H Ga4  HGa.msmeam H Ga4
+Reading MEAM library file library.msmeam with DATE: 2018-09-22
+# Setup neighbor style
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Setup minimization style
+variable dmax equal 1.0e-2
+min_style	     cg
+min_modify	     dmax ${dmax} line quadratic
+min_modify	     dmax 0.01 line quadratic
+compute eng all pe/atom
+compute eatoms all reduce sum c_eng
+
+# Setup output
+thermo		100
+thermo_style custom step temp etotal  press pxx pyy pzz pxy pxz pyz lx ly lz vol c_eatoms
+thermo_modify norm yes
+create_atoms    1 single 0 0 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+create_atoms    2 single 2.2 0 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+create_atoms    2 single 0.3 2.3 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+# ---------- Define Settings ---------------------
+variable        teng equal "c_eatoms"
+compute pot_energy all pe/atom
+compute stress all stress/atom NULL
+# dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
+run     1
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 6.9
+  ghost atom cutoff = 6.9
+  binsize = 3.45, bins = 3 3 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair meam/ms, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+  (2) pair meam/ms, perpetual, half/full from (1)
+      attributes: half, newton on
+      pair build: halffull/newton
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 8.587 | 8.587 | 8.587 Mbytes
+   Step          Temp          TotEng         Press           Pxx            Pyy            Pzz            Pxy            Pxz            Pyz             Lx             Ly             Lz           Volume        c_eatoms   
+         0   0              15.433079      491354.7       838670.96      635393.15      0              80195.797      0              0              8              8              8              512            15.433079    
+         1   0              15.433079      491354.7       838670.96      635393.15      0              80195.797      0              0              8              8              8              512            15.433079    
+Loop time of 4.4446e-05 on 1 procs for 1 steps with 3 atoms
+
+Performance: 1943.932 ns/day, 0.012 hours/ns, 22499.213 timesteps/s, 67.498 katom-step/s
+31.5% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.9908e-05 | 2.9908e-05 | 2.9908e-05 |   0.0 | 67.29
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 1.033e-06  | 1.033e-06  | 1.033e-06  |   0.0 |  2.32
+Output  | 9.347e-06  | 9.347e-06  | 9.347e-06  |   0.0 | 21.03
+Modify  | 2.02e-07   | 2.02e-07   | 2.02e-07   |   0.0 |  0.45
+Other   |            | 3.956e-06  |            |       |  8.90
+
+Nlocal:              3 ave           3 max           3 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:             78 ave          78 max          78 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:              7 ave           7 max           7 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:           14 ave          14 max          14 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 14
+Ave neighs/atom = 4.6666667
+Neighbor list builds = 0
+Dangerous builds = 0
+#write_data      data.msmeam
+
+print "All done!"
+All done!
+Total wall time: 0:00:00
diff --git a/examples/meam/msmeam/log.1Mar2024.msmeam.g++.4 b/examples/meam/msmeam/log.1Mar2024.msmeam.g++.4
new file mode 100644
index 0000000000..6951a64945
--- /dev/null
+++ b/examples/meam/msmeam/log.1Mar2024.msmeam.g++.4
@@ -0,0 +1,126 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-182-g93942f2013-modified)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+# Test of MEAM potential for HGa
+
+# ------------------------ INITIALIZATION ----------------------------
+units           metal
+dimension       3
+boundary        p       p       p
+atom_style      atomic
+variable latparam equal 4.646
+variable ncell equal 3
+
+# ----------------------- ATOM DEFINITION ----------------------------
+region          box block -4 4 -4 4 -4 4
+create_box      2 box
+Created orthogonal box = (-4 -4 -4) to (4 4 4)
+  1 by 2 by 2 MPI processor grid
+
+#
+
+include potential.mod
+# NOTE: This script can be modified for different pair styles
+# See in.elastic for more info.
+
+variable Pu string H
+print "potential chosen ${Pu}"
+potential chosen H
+# Choose potential
+pair_style meam/ms
+print		"we just executed"
+we just executed
+
+pair_coeff      * * library.msmeam ${Pu} Ga4  HGa.msmeam ${Pu} Ga4
+pair_coeff      * * library.msmeam H Ga4  HGa.msmeam ${Pu} Ga4
+pair_coeff      * * library.msmeam H Ga4  HGa.msmeam H Ga4
+Reading MEAM library file library.msmeam with DATE: 2018-09-22
+# Setup neighbor style
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Setup minimization style
+variable dmax equal 1.0e-2
+min_style	     cg
+min_modify	     dmax ${dmax} line quadratic
+min_modify	     dmax 0.01 line quadratic
+compute eng all pe/atom
+compute eatoms all reduce sum c_eng
+
+# Setup output
+thermo		100
+thermo_style custom step temp etotal  press pxx pyy pzz pxy pxz pyz lx ly lz vol c_eatoms
+thermo_modify norm yes
+create_atoms    1 single 0 0 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+create_atoms    2 single 2.2 0 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+create_atoms    2 single 0.3 2.3 0  units box
+Created 1 atoms
+  using box units in orthogonal box = (-4 -4 -4) to (4 4 4)
+  create_atoms CPU = 0.000 seconds
+# ---------- Define Settings ---------------------
+variable        teng equal "c_eatoms"
+compute pot_energy all pe/atom
+compute stress all stress/atom NULL
+# dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
+run     1
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 6.9
+  ghost atom cutoff = 6.9
+  binsize = 3.45, bins = 3 3 3
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair meam/ms, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+  (2) pair meam/ms, perpetual, half/full from (1)
+      attributes: half, newton on
+      pair build: halffull/newton
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 7.965 | 8.123 | 8.594 Mbytes
+   Step          Temp          TotEng         Press           Pxx            Pyy            Pzz            Pxy            Pxz            Pyz             Lx             Ly             Lz           Volume        c_eatoms   
+         0   0              15.433079      491354.7       838670.96      635393.15      0              80195.797      0              0              8              8              8              512            15.433079    
+         1   0              15.433079      491354.7       838670.96      635393.15      0              80195.797      0              0              8              8              8              512            15.433079    
+Loop time of 8.70645e-05 on 4 procs for 1 steps with 3 atoms
+
+Performance: 992.368 ns/day, 0.024 hours/ns, 11485.738 timesteps/s, 34.457 katom-step/s
+29.0% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 4.3957e-05 | 4.67e-05   | 5.1056e-05 |   0.0 | 53.64
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 1.105e-05  | 1.3822e-05 | 1.7033e-05 |   0.0 | 15.88
+Output  | 1.5765e-05 | 1.9045e-05 | 2.5216e-05 |   0.0 | 21.87
+Modify  | 2.58e-07   | 3.465e-07  | 3.81e-07   |   0.0 |  0.40
+Other   |            | 7.151e-06  |            |       |  8.21
+
+Nlocal:           0.75 ave           3 max           0 min
+Histogram: 3 0 0 0 0 0 0 0 0 1
+Nghost:          38.25 ave          42 max          36 min
+Histogram: 2 0 0 0 0 1 0 0 0 1
+Neighs:           1.75 ave           7 max           0 min
+Histogram: 3 0 0 0 0 0 0 0 0 1
+FullNghs:          3.5 ave          14 max           0 min
+Histogram: 3 0 0 0 0 0 0 0 0 1
+
+Total # of neighbors = 14
+Ave neighs/atom = 4.6666667
+Neighbor list builds = 0
+Dangerous builds = 0
+#write_data      data.msmeam
+
+print "All done!"
+All done!
+Total wall time: 0:00:00
diff --git a/examples/meam/msmeam/log.msmeam.bu b/examples/meam/msmeam/log.msmeam.bu
deleted file mode 100644
index 8eac453c1e..0000000000
--- a/examples/meam/msmeam/log.msmeam.bu
+++ /dev/null
@@ -1,107 +0,0 @@
-# Test of MEAM potential for HGa
-
-# ------------------------ INITIALIZATION ----------------------------
-units           metal
-dimension       3
-boundary        p       p       p
-atom_style      atomic
-variable latparam equal 4.646
-variable ncell equal 3
-
-# ----------------------- ATOM DEFINITION ----------------------------
-region          box block -4 4 -4 4 -4 4
-create_box      2 box
-Created orthogonal box = (-4 -4 -4) to (4 4 4)
-  1 by 1 by 1 MPI processor grid
-
-#
-
-include potential.mod
-# NOTE: This script can be modified for different pair styles
-# See in.elastic for more info.
-
-variable Pu string H
-print "potential chosen ${Pu}"
-potential chosen H
-# Choose potential
-pair_style      MSmeam
-print		"we just executed"
-we just executed
-
-pair_coeff      * * library.MSmeam ${Pu} Ga4  HGaMS.meam ${Pu} Ga4
-pair_coeff      * * library.MSmeam H Ga4  HGaMS.meam ${Pu} Ga4
-pair_coeff      * * library.MSmeam H Ga4  HGaMS.meam H Ga4
-Reading potential file library.MSmeam with DATE: 2018-09-22
-# Setup neighbor style
-neighbor 1.0 nsq
-neigh_modify once no every 1 delay 0 check yes
-
-# Setup minimization style
-variable dmax equal 1.0e-2
-min_style	     cg
-min_modify	     dmax ${dmax} line quadratic
-min_modify	     dmax 0.01 line quadratic
-compute eng all pe/atom
-compute eatoms all reduce sum c_eng
-
-# Setup output
-thermo		100
-thermo_style custom step temp etotal  press pxx pyy pzz pxy pxz pyz lx ly lz vol c_eatoms
-thermo_modify norm yes
-create_atoms    1 single 0 0 0  units box
-Created 1 atoms
-create_atoms    2 single 2.2 0 0  units box
-Created 1 atoms
-create_atoms    2 single 0.3 2.3 0  units box
-Created 1 atoms
-# ---------- Define Settings ---------------------
-variable	teng equal "c_eatoms"
-compute pot_energy all pe/atom
-compute stress all stress/atom NULL
-dump 1 all custom 1 dump.msmeam id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6]
-run	1
-WARNING: No fixes defined, atoms won't move (../verlet.cpp:55)
-Neighbor list info ...
-  2 neighbor list requests
-  update every 1 steps, delay 0 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 6.9
-  ghost atom cutoff = 6.9
-Memory usage per processor = 12.9295 Mbytes
-Step Temp TotEng Press Pxx Pyy Pzz Pxy Pxz Pyz Lx Ly Lz Volume eatoms 
-       0            0    15.433079    491354.68    838670.91    635393.13            0    80195.793            0            0            8            8            8          512    15.433079 
-       1            0    15.433079    491354.68    838670.91    635393.13            0    80195.793            0            0            8            8            8          512    15.433079 
-Loop time of 0.000172138 on 1 procs for 1 steps with 3 atoms
-
-Performance: 501.922 ns/day, 0.048 hours/ns, 5809.285 timesteps/s
-81.3% CPU use with 1 MPI tasks x no OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 6.6996e-05 | 6.6996e-05 | 6.6996e-05 |   0.0 | 38.92
-Neigh   | 0          | 0          | 0          |   0.0 |  0.00
-Comm    | 1.9073e-06 | 1.9073e-06 | 1.9073e-06 |   0.0 |  1.11
-Output  | 9.7036e-05 | 9.7036e-05 | 9.7036e-05 |   0.0 | 56.37
-Modify  | 0          | 0          | 0          |   0.0 |  0.00
-Other   |            | 6.199e-06  |            |       |  3.60
-
-Nlocal:    3 ave 3 max 3 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    78 ave 78 max 78 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    7 ave 7 max 7 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-FullNghs:  14 ave 14 max 14 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 14
-Ave neighs/atom = 4.66667
-Neighbor list builds = 0
-Dangerous builds = 0
-write_data	data.msmeam
-
-print "All done!"
-All done!
-Total wall time: 0:00:00
-
diff --git a/examples/meam/msmeam/msmeam.dump.bu b/examples/meam/msmeam/msmeam.dump.bu
deleted file mode 100644
index 039f630073..0000000000
--- a/examples/meam/msmeam/msmeam.dump.bu
+++ /dev/null
@@ -1,24 +0,0 @@
-ITEM: TIMESTEP
-0
-ITEM: NUMBER OF ATOMS
-3
-ITEM: BOX BOUNDS pp pp pp
--4 4
--4 4
--4 4
-ITEM: ATOMS id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6] 
-1 0 0 0 -131.925 -88.3005 0 22.9153 -2.147e+08 -1.62661e+08 -0 -2.05301e+07 -0 -0 
-2 2.2 0 0 120.809 -0.482171 0 14.7692 -2.12028e+08 -0 -0 403352 -0 -0 
-3 0.3 2.3 0 11.1159 88.7827 0 8.61478 -2.67145e+06 -1.62661e+08 -0 -2.09335e+07 -0 -0 
-ITEM: TIMESTEP
-1
-ITEM: NUMBER OF ATOMS
-3
-ITEM: BOX BOUNDS pp pp pp
--4 4
--4 4
--4 4
-ITEM: ATOMS id x y z fx fy fz c_pot_energy c_stress[1] c_stress[2] c_stress[3] c_stress[4] c_stress[5] c_stress[6] 
-1 0 0 0 -131.925 -88.3005 0 22.9153 -2.147e+08 -1.62661e+08 -0 -2.05301e+07 -0 -0 
-2 2.2 0 0 120.809 -0.482171 0 14.7692 -2.12028e+08 -0 -0 403352 -0 -0 
-3 0.3 2.3 0 11.1159 88.7827 0 8.61478 -2.67145e+06 -1.62661e+08 -0 -2.09335e+07 -0 -0 
diff --git a/examples/meam/msmeam/potential.mod b/examples/meam/msmeam/potential.mod
index 760cc93503..117736743b 100644
--- a/examples/meam/msmeam/potential.mod
+++ b/examples/meam/msmeam/potential.mod
@@ -7,7 +7,7 @@ print "potential chosen ${Pu}"
 pair_style meam/ms
 print		"we just executed"
 
-pair_coeff      * * library.msmeam ${Pu} Ga4  HGa.meam ${Pu} Ga4
+pair_coeff      * * library.msmeam ${Pu} Ga4  HGa.msmeam ${Pu} Ga4
 # Setup neighbor style
 neighbor 1.0 bin
 neigh_modify once no every 1 delay 0 check yes
diff --git a/examples/plugins/LAMMPSInterfaceCXX.cmake b/examples/plugins/LAMMPSInterfaceCXX.cmake
index 7eef5bd6e4..d1f8faec22 100644
--- a/examples/plugins/LAMMPSInterfaceCXX.cmake
+++ b/examples/plugins/LAMMPSInterfaceCXX.cmake
@@ -23,12 +23,14 @@ function(validate_option name values)
 endfunction(validate_option)
 
 #################################################################################
-# LAMMPS C++ interface. We only need the header related parts.
+# LAMMPS C++ interface. We only need the header related parts for shared linkage
+# but the library .a file for real static or quasi-static linkage (of LAMMPS).
 add_library(lammps INTERFACE)
 target_include_directories(lammps INTERFACE ${LAMMPS_HEADER_DIR})
 if((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND CMAKE_CROSSCOMPILING)
   target_link_libraries(lammps INTERFACE ${CMAKE_BINARY_DIR}/../liblammps.dll.a)
 endif()
+
 ################################################################################
 # MPI configuration
 if(NOT CMAKE_CROSSCOMPILING)
@@ -82,13 +84,9 @@ if(BUILD_MPI)
       # Download and configure custom MPICH files for Windows
       message(STATUS "Downloading and configuring MPICH-1.4.1 for Windows")
       set(MPICH2_WIN64_DEVEL_URL "${LAMMPS_THIRDPARTY_URL}/mpich2-win64-devel.tar.gz" CACHE STRING "URL for MPICH2 (win64) tarball")
-      set(MPICH2_WIN32_DEVEL_URL "${LAMMPS_THIRDPARTY_URL}/mpich2-win32-devel.tar.gz" CACHE STRING "URL for MPICH2 (win32) tarball")
       set(MPICH2_WIN64_DEVEL_MD5 "4939fdb59d13182fd5dd65211e469f14" CACHE STRING "MD5 checksum of MPICH2 (win64) tarball")
-      set(MPICH2_WIN32_DEVEL_MD5 "a61d153500dce44e21b755ee7257e031" CACHE STRING "MD5 checksum of MPICH2 (win32) tarball")
       mark_as_advanced(MPICH2_WIN64_DEVEL_URL)
-      mark_as_advanced(MPICH2_WIN32_DEVEL_URL)
       mark_as_advanced(MPICH2_WIN64_DEVEL_MD5)
-      mark_as_advanced(MPICH2_WIN32_DEVEL_MD5)
 
       include(ExternalProject)
       if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
@@ -131,6 +129,8 @@ else()
   target_include_directories(lammps INTERFACE "${LAMMPS_SOURCE_DIR}/STUBS")
 endif()
 
+################
+# integer size selection
 set(LAMMPS_SIZES "smallbig" CACHE STRING "LAMMPS integer sizes (smallsmall: all 32-bit, smallbig: 64-bit #atoms #timesteps, bigbig: also 64-bit imageint, 64-bit atom ids)")
 set(LAMMPS_SIZES_VALUES smallbig bigbig smallsmall)
 set_property(CACHE LAMMPS_SIZES PROPERTY STRINGS ${LAMMPS_SIZES_VALUES})
diff --git a/examples/rdf-adf/in.spce b/examples/rdf-adf/in.spce
index 9a9d99fd42..6627924adc 100644
--- a/examples/rdf-adf/in.spce
+++ b/examples/rdf-adf/in.spce
@@ -1,22 +1,22 @@
 # Liquid water RDFs and ADFs (~12 O-O-O/atom, ~1 O-H...O/atom)
 
-units		real	
-atom_style	full
+units           real
+atom_style      full
 
-read_data	data.spce
+read_data       data.spce
 
 pair_style      lj/cut/coul/long 12.0 12.0
 pair_coeff      * * 0.0     1.0
 pair_coeff      1 1 0.15535 3.166
-kspace_style	pppm 1.0e-6
+kspace_style    pppm 1.0e-6
 
-bond_style	harmonic
-angle_style	harmonic
-dihedral_style	none
-improper_style	none
+bond_style      harmonic
+angle_style     harmonic
+dihedral_style  none
+improper_style  none
 
-bond_coeff	1 1000.00 1.000
-angle_coeff	1 100.0 109.47
+bond_coeff      1 1000.00 1.000
+angle_coeff     1 100.0 109.47
 
 # need to set bond/angle inclusion to > 0.0
 # so that intramolecular pairs are included in neighbor lists (required for second ADF)
@@ -26,8 +26,8 @@ neighbor        2.0 bin
 timestep        2.0
 neigh_modify    every 1 delay 2 check yes
 
-fix		1 all shake 0.0001 20 0 b 1 a 1
-fix		2 all nvt temp 300.0 300.0 100.0
+fix             1 all shake 0.0001 20 0 b 1 a 1
+fix             2 all nvt temp 300.0 300.0 100.0
 
 velocity all create 300.0 6244325
 
diff --git a/examples/threebody/MoS.rebomos b/examples/threebody/MoS.rebomos
new file mode 120000
index 0000000000..6146c74c24
--- /dev/null
+++ b/examples/threebody/MoS.rebomos
@@ -0,0 +1 @@
+../../potentials/MoS.rebomos
\ No newline at end of file
diff --git a/examples/threebody/in.mos2-bulk b/examples/threebody/in.mos2-bulk
new file mode 100644
index 0000000000..032e71fce8
--- /dev/null
+++ b/examples/threebody/in.mos2-bulk
@@ -0,0 +1,35 @@
+units metal
+
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000 &
+                   a2 -1.5964590311 2.7651481541  0.0000000000 &
+                   a3  0.0000000000 0.0000000000 13.9827680588 &
+                basis  0.0000000000 0.000000000   $(3.0/4.0)   &
+                basis  0.0000000000 0.000000000   $(1.0/4.0)   &
+                basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989  &
+                basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996  &
+                basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989  &
+                basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011  &
+                origin 0.1 0.1 0.1
+
+region box prism 0 4 0 8 0 1 -2.0 0.0 0.0
+create_box 2 box
+create_atoms 2 box &
+   basis 1 1 &
+   basis 2 1 &
+   basis 3 2 &
+   basis 4 2 &
+   basis 5 2 &
+   basis 6 2
+
+mass                    1 95.95  #Mo
+mass                    2 32.065 #S
+
+pair_style rebomos
+pair_coeff * * MoS.rebomos Mo S
+
+thermo_style custom step temp press pe ke cellgamma vol
+thermo 10
+#dump 1 all atom 10 MoS.lammpstrj
+fix 1 all nve
+run 20
+
diff --git a/examples/threebody/in.mos2.rebomos b/examples/threebody/in.mos2.rebomos
new file mode 100644
index 0000000000..ca91f67003
--- /dev/null
+++ b/examples/threebody/in.mos2.rebomos
@@ -0,0 +1,31 @@
+# monolayer MoS2
+units           metal
+boundary        p p f
+processors      * * 1
+atom_modify map array
+
+atom_style      atomic
+read_data       single_layer_MoS2.data
+
+mass            * 32.065        # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
+mass            1 95.94         # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
+
+########################## Define potentials ################################
+pair_style      rebomos
+pair_coeff      * * MoS.rebomos Mo S S
+#########################################################################
+
+### Simulation settings ####
+timestep        0.001
+velocity        all create 300.0 12345 loop geom
+
+############################
+
+# Output
+thermo          500
+thermo_style    custom step etotal pe ke temp
+thermo_modify   lost warn
+
+###### Run molecular dynamics ######
+fix             thermostat all nve
+run             5000
diff --git a/examples/threebody/log.22Feb24.mos2-bulk.g++.1 b/examples/threebody/log.22Feb24.mos2-bulk.g++.1
new file mode 100644
index 0000000000..8218026f3d
--- /dev/null
+++ b/examples/threebody/log.22Feb24.mos2-bulk.g++.1
@@ -0,0 +1,85 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-73-g36fa601fe0)
+  using 1 OpenMP thread(s) per MPI task
+units metal
+
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   $(3.0/4.0)                   basis  0.0000000000 0.000000000   $(1.0/4.0)                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   $(1.0/4.0)                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  0.66666666666666662966   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  0.66666666666666662966   0.33333333333333331483    0.637991011                  origin 0.1 0.1 0.1
+Lattice spacing in x,y,z = 4.7867748 2.7651482 13.982768
+
+region box prism 0 4 0 8 0 1 -2.0 0.0 0.0
+create_box 2 box
+Created triclinic box = (0 0 0) to (19.147099 22.121185 13.982768) with tilt (-9.5735495 0 0)
+  1 by 1 by 1 MPI processor grid
+create_atoms 2 box    basis 1 1    basis 2 1    basis 3 2    basis 4 2    basis 5 2    basis 6 2
+Created 288 atoms
+  using lattice units in triclinic box = (0 0 0) to (19.147099 22.121185 13.982768) with tilt (-9.5735495 0 0)
+  create_atoms CPU = 0.000 seconds
+
+mass                    1 95.95  #Mo
+mass                    2 32.065 #S
+
+pair_style rebomos
+pair_coeff * * MoS.rebomos Mo S
+Reading rebomos potential file MoS.rebomos with DATE: 2013-11-04
+
+thermo_style custom step temp press pe ke cellgamma vol
+thermo 10
+#dump 1 all atom 10 MoS.lammpstrj
+fix 1 all nve
+run 20
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 13.4
+  ghost atom cutoff = 13.4
+  binsize = 6.7, bins = 5 4 3
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair rebomos, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.996 | 4.996 | 4.996 Mbytes
+   Step          Temp          Press          PotEng         KinEng       CellGamma        Volume    
+         0   0              28799.53      -2061.6112      0              113.40187      5922.4926    
+        10   80.776057      13540.088     -2064.6132      2.9966028      113.40187      5922.4926    
+        20   146.17503     -20669.371     -2067.0428      5.4227518      113.40187      5922.4926    
+Loop time of 0.058071 on 1 procs for 20 steps with 288 atoms
+
+Performance: 29.757 ns/day, 0.807 hours/ns, 344.406 timesteps/s, 99.189 katom-step/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.057666   | 0.057666   | 0.057666   |   0.0 | 99.30
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.00024654 | 0.00024654 | 0.00024654 |   0.0 |  0.42
+Output  | 2.3975e-05 | 2.3975e-05 | 2.3975e-05 |   0.0 |  0.04
+Modify  | 3.8394e-05 | 3.8394e-05 | 3.8394e-05 |   0.0 |  0.07
+Other   |            | 9.596e-05  |            |       |  0.17
+
+Nlocal:            288 ave         288 max         288 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           4285 ave        4285 max        4285 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:       142848 ave      142848 max      142848 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 142848
+Ave neighs/atom = 496
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
diff --git a/examples/threebody/log.22Feb24.mos2-bulk.g++.4 b/examples/threebody/log.22Feb24.mos2-bulk.g++.4
new file mode 100644
index 0000000000..0b9cd3ed8a
--- /dev/null
+++ b/examples/threebody/log.22Feb24.mos2-bulk.g++.4
@@ -0,0 +1,85 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-73-g36fa601fe0)
+  using 1 OpenMP thread(s) per MPI task
+units metal
+
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   $(3.0/4.0)                   basis  0.0000000000 0.000000000   $(1.0/4.0)                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   $(1.0/4.0)                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  $(2.0/3.0)   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   $(1.0/3.0)    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  $(1.0/3.0)   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   $(2.0/3.0)    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  $(1.0/3.0)   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   $(2.0/3.0)    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  $(2.0/3.0)   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  0.66666666666666662966   $(1.0/3.0)    0.637991011                  origin 0.1 0.1 0.1
+lattice custom 1.0 a1  3.1903157234 0.0000000000  0.0000000000                    a2 -1.5964590311 2.7651481541  0.0000000000                    a3  0.0000000000 0.0000000000 13.9827680588                 basis  0.0000000000 0.000000000   0.75                   basis  0.0000000000 0.000000000   0.25                   basis  0.66666666666666662966   0.33333333333333331483    0.862008989                  basis  0.33333333333333331483   0.66666666666666662966    0.137990996                  basis  0.33333333333333331483   0.66666666666666662966    0.362008989                  basis  0.66666666666666662966   0.33333333333333331483    0.637991011                  origin 0.1 0.1 0.1
+Lattice spacing in x,y,z = 4.7867748 2.7651482 13.982768
+
+region box prism 0 4 0 8 0 1 -2.0 0.0 0.0
+create_box 2 box
+Created triclinic box = (0 0 0) to (19.147099 22.121185 13.982768) with tilt (-9.5735495 0 0)
+  2 by 2 by 1 MPI processor grid
+create_atoms 2 box    basis 1 1    basis 2 1    basis 3 2    basis 4 2    basis 5 2    basis 6 2
+Created 288 atoms
+  using lattice units in triclinic box = (0 0 0) to (19.147099 22.121185 13.982768) with tilt (-9.5735495 0 0)
+  create_atoms CPU = 0.000 seconds
+
+mass                    1 95.95  #Mo
+mass                    2 32.065 #S
+
+pair_style rebomos
+pair_coeff * * MoS.rebomos Mo S
+Reading rebomos potential file MoS.rebomos with DATE: 2013-11-04
+
+thermo_style custom step temp press pe ke cellgamma vol
+thermo 10
+#dump 1 all atom 10 MoS.lammpstrj
+fix 1 all nve
+run 20
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 13.4
+  ghost atom cutoff = 13.4
+  binsize = 6.7, bins = 5 4 3
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair rebomos, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.15 | 4.151 | 4.151 Mbytes
+   Step          Temp          Press          PotEng         KinEng       CellGamma        Volume    
+         0   0              28799.53      -2061.6112      0              113.40187      5922.4926    
+        10   80.776057      13540.088     -2064.6132      2.9966028      113.40187      5922.4926    
+        20   146.17503     -20669.371     -2067.0428      5.4227518      113.40187      5922.4926    
+Loop time of 0.0219485 on 4 procs for 20 steps with 288 atoms
+
+Performance: 78.730 ns/day, 0.305 hours/ns, 911.225 timesteps/s, 262.433 katom-step/s
+96.3% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.018118   | 0.019372   | 0.020087   |   0.5 | 88.26
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0015635  | 0.0023195  | 0.0035967  |   1.6 | 10.57
+Output  | 2.5017e-05 | 4.6834e-05 | 0.00010543 |   0.0 |  0.21
+Modify  | 1.3954e-05 | 1.423e-05  | 1.4594e-05 |   0.0 |  0.06
+Other   |            | 0.0001957  |            |       |  0.89
+
+Nlocal:             72 ave          72 max          72 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:         2771.5 ave        2775 max        2768 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Neighs:              0 ave           0 max           0 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:        35712 ave       35712 max       35712 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 142848
+Ave neighs/atom = 496
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:00
diff --git a/examples/threebody/log.22Feb24.mos2.rebomos.g++.1 b/examples/threebody/log.22Feb24.mos2.rebomos.g++.1
new file mode 100644
index 0000000000..f7c5b3c74d
--- /dev/null
+++ b/examples/threebody/log.22Feb24.mos2.rebomos.g++.1
@@ -0,0 +1,95 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-73-g36fa601fe0)
+  using 1 OpenMP thread(s) per MPI task
+# monolayer MoS2
+units           metal
+boundary        p p f
+processors      * * 1
+atom_modify map array
+
+atom_style      atomic
+read_data       single_layer_MoS2.data
+Reading data file ...
+  triclinic box = (0 0 -100) to (51.15232 44.299209 100) with tilt (25.57616 0 0)
+WARNING: Triclinic box skew is large. LAMMPS will run inefficiently. (src/domain.cpp:219)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  768 atoms
+  read_data CPU = 0.002 seconds
+
+mass            * 32.065        # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
+mass            1 95.94         # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
+
+########################## Define potentials ################################
+pair_style      rebomos
+pair_coeff      * * MoS.rebomos Mo S S
+Reading rebomos potential file MoS.rebomos with DATE: 2013-11-04
+#########################################################################
+
+### Simulation settings ####
+timestep        0.001
+velocity        all create 300.0 12345 loop geom
+
+############################
+
+# Output
+thermo          500
+thermo_style    custom step etotal pe ke temp
+thermo_modify   lost warn
+
+###### Run molecular dynamics ######
+fix             thermostat all nve
+run             5000
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 13.4
+  ghost atom cutoff = 13.4
+  binsize = 6.7, bins = 12 7 30
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair rebomos, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.473 | 4.473 | 4.473 Mbytes
+   Step         TotEng         PotEng         KinEng          Temp     
+         0  -5466.9785     -5496.7212      29.742759      300          
+       500  -5466.964      -5482.6985      15.734505      158.7059     
+      1000  -5466.9615     -5480.9492      13.98763       141.08607    
+      1500  -5466.964      -5482.6912      15.727258      158.63281    
+      2000  -5466.9657     -5483.3606      16.394878      165.36675    
+      2500  -5466.9624     -5481.6253      14.662948      147.89765    
+      3000  -5466.9642     -5482.7515      15.7873        159.23842    
+      3500  -5466.9654     -5483.3789      16.413502      165.5546     
+      4000  -5466.9628     -5481.848       14.885236      150.13977    
+      4500  -5466.9648     -5483.5045      16.539775      166.82825    
+      5000  -5466.9649     -5483.4932      16.528298      166.71249    
+Loop time of 19.1009 on 1 procs for 5000 steps with 768 atoms
+
+Performance: 22.617 ns/day, 1.061 hours/ns, 261.768 timesteps/s, 201.038 katom-step/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 19.042     | 19.042     | 19.042     |   0.0 | 99.69
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.018451   | 0.018451   | 0.018451   |   0.0 |  0.10
+Output  | 0.00015575 | 0.00015575 | 0.00015575 |   0.0 |  0.00
+Modify  | 0.023931   | 0.023931   | 0.023931   |   0.0 |  0.13
+Other   |            | 0.01658    |            |       |  0.09
+
+Nlocal:            768 ave         768 max         768 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           1158 ave        1158 max        1158 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:       141824 ave      141824 max      141824 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 141824
+Ave neighs/atom = 184.66667
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:19
diff --git a/examples/threebody/log.22Feb24.mos2.rebomos.g++.4 b/examples/threebody/log.22Feb24.mos2.rebomos.g++.4
new file mode 100644
index 0000000000..dc1cfa84d4
--- /dev/null
+++ b/examples/threebody/log.22Feb24.mos2.rebomos.g++.4
@@ -0,0 +1,95 @@
+LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-73-g36fa601fe0)
+  using 1 OpenMP thread(s) per MPI task
+# monolayer MoS2
+units           metal
+boundary        p p f
+processors      * * 1
+atom_modify map array
+
+atom_style      atomic
+read_data       single_layer_MoS2.data
+Reading data file ...
+  triclinic box = (0 0 -100) to (51.15232 44.299209 100) with tilt (25.57616 0 0)
+WARNING: Triclinic box skew is large. LAMMPS will run inefficiently. (src/domain.cpp:219)
+  2 by 2 by 1 MPI processor grid
+  reading atoms ...
+  768 atoms
+  read_data CPU = 0.002 seconds
+
+mass            * 32.065        # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
+mass            1 95.94         # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
+
+########################## Define potentials ################################
+pair_style      rebomos
+pair_coeff      * * MoS.rebomos Mo S S
+Reading rebomos potential file MoS.rebomos with DATE: 2013-11-04
+#########################################################################
+
+### Simulation settings ####
+timestep        0.001
+velocity        all create 300.0 12345 loop geom
+
+############################
+
+# Output
+thermo          500
+thermo_style    custom step etotal pe ke temp
+thermo_modify   lost warn
+
+###### Run molecular dynamics ######
+fix             thermostat all nve
+run             5000
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 13.4
+  ghost atom cutoff = 13.4
+  binsize = 6.7, bins = 12 7 30
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair rebomos, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 4.045 | 4.045 | 4.045 Mbytes
+   Step         TotEng         PotEng         KinEng          Temp     
+         0  -5466.9785     -5496.7212      29.742759      300          
+       500  -5466.964      -5482.6985      15.734505      158.7059     
+      1000  -5466.9615     -5480.9492      13.98763       141.08607    
+      1500  -5466.964      -5482.6912      15.727258      158.63281    
+      2000  -5466.9657     -5483.3606      16.394878      165.36675    
+      2500  -5466.9624     -5481.6253      14.662948      147.89765    
+      3000  -5466.9642     -5482.7515      15.7873        159.23842    
+      3500  -5466.9654     -5483.3789      16.413502      165.5546     
+      4000  -5466.9628     -5481.848       14.885236      150.13977    
+      4500  -5466.9648     -5483.5045      16.539775      166.82825    
+      5000  -5466.9649     -5483.4932      16.528298      166.71249    
+Loop time of 5.69326 on 4 procs for 5000 steps with 768 atoms
+
+Performance: 75.879 ns/day, 0.316 hours/ns, 878.231 timesteps/s, 674.482 katom-step/s
+98.6% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.2611     | 5.3666     | 5.4358     |   3.0 | 94.26
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.23476    | 0.30106    | 0.40642    |  12.8 |  5.29
+Output  | 0.00014996 | 0.0004478  | 0.0013353  |   0.0 |  0.01
+Modify  | 0.0068861  | 0.0069917  | 0.0072247  |   0.2 |  0.12
+Other   |            | 0.01814    |            |       |  0.32
+
+Nlocal:            192 ave         194 max         190 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Nghost:            710 ave         712 max         708 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Neighs:              0 ave           0 max           0 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:        35456 ave       35824 max       35088 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+
+Total # of neighbors = 141824
+Ave neighs/atom = 184.66667
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:05
diff --git a/examples/wall/in.wall.flow b/examples/wall/in.wall.flow
new file mode 100644
index 0000000000..9dfe001a55
--- /dev/null
+++ b/examples/wall/in.wall.flow
@@ -0,0 +1,79 @@
+variable nrun equal 1000
+variable dump_count equal 10
+
+variable nwall equal 4
+variable w1 equal 67
+variable w2 equal 71
+variable w3 equal 75
+variable w4 equal 79
+
+variable x_cylinder equal 20
+variable y_cylinder equal 17
+variable r_cylinder equal 4
+
+variable MASS  equal 1
+variable TEMP  equal 0.4
+variable VFLOW equal 0.5
+
+units         lj
+atom_style    atomic
+
+lattice       fcc 0.3
+region        sim_box block 0 84 0 34 0 10
+
+boundary      p p p
+
+create_box    2 sim_box
+region        reg_cylinder cylinder z ${x_cylinder} ${y_cylinder} ${r_cylinder} EDGE EDGE
+
+create_atoms  1 box
+
+## setup obstacle ##
+group  g_obst region reg_cylinder
+group  g_flow subtract all g_obst
+set    group g_obst type 2 
+
+mass          1 ${MASS}
+mass          2 ${MASS}
+
+velocity  g_flow create ${TEMP} 4928459 rot yes dist gaussian
+velocity  g_obst set    0.0 0.0 0.0
+
+pair_style  lj/cut 1.122462
+pair_coeff  1 1 1.0 1.0
+pair_coeff  1 2 1.0 1.0
+pair_coeff  2 2 1.0 1.0
+pair_modify shift yes
+
+neighbor      0.3 bin
+neigh_modify  delay 0 every 20 check no
+
+fix    1     g_flow nve
+fix    2     g_flow wall/flow  x ${VFLOW} ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+
+variable dump_every equal ${nrun}/${dump_count}
+variable thermo_every equal ${dump_every}
+variable restart_every equal ${nrun}/10
+
+##### uncomment for grid aggregation #####
+#variable gr_Nx equal 42
+#variable gr_Ny equal 17
+#variable gr_Nz equal 1
+#variable gr_Nevery equal ${dump_every}
+#variable gr_Nrepeat equal 1
+#variable gr_Nfreq equal ${dump_every}
+#fix    3     g_flow ave/grid ${gr_Nevery} ${gr_Nrepeat} ${gr_Nfreq} ${gr_Nx} ${gr_Ny} ${gr_Nz} vx vy vz density/mass norm all ave one
+#compute ct_gridId g_flow property/grid ${gr_Nx} ${gr_Ny} ${gr_Nz} id
+#dump   dmp_grid  g_flow grid ${dump_every} grid.lammpstrj c_ct_gridId:grid:data f_3:grid:data[*]
+##########################################
+
+#dump   dmp_coord all atom ${dump_every} dump.lammpstrj
+
+#compute ct_Temp   g_flow temp/com
+#thermo_style custom step temp epair emol etotal press c_ct_Temp
+
+#restart  ${restart_every} flow.restart
+
+timestep 0.005
+thermo   ${thermo_every}
+run      ${nrun}
diff --git a/examples/wall/log.7Feb24.wall.flow.g++.1 b/examples/wall/log.7Feb24.wall.flow.g++.1
new file mode 100644
index 0000000000..75e8b66fe1
--- /dev/null
+++ b/examples/wall/log.7Feb24.wall.flow.g++.1
@@ -0,0 +1,182 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-758-ge33590b2fc-modified)
+  using 1 OpenMP thread(s) per MPI task
+variable nrun equal 1000
+variable dump_count equal 10
+
+variable nwall equal 4
+variable w1 equal 67
+variable w2 equal 71
+variable w3 equal 75
+variable w4 equal 79
+
+variable x_cylinder equal 20
+variable y_cylinder equal 17
+variable r_cylinder equal 4
+
+variable MASS  equal 1
+variable TEMP  equal 0.4
+variable VFLOW equal 0.5
+
+units         lj
+atom_style    atomic
+
+lattice       fcc 0.3
+Lattice spacing in x,y,z = 2.3712622 2.3712622 2.3712622
+region        sim_box block 0 84 0 34 0 10
+
+boundary      p p p
+
+create_box    2 sim_box
+Created orthogonal box = (0 0 0) to (199.18603 80.622915 23.712622)
+  1 by 1 by 1 MPI processor grid
+region        reg_cylinder cylinder z ${x_cylinder} ${y_cylinder} ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 ${y_cylinder} ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 17 ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 17 4 EDGE EDGE
+
+create_atoms  1 box
+Created 114240 atoms
+  using lattice units in orthogonal box = (0 0 0) to (199.18603 80.622915 23.712622)
+  create_atoms CPU = 0.010 seconds
+
+## setup obstacle ##
+group  g_obst region reg_cylinder
+1950 atoms in group g_obst
+group  g_flow subtract all g_obst
+112290 atoms in group g_flow
+set    group g_obst type 2
+Setting atom values ...
+  1950 settings made for type
+
+mass          1 ${MASS}
+mass          1 1
+mass          2 ${MASS}
+mass          2 1
+
+velocity  g_flow create ${TEMP} 4928459 rot yes dist gaussian
+velocity  g_flow create 0.4 4928459 rot yes dist gaussian
+velocity  g_obst set    0.0 0.0 0.0
+
+pair_style  lj/cut 1.122462
+pair_coeff  1 1 1.0 1.0
+pair_coeff  1 2 1.0 1.0
+pair_coeff  2 2 1.0 1.0
+pair_modify shift yes
+
+neighbor      0.3 bin
+neigh_modify  delay 0 every 20 check no
+
+fix    1     g_flow nve
+fix    2     g_flow wall/flow  x ${VFLOW} ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 75 ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 75 79
+
+variable dump_every equal ${nrun}/${dump_count}
+variable dump_every equal 1000/${dump_count}
+variable dump_every equal 1000/10
+variable thermo_every equal ${dump_every}
+variable thermo_every equal 100
+variable restart_every equal ${nrun}/10
+variable restart_every equal 1000/10
+
+##### uncomment for grid aggregation #####
+#variable gr_Nx equal 42
+#variable gr_Ny equal 17
+#variable gr_Nz equal 1
+#variable gr_Nevery equal ${dump_every}
+#variable gr_Nrepeat equal 1
+#variable gr_Nfreq equal ${dump_every}
+#fix    3     g_flow ave/grid ${gr_Nevery} ${gr_Nrepeat} ${gr_Nfreq} ${gr_Nx} ${gr_Ny} ${gr_Nz} vx vy vz density/mass norm all ave one
+#compute ct_gridId g_flow property/grid ${gr_Nx} ${gr_Ny} ${gr_Nz} id
+#dump   dmp_grid  g_flow grid ${dump_every} grid.lammpstrj c_ct_gridId:grid:data f_3:grid:data[*]
+##########################################
+
+#dump   dmp_coord all atom ${dump_every} dump.lammpstrj
+
+#compute ct_Temp   g_flow temp/com
+#thermo_style custom step temp epair emol etotal press c_ct_Temp
+
+#restart  ${restart_every} flow.restart
+
+timestep 0.005
+thermo   ${thermo_every}
+thermo   100
+run      ${nrun}
+run      1000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix wall/flow command: doi:10.1177/10943420231213013
+
+@Article{Pavlov-etal-IJHPCA-2024,
+ author = {Daniil Pavlov and Vladislav Galigerov and Daniil Kolotinskii and Vsevolod Nikolskiy and Vladimir Stegailov},
+ title = {GPU-based molecular dynamics of fluid flows: Reaching for turbulence},
+ journal = {The International Journal of High Performance Computing Applications},
+ year =    2024,
+ volume =  38,
+ number =  1,
+ pages =   34-49
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 20 steps, delay = 0 steps, check = no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.422462
+  ghost atom cutoff = 1.422462
+  binsize = 0.711231, bins = 281 114 34
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 26.69 | 26.69 | 26.69 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+         0   0.39317221     0              0              0.58975315     0.11795063   
+       100   0.3671684      0.045118445    0              0.59586622     0.27378331   
+       200   0.3732041      0.036897471    0              0.59669873     0.24917809   
+       300   0.37432305     0.036501844    0              0.5979815      0.24715194   
+       400   0.37603886     0.035350565    0              0.59940392     0.24480762   
+       500   0.37617142     0.036949771    0              0.60120196     0.24862985   
+       600   0.37751983     0.036484268    0              0.60275905     0.24784635   
+       700   0.37787831     0.037327783    0              0.60414029     0.25060427   
+       800   0.37959242     0.036206184    0              0.60558983     0.2476903    
+       900   0.38019033     0.036874395    0              0.6071549      0.24984211   
+      1000   0.38070666     0.037068948    0              0.60812395     0.25041936   
+Loop time of 5.61598 on 1 procs for 1000 steps with 114240 atoms
+
+Performance: 76923.319 tau/day, 178.063 timesteps/s, 20.342 Matom-step/s
+99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.6351     | 2.6351     | 2.6351     |   0.0 | 46.92
+Neigh   | 1.2994     | 1.2994     | 1.2994     |   0.0 | 23.14
+Comm    | 0.26576    | 0.26576    | 0.26576    |   0.0 |  4.73
+Output  | 0.0030531  | 0.0030531  | 0.0030531  |   0.0 |  0.05
+Modify  | 1.3019     | 1.3019     | 1.3019     |   0.0 | 23.18
+Other   |            | 0.1107     |            |       |  1.97
+
+Nlocal:         114240 ave      114240 max      114240 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:          20119 ave       20119 max       20119 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:         164018 ave      164018 max      164018 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 164018
+Ave neighs/atom = 1.4357318
+Neighbor list builds = 50
+Dangerous builds not checked
+Total wall time: 0:00:05
diff --git a/examples/wall/log.7Feb24.wall.flow.g++.4 b/examples/wall/log.7Feb24.wall.flow.g++.4
new file mode 100644
index 0000000000..1efe7bb28e
--- /dev/null
+++ b/examples/wall/log.7Feb24.wall.flow.g++.4
@@ -0,0 +1,182 @@
+LAMMPS (21 Nov 2023 - Development - patch_21Nov2023-758-ge33590b2fc-modified)
+  using 1 OpenMP thread(s) per MPI task
+variable nrun equal 1000
+variable dump_count equal 10
+
+variable nwall equal 4
+variable w1 equal 67
+variable w2 equal 71
+variable w3 equal 75
+variable w4 equal 79
+
+variable x_cylinder equal 20
+variable y_cylinder equal 17
+variable r_cylinder equal 4
+
+variable MASS  equal 1
+variable TEMP  equal 0.4
+variable VFLOW equal 0.5
+
+units         lj
+atom_style    atomic
+
+lattice       fcc 0.3
+Lattice spacing in x,y,z = 2.3712622 2.3712622 2.3712622
+region        sim_box block 0 84 0 34 0 10
+
+boundary      p p p
+
+create_box    2 sim_box
+Created orthogonal box = (0 0 0) to (199.18603 80.622915 23.712622)
+  4 by 1 by 1 MPI processor grid
+region        reg_cylinder cylinder z ${x_cylinder} ${y_cylinder} ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 ${y_cylinder} ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 17 ${r_cylinder} EDGE EDGE
+region        reg_cylinder cylinder z 20 17 4 EDGE EDGE
+
+create_atoms  1 box
+Created 114240 atoms
+  using lattice units in orthogonal box = (0 0 0) to (199.18603 80.622915 23.712622)
+  create_atoms CPU = 0.003 seconds
+
+## setup obstacle ##
+group  g_obst region reg_cylinder
+1950 atoms in group g_obst
+group  g_flow subtract all g_obst
+112290 atoms in group g_flow
+set    group g_obst type 2
+Setting atom values ...
+  1950 settings made for type
+
+mass          1 ${MASS}
+mass          1 1
+mass          2 ${MASS}
+mass          2 1
+
+velocity  g_flow create ${TEMP} 4928459 rot yes dist gaussian
+velocity  g_flow create 0.4 4928459 rot yes dist gaussian
+velocity  g_obst set    0.0 0.0 0.0
+
+pair_style  lj/cut 1.122462
+pair_coeff  1 1 1.0 1.0
+pair_coeff  1 2 1.0 1.0
+pair_coeff  2 2 1.0 1.0
+pair_modify shift yes
+
+neighbor      0.3 bin
+neigh_modify  delay 0 every 20 check no
+
+fix    1     g_flow nve
+fix    2     g_flow wall/flow  x ${VFLOW} ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 ${TEMP} 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 ${nwall} ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 ${w1} ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 ${w2} ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 ${w3} ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 75 ${w4}
+fix    2     g_flow wall/flow  x 0.5 0.4 123 4 67 71 75 79
+
+variable dump_every equal ${nrun}/${dump_count}
+variable dump_every equal 1000/${dump_count}
+variable dump_every equal 1000/10
+variable thermo_every equal ${dump_every}
+variable thermo_every equal 100
+variable restart_every equal ${nrun}/10
+variable restart_every equal 1000/10
+
+##### uncomment for grid aggregation #####
+#variable gr_Nx equal 42
+#variable gr_Ny equal 17
+#variable gr_Nz equal 1
+#variable gr_Nevery equal ${dump_every}
+#variable gr_Nrepeat equal 1
+#variable gr_Nfreq equal ${dump_every}
+#fix    3     g_flow ave/grid ${gr_Nevery} ${gr_Nrepeat} ${gr_Nfreq} ${gr_Nx} ${gr_Ny} ${gr_Nz} vx vy vz density/mass norm all ave one
+#compute ct_gridId g_flow property/grid ${gr_Nx} ${gr_Ny} ${gr_Nz} id
+#dump   dmp_grid  g_flow grid ${dump_every} grid.lammpstrj c_ct_gridId:grid:data f_3:grid:data[*]
+##########################################
+
+#dump   dmp_coord all atom ${dump_every} dump.lammpstrj
+
+#compute ct_Temp   g_flow temp/com
+#thermo_style custom step temp epair emol etotal press c_ct_Temp
+
+#restart  ${restart_every} flow.restart
+
+timestep 0.005
+thermo   ${thermo_every}
+thermo   100
+run      ${nrun}
+run      1000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- fix wall/flow command: doi:10.1177/10943420231213013
+
+@Article{Pavlov-etal-IJHPCA-2024,
+ author = {Daniil Pavlov and Vladislav Galigerov and Daniil Kolotinskii and Vsevolod Nikolskiy and Vladimir Stegailov},
+ title = {GPU-based molecular dynamics of fluid flows: Reaching for turbulence},
+ journal = {The International Journal of High Performance Computing Applications},
+ year =    2024,
+ volume =  38,
+ number =  1,
+ pages =   34-49
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 20 steps, delay = 0 steps, check = no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 1.422462
+  ghost atom cutoff = 1.422462
+  binsize = 0.711231, bins = 281 114 34
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 8.496 | 8.496 | 8.496 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+         0   0.39317221     0              0              0.58975315     0.11795063   
+       100   0.36726398     0.045386014    0              0.59627716     0.27402111   
+       200   0.37384538     0.036574547    0              0.5973377      0.24836729   
+       300   0.37487455     0.036519645    0              0.59882654     0.24691726   
+       400   0.37591417     0.036405755    0              0.60027207     0.24700641   
+       500   0.37654714     0.037008829    0              0.60182459     0.24883444   
+       600   0.3778008      0.03663706     0              0.6033333      0.24874392   
+       700   0.37851338     0.036714175    0              0.60447928     0.24881829   
+       800   0.37984876     0.036237049    0              0.6060052      0.24843003   
+       900   0.38022763     0.036847615    0              0.60718407     0.24987198   
+      1000   0.38084717     0.037139994    0              0.60840575     0.25070072   
+Loop time of 2.20347 on 4 procs for 1000 steps with 114240 atoms
+
+Performance: 196054.093 tau/day, 453.829 timesteps/s, 51.845 Matom-step/s
+95.6% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.67927    | 0.70882    | 0.73473    |   2.4 | 32.17
+Neigh   | 0.32928    | 0.34467    | 0.36084    |   2.0 | 15.64
+Comm    | 0.3211     | 0.36609    | 0.40741    |   6.1 | 16.61
+Output  | 0.0017748  | 0.0032465  | 0.0046508  |   2.1 |  0.15
+Modify  | 0.71135    | 0.74424    | 0.76001    |   2.3 | 33.78
+Other   |            | 0.03641    |            |       |  1.65
+
+Nlocal:          28560 ave       29169 max       27884 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+Nghost:        6452.25 ave        6546 max        6368 min
+Histogram: 1 0 0 0 2 0 0 0 0 1
+Neighs:          40893 ave       42032 max       39445 min
+Histogram: 1 0 0 0 1 0 0 1 0 1
+
+Total # of neighbors = 163572
+Ave neighs/atom = 1.4318277
+Neighbor list builds = 50
+Dangerous builds not checked
+Total wall time: 0:00:02
diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md
index c6115f4b3d..40e3c95f24 100644
--- a/lib/kokkos/CHANGELOG.md
+++ b/lib/kokkos/CHANGELOG.md
@@ -1,5 +1,26 @@
 # CHANGELOG
 
+## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01)
+
+### Backend and Architecture Enhancements:
+
+#### CUDA:
+- Add warp sync for `parallel_reduce` to avoid race condition [\#6630](https://github.com/kokkos/kokkos/pull/6630), [\#6746](https://github.com/kokkos/kokkos/pull/6746)
+
+#### HIP:
+- Fix Graph "multiple definition of" linking error (missing `inline` specifier) [\#6624](https://github.com/kokkos/kokkos/pull/6624)
+- Add support for gfx940 (AMD Instinct MI300 GPU) [\#6671](https://github.com/kokkos/kokkos/pull/6671)
+
+### Build System
+- CMake: Don't let Kokkos set `CMAKE_CXX_FLAGS` for Trilinos builds [\#6742](https://github.com/kokkos/kokkos/pull/6742)
+
+### Bug Fixes
+- Remove deprecation warning for `AllocationMechanism` for GCC <11.0 [\#6653](https://github.com/kokkos/kokkos/pull/6653)
+- Fix bug early tools finalize with non-default host execution instances [\#6635](https://github.com/kokkos/kokkos/pull/6635)
+- Fix various issues for MSVC CUDA builds [\#6659](https://github.com/kokkos/kokkos/pull/6659)
+- Fix "extra `;`" warning with `-pedantic` flag in `<Kokkos_SIMD_Scalar.hpp>` [\#6510](https://github.com/kokkos/kokkos/pull/6510)
+
 ## [4.2.00](https://github.com/kokkos/kokkos/tree/4.2.00) (2023-11-06)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/4.1.00...4.2.00)
 
@@ -43,7 +64,7 @@
 
 #### SYCL:
 - Enforce external `sycl::queues` to be in-order [\#6246](https://github.com/kokkos/kokkos/pull/6246)
-- Improve reduction performance: [\#6272](https://github.com/kokkos/kokkos/pull/6272) [\#6271](https://github.com/kokkos/kokkos/pull/6271) [\#6270](https://github.com/kokkos/kokkos/pull/6270) [\#6264](https://github.com/kokkos/kokkos/pull/6264) 
+- Improve reduction performance: [\#6272](https://github.com/kokkos/kokkos/pull/6272) [\#6271](https://github.com/kokkos/kokkos/pull/6271) [\#6270](https://github.com/kokkos/kokkos/pull/6270) [\#6264](https://github.com/kokkos/kokkos/pull/6264)
 - Allow using the SYCL execution space on AMD GPUs [\#6321](https://github.com/kokkos/kokkos/pull/6321)
 - Allow sorting via native oneDPL to support Views with stride=1 [\#6322](https://github.com/kokkos/kokkos/pull/6322)
 - Make in-order queues the default via macro [\#6189](https://github.com/kokkos/kokkos/pull/6189)
@@ -64,7 +85,7 @@
 - Add converting assignment to `DualView`:  [\#6474](https://github.com/kokkos/kokkos/pull/6474)
 
 
-### Build System Changes 
+### Build System Changes
 
 - Export `Kokkos_CXX_COMPILER_VERSION` [\#6282](https://github.com/kokkos/kokkos/pull/6282)
 - Disable default oneDPL support in Trilinos [\#6342](https://github.com/kokkos/kokkos/pull/6342)
diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt
index f6bd81058e..4a4e7a5501 100644
--- a/lib/kokkos/CMakeLists.txt
+++ b/lib/kokkos/CMakeLists.txt
@@ -151,7 +151,7 @@ ENDIF()
 
 set(Kokkos_VERSION_MAJOR 4)
 set(Kokkos_VERSION_MINOR 2)
-set(Kokkos_VERSION_PATCH 0)
+set(Kokkos_VERSION_PATCH 1)
 set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
 message(STATUS "Kokkos version: ${Kokkos_VERSION}")
 math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
@@ -252,7 +252,6 @@ ENDIF()
 # subpackages
 
 ## This restores the old behavior of ProjectCompilerPostConfig.cmake
-# It sets the CMAKE_CXX_FLAGS globally to those used by Kokkos
 # We must do this before KOKKOS_PACKAGE_DECL
 IF (KOKKOS_HAS_TRILINOS)
   # Overwrite the old flags at the top-level
@@ -280,21 +279,13 @@ IF (KOKKOS_HAS_TRILINOS)
     SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}")
     LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG})
   ENDFOREACH()
-  SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}")
   IF (KOKKOS_ENABLE_CUDA)
     STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS    "${KOKKOS_CUDA_OPTIONS}")
     FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS})
       SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}")
       LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG})
     ENDFOREACH()
-    SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_CXX_FLAGS} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS}")
   ENDIF()
-  # Both parent scope and this package
-  # In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in
-  # TRILINOS_TOPLEVEL_CXX_FLAGS
-  SET(CMAKE_CXX_FLAGS "${TRILINOS_TOPLEVEL_CXX_FLAGS} ${KOKKOSCORE_CXX_FLAGS}" PARENT_SCOPE)
-  SET(CMAKE_CXX_FLAGS "${TRILINOS_TOPLEVEL_CXX_FLAGS} ${KOKKOSCORE_CXX_FLAGS}")
-  #CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here
   #These flags get set up in KOKKOS_PACKAGE_DECL, which means they
   #must be configured before KOKKOS_PACKAGE_DECL
   SET(KOKKOS_ALL_COMPILE_OPTIONS
diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos
index c970f72755..393422d73c 100644
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@@ -12,7 +12,7 @@ endif
 
 KOKKOS_VERSION_MAJOR = 4
 KOKKOS_VERSION_MINOR = 2
-KOKKOS_VERSION_PATCH = 0
+KOKKOS_VERSION_PATCH = 1
 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
 
 # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
@@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
 # NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
 # ARM:      ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
 # IBM:      BGQ,Power7,Power8,Power9
-# AMD-GPUS: GFX906,GFX908,GFX90A,GFX942,GFX1030,GFX1100
+# AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100
 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
 KOKKOS_ARCH ?= ""
@@ -416,6 +416,8 @@ endif
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906))
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908))
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A))
+KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
+KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030))
 KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100))
 
@@ -1113,6 +1115,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 1)
   tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
   KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a
 endif
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940), 1)
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX940")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
+  KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx940
+endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942), 1)
   tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX942")
   tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp
index b962218b5f..3eb963faf2 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp
@@ -198,8 +198,9 @@ auto create_deep_copyable_compatible_view_with_same_extent(ViewType view) {
 
   // this is needed for intel to avoid
   // error #1011: missing return statement at end of non-void function
-#if defined KOKKOS_COMPILER_INTEL || \
-    (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130)
+#if defined KOKKOS_COMPILER_INTEL ||                                  \
+    (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
+     !defined(KOKKOS_COMPILER_MSVC))
   __builtin_unreachable();
 #endif
 }
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp
index b32a9be3a1..b5aa27c7c3 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamCopyIf.cpp
@@ -139,7 +139,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
   auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
   Kokkos::View<ValueType**, Kokkos::HostSpace> stdDestView("stdDestView",
                                                            numTeams, numCols);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
   for (std::size_t i = 0; i < sourceView.extent(0); ++i) {
     auto rowFrom = Kokkos::subview(sourceViewBeforeOp_h, i, Kokkos::ALL());
     auto rowDest = Kokkos::subview(stdDestView, i, Kokkos::ALL());
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp
index 1928f95588..21da333e75 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamIsPartitioned.cpp
@@ -191,7 +191,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId,
   // -----------------------------------------------
   auto returnView_h            = create_host_space_copy(returnView);
   auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
 
   for (std::size_t i = 0; i < dataView_dc_h.extent(0); ++i) {
     auto myRow = Kokkos::subview(dataView_dc_h, i, Kokkos::ALL());
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp
index c0bbdfa390..78ab6bf1f8 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionCopy.cpp
@@ -240,7 +240,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId,
       "stdDestTrueView", numTeams, numCols);
   Kokkos::View<ValueType**, Kokkos::HostSpace> stdDestFalseView(
       "stdDestFalseView", numTeams, numCols);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
 
   for (std::size_t i = 0; i < sourceView_dc_h.extent(0); ++i) {
     auto myRowSource    = Kokkos::subview(sourceView_dc_h, i, Kokkos::ALL());
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp
index 954d461246..370e91cc1f 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamPartitionPoint.cpp
@@ -197,7 +197,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId,
   auto distancesView_h         = create_host_space_copy(distancesView);
   auto dataViewAfterOp_h       = create_host_space_copy(dataView);
   auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
 
   for (std::size_t i = 0; i < dataView_dc_h.extent(0); ++i) {
     auto myRow = Kokkos::subview(dataView_dc_h, i, Kokkos::ALL());
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp
index 2082fa9728..ce18eb4d31 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveCopyIf.cpp
@@ -138,7 +138,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
   auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
   Kokkos::View<ValueType**, Kokkos::HostSpace> stdDestView("stdDestView",
                                                            numTeams, numCols);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
   for (std::size_t i = 0; i < destViewAfterOp_h.extent(0); ++i) {
     auto rowFrom =
         Kokkos::subview(cloneOfSourceViewBeforeOp_h, i, Kokkos::ALL());
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp
index 3315f281da..3dd7cb764c 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamRemoveIf.cpp
@@ -127,7 +127,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
   // -----------------------------------------------
   // check against std
   // -----------------------------------------------
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
   auto dataViewAfterOp_h       = create_host_space_copy(dataView);
   auto distancesView_h         = create_host_space_copy(distancesView);
   auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp
index ae43a2a426..d0217aed7a 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceCopyIf.cpp
@@ -145,7 +145,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
   auto intraTeamSentinelView_h = create_host_space_copy(intraTeamSentinelView);
   Kokkos::View<ValueType**, Kokkos::HostSpace> stdDestView("stdDestView",
                                                            numTeams, numCols);
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
   for (std::size_t i = 0; i < sourceView.extent(0); ++i) {
     auto rowFrom =
         Kokkos::subview(cloneOfSourceViewBeforeOp_h, i, Kokkos::ALL());
diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp
index 1d5d9578f9..d79b53d355 100644
--- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReplaceIf.cpp
@@ -103,7 +103,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
       stdDataView(i, j) = cloneOfDataViewBeforeOp_h(i, j);
     }
   }
-  GreaterThanValueFunctor predicate(threshold);
+  GreaterThanValueFunctor<ValueType> predicate(threshold);
   for (std::size_t i = 0; i < dataView.extent(0); ++i) {
     auto thisRow = Kokkos::subview(stdDataView, i, Kokkos::ALL());
     std::replace_if(KE::begin(thisRow), KE::end(thisRow), predicate, newVal);
diff --git a/lib/kokkos/cmake/KokkosCore_config.h.in b/lib/kokkos/cmake/KokkosCore_config.h.in
index bec59ebd03..9930d2abf0 100644
--- a/lib/kokkos/cmake/KokkosCore_config.h.in
+++ b/lib/kokkos/cmake/KokkosCore_config.h.in
@@ -114,6 +114,7 @@
 #cmakedefine KOKKOS_ARCH_AMD_GFX906
 #cmakedefine KOKKOS_ARCH_AMD_GFX908
 #cmakedefine KOKKOS_ARCH_AMD_GFX90A
+#cmakedefine KOKKOS_ARCH_AMD_GFX940
 #cmakedefine KOKKOS_ARCH_AMD_GFX942
 #cmakedefine KOKKOS_ARCH_AMD_GFX1030
 #cmakedefine KOKKOS_ARCH_AMD_GFX1100
diff --git a/lib/kokkos/cmake/kokkos_arch.cmake b/lib/kokkos/cmake/kokkos_arch.cmake
index bccf674d76..30764bde86 100644
--- a/lib/kokkos/cmake/kokkos_arch.cmake
+++ b/lib/kokkos/cmake/kokkos_arch.cmake
@@ -94,9 +94,9 @@ IF(Kokkos_ENABLE_HIP OR Kokkos_ENABLE_OPENMPTARGET OR Kokkos_ENABLE_OPENACC OR K
 ENDIF()
 
 # AMD archs ordered in decreasing priority of autodetection
-LIST(APPEND SUPPORTED_AMD_GPUS       MI300)
-LIST(APPEND SUPPORTED_AMD_ARCHS      AMD_GFX942)
-LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx942)
+LIST(APPEND SUPPORTED_AMD_GPUS       MI300 MI300)
+LIST(APPEND SUPPORTED_AMD_ARCHS      AMD_GFX942 AMD_GFX940)
+LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx942 gfx940)
 LIST(APPEND SUPPORTED_AMD_GPUS       MI200    MI200       MI100    MI100)
 LIST(APPEND SUPPORTED_AMD_ARCHS      VEGA90A  AMD_GFX90A  VEGA908  AMD_GFX908)
 LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx90a   gfx90a      gfx908   gfx908)
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp
index 8aae27d091..49d6c112e3 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_MDRange.hpp
@@ -309,6 +309,11 @@ class ParallelReduce<CombinedFunctorReducerType,
 
       if (CudaTraits::WarpSize < word_count.value) {
         __syncthreads();
+      } else {
+        // In the above call to final(), shared might have been updated by a
+        // single thread within a warp without synchronization. Synchronize
+        // threads within warp to avoid potential race condition.
+        __syncwarp(0xffffffff);
       }
 
       for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) {
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp
index 5226c48bd9..3472999281 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Range.hpp
@@ -243,6 +243,12 @@ class ParallelReduce<CombinedFunctorReducerType, Kokkos::RangePolicy<Traits...>,
 
       if (CudaTraits::WarpSize < word_count.value) {
         __syncthreads();
+      } else if (word_count.value > 1) {
+        // Inside cuda_single_inter_block_reduce_scan() above, shared[i] below
+        // might have been updated by a single thread within a warp without
+        // synchronization afterwards. Synchronize threads within warp to avoid
+        // potential racecondition.
+        __syncwarp(0xffffffff);
       }
 
       for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) {
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
index 498e57f94a..b4679b4e0d 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel_Team.hpp
@@ -742,6 +742,11 @@ class ParallelReduce<CombinedFunctorReducerType,
 
       if (CudaTraits::WarpSize < word_count.value) {
         __syncthreads();
+      } else {
+        // In the above call to final(), shared might have been updated by a
+        // single thread within a warp without synchronization. Synchronize
+        // threads within warp to avoid potential race condition.
+        __syncwarp(0xffffffff);
       }
 
       for (unsigned i = threadIdx.y; i < word_count.value; i += blockDim.y) {
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp
index 3bde15444c..7cc06d02fb 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Graph_Impl.hpp
@@ -83,7 +83,7 @@ class GraphImpl<Kokkos::HIP> {
   hipGraphExec_t m_graph_exec = nullptr;
 };
 
-GraphImpl<Kokkos::HIP>::~GraphImpl() {
+inline GraphImpl<Kokkos::HIP>::~GraphImpl() {
   m_execution_space.fence("Kokkos::GraphImpl::~GraphImpl: Graph Destruction");
   KOKKOS_EXPECTS(m_graph);
   if (m_graph_exec) {
@@ -92,12 +92,12 @@ GraphImpl<Kokkos::HIP>::~GraphImpl() {
   KOKKOS_IMPL_HIP_SAFE_CALL(hipGraphDestroy(m_graph));
 }
 
-GraphImpl<Kokkos::HIP>::GraphImpl(Kokkos::HIP instance)
+inline GraphImpl<Kokkos::HIP>::GraphImpl(Kokkos::HIP instance)
     : m_execution_space(std::move(instance)) {
   KOKKOS_IMPL_HIP_SAFE_CALL(hipGraphCreate(&m_graph, 0));
 }
 
-void GraphImpl<Kokkos::HIP>::add_node(
+inline void GraphImpl<Kokkos::HIP>::add_node(
     std::shared_ptr<aggregate_node_impl_t> const& arg_node_ptr) {
   // All of the predecessors are just added as normal, so all we need to
   // do here is add an empty node
@@ -110,7 +110,7 @@ void GraphImpl<Kokkos::HIP>::add_node(
 // Requires NodeImplPtr is a shared_ptr to specialization of GraphNodeImpl
 // Also requires that the kernel has the graph node tag in it's policy
 template <class NodeImpl>
-void GraphImpl<Kokkos::HIP>::add_node(
+inline void GraphImpl<Kokkos::HIP>::add_node(
     std::shared_ptr<NodeImpl> const& arg_node_ptr) {
   static_assert(NodeImpl::kernel_type::Policy::is_graph_kernel::value);
   KOKKOS_EXPECTS(arg_node_ptr);
@@ -129,8 +129,8 @@ void GraphImpl<Kokkos::HIP>::add_node(
 // already been added to this graph and NodeImpl is a specialization of
 // GraphNodeImpl that has already been added to this graph.
 template <class NodeImplPtr, class PredecessorRef>
-void GraphImpl<Kokkos::HIP>::add_predecessor(NodeImplPtr arg_node_ptr,
-                                             PredecessorRef arg_pred_ref) {
+inline void GraphImpl<Kokkos::HIP>::add_predecessor(
+    NodeImplPtr arg_node_ptr, PredecessorRef arg_pred_ref) {
   KOKKOS_EXPECTS(arg_node_ptr);
   auto pred_ptr = GraphAccess::get_node_ptr(arg_pred_ref);
   KOKKOS_EXPECTS(pred_ptr);
@@ -145,7 +145,7 @@ void GraphImpl<Kokkos::HIP>::add_predecessor(NodeImplPtr arg_node_ptr,
       hipGraphAddDependencies(m_graph, &pred_node, &node, 1));
 }
 
-void GraphImpl<Kokkos::HIP>::submit() {
+inline void GraphImpl<Kokkos::HIP>::submit() {
   if (!m_graph_exec) {
     instantiate_graph();
   }
@@ -153,12 +153,12 @@ void GraphImpl<Kokkos::HIP>::submit() {
       hipGraphLaunch(m_graph_exec, m_execution_space.hip_stream()));
 }
 
-Kokkos::HIP const& GraphImpl<Kokkos::HIP>::get_execution_space() const
+inline Kokkos::HIP const& GraphImpl<Kokkos::HIP>::get_execution_space() const
     noexcept {
   return m_execution_space;
 }
 
-auto GraphImpl<Kokkos::HIP>::create_root_node_ptr() {
+inline auto GraphImpl<Kokkos::HIP>::create_root_node_ptr() {
   KOKKOS_EXPECTS(m_graph);
   KOKKOS_EXPECTS(!m_graph_exec);
   auto rv = std::make_shared<root_node_impl_t>(get_execution_space(),
@@ -172,7 +172,7 @@ auto GraphImpl<Kokkos::HIP>::create_root_node_ptr() {
 }
 
 template <class... PredecessorRefs>
-auto GraphImpl<Kokkos::HIP>::create_aggregate_ptr(PredecessorRefs&&...) {
+inline auto GraphImpl<Kokkos::HIP>::create_aggregate_ptr(PredecessorRefs&&...) {
   // The attachment to predecessors, which is all we really need, happens
   // in the generic layer, which calls through to add_predecessor for
   // each predecessor ref, so all we need to do here is create the (trivial)
diff --git a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
index ef140ec46c..63ad66686b 100644
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
@@ -30,7 +30,8 @@ namespace Impl {
 
 struct HIPTraits {
 #if defined(KOKKOS_ARCH_AMD_GFX906) || defined(KOKKOS_ARCH_AMD_GFX908) || \
-    defined(KOKKOS_ARCH_AMD_GFX90A) || defined(KOKKOS_ARCH_AMD_GFX942)
+    defined(KOKKOS_ARCH_AMD_GFX90A) || defined(KOKKOS_ARCH_AMD_GFX940) || \
+    defined(KOKKOS_ARCH_AMD_GFX942)
   static constexpr int WarpSize       = 64;
   static constexpr int WarpIndexMask  = 0x003f; /* hexadecimal for 63 */
   static constexpr int WarpIndexShift = 6;      /* WarpSize == 1 << WarpShift*/
diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp
index 90d1404063..252aabd949 100644
--- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp
+++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp
@@ -75,12 +75,19 @@ class HostSpace {
   /**\brief  Non-default memory space instance to choose allocation mechansim,
    * if available */
 
-  enum KOKKOS_DEPRECATED AllocationMechanism {
-    STD_MALLOC,
-    POSIX_MEMALIGN,
-    POSIX_MMAP,
-    INTEL_MM_ALLOC
-  };
+#if defined(KOKKOS_COMPILER_GNU) && KOKKOS_COMPILER_GNU < 1100
+  // We see deprecation warnings even when not using the deprecated
+  // HostSpace constructor below when using gcc before release 11.
+  enum
+#else
+  enum KOKKOS_DEPRECATED
+#endif
+      AllocationMechanism {
+        STD_MALLOC,
+        POSIX_MEMALIGN,
+        POSIX_MMAP,
+        INTEL_MM_ALLOC
+      };
 
   KOKKOS_DEPRECATED
   explicit HostSpace(const AllocationMechanism&);
diff --git a/lib/kokkos/core/src/Kokkos_Printf.hpp b/lib/kokkos/core/src/Kokkos_Printf.hpp
index af20221a5a..39f95825c3 100644
--- a/lib/kokkos/core/src/Kokkos_Printf.hpp
+++ b/lib/kokkos/core/src/Kokkos_Printf.hpp
@@ -31,7 +31,7 @@ namespace Kokkos {
 // backends. The GPU backends always return 1 and NVHPC only compiles if we
 // don't ask for the return value.
 template <typename... Args>
-KOKKOS_FORCEINLINE_FUNCTION void printf(const char* format, Args... args) {
+KOKKOS_FUNCTION void printf(const char* format, Args... args) {
 #ifdef KOKKOS_ENABLE_SYCL
   // Some compilers warn if "args" is empty and format is not a string literal
   if constexpr (sizeof...(Args) == 0)
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
index 44f0fbc180..12bf3b71f7 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
@@ -359,8 +359,6 @@ void OpenMPInternal::finalize() {
   }
 
   m_initialized = false;
-
-  Kokkos::Profiling::finalize();
 }
 
 void OpenMPInternal::print_configuration(std::ostream &s) const {
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp
index 4586406e16..03f5fff395 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp
@@ -219,6 +219,8 @@ KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions,
     Exec::validate_partition_impl(prev_instance->m_pool_size, num_partitions,
                                   partition_size);
 
+    OpenMP::memory_space space;
+
 #pragma omp parallel num_threads(num_partitions)
     {
       Exec thread_local_instance(partition_size);
diff --git a/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp b/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp
index e81e834939..071ecdbc4f 100644
--- a/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp
+++ b/lib/kokkos/core/src/Serial/Kokkos_Serial.cpp
@@ -58,8 +58,6 @@ void SerialInternal::finalize() {
     m_thread_team_data.scratch_assign(nullptr, 0, 0, 0, 0, 0);
   }
 
-  Kokkos::Profiling::finalize();
-
   m_is_initialized = false;
 }
 
diff --git a/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp b/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp
index db1567610b..67119cac16 100644
--- a/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp
+++ b/lib/kokkos/core/src/Serial/Kokkos_Serial.hpp
@@ -30,6 +30,7 @@ static_assert(false,
 
 #include <cstddef>
 #include <iosfwd>
+#include <iterator>
 #include <mutex>
 #include <thread>
 #include <Kokkos_Core_fwd.hpp>
diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
index c754091e87..801a1ac82e 100644
--- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
+++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp
@@ -815,8 +815,6 @@ void ThreadsExec::finalize() {
   s_threads_process.m_pool_size      = 1;
   s_threads_process.m_pool_fan_size  = 0;
   s_threads_process.m_pool_state     = ThreadsExec::Inactive;
-
-  Kokkos::Profiling::finalize();
 }
 
 //----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp b/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp
index d32ef4ca23..424ba05a90 100644
--- a/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp
+++ b/lib/kokkos/core/unit_test/TestMathematicalFunctions.hpp
@@ -30,8 +30,9 @@
 #define MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS
 #endif
 
-#if defined KOKKOS_COMPILER_INTEL || \
-    (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130)
+#if defined KOKKOS_COMPILER_INTEL ||                                  \
+    (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
+     !defined(KOKKOS_COMPILER_MSVC))
 #define MATHEMATICAL_FUNCTIONS_TEST_UNREACHABLE __builtin_unreachable();
 #else
 #define MATHEMATICAL_FUNCTIONS_TEST_UNREACHABLE
@@ -394,10 +395,12 @@ DEFINE_UNARY_FUNCTION_EVAL(log2, 2);
 DEFINE_UNARY_FUNCTION_EVAL(log1p, 2);
 #endif
 
-#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 DEFINE_UNARY_FUNCTION_EVAL(sqrt, 2);
 DEFINE_UNARY_FUNCTION_EVAL(cbrt, 2);
+#endif
 
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1
 DEFINE_UNARY_FUNCTION_EVAL(sin, 2);
 DEFINE_UNARY_FUNCTION_EVAL(cos, 2);
 DEFINE_UNARY_FUNCTION_EVAL(tan, 2);
@@ -483,11 +486,9 @@ DEFINE_UNARY_FUNCTION_EVAL(logb, 2);
   };                                                                           \
   constexpr char math_function_name<MathBinaryFunction_##FUNC>::name[]
 
-#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 DEFINE_BINARY_FUNCTION_EVAL(pow, 2);
 DEFINE_BINARY_FUNCTION_EVAL(hypot, 2);
-#endif
-#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 DEFINE_BINARY_FUNCTION_EVAL(nextafter, 1);
 DEFINE_BINARY_FUNCTION_EVAL(copysign, 1);
 #endif
@@ -519,7 +520,7 @@ DEFINE_BINARY_FUNCTION_EVAL(copysign, 1);
   };                                                                          \
   constexpr char math_function_name<MathTernaryFunction_##FUNC>::name[]
 
-#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_1
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 DEFINE_TERNARY_FUNCTION_EVAL(hypot, 2);
 DEFINE_TERNARY_FUNCTION_EVAL(fma, 2);
 #endif
@@ -787,7 +788,9 @@ TEST(TEST_CATEGORY, mathematical_functions_trigonometric_functions) {
 
   // TODO atan2
 }
+#endif
 
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 TEST(TEST_CATEGORY, mathematical_functions_power_functions) {
   TEST_MATH_FUNCTION(sqrt)({0, 1, 2, 3, 5, 7, 11});
   TEST_MATH_FUNCTION(sqrt)({0l, 1l, 2l, 3l, 5l, 7l, 11l});
@@ -1568,6 +1571,7 @@ TEST(TEST_CATEGORY, mathematical_functions_ieee_remainder_function) {
 
 // TODO: TestFpClassify, see https://github.com/kokkos/kokkos/issues/6279
 
+#ifndef KOKKOS_MATHEMATICAL_FUNCTIONS_SKIP_2
 template <class Space>
 struct TestIsFinite {
   TestIsFinite() { run(); }
@@ -1591,6 +1595,7 @@ struct TestIsFinite {
       ++e;
       Kokkos::printf("failed isfinite(float)\n");
     }
+#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC))
     if (!isfinite(static_cast<KE::half_t>(2.f))
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
         || isfinite(quiet_NaN<KE::half_t>::value) ||
@@ -1611,6 +1616,7 @@ struct TestIsFinite {
       ++e;
       Kokkos::printf("failed isfinite(KE::bhalf_t)\n");
     }
+#endif
     if (!isfinite(3.)
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
         || isfinite(quiet_NaN<double>::value) ||
@@ -1670,6 +1676,7 @@ struct TestIsInf {
       ++e;
       Kokkos::printf("failed isinf(float)\n");
     }
+#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC))
     if (isinf(static_cast<KE::half_t>(2.f))
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
         || isinf(quiet_NaN<KE::half_t>::value) ||
@@ -1690,6 +1697,7 @@ struct TestIsInf {
       ++e;
       Kokkos::printf("failed isinf(KE::bhalf_t)\n");
     }
+#endif
     if (isinf(3.)
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
         || isinf(quiet_NaN<double>::value) ||
@@ -1748,6 +1756,7 @@ struct TestIsNaN {
       ++e;
       Kokkos::printf("failed isnan(float)\n");
     }
+#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC))
     if (isnan(static_cast<KE::half_t>(2.f))
 #ifndef KOKKOS_COMPILER_NVHPC  // FIXME_NVHPC 23.7
         || !isnan(quiet_NaN<KE::half_t>::value) ||
@@ -1777,6 +1786,7 @@ struct TestIsNaN {
       ++e;
       Kokkos::printf("failed isnan(double)\n");
     }
+#endif
 #ifdef MATHEMATICAL_FUNCTIONS_HAVE_LONG_DOUBLE_OVERLOADS
     if (isnan(4.l) || !isnan(quiet_NaN<long double>::value) ||
         !isnan(signaling_NaN<long double>::value) ||
@@ -1803,6 +1813,7 @@ struct TestIsNaN {
 TEST(TEST_CATEGORY, mathematical_functions_isnan) {
   TestIsNaN<TEST_EXECSPACE>();
 }
+#endif
 
 // TODO: TestSignBit, see https://github.com/kokkos/kokkos/issues/6279
 #endif
diff --git a/lib/kokkos/core/unit_test/TestNumericTraits.hpp b/lib/kokkos/core/unit_test/TestNumericTraits.hpp
index 2b5531f29a..ec1c1e0ca0 100644
--- a/lib/kokkos/core/unit_test/TestNumericTraits.hpp
+++ b/lib/kokkos/core/unit_test/TestNumericTraits.hpp
@@ -110,8 +110,8 @@ struct TestNumericTraits {
 
   KOKKOS_FUNCTION void operator()(Epsilon, int, int& e) const {
     using Kokkos::Experimental::epsilon;
-    auto const eps = epsilon<T>::value;
-    auto const one = T(1);
+    T const eps = epsilon<T>::value;
+    T const one = 1;
     // Avoid higher precision intermediate representation
     compare() = one + eps;
     e += (int)!(compare() != one);
diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash
index 1b216d9fe3..301a1fceb5 100755
--- a/lib/kokkos/generate_makefile.bash
+++ b/lib/kokkos/generate_makefile.bash
@@ -160,6 +160,7 @@ display_help_text() {
       echo "                 AMD_GFX906      = AMD GPU MI50/MI60 GFX906"
       echo "                 AMD_GFX908      = AMD GPU MI100 GFX908"
       echo "                 AMD_GFX90A      = AMD GPU MI200 GFX90A"
+      echo "                 AMD_GFX940      = AMD GPU MI300 GFX940"
       echo "                 AMD_GFX942      = AMD GPU MI300 GFX942"
       echo "                 AMD_GFX1030     = AMD GPU V620/W6800 GFX1030"
       echo "                 AMD_GFX1100     = AMD GPU RX 7900 XT(X) GFX1100"
diff --git a/lib/kokkos/master_history.txt b/lib/kokkos/master_history.txt
index fd0020b8d5..a43b5276a8 100644
--- a/lib/kokkos/master_history.txt
+++ b/lib/kokkos/master_history.txt
@@ -34,3 +34,4 @@ tag:  4.0.00     date: 02:23:2023    master: 5ad60966    release: 52ea2953
 tag:  4.0.01     date: 04:26:2023    master: aa1f48f3    release: 5893754f
 tag:  4.1.00     date: 06:20:2023    master: 62d2b6c8    release: adde1e6a
 tag:  4.2.00     date: 11:09:2023    master: 1a3ea28f    release: abe01c88
+tag:  4.2.01     date: 01:30:2024    master: 71a9bcae    release: 221e5f7a
diff --git a/lib/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp b/lib/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp
index af7cb1e2c6..7443f5596b 100644
--- a/lib/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp
+++ b/lib/kokkos/simd/src/Kokkos_SIMD_Scalar.hpp
@@ -224,7 +224,7 @@ template <typename T>
   using data_type = std::conditional_t<std::is_floating_point_v<T>, T, double>;
   return Experimental::simd<data_type, Experimental::simd_abi::scalar>(
       Kokkos::floor(static_cast<data_type>(a[0])));
-};
+}
 
 template <typename T>
 [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto ceil(
@@ -232,7 +232,7 @@ template <typename T>
   using data_type = std::conditional_t<std::is_floating_point_v<T>, T, double>;
   return Experimental::simd<data_type, Experimental::simd_abi::scalar>(
       Kokkos::ceil(static_cast<data_type>(a[0])));
-};
+}
 
 template <typename T>
 [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto round(
@@ -240,7 +240,7 @@ template <typename T>
   using data_type = std::conditional_t<std::is_floating_point_v<T>, T, double>;
   return Experimental::simd<data_type, Experimental::simd_abi::scalar>(
       Experimental::round_half_to_nearest_even(static_cast<data_type>(a[0])));
-};
+}
 
 template <typename T>
 [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION auto trunc(
@@ -248,7 +248,7 @@ template <typename T>
   using data_type = std::conditional_t<std::is_floating_point_v<T>, T, double>;
   return Experimental::simd<data_type, Experimental::simd_abi::scalar>(
       Kokkos::trunc(static_cast<data_type>(a[0])));
-};
+}
 
 template <class T>
 [[nodiscard]] KOKKOS_FORCEINLINE_FUNCTION
diff --git a/lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp b/lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp
index 4feff3a89d..4af08c266b 100644
--- a/lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp
+++ b/lib/kokkos/simd/unit_tests/include/TestSIMD_GeneratorCtors.hpp
@@ -42,6 +42,7 @@ inline void host_check_gen_ctor() {
   simd_type blend;
   blend.copy_from(expected, Kokkos::Experimental::element_aligned_tag());
 
+#if !(defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_COMPILER_MSVC))
   if constexpr (std::is_same_v<Abi, Kokkos::Experimental::simd_abi::scalar>) {
     simd_type basic(KOKKOS_LAMBDA(std::size_t i) { return init[i]; });
     host_check_equality(basic, rhs, lanes);
@@ -63,6 +64,7 @@ inline void host_check_gen_ctor() {
 
     host_check_equality(blend, result, lanes);
   }
+#endif
 }
 
 template <typename Abi, typename... DataTypes>
diff --git a/potentials/HGa.msmeam b/potentials/HGa.msmeam
index 9f01501c16..1f84d0f8a5 100644
--- a/potentials/HGa.msmeam
+++ b/potentials/HGa.msmeam
@@ -1,29 +1,29 @@
 bkgd_dyn        =       1
 emb_lin_neg = 1
-augt1=0 
-ialloy=1 
-rc	=	 5.9 
+augt1=0
+ialloy=1
+rc      =        5.9
 #H
-attrac(1,1)=0.460 
-repuls(1,1)=0.460 
+attrac(1,1)=0.460
+repuls(1,1)=0.460
 Cmin(1,1,1)=1.3 # PuMS
-Cmax(1,1,1)= 2.80 
+Cmax(1,1,1)= 2.80
 nn2(1,1)=1
 #Ga
 rho0(2)         =       0.6
-attrac(2,2)=0.097 
-repuls(2,2)=0.097 
+attrac(2,2)=0.097
+repuls(2,2)=0.097
 nn2(2,2)=1
 #HGa
-attrac(1,2)=0.300 
-repuls(1,2)=0.300 
-lattce(1,2)=l12 
-re(1,2)=3.19 
-delta(1,2)=-0.48  
-alpha(1,2)=6.6 
-Cmin(1,1,2)=2.0 
-Cmin(2,1,2)= 2.0 
-Cmin(1,2,1)=2.0 
+attrac(1,2)=0.300
+repuls(1,2)=0.300
+lattce(1,2)=l12
+re(1,2)=3.19
+delta(1,2)=-0.48
+alpha(1,2)=6.6
+Cmin(1,1,2)=2.0
+Cmin(2,1,2)= 2.0
+Cmin(1,2,1)=2.0
 Cmin(2,2,1)     =       1.4
 Cmin(1,2,2)     =       1.4
 Cmin(1,1,2)     =       1.4
diff --git a/potentials/MoS.rebomos b/potentials/MoS.rebomos
new file mode 100644
index 0000000000..ea96981a1e
--- /dev/null
+++ b/potentials/MoS.rebomos
@@ -0,0 +1,65 @@
+# DATE: 2013-11-04 UNITS: metal CONTRIBUTOR: J Stewart, K Dang, D Spearot (UArk) CITATION: Stewart J A and Spearot D E, Modelling Simul. Mater. Sci. Eng. 21.
+# MoS-S REBO Brenner/Sinnot Potential as published in
+# Liang T, Phillpot S R and Sinnott S B (2009) Phys. Rev. B79 245110, Erratum: Phys. Rev. B85 199903(E).
+
+3.50                rcmin_MM
+2.75                rcmin_MS
+2.30                rcmin_SS
+3.80                rcmax_MM
+3.05                rcmax_MS
+3.00                rcmax_SS
+3.419129390005910   Q_MM
+1.505537839153790   Q_MS
+0.254959104053671   Q_SS
+1.07500712999340    alpha_MM
+1.19267902218820    alpha_MS
+1.10775022439715    alpha_SS
+179.008013654688    A_MM
+575.509677721866    A_MS
+1228.43233679426    A_SS
+706.247903589221    BIJc_MM1
+1344.46820036159    BIJc_MS1
+1498.64815404145    BIJc_SS1
+1.16100322369589    Beta_MM1
+1.26973752204290    Beta_MS1
+1.12673623610320    Beta_SS1
+0.1326842550663270  M_b0
+-0.007642788338017  M_b1
+0.0341395775059370  M_b2
+0.2523050971380870  M_b3
+0.1227287372225670  M_b4
+-0.361387798398897  M_b5
+-0.282577591351457  M_b6
+0.120194301035280   M_bg0
+0.045238287358190   M_bg1
+0.067922807244030   M_bg2
+-0.03672511378682   M_bg3
+0.107516477513860   M_bg4
+0.004964711984940   M_bg5
+-0.12997598358652   M_bg6
+0.006848761596750   S_b0
+-0.02389964401024   S_b1
+0.137457353311170   S_b2
+0.033016467497740   S_b3
+-0.31064291544850   S_b4
+-0.08550273135791   S_b5
+0.149252790306880   S_b6
+-0.2850852	    S_bg0
+1.67102480	    S_bg1
+-3.5678516	    S_bg2
+3.45054990	    S_bg3
+-1.2186289	    S_bg4
+0.0		    S_bg5
+0.0		    S_bg6
+0.138040769883614   M_a0
+0.803625443023934   M_a1
+0.292412960851064   M_a2
+0.640588078946224   M_a3
+0.062978539843324   S_a0
+2.478617619878250   S_a1
+0.036666243238154   S_a2
+2.386431372486710   S_a3
+0.00058595          epsilon_MM
+0.01386             epsilon_SS
+4.200               sigma_MM 
+3.130               sigma_SS
diff --git a/potentials/README b/potentials/README
index c234f5f48b..2cb4a383c5 100644
--- a/potentials/README
+++ b/potentials/README
@@ -84,7 +84,7 @@ Au_u3 = Gold universal 3
 The suffix of each file indicates the pair style it is used with:
 
 adp           ADP angular dependent potential
-airebo        AI-REBO and REBO potentials
+airebo        AI-REBO potentials
 bop.table     BOP potential, tabulated form
 cdeam         concentration-dependent EAM
 comb          COMB potential
@@ -107,6 +107,8 @@ nb3b.harmonic nonbonded 3-body harmonic potential
 pod           ML potential with proper orthogonal descriptors (POD)
 poly          polymorphic 3-body potential
 reax          ReaxFF potential (see README.reax for more info)
+rebo          REBO potentials
+rebomos       REBOMoS potential
 smtbq         second moment tight binding QEq (SMTBQ) potential
 snap          SNAP potential
 snapcoeff     SNAP potential
diff --git a/src/.gitignore b/src/.gitignore
index 1e4c5b9ddb..88bb80fdc5 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -791,6 +791,8 @@
 /fix_cmap.h
 /fix_damping_cundall.cpp
 /fix_damping_cundall.h
+/fix_deform_pressure.cpp
+/fix_deform_pressure.h
 /fix_dpd_energy.cpp
 /fix_dpd_energy.h
 /fix_efield_tip4p.cpp
@@ -1023,6 +1025,8 @@
 /fix_wall_colloid.h
 /fix_wall_ees.cpp
 /fix_wall_ees.h
+/fix_wall_flow.cpp
+/fix_wall_flow.h
 /fix_wall_region_ees.cpp
 /fix_wall_region_ees.h
 /fix_wall_reflect_stochastic.cpp
@@ -1369,6 +1373,8 @@
 /pair_reaxff.h
 /pair_rebo.cpp
 /pair_rebo.h
+/pair_rebomos.cpp
+/pair_rebomos.h
 /pair_resquared.cpp
 /pair_resquared.h
 /pair_saip_metal.cpp
diff --git a/src/AMOEBA/amoeba_convolution.cpp b/src/AMOEBA/amoeba_convolution.cpp
index 3bdfdc9b74..44b7248815 100644
--- a/src/AMOEBA/amoeba_convolution.cpp
+++ b/src/AMOEBA/amoeba_convolution.cpp
@@ -15,16 +15,13 @@
 #include "amoeba_convolution.h"
 
 #include "comm.h"
-#include "domain.h"
 #include "fft3d_wrap.h"
 #include "grid3d.h"
-#include "math_extra.h"
 #include "memory.h"
 #include "neighbor.h"
 #include "remap_wrap.h"
 #include "timer.h"
 
-#include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
diff --git a/src/AMOEBA/pair_amoeba.h b/src/AMOEBA/pair_amoeba.h
index 648fc86126..fd694a8037 100644
--- a/src/AMOEBA/pair_amoeba.h
+++ b/src/AMOEBA/pair_amoeba.h
@@ -20,7 +20,7 @@ PairStyle(amoeba,PairAmoeba);
 #ifndef LMP_PAIR_AMOEBA_H
 #define LMP_PAIR_AMOEBA_H
 
-#include "lmpfftsettings.h"
+#include "lmpfftsettings.h"    // IWYU pragma: export
 #include "pair.h"
 
 namespace LAMMPS_NS {
@@ -330,10 +330,10 @@ class PairAmoeba : public Pair {
   double *qfac;        // convoulution pre-factors
   double *gridfft1;    // copy of p_kspace FFT grid
 
-  double **cmp,**fmp;              // Cartesian and fractional multipoles
-  double **cphi,**fphi;
+  double **cmp, **fmp;    // Cartesian and fractional multipoles
+  double **cphi, **fphi;
 
-  double *_moduli_array;           // buffers for moduli
+  double *_moduli_array;    // buffers for moduli
   double *_moduli_bsarray;
   int _nfft_max;
 
@@ -345,11 +345,11 @@ class PairAmoeba : public Pair {
   double ctf[10][10];         // indices NOT flipped vs Fortran
   double ftc[10][10];         // indices NOT flipped vs Fortran
 
-  class AmoebaConvolution *m_kspace;   // multipole KSpace
-  class AmoebaConvolution *p_kspace;   // polar KSpace
+  class AmoebaConvolution *m_kspace;    // multipole KSpace
+  class AmoebaConvolution *p_kspace;    // polar KSpace
   class AmoebaConvolution *pc_kspace;
-  class AmoebaConvolution *d_kspace;   // dispersion KSpace
-  class AmoebaConvolution *i_kspace;   // induce KSpace
+  class AmoebaConvolution *d_kspace;    // dispersion KSpace
+  class AmoebaConvolution *i_kspace;    // induce KSpace
   class AmoebaConvolution *ic_kspace;
 
   // FFT grid size factors
@@ -362,8 +362,8 @@ class PairAmoeba : public Pair {
   void hal();
 
   virtual void repulsion();
-  void damprep(double, double, double, double, double, double, double, double,
-               int, double, double, double *);
+  void damprep(double, double, double, double, double, double, double, double, int, double, double,
+               double *);
 
   void dispersion();
   virtual void dispersion_real();
diff --git a/src/ASPHERE/compute_temp_asphere.cpp b/src/ASPHERE/compute_temp_asphere.cpp
index 7d5d6adce5..d99d9f30c8 100644
--- a/src/ASPHERE/compute_temp_asphere.cpp
+++ b/src/ASPHERE/compute_temp_asphere.cpp
@@ -18,30 +18,29 @@
 
 #include "compute_temp_asphere.h"
 
-#include <cstring>
-#include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_ellipsoid.h"
-#include "update.h"
-#include "force.h"
 #include "domain.h"
-#include "modify.h"
-#include "group.h"
 #include "error.h"
+#include "force.h"
+#include "group.h"
+#include "math_extra.h"
+#include "modify.h"
+#include "update.h"
+
+#include <cstring>
 
 using namespace LAMMPS_NS;
 
-enum{ROTATE,ALL};
-
-static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
+enum { ROTATE, ALL };
+static constexpr double INERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
 ComputeTempAsphere::ComputeTempAsphere(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg),
-  id_bias(nullptr), tbias(nullptr), avec(nullptr)
+    Compute(lmp, narg, arg), id_bias(nullptr), tbias(nullptr), avec(nullptr)
 {
-  if (narg < 3) error->all(FLERR,"Illegal compute temp/asphere command");
+  if (narg < 3) utils::missing_cmd_args(FLERR, "compute temp/asphere", error);
 
   scalar_flag = vector_flag = 1;
   size_vector = 6;
@@ -56,19 +55,17 @@ ComputeTempAsphere::ComputeTempAsphere(LAMMPS *lmp, int narg, char **arg) :
   int iarg = 3;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"bias") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal compute temp/asphere command");
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "compute temp/asphere bias", error);
       tempbias = 1;
       id_bias = utils::strdup(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"dof") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal compute temp/asphere command");
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "compute temp/asphere dof", error);
       if (strcmp(arg[iarg+1],"rotate") == 0) mode = ROTATE;
       else if (strcmp(arg[iarg+1],"all") == 0) mode = ALL;
-      else error->all(FLERR,"Illegal compute temp/asphere command");
+      else error->all(FLERR,"Unknown compute temp/asphere dof keyword {}", arg[iarg+1]);
       iarg += 2;
-    } else error->all(FLERR,"Illegal compute temp/asphere command");
+    } else error->all(FLERR,"Unknown compute temp/asphere keyword {}", arg[iarg]);
   }
 
   // when computing only the rotational temperature,
@@ -84,8 +81,8 @@ ComputeTempAsphere::ComputeTempAsphere(LAMMPS *lmp, int narg, char **arg) :
 
 ComputeTempAsphere::~ComputeTempAsphere()
 {
-  delete [] id_bias;
-  delete [] vector;
+  delete[] id_bias;
+  delete[] vector;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -107,17 +104,17 @@ void ComputeTempAsphere::init()
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit)
       if (ellipsoid[i] < 0)
-        error->one(FLERR,"Compute temp/asphere requires extended particles");
+        error->one(FLERR,"Compute temp/asphere requires all extended particles");
 
   if (tempbias) {
-    int i = modify->find_compute(id_bias);
-    if (i < 0)
-      error->all(FLERR,"Could not find compute ID for temperature bias");
-    tbias = modify->compute[i];
+    tbias = modify->get_compute_by_id(id_bias);
+    if (!tbias)
+      error->all(FLERR,"Could not find compute ID {} for temperature bias", id_bias);
+
     if (tbias->tempflag == 0)
-      error->all(FLERR,"Bias compute does not calculate temperature");
+      error->all(FLERR,"Bias compute {} does not calculate temperature", id_bias);
     if (tbias->tempbias == 0)
-      error->all(FLERR,"Bias compute does not calculate a velocity bias");
+      error->all(FLERR,"Bias compute {} does not calculate a velocity bias", id_bias);
     if (tbias->igroup != igroup)
       error->all(FLERR,"Bias compute group does not match compute group");
     if (strcmp(tbias->style,"temp/region") == 0) tempbias = 2;
diff --git a/src/ATC/fix_atc.cpp b/src/ATC/fix_atc.cpp
index 436ffc9555..b1276d6788 100644
--- a/src/ATC/fix_atc.cpp
+++ b/src/ATC/fix_atc.cpp
@@ -20,15 +20,17 @@
 #include "group.h"
 #include "neighbor.h"
 
+#include "ATC_Error.h"
 #include "ATC_Method.h"
-#include "ATC_TransferKernel.h"
-#include "ATC_TransferPartitionOfUnity.h"
 #include "ATC_CouplingEnergy.h"
 #include "ATC_CouplingMomentum.h"
 #include "ATC_CouplingMass.h"
 #include "ATC_CouplingMomentumEnergy.h"
+#include "ATC_TransferKernel.h"
+#include "ATC_TransferPartitionOfUnity.h"
+#include "ATC_TypeDefs.h"
+#include "ExtrinsicModel.h"
 #include "LammpsInterface.h"
-
 #include <cstring>
 
 using namespace LAMMPS_NS;
diff --git a/src/AWPMD/atom_vec_wavepacket.cpp b/src/AWPMD/atom_vec_wavepacket.cpp
index ff0d660fb6..65b8214369 100644
--- a/src/AWPMD/atom_vec_wavepacket.cpp
+++ b/src/AWPMD/atom_vec_wavepacket.cpp
@@ -19,6 +19,8 @@
 
 #include "atom.h"
 
+#include <cstring>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/BOCS/compute_pressure_bocs.cpp b/src/BOCS/compute_pressure_bocs.cpp
index a8bc9596cc..16d6c91a1f 100644
--- a/src/BOCS/compute_pressure_bocs.cpp
+++ b/src/BOCS/compute_pressure_bocs.cpp
@@ -42,7 +42,7 @@ ComputePressureBocs::ComputePressureBocs(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg),
   vptr(nullptr), id_temp(nullptr)
 {
-  if (narg < 4) error->all(FLERR,"Illegal compute pressure/bocs command");
+  if (narg < 4) utils::missing_cmd_args(FLERR,"compute pressure/bocs", error);
   if (igroup) error->all(FLERR,"Compute pressure/bocs must use group all");
 
   scalar_flag = vector_flag = 1;
@@ -62,12 +62,12 @@ ComputePressureBocs::ComputePressureBocs(LAMMPS *lmp, int narg, char **arg) :
   else {
     id_temp = utils::strdup(arg[3]);
 
-    int icompute = modify->find_compute(id_temp);
-    if (icompute < 0)
-      error->all(FLERR,"Could not find compute pressure/bocs temperature ID");
-    if (modify->compute[icompute]->tempflag == 0)
-      error->all(FLERR,"Compute pressure/bocs temperature ID does not "
-                 "compute temperature");
+    temperature = modify->get_compute_by_id(id_temp);
+    if (!temperature)
+      error->all(FLERR,"Could not find compute pressure/bocs temperature compute {}", id_temp);
+    if (temperature->tempflag == 0)
+      error->all(FLERR,"Compute pressure/bocs temperature compute {} does not compute "
+                 "temperature", id_temp);
   }
 
   // process optional args
@@ -137,10 +137,9 @@ void ComputePressureBocs::init()
   // fixes could have changed or compute_modify could have changed it
 
   if (keflag) {
-    int icompute = modify->find_compute(id_temp);
-    if (icompute < 0)
-      error->all(FLERR,"Could not find compute pressure/bocs temperature ID");
-    temperature = modify->compute[icompute];
+    temperature = modify->get_compute_by_id(id_temp);
+    if (!temperature)
+      error->all(FLERR,"Could not find compute pressure/bocs temperature compute {}", id_temp);
   }
 
   // detect contributions to virial
@@ -158,10 +157,8 @@ void ComputePressureBocs::init()
     if (improperflag && force->improper) nvirial++;
   }
   if (fixflag) {
-    Fix **fix = modify->fix;
-    int nfix = modify->nfix;
-    for (int i = 0; i < nfix; i++)
-      if (fix[i]->thermo_virial) nvirial++;
+    for (const auto &ifix : modify->get_fix_list())
+      if (ifix->thermo_virial) nvirial++;
   }
 
   if (nvirial) {
@@ -174,10 +171,11 @@ void ComputePressureBocs::init()
       vptr[nvirial++] = force->dihedral->virial;
     if (improperflag && force->improper)
       vptr[nvirial++] = force->improper->virial;
-    if (fixflag)
-      for (int i = 0; i < modify->nfix; i++)
-        if (modify->fix[i]->virial_global_flag && modify->fix[i]->thermo_virial)
-          vptr[nvirial++] = modify->fix[i]->virial;
+    if (fixflag) {
+      for (const auto &ifix : modify->get_fix_list())
+        if (ifix->virial_global_flag && ifix->thermo_virial)
+          vptr[nvirial++] = ifix->virial;
+    }
   }
 
   // flag Kspace contribution separately, since not summed across procs
diff --git a/src/BOCS/fix_bocs.cpp b/src/BOCS/fix_bocs.cpp
index 33f4f072e7..25471d04a4 100644
--- a/src/BOCS/fix_bocs.cpp
+++ b/src/BOCS/fix_bocs.cpp
@@ -25,7 +25,6 @@
 #include "error.h"
 #include "fix_deform.h"
 #include "force.h"
-#include "group.h"
 #include "irregular.h"
 #include "kspace.h"
 #include "memory.h"
@@ -64,15 +63,15 @@ enum { ISO, ANISO, TRICLINIC };
 /* ----------------------------------------------------------------------
    NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion
  ---------------------------------------------------------------------- */
-// clang-format off
 
 FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
-    Fix(lmp, narg, arg), id_dilate(nullptr), irregular(nullptr), id_temp(nullptr),
-    id_press(nullptr), eta(nullptr), eta_dot(nullptr), eta_dotdot(nullptr), eta_mass(nullptr),
-    etap(nullptr), etap_dot(nullptr), etap_dotdot(nullptr), etap_mass(nullptr)
+    Fix(lmp, narg, arg), irregular(nullptr), id_temp(nullptr), id_press(nullptr), eta(nullptr),
+    eta_dot(nullptr), eta_dotdot(nullptr), eta_mass(nullptr), etap(nullptr), etap_dot(nullptr),
+    etap_dotdot(nullptr), etap_mass(nullptr)
 {
   if (lmp->citeme) lmp->citeme->add(cite_user_bocs_package);
 
+  // clang-format off
   if (narg < 4) utils::missing_cmd_args(FLERR,"fix bocs",error);
 
   restart_global = 1;
@@ -89,8 +88,6 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
 
   pcouple = NONE;
   drag = 0.0;
-  allremap = 1;
-  id_dilate = nullptr;
   mtchain = mpchain = 3;
   nc_tchain = nc_pchain = 1;
   mtk_flag = 1;
@@ -147,15 +144,14 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"temp") == 0) {
-      if (iarg+4 > narg) error->all(FLERR,"Illegal fix bocs command");
+      if (iarg+4 > narg) utils::missing_cmd_args(FLERR,"fix bocs temp", error);
       tstat_flag = 1;
       t_start = utils::numeric(FLERR,arg[iarg+1],false,lmp);
       t_target = t_start;
       t_stop = utils::numeric(FLERR,arg[iarg+2],false,lmp);
       t_period = utils::numeric(FLERR,arg[iarg+3],false,lmp);
       if (t_start <= 0.0 || t_stop <= 0.0)
-        error->all(FLERR,
-                   "Target temperature for fix bocs cannot be 0.0");
+        error->all(FLERR, "Target temperature for fix bocs cannot be 0.0");
       iarg += 4;
     } else if (strcmp(arg[iarg],"iso") == 0) {
       error->all(FLERR,"Illegal fix bocs command. Pressure fix must be "
@@ -166,12 +162,9 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
                          "followed by: P_0 P_f P_coupl");
       p_match_flag = 1;
       pcouple = XYZ;
-      p_start[0] = p_start[1] = p_start[2] =
-                                        utils::numeric(FLERR,arg[iarg+1],false,lmp);
-      p_stop[0] = p_stop[1] = p_stop[2] =
-                                        utils::numeric(FLERR,arg[iarg+2],false,lmp);
-      p_period[0] = p_period[1] = p_period[2] =
-                                        utils::numeric(FLERR,arg[iarg+3],false,lmp);
+      p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp);
+      p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+      p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp);
 
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       p_flag[3] = p_flag[4] = p_flag[5] = 0; // MRD
@@ -288,7 +281,6 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
     if (p_flag[4]) box_change |= BOX_CHANGE_XZ;
     if (p_flag[5]) box_change |= BOX_CHANGE_XY;
     no_change_box = 1;
-    if (allremap == 0) restart_pbc = 1;
 
     pstyle = ISO; // MRD this is the only one that can happen
 
@@ -386,7 +378,7 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
   // and thus its KE/temperature contribution should use group all
 
   id_temp = utils::strdup(std::string(id)+"_temp");
-  modify->add_compute(fmt::format("{} all temp",id_temp));
+  temperature = modify->add_compute(fmt::format("{} all temp",id_temp));
   tcomputeflag = 1;
 
   // create a new compute pressure style
@@ -394,7 +386,7 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
   // pass id_temp as 4th arg to pressure constructor
 
   id_press = utils::strdup(std::string(id)+"_press");
-  modify->add_compute(fmt::format("{} all PRESSURE/BOCS {}",id_press,id_temp));
+  pressure = modify->add_compute(fmt::format("{} all PRESSURE/BOCS {}",id_press,id_temp));
   pcomputeflag = 1;
 
 /*~ MRD End of stuff copied from fix_npt.cpp~*/
@@ -407,7 +399,6 @@ FixBocs::~FixBocs()
 {
   if (copymode) return;
 
-  delete[] id_dilate;
   delete irregular;
 
   // delete temperature and pressure if fix created them
@@ -458,14 +449,6 @@ int FixBocs::setmask()
 
 void FixBocs::init()
 {
-  // recheck that dilate group has not been deleted
-  if (allremap == 0) {
-    int idilate = group->find(id_dilate);
-    if (idilate == -1)
-      error->all(FLERR,"Fix bocs dilate group ID does not exist");
-    dilate_group_bit = group->bitmask[idilate];
-  }
-
   // ensure no conflict with fix deform
 
   if (pstat_flag) {
@@ -1125,19 +1108,15 @@ void FixBocs::couple()
 }
 
 /* ----------------------------------------------------------------------
-   change box size
-   remap all atoms or dilate group atoms depending on allremap flag
+   change box size, remap all atoms
    if rigid bodies exist, scale rigid body centers-of-mass
 ------------------------------------------------------------------------- */
 
 void FixBocs::remap()
 {
-  int i;
   double oldlo,oldhi;
   double expfac;
 
-  double **x = atom->x;
-  int *mask = atom->mask;
   int nlocal = atom->nlocal;
   double *h = domain->h;
 
@@ -1147,12 +1126,7 @@ void FixBocs::remap()
 
   // convert pertinent atoms and rigid bodies to lamda coords
 
-  if (allremap) domain->x2lamda(nlocal);
-  else {
-    for (i = 0; i < nlocal; i++)
-      if (mask[i] & dilate_group_bit)
-        domain->x2lamda(x[i],x[i]);
-  }
+  domain->x2lamda(nlocal);
 
   for (auto &ifix : rfix) ifix->deform(0);
 
@@ -1292,12 +1266,7 @@ void FixBocs::remap()
 
   // convert pertinent atoms and rigid bodies back to box coords
 
-  if (allremap) domain->lamda2x(nlocal);
-  else {
-    for (i = 0; i < nlocal; i++)
-      if (mask[i] & dilate_group_bit)
-        domain->lamda2x(x[i],x[i]);
-  }
+  domain->lamda2x(nlocal);
 
   for (auto &ifix : rfix) ifix->deform(1);
 }
@@ -1461,24 +1430,22 @@ int FixBocs::modify_param(int narg, char **arg)
     delete[] id_temp;
     id_temp = utils::strdup(arg[1]);
 
-    int icompute = modify->find_compute(arg[1]);
-    if (icompute < 0)
-      error->all(FLERR,"Could not find fix_modify temperature ID");
-    temperature = modify->compute[icompute];
+    temperature = modify->get_compute_by_id(id_temp);
+    if (!temperature)
+      error->all(FLERR,"Could not find fix_modify temperature compute {}", id_temp);
 
     if (temperature->tempflag == 0)
-      error->all(FLERR,
-                 "Fix_modify temperature ID does not compute temperature");
+      error->all(FLERR, "Fix_modify temperature compute {} does not compute temperature", id_temp);
     if (temperature->igroup != 0 && comm->me == 0)
-      error->warning(FLERR,"Temperature for fix modify is not for group all");
+      error->warning(FLERR,"Temperature compute {} for fix modify is not for group all", id_temp);
 
     // reset id_temp of pressure to new temperature ID
 
     if (pstat_flag) {
-      icompute = modify->find_compute(id_press);
-      if (icompute < 0)
-        error->all(FLERR,"Pressure ID for fix modify does not exist");
-      modify->compute[icompute]->reset_extra_compute_fix(id_temp);
+      pressure = modify->get_compute_by_id(id_press);
+      if (!pressure)
+        error->all(FLERR,"Pressure ID {} for fix modify does not exist", id_press);
+      pressure->reset_extra_compute_fix(id_temp);
     }
 
     return 2;
diff --git a/src/BOCS/fix_bocs.h b/src/BOCS/fix_bocs.h
index af221fedb3..7f752a03f3 100644
--- a/src/BOCS/fix_bocs.h
+++ b/src/BOCS/fix_bocs.h
@@ -65,7 +65,7 @@ class FixBocs : public Fix {
   int tstat_flag;    // 1 if control T
   int pstat_flag;    // 1 if control P
 
-  int pstyle, pcouple, allremap;
+  int pstyle, pcouple;
   int p_flag[6];    // 1 if control P on this dim, 0 if not
   double p_start[6], p_stop[6];
   double p_freq[6], p_target[6];
@@ -75,9 +75,7 @@ class FixBocs : public Fix {
   double drag, tdrag_factor;     // drag factor on particle thermostat
   double pdrag_factor;           // drag factor on barostat
   int kspace_flag;               // 1 if KSpace invoked, 0 if not
-  int dilate_group_bit;          // mask for dilation group
   std::vector<Fix *> rfix;       // list of rigid fixes
-  char *id_dilate;               // group name to dilate
   class Irregular *irregular;    // for migrating atoms after box flips
 
   // MRD NJD
diff --git a/src/BODY/compute_temp_body.cpp b/src/BODY/compute_temp_body.cpp
index 319c2ff986..39b2518600 100644
--- a/src/BODY/compute_temp_body.cpp
+++ b/src/BODY/compute_temp_body.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -19,27 +18,28 @@
 
 #include "compute_temp_body.h"
 
-#include <cstring>
-#include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_body.h"
-#include "update.h"
-#include "force.h"
 #include "domain.h"
-#include "modify.h"
-#include "group.h"
 #include "error.h"
+#include "force.h"
+#include "group.h"
+#include "math_extra.h"
+#include "modify.h"
+#include "update.h"
+
+#include <cstring>
 
 using namespace LAMMPS_NS;
 
-enum{ROTATE,ALL};
+enum { ROTATE, ALL };
 
 /* ---------------------------------------------------------------------- */
 
 ComputeTempBody::ComputeTempBody(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg), id_bias(nullptr), tbias(nullptr), avec(nullptr)
+    Compute(lmp, narg, arg), id_bias(nullptr), tbias(nullptr), avec(nullptr)
 {
-  if (narg < 3) error->all(FLERR,"Illegal compute temp/body command");
+  if (narg < 3) utils::missing_cmd_args(FLERR, "compute temp/body", error);
 
   scalar_flag = vector_flag = 1;
   size_vector = 6;
@@ -48,25 +48,24 @@ ComputeTempBody::ComputeTempBody(LAMMPS *lmp, int narg, char **arg) :
   tempflag = 1;
 
   tempbias = 0;
-  id_bias = nullptr;
   mode = ALL;
 
+  // clang-format off
+
   int iarg = 3;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"bias") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal compute temp/body command");
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "compute temp/body bias", error);
       tempbias = 1;
       id_bias = utils::strdup(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"dof") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal compute temp/body command");
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "compute temp/body dof", error);
       if (strcmp(arg[iarg+1],"rotate") == 0) mode = ROTATE;
       else if (strcmp(arg[iarg+1],"all") == 0) mode = ALL;
-      else error->all(FLERR,"Illegal compute temp/body command");
+      else error->all(FLERR,"Unknown compute temp/body dof keyword {}", arg[iarg+1]);
       iarg += 2;
-    } else error->all(FLERR,"Illegal compute temp/body command");
+    } else error->all(FLERR,"Unknown compute temp/body keyword {}", arg[iarg]);
   }
 
   vector = new double[size_vector];
@@ -77,8 +76,8 @@ ComputeTempBody::ComputeTempBody(LAMMPS *lmp, int narg, char **arg) :
 
 ComputeTempBody::~ComputeTempBody()
 {
-  delete [] id_bias;
-  delete [] vector;
+  delete[] id_bias;
+  delete[] vector;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -88,8 +87,7 @@ void ComputeTempBody::init()
   // error check
 
   avec = dynamic_cast<AtomVecBody *>(atom->style_match("body"));
-  if (!avec)
-    error->all(FLERR,"Compute temp/body requires atom style body");
+  if (!avec) error->all(FLERR,"Compute temp/body requires atom style body");
 
   // check that all particles are finite-size, no point particles allowed
 
@@ -99,18 +97,16 @@ void ComputeTempBody::init()
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit)
-      if (body[i] < 0)
-        error->one(FLERR,"Compute temp/body requires bodies");
+      if (body[i] < 0) error->one(FLERR,"Compute temp/body requires bodies");
 
   if (tempbias) {
-    int i = modify->find_compute(id_bias);
-    if (i < 0)
-      error->all(FLERR,"Could not find compute ID for temperature bias");
-    tbias = modify->compute[i];
+    tbias = modify->get_compute_by_id(id_bias);
+    if (!tbias)
+      error->all(FLERR,"Could not find compute {} for temperature bias", id_bias);
     if (tbias->tempflag == 0)
-      error->all(FLERR,"Bias compute does not calculate temperature");
+      error->all(FLERR,"Bias compute {} does not calculate temperature", id_bias);
     if (tbias->tempbias == 0)
-      error->all(FLERR,"Bias compute does not calculate a velocity bias");
+      error->all(FLERR,"Bias compute {} does not calculate a velocity bias", id_bias);
     if (tbias->igroup != igroup)
       error->all(FLERR,"Bias compute group does not match compute group");
     if (strcmp(tbias->style,"temp/region") == 0) tempbias = 2;
diff --git a/src/BPM/atom_vec_bpm_sphere.cpp b/src/BPM/atom_vec_bpm_sphere.cpp
index 37a36f4a59..4332d517b4 100644
--- a/src/BPM/atom_vec_bpm_sphere.cpp
+++ b/src/BPM/atom_vec_bpm_sphere.cpp
@@ -14,14 +14,11 @@
 #include "atom_vec_bpm_sphere.h"
 
 #include "atom.h"
-#include "comm.h"
 #include "error.h"
 #include "fix.h"
 #include "math_const.h"
 #include "modify.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using MathConst::MY_PI;
 
diff --git a/src/BPM/bond_bpm.cpp b/src/BPM/bond_bpm.cpp
index b484df7fab..f1482d4203 100644
--- a/src/BPM/bond_bpm.cpp
+++ b/src/BPM/bond_bpm.cpp
@@ -357,7 +357,6 @@ void BondBPM::process_broken(int i, int j)
   if (i < nlocal) {
     for (m = 0; m < num_bond[i]; m++) {
       if (bond_atom[i][m] == tag[j]) {
-        bond_type[i][m] = 0;
         n = num_bond[i];
         bond_type[i][m] = bond_type[i][n - 1];
         bond_atom[i][m] = bond_atom[i][n - 1];
@@ -372,7 +371,6 @@ void BondBPM::process_broken(int i, int j)
   if (j < nlocal) {
     for (m = 0; m < num_bond[j]; m++) {
       if (bond_atom[j][m] == tag[i]) {
-        bond_type[j][m] = 0;
         n = num_bond[j];
         bond_type[j][m] = bond_type[j][n - 1];
         bond_atom[j][m] = bond_atom[j][n - 1];
diff --git a/src/BPM/fix_update_special_bonds.cpp b/src/BPM/fix_update_special_bonds.cpp
index 04f5d94e7e..cdc72ee987 100644
--- a/src/BPM/fix_update_special_bonds.cpp
+++ b/src/BPM/fix_update_special_bonds.cpp
@@ -21,7 +21,6 @@
 #include "modify.h"
 #include "neigh_list.h"
 #include "neighbor.h"
-#include "pair.h"
 
 #include <utility>
 
diff --git a/src/CG-DNA/pair_oxdna_stk.cpp b/src/CG-DNA/pair_oxdna_stk.cpp
index 99ddf9c96a..1ef779dbd9 100644
--- a/src/CG-DNA/pair_oxdna_stk.cpp
+++ b/src/CG-DNA/pair_oxdna_stk.cpp
@@ -25,6 +25,7 @@
 #include "memory.h"
 #include "mf_oxdna.h"
 #include "neighbor.h"
+#include "neigh_list.h"
 
 #include <cmath>
 #include <cstring>
diff --git a/src/CG-DNA/pair_oxrna2_stk.cpp b/src/CG-DNA/pair_oxrna2_stk.cpp
index f6e12fffe1..f56aa572e8 100644
--- a/src/CG-DNA/pair_oxrna2_stk.cpp
+++ b/src/CG-DNA/pair_oxrna2_stk.cpp
@@ -26,6 +26,7 @@
 #include "memory.h"
 #include "mf_oxdna.h"
 #include "neighbor.h"
+#include "neigh_list.h"
 
 #include <cmath>
 #include <cstring>
diff --git a/src/COLVARS/colvarproxy_lammps.cpp b/src/COLVARS/colvarproxy_lammps.cpp
index 06a2a23ec0..1c5d84d62a 100644
--- a/src/COLVARS/colvarproxy_lammps.cpp
+++ b/src/COLVARS/colvarproxy_lammps.cpp
@@ -19,11 +19,7 @@
 #include "colvarmodule.h"
 #include "colvarproxy.h"
 
-#include <sys/stat.h>
-#include <cerrno>
-#include <cstring>
-#include <iostream>
-#include <memory>
+#include <sstream>
 
 #define HASH_FAIL  -1
 
diff --git a/src/COLVARS/fix_colvars.cpp b/src/COLVARS/fix_colvars.cpp
index baf0209c61..0b496ee71b 100644
--- a/src/COLVARS/fix_colvars.cpp
+++ b/src/COLVARS/fix_colvars.cpp
@@ -40,8 +40,6 @@
 
 #include <cstring>
 #include <iostream>
-#include <memory>
-#include <vector>
 
 #include "colvarproxy_lammps.h"
 #include "colvarmodule.h"
diff --git a/src/DIELECTRIC/fix_polarize_bem_gmres.cpp b/src/DIELECTRIC/fix_polarize_bem_gmres.cpp
index 40f7d0c853..67d79d57f0 100644
--- a/src/DIELECTRIC/fix_polarize_bem_gmres.cpp
+++ b/src/DIELECTRIC/fix_polarize_bem_gmres.cpp
@@ -41,7 +41,6 @@
 #include "comm.h"
 #include "error.h"
 #include "force.h"
-#include "group.h"
 #include "kspace.h"
 #include "math_const.h"
 #include "memory.h"
diff --git a/src/DIELECTRIC/pppm_dielectric.cpp b/src/DIELECTRIC/pppm_dielectric.cpp
index 49fa8ed128..e02cc33162 100644
--- a/src/DIELECTRIC/pppm_dielectric.cpp
+++ b/src/DIELECTRIC/pppm_dielectric.cpp
@@ -23,7 +23,6 @@
 #include "comm.h"
 #include "domain.h"
 #include "error.h"
-#include "fft3d_wrap.h"
 #include "force.h"
 #include "grid3d.h"
 #include "math_const.h"
diff --git a/src/DIELECTRIC/pppm_disp_dielectric.cpp b/src/DIELECTRIC/pppm_disp_dielectric.cpp
index c6108646be..e5149ae427 100644
--- a/src/DIELECTRIC/pppm_disp_dielectric.cpp
+++ b/src/DIELECTRIC/pppm_disp_dielectric.cpp
@@ -25,10 +25,12 @@
 #include "error.h"
 #include "force.h"
 #include "grid3d.h"
+#include "lmpfftsettings.h"
 #include "math_const.h"
 #include "memory.h"
 
 #include <cmath>
+#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
diff --git a/src/DIFFRACTION/compute_xrd.cpp b/src/DIFFRACTION/compute_xrd.cpp
index 426248b31e..11e0bb9a9f 100644
--- a/src/DIFFRACTION/compute_xrd.cpp
+++ b/src/DIFFRACTION/compute_xrd.cpp
@@ -35,7 +35,7 @@
 
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
-using namespace MathConst;
+using MathConst::MY_PI;
 
 static const char cite_compute_xrd_c[] =
   "compute xrd command: doi:10.1088/0965-0393/21/5/055020\n\n"
@@ -261,7 +261,7 @@ void ComputeXRD::init()
   double ang = 0.0;
 
   double convf = 360 / MY_PI;
-  if (radflag ==1) convf = 1;
+  if (radflag == 1) convf = 2;
 
   int n = 0;
   for (int m = 0; m < mmax; m++) {
diff --git a/src/DIPOLE/pair_lj_long_dipole_long.cpp b/src/DIPOLE/pair_lj_long_dipole_long.cpp
index 6f46d9d024..2e1a9c4db0 100644
--- a/src/DIPOLE/pair_lj_long_dipole_long.cpp
+++ b/src/DIPOLE/pair_lj_long_dipole_long.cpp
@@ -401,7 +401,7 @@ void PairLJLongDipoleLong::compute(int eflag, int vflag)
   ev_init(eflag,vflag);
 
   double **x = atom->x, *x0 = x[0];
-  double **mu = atom->mu, *mu0 = mu[0], *imu, *jmu;
+  double **mu = atom->mu, *mu0 = mu[0];
   double **tq = atom->torque, *tq0 = tq[0], *tqi;
   double **f = atom->f, *f0 = f[0], *fi = f0, fx, fy, fz;
   double *q = atom->q, qi = 0, qj;
@@ -435,7 +435,7 @@ void PairLJLongDipoleLong::compute(int eflag, int vflag)
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), 3*sizeof(double));
-    memcpy(mui, imu = mu0+(i<<2), 3*sizeof(double));
+    memcpy(mui, mu0+(i<<2), 3*sizeof(double));
 
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
@@ -453,7 +453,7 @@ void PairLJLongDipoleLong::compute(int eflag, int vflag)
       r2inv = 1.0/rsq;
 
       if (order3 && (rsq < cut_coulsq)) {               // dipole
-        memcpy(muj, jmu = mu0+(j<<2), 3*sizeof(double));
+        memcpy(muj, mu0+(j<<2), 3*sizeof(double));
         {                                               // series real space
           double r = sqrt(rsq);
           double x = g_ewald*r;
diff --git a/src/DPD-REACT/npair_half_bin_newton_ssa.cpp b/src/DPD-REACT/npair_half_bin_newton_ssa.cpp
index ce405da3ac..7393e54f78 100644
--- a/src/DPD-REACT/npair_half_bin_newton_ssa.cpp
+++ b/src/DPD-REACT/npair_half_bin_newton_ssa.cpp
@@ -18,6 +18,7 @@
 ------------------------------------------------------------------------- */
 
 #include "npair_half_bin_newton_ssa.h"
+
 #include "nstencil_ssa.h"
 #include "nbin_ssa.h"
 #include "neigh_list.h"
diff --git a/src/DRUDE/compute_temp_drude.cpp b/src/DRUDE/compute_temp_drude.cpp
index b309346934..0bf276924a 100644
--- a/src/DRUDE/compute_temp_drude.cpp
+++ b/src/DRUDE/compute_temp_drude.cpp
@@ -54,20 +54,19 @@ ComputeTempDrude::ComputeTempDrude(LAMMPS *lmp, int narg, char **arg) :
 
 ComputeTempDrude::~ComputeTempDrude()
 {
-  delete [] vector;
-  delete [] extlist;
-  delete [] id_temp;
+  delete[] vector;
+  delete[] extlist;
+  delete[] id_temp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeTempDrude::init()
 {
-  int ifix;
-  for (ifix = 0; ifix < modify->nfix; ifix++)
-    if (strcmp(modify->fix[ifix]->style,"drude") == 0) break;
-  if (ifix == modify->nfix) error->all(FLERR, "compute temp/drude requires fix drude");
-  fix_drude = dynamic_cast<FixDrude *>(modify->fix[ifix]);
+  // Fix drude already checks that there is only one fix drude instance
+  auto &fixes = modify->get_fix_by_style("^drude$");
+  if (fixes.size() == 0) error->all(FLERR, "compute temp/drude requires fix drude");
+  fix_drude = dynamic_cast<FixDrude *>(fixes[0]);
 
   if (!comm->ghost_velocity)
     error->all(FLERR,"compute temp/drude requires ghost velocities. Use comm_modify vel yes");
@@ -118,14 +117,12 @@ int ComputeTempDrude::modify_param(int narg, char **arg)
     delete [] id_temp;
     id_temp = utils::strdup(arg[1]);
 
-    int icompute = modify->find_compute(id_temp);
-    if (icompute < 0)
-      error->all(FLERR,"Could not find fix_modify temperature ID");
-    temperature = modify->compute[icompute];
+    temperature = modify->get_compute_by_id(id_temp);
+    if (!temperature)
+      error->all(FLERR,"Could not find fix_modify temperature compute {}", id_temp);
 
     if (temperature->tempflag == 0)
-      error->all(FLERR,
-                 "Fix_modify temperature ID does not compute temperature");
+      error->all(FLERR, "Fix_modify temperature compute {} does not compute temperature", id_temp);
     if (temperature->igroup != igroup && comm->me == 0)
       error->warning(FLERR,"Group for fix_modify temp != fix group");
     return 2;
diff --git a/src/DRUDE/fix_drude.cpp b/src/DRUDE/fix_drude.cpp
index 56a5cd42f6..6c0c84861a 100644
--- a/src/DRUDE/fix_drude.cpp
+++ b/src/DRUDE/fix_drude.cpp
@@ -22,7 +22,6 @@
 #include "modify.h"
 #include "molecule.h"
 
-#include <cstring>
 #include <map>
 #include <set>
 
@@ -83,10 +82,7 @@ FixDrude::~FixDrude()
 
 void FixDrude::init()
 {
-  int count = 0;
-  for (int i = 0; i < modify->nfix; i++)
-    if (strcmp(modify->fix[i]->style,"drude") == 0) count++;
-  if (count > 1) error->all(FLERR,"More than one fix drude");
+  if (modify->get_fix_by_style("^drude$").size() > 1) error->all(FLERR,"More than one fix drude");
 
   if (!rebuildflag) rebuild_special();
 }
diff --git a/src/DRUDE/fix_drude_transform.cpp b/src/DRUDE/fix_drude_transform.cpp
index 4a85b8ae4c..ad92740116 100644
--- a/src/DRUDE/fix_drude_transform.cpp
+++ b/src/DRUDE/fix_drude_transform.cpp
@@ -24,7 +24,6 @@
 #include "modify.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
@@ -54,7 +53,7 @@ void FixDrudeTransform<inverse>::init()
   std::string substyle = "direct";
   if (inverse) substyle = "inverse";
 
-  auto fixes = modify->get_fix_by_style("^drude");
+  auto fixes = modify->get_fix_by_style("^drude$");
   if (fixes.size() > 0) fix_drude = dynamic_cast<FixDrude *>(fixes[0]);
   if (!fix_drude)
     error->all(FLERR, "fix drude/transform/{} requires fix drude", substyle);
diff --git a/src/DRUDE/fix_tgnh_drude.cpp b/src/DRUDE/fix_tgnh_drude.cpp
index c067bf7adf..e15e865ce8 100644
--- a/src/DRUDE/fix_tgnh_drude.cpp
+++ b/src/DRUDE/fix_tgnh_drude.cpp
@@ -516,7 +516,7 @@ FixTGNHDrude::FixTGNHDrude(LAMMPS *lmp, int narg, char **arg) :
 
   // find fix drude
 
-  auto fdrude = modify->get_fix_by_style("^drude");
+  auto fdrude = modify->get_fix_by_style("^drude$");
   if (fdrude.size() < 1) error->all(FLERR, "Fix {} requires fix drude", style);
   fix_drude = dynamic_cast<FixDrude *>(fdrude[0]);
   if (!fix_drude) error->all(FLERR, "Fix {} requires fix drude", style);
diff --git a/src/ELECTRODE/README b/src/ELECTRODE/README
new file mode 100644
index 0000000000..72a95b7fe9
--- /dev/null
+++ b/src/ELECTRODE/README
@@ -0,0 +1,17 @@
+This package provides the "fix electrode/*" commands which can be used in a
+LAMMPS input script. These fixes implement the constant potential method, which
+minimizes the energy of electrodes as a function of atom charges at given
+electric potentials or electrode charges.
+
+See the doc page for the fix electrode/conp command to get started.  There are
+example scripts for using this package in examples/PACKAGES/electrode.
+
+This package uses an external library in lib/electrode which must be compiled
+before making LAMMPS. For a CMake build, the location of the LAPACK library
+should be linked automatically. Alternatively, the "USE_INTERNAL_LINALG" option
+can be used to enable the bundled library. See the doc page on "Packages with
+extra build options" for more information.
+
+The primary people who created this package are Ludwig Ahrens-Iwers, Shern Tee
+(s.tee@griffith.edu.au) and Robert Meißner (robert.meissner@tuhh.de). Contact
+them directly if you have questions.
diff --git a/src/ELECTRODE/electrode_math.h b/src/ELECTRODE/electrode_math.h
index 5992df2289..4a3cb7bac4 100644
--- a/src/ELECTRODE/electrode_math.h
+++ b/src/ELECTRODE/electrode_math.h
@@ -18,22 +18,20 @@
 #ifndef LMP_ELECTRODE_MATH_H
 #define LMP_ELECTRODE_MATH_H
 
+#include "ewald_const.h"
 #include "math_const.h"
 
+#include <cmath>
+
 namespace LAMMPS_NS {
+using namespace EwaldConst;
 
 namespace ElectrodeMath {
-  static constexpr double EWALD_P = 0.3275911;
-  static constexpr double A1 = 0.254829592;
-  static constexpr double A2 = -0.284496736;
-  static constexpr double A3 = 1.421413741;
-  static constexpr double A4 = -1.453152027;
-  static constexpr double A5 = 1.061405429;
   static constexpr double ERFCMAX = 5.8;    // erfc(ERFCMAX) < machine epsilon(double)
 
   static double safe_erfc(double x)
   {
-    if (x > ERFCMAX) return 0.;
+    if (x > ERFCMAX) return 0.0;
     double expm2 = exp(-x * x);
     double t = 1.0 / (1.0 + EWALD_P * x);
     return t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
@@ -42,14 +40,14 @@ namespace ElectrodeMath {
   static double safe_derfcr(double x, double &erfc)
   {
     if (x > ERFCMAX) {
-      erfc = 0.;
-      return 0.;
+      erfc = 0.0;
+      return 0.0;
     }
     double x2 = x * x;
     double expm2 = exp(-x2);
     double t = 1.0 / (1.0 + EWALD_P * x);
     erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
-    return -erfc - 2 * expm2 * x / MathConst::MY_PIS;
+    return -erfc - 2.0 * expm2 * x / MathConst::MY_PIS;
   }
 }    // namespace ElectrodeMath
 
diff --git a/src/ELECTRODE/electrode_matrix.cpp b/src/ELECTRODE/electrode_matrix.cpp
index 86761742d4..9c2da1d13b 100644
--- a/src/ELECTRODE/electrode_matrix.cpp
+++ b/src/ELECTRODE/electrode_matrix.cpp
@@ -43,6 +43,7 @@ ElectrodeMatrix::ElectrodeMatrix(LAMMPS *lmp, int electrode_group, double eta) :
   groupbit = group->bitmask[igroup];
   ngroup = group->count(igroup);
   this->eta = eta;
+  etaflag = false;
   tfflag = false;
 }
 
@@ -72,6 +73,14 @@ void ElectrodeMatrix::setup_tf(const std::map<int, double> &tf_types)
 
 /* ---------------------------------------------------------------------- */
 
+void ElectrodeMatrix::setup_eta(int index)
+{
+  etaflag = true;
+  eta_index = index;
+}
+
+/* ---------------------------------------------------------------------- */
+
 void ElectrodeMatrix::compute_array(double **array, bool timer_flag)
 {
   // setting all entries of coulomb matrix to zero
@@ -115,8 +124,6 @@ void ElectrodeMatrix::pair_contribution(double **array)
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
-  double const etaij = eta * eta / sqrt(2.0 * eta * eta);    // see mw ewald theory eq. (29)-(30)
-
   // neighbor list will be ready because called from post_neighbor
   inum = list->inum;
   ilist = list->ilist;
@@ -135,6 +142,7 @@ void ElectrodeMatrix::pair_contribution(double **array)
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
+    double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta;
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
@@ -152,6 +160,9 @@ void ElectrodeMatrix::pair_contribution(double **array)
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
+        double const eta_j = etaflag ? atom->dvector[eta_index][j] : eta;
+        double const etaij = eta_i * eta_j / sqrt(eta_i * eta_i + eta_j * eta_j);
+
         r = sqrt(rsq);
         rinv = 1.0 / r;
         aij = rinv;
@@ -178,7 +189,10 @@ void ElectrodeMatrix::self_contribution(double **array)
   const double preta = MY_SQRT2 / MY_PIS;
 
   for (int i = 0; i < nlocal; i++)
-    if (mask[i] & groupbit) { array[mpos[i]][mpos[i]] += preta * eta - selfint; }
+    if (mask[i] & groupbit) {
+      double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta;
+      array[mpos[i]][mpos[i]] += preta * eta_i - selfint;
+    }
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/ELECTRODE/electrode_matrix.h b/src/ELECTRODE/electrode_matrix.h
index 8499cfdb34..1c64d8a4c4 100644
--- a/src/ELECTRODE/electrode_matrix.h
+++ b/src/ELECTRODE/electrode_matrix.h
@@ -30,6 +30,7 @@ class ElectrodeMatrix : protected Pointers {
   ElectrodeMatrix(class LAMMPS *, int, double);
   void setup(const std::unordered_map<tagint, int> &, class Pair *, class NeighList *);
   void setup_tf(const std::map<int, double> &);
+  void setup_eta(int);
   void compute_array(double **, bool);
   int igroup;
 
@@ -39,6 +40,8 @@ class ElectrodeMatrix : protected Pointers {
   double **cutsq;
   double g_ewald, eta;
   bool tfflag;
+  bool etaflag;
+  int eta_index;
   std::map<int, double> tf_types;
   std::unordered_map<tagint, int> tag_to_iele;
   bool assigned;
diff --git a/src/ELECTRODE/electrode_vector.cpp b/src/ELECTRODE/electrode_vector.cpp
index 8511ddc17c..fc2cca5e46 100644
--- a/src/ELECTRODE/electrode_vector.cpp
+++ b/src/ELECTRODE/electrode_vector.cpp
@@ -29,6 +29,7 @@
 #include "neigh_list.h"
 #include "pair.h"
 
+#include <cassert>
 #include <cmath>
 #include <exception>
 
@@ -47,6 +48,7 @@ ElectrodeVector::ElectrodeVector(LAMMPS *lmp, int sensor_group, int source_group
   source_grpbit = group->bitmask[source_group];
   this->eta = eta;
   tfflag = false;
+  etaflag = false;
 
   kspace_time_total = 0;
   pair_time_total = 0;
@@ -93,6 +95,14 @@ void ElectrodeVector::setup_tf(const std::map<int, double> &tf_types)
 
 /* ---------------------------------------------------------------------- */
 
+void ElectrodeVector::setup_eta(int index)
+{
+  etaflag = true;
+  eta_index = index;
+}
+
+/* ---------------------------------------------------------------------- */
+
 void ElectrodeVector::compute_vector(double *vector)
 {
   MPI_Barrier(world);
@@ -121,7 +131,6 @@ void ElectrodeVector::compute_vector(double *vector)
 
 void ElectrodeVector::pair_contribution(double *vector)
 {
-  double const etaij = eta * MY_ISQRT2;
   double **x = atom->x;
   double *q = atom->q;
   int *type = atom->type;
@@ -142,6 +151,7 @@ void ElectrodeVector::pair_contribution(double *vector)
     double const xtmp = x[i][0];
     double const ytmp = x[i][1];
     double const ztmp = x[i][2];
+    double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta;
     int itype = type[i];
     int *jlist = firstneigh[i];
     int jnum = numneigh[i];
@@ -158,18 +168,22 @@ void ElectrodeVector::pair_contribution(double *vector)
       double const rsq = delx * delx + dely * dely + delz * delz;
       int jtype = type[j];
       if (rsq >= cutsq[itype][jtype]) continue;
+      double const eta_j = etaflag ? atom->dvector[eta_index][j] : eta;
+      double etaij;
+      if (i_in_sensor && j_in_sensor) {
+        etaij = eta_i * eta_j / sqrt(eta_i * eta_i + eta_j * eta_j);
+      } else if (i_in_sensor) {
+        etaij = eta_i;
+      } else {
+        assert(j_in_sensor);
+        etaij = eta_j;
+      }
       double const r = sqrt(rsq);
       double const rinv = 1.0 / r;
       double aij = rinv;
       aij *= ElectrodeMath::safe_erfc(g_ewald * r);
-      if (invert_source)
-        aij -= ElectrodeMath::safe_erfc(eta * r) * rinv;
-      else
-        aij -= ElectrodeMath::safe_erfc(etaij * r) * rinv;
-      if (i_in_sensor) {
-        vector[i] += aij * q[j];
-        //} else if (j_in_sensor) {
-      }
+      aij -= ElectrodeMath::safe_erfc(etaij * r) * rinv;
+      if (i_in_sensor) { vector[i] += aij * q[j]; }
       if (j_in_sensor && (!invert_source || !i_in_sensor)) { vector[j] += aij * q[i]; }
     }
   }
@@ -189,9 +203,10 @@ void ElectrodeVector::self_contribution(double *vector)
 
   for (int ii = 0; ii < inum; ii++) {
     int const i = ilist[ii];
+    double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta;
     bool const i_in_sensor = (mask[i] & groupbit);
     bool const i_in_source = !!(mask[i] & source_grpbit) != invert_source;
-    if (i_in_sensor && i_in_source) vector[i] += (preta * eta - selfint) * q[i];
+    if (i_in_sensor && i_in_source) vector[i] += (preta * eta_i - selfint) * q[i];
   }
 }
 
diff --git a/src/ELECTRODE/electrode_vector.h b/src/ELECTRODE/electrode_vector.h
index e7f637dd2d..a4f274a049 100644
--- a/src/ELECTRODE/electrode_vector.h
+++ b/src/ELECTRODE/electrode_vector.h
@@ -29,6 +29,7 @@ class ElectrodeVector : protected Pointers {
   ~ElectrodeVector() override;
   void setup(class Pair *, class NeighList *, bool);
   void setup_tf(const std::map<int, double> &);
+  void setup_eta(int);
   void compute_vector(double *);
   int igroup, source_group;
 
@@ -39,6 +40,8 @@ class ElectrodeVector : protected Pointers {
   double **cutsq;
   double g_ewald, eta;
   bool tfflag;
+  bool etaflag;
+  int eta_index;
   std::map<int, double> tf_types;
   class Pair *pair;
   class NeighList *list;
diff --git a/src/ELECTRODE/fix_electrode_conp.cpp b/src/ELECTRODE/fix_electrode_conp.cpp
index 9e2599ca8a..94c085de5c 100644
--- a/src/ELECTRODE/fix_electrode_conp.cpp
+++ b/src/ELECTRODE/fix_electrode_conp.cpp
@@ -43,6 +43,7 @@
 #include <cmath>
 #include <cstring>
 #include <exception>
+#include <memory>
 #include <utility>
 
 using namespace LAMMPS_NS;
@@ -97,26 +98,30 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) :
   top_group = 0;
   intelflag = false;
   tfflag = false;
+  etaflag = false;
   timer_flag = false;
 
   update_time = 0;
   mult_time = 0;
   n_call = n_cg_step = 0;
 
+  qtotal = 0.;
+  qtotal_var_style = VarStyle::UNSET;
+
   // read fix command
   fixname = std::string(arg[0]);
   groups = std::vector<int>(1, igroup);
   group_bits = std::vector<int>(1, groupbit);
   group_psi_var_names = std::vector<std::string>(1);
   group_psi_var_styles = std::vector<VarStyle>(1, VarStyle::CONST);
-  group_psi = std::vector<double>(1);
+  group_psi_const = std::vector<double>(1);
   etypes_neighlists = false;
   if (strstr(arg[3], "v_") == arg[3]) {
     std::string vname = arg[3];
     group_psi_var_names[0] = vname.substr(2);
     group_psi_var_styles[0] = VarStyle::EQUAL;
   } else
-    group_psi[0] = utils::numeric(FLERR, arg[3], false, lmp);
+    group_psi_const[0] = utils::numeric(FLERR, arg[3], false, lmp);
   char *eta_str = arg[4];
   eta = utils::numeric(FLERR, eta_str, false, lmp);
   int iarg = 5;
@@ -132,12 +137,12 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) :
         std::string vname = arg[iarg];
         group_psi_var_names.push_back(vname.substr(2));
         group_psi_var_styles.push_back(VarStyle::EQUAL);
-        group_psi.push_back(0.);
+        group_psi_const.push_back(0.);
       } else {
         std::string null;
         group_psi_var_names.push_back(null);
         group_psi_var_styles.push_back(VarStyle::CONST);
-        group_psi.push_back(utils::numeric(FLERR, arg[iarg], false, lmp));
+        group_psi_const.push_back(utils::numeric(FLERR, arg[iarg], false, lmp));
       }
     } else if ((strcmp(arg[iarg], "algo") == 0)) {
       if (!default_algo) error->one(FLERR, "Algorithm can be set only once");
@@ -195,8 +200,32 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) :
       thermo_temp = force->boltz / force->qe2f * utils::numeric(FLERR, arg[++iarg], false, lmp);
       thermo_time = utils::numeric(FLERR, arg[++iarg], false, lmp);
       thermo_init = utils::inumeric(FLERR, arg[++iarg], false, lmp);
-      // toggle parameters
-    } else if ((strcmp(arg[iarg], "etypes") == 0)) {
+    } else if ((strcmp(arg[iarg], "qtotal") == 0)) {
+      if (iarg + 2 > narg) error->all(FLERR, "Need one argument after qtotal keyword");
+      if (strcmp(this->style, "electrode/conq") == 0)
+        error->all(FLERR, "qtotal keyword not available for electrode/conq");
+      ++iarg;
+      if (strstr(arg[iarg], "v_") == arg[iarg]) {
+        std::string vname = arg[iarg];
+        qtotal_var_name = vname.substr(2);
+        qtotal_var_style = VarStyle::EQUAL;
+      } else {
+        qtotal = utils::numeric(FLERR, arg[iarg], false, lmp);
+        qtotal_var_style = VarStyle::CONST;
+      }
+    } else if ((strcmp(arg[iarg], "eta") == 0)) {
+      if (iarg + 2 > narg) error->all(FLERR, "Need two arguments after eta command");
+      etaflag = true;
+      int is_double, cols, ghost;
+      eta_index = atom->find_custom_ghost(arg[++iarg] + 2, is_double, cols, ghost);
+      if (eta_index == -1)
+        error->all(FLERR, "eta keyword requires name of previously defined property");
+      if (!is_double) error->all(FLERR, "eta keyword requires double-valued property/atom vector");
+      if (cols != 0) error->all(FLERR, "eta keyword requires property/atom vector not an array");
+      if (!ghost) error->all(FLERR, "eta keyword requires property/atom fix with ghost on");
+    }
+    // toggle parameters
+    else if ((strcmp(arg[iarg], "etypes") == 0)) {
       etypes_neighlists = utils::logical(FLERR, arg[++iarg], false, lmp);
     } else if ((strncmp(arg[iarg], "symm", 4) == 0)) {
       symm = utils::logical(FLERR, arg[++iarg], false, lmp);
@@ -208,6 +237,12 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) :
     iarg++;
   }
 
+  if (qtotal_var_style != VarStyle::UNSET) {
+    if (symm) error->all(FLERR, "{} cannot use qtotal keyword with symm on", this->style);
+  }
+
+  // computatonal potential
+  group_psi = std::vector<double>(groups.size());
   // union of all coupled groups
   std::string union_group = "conp_group";
   std::string group_cmd = union_group + " union";
@@ -225,6 +260,7 @@ FixElectrodeConp::FixElectrodeConp(LAMMPS *lmp, int narg, char **arg) :
   if (need_elec_vector) elec_vector = new ElectrodeVector(lmp, igroup, igroup, eta, false);
   assert(groups.size() == group_bits.size());
   assert(groups.size() == group_psi.size());
+  assert(groups.size() == group_psi_const.size());
   assert(groups.size() == group_psi_var_styles.size());
   assert(groups.size() == group_psi_var_names.size());
   assert(igroup == elyt_vector->igroup);
@@ -374,6 +410,31 @@ void FixElectrodeConp::init()
     if (strncmp(modify->fix[i]->style, "electrode", 9) == 0) count++;
   if (count > 1) error->all(FLERR, "More than one fix electrode");
 
+  // make sure electrode atoms are not integrated if a matrix is used for electrode-electrode interaction
+  int const nlocal = atom->nlocal;
+  int *mask = atom->mask;
+  Fix **fix = modify->fix;
+  if (matrix_algo) {
+    std::vector<char *> integrate_ids = std::vector<char *>();
+    for (int i = 0; i < modify->nfix; i++) {
+      if (fix[i]->time_integrate == 0) continue;
+      int electrode_mover = 0;
+      int fix_groupbit = fix[i]->groupbit;
+      for (int j = 0; j < nlocal; j++)
+        if ((mask[j] & fix_groupbit) && (mask[j] & groupbit)) electrode_mover = 1;
+      MPI_Allreduce(MPI_IN_PLACE, &electrode_mover, 1, MPI_INT, MPI_SUM, world);
+      if (electrode_mover && comm->me == 0) integrate_ids.push_back(fix[i]->id);
+    }
+    if (comm->me == 0)
+      for (char *fix_id : integrate_ids)
+        error->warning(
+            FLERR,
+            "Electrode atoms are integrated by fix {}, but fix electrode is using a matrix method. "
+            "For "
+            "mobile electrodes use the conjugate gradient algorithm without matrix ('algo cg').",
+            fix_id);
+  }
+
   // check for package intel
   if (etypes_neighlists)
     request_etypes_neighlists();
@@ -448,6 +509,7 @@ void FixElectrodeConp::setup_post_neighbor()
   // get equal-style variable ids:
   group_psi_var_ids = std::vector<int>(num_of_groups, -1);
   for (int g = 0; g < num_of_groups; g++) {
+    assert(group_psi_var_styles[g] != VarStyle::UNSET);
     if (group_psi_var_styles[g] == VarStyle::CONST) continue;
     const char *var_name = group_psi_var_names[g].c_str();
     int var_id = input->variable->find(var_name);
@@ -456,13 +518,23 @@ void FixElectrodeConp::setup_post_neighbor()
       error->all(FLERR, "Variable '{}' for fix {} is not equal-style", var_name, style);
     group_psi_var_ids[g] = var_id;
   }
+  if (qtotal_var_style == VarStyle::EQUAL) {
+    const char *var_name = qtotal_var_name.c_str();
+    int var_id = input->variable->find(var_name);
+    if (var_id < 0) error->all(FLERR, "Variable '{}' for fix electrode does not exist", var_name);
+    if (!input->variable->equalstyle(var_id))
+      error->all(FLERR, "Variable '{}' for fix electrode is not equal-style", var_name);
+    qtotal_var_id = var_id;
+  }
 
   // pair and list setups:
 
   evscale = force->qe2f / force->qqrd2e;
   elyt_vector->setup(pair, vec_neighlist, timer_flag);
+  if (etaflag) elyt_vector->setup_eta(eta_index);
   if (need_elec_vector) {
     elec_vector->setup(pair, mat_neighlist, timer_flag);
+    if (etaflag) elec_vector->setup_eta(eta_index);
     if (tfflag) elec_vector->setup_tf(tf_types);
   }
 
@@ -497,7 +569,8 @@ void FixElectrodeConp::setup_post_neighbor()
       if (etypes_neighlists) neighbor->build_one(mat_neighlist, 0);
       auto array_compute = std::unique_ptr<ElectrodeMatrix>(new ElectrodeMatrix(lmp, igroup, eta));
       array_compute->setup(tag_to_iele, pair, mat_neighlist);
-      if (tfflag) { array_compute->setup_tf(tf_types); }
+      if (etaflag) array_compute->setup_eta(eta_index);
+      if (tfflag) array_compute->setup_tf(tf_types);
       array_compute->compute_array(elastance, timer_flag);
     }    // write_mat before proceeding
     if (comm->me == 0 && write_mat) {
@@ -804,6 +877,8 @@ void FixElectrodeConp::update_charges()
     }
     MPI_Allreduce(MPI_IN_PLACE, &sb_charges.front(), num_of_groups, MPI_DOUBLE, MPI_SUM, world);
     update_psi();    // use for equal-style and conq
+    if (qtotal_var_style != VarStyle::UNSET)
+      update_psi_qtotal();    // use for qtotal; same for thermo
     for (int g = 0; g < num_of_groups; g++)
       for (int j = 0; j < nlocalele; j++) q_local[j] += sd_vectors[g][list_iele[j]] * group_psi[g];
     MPI_Barrier(world);
@@ -906,12 +981,22 @@ std::vector<double> FixElectrodeConp::gather_ngroup(std::vector<double> x_local)
 }
 
 /* ----------------------------------------------------------------------
-   ensure total electrode charge is 0 if symm
+   ensure total electrode charge is 0 if symm and qtotal if qtotal is used
 ------------------------------------------------------------------------- */
 
 std::vector<double> FixElectrodeConp::constraint_correction(std::vector<double> x)
 {
-  return constraint_projection(std::move(x));
+  if (symm || qtotal_var_style != VarStyle::UNSET) {
+    if (qtotal_var_style == VarStyle::EQUAL) qtotal = input->variable->compute_equal(qtotal_var_id);
+    double sum = 0.;
+    for (double xi : x) sum += xi;
+    MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, world);
+    if (qtotal_var_style != VarStyle::UNSET) sum -= qtotal;
+    sum /= ngroup;
+    for (double &xi : x) xi -= sum;
+    return x;
+  }
+  return x;
 }
 
 /* ----------------------------------------------------------------------
@@ -920,7 +1005,7 @@ std::vector<double> FixElectrodeConp::constraint_correction(std::vector<double>
 
 std::vector<double> FixElectrodeConp::constraint_projection(std::vector<double> x)
 {
-  if (symm) {
+  if (symm || qtotal_var_style != VarStyle::UNSET) {
     double sum = 0.;
     for (double xi : x) sum += xi;
     MPI_Allreduce(MPI_IN_PLACE, &sum, 1, MPI_DOUBLE, MPI_SUM, world);
@@ -978,13 +1063,28 @@ std::vector<double> FixElectrodeConp::times_elastance(std::vector<double> x)
 void FixElectrodeConp::update_psi()
 {
   for (int g = 0; g < num_of_groups; g++) {
-    if (group_psi_var_styles[g] == VarStyle::CONST) continue;
-    group_psi[g] = input->variable->compute_equal(group_psi_var_ids[g]);
+    if (group_psi_var_styles[g] == VarStyle::CONST)
+      group_psi[g] = group_psi_const[g];
+    else
+      group_psi[g] = input->variable->compute_equal(group_psi_var_ids[g]);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
+void FixElectrodeConp::update_psi_qtotal()
+{
+  if (qtotal_var_style == VarStyle::EQUAL) qtotal = input->variable->compute_equal(qtotal_var_id);
+  double q_current = 0.;
+  for (int i = 0; i < num_of_groups; i++) {
+    q_current += sb_charges[i];
+    for (int j = 0; j < num_of_groups; j++) q_current += macro_capacitance[i][j] * group_psi[j];
+  }
+  double add_psi = (qtotal - q_current) / macro_capacitance_sum;
+  for (int i = 0; i < num_of_groups; i++) group_psi[i] += add_psi;
+}
+/* ---------------------------------------------------------------------- */
+
 void FixElectrodeConp::compute_macro_matrices()
 {
   assert(algo == Algo::MATRIX_INV);
@@ -1040,6 +1140,10 @@ void FixElectrodeConp::compute_macro_matrices()
       }
     }
   }
+
+  macro_capacitance_sum = 0.;
+  for (int i = 0; i < num_of_groups; i++)
+    for (int j = 0; j < num_of_groups; j++) macro_capacitance_sum += macro_capacitance[i][j];
 }
 
 /* ---------------------------------------------------------------------- */
@@ -1096,7 +1200,7 @@ double FixElectrodeConp::self_energy(int eflag)
   // corrections to energy due to self interaction
   double const qqrd2e = force->qqrd2e;
   int const nlocal = atom->nlocal;
-  double const pre = eta / sqrt(MY_2PI) * qqrd2e;
+  double const pre = 1. / sqrt(MY_2PI) * qqrd2e;
   int *mask = atom->mask;
   int *type = atom->type;
   double *q = atom->q;
@@ -1104,7 +1208,8 @@ double FixElectrodeConp::self_energy(int eflag)
   for (int i = 0; i < nlocal; i++) {
     if (groupbit & mask[i]) {
       double const q2 = q[i] * q[i];
-      double e = pre * q2;
+      double ieta = etaflag ? atom->dvector[eta_index][i] : eta;
+      double e = ieta * pre * q2;
       if (tfflag && (groupbit & mask[i])) e += 0.5 * qqrd2e * q2 * tf_types[type[i]];
       energy += e;
       if (eflag) {
@@ -1144,6 +1249,7 @@ double FixElectrodeConp::gausscorr(int eflag, bool fflag)
     double xtmp = x[i][0];
     double ytmp = x[i][1];
     double ztmp = x[i][2];
+    double const eta_i = etaflag ? atom->dvector[eta_index][i] : eta;
     int itype = type[i];
     int *jlist = firstneigh[i];
     int jnum = numneigh[i];
@@ -1152,7 +1258,6 @@ double FixElectrodeConp::gausscorr(int eflag, bool fflag)
       int const j = jlist[jj] & NEIGHMASK;
       bool j_in_ele = groupbit & mask[j];
       if (!(i_in_ele || j_in_ele)) continue;
-      double eta_ij = (i_in_ele && j_in_ele) ? eta / MY_SQRT2 : eta;
 
       double delx = xtmp - x[j][0];
       double dely = ytmp - x[j][1];
@@ -1161,6 +1266,16 @@ double FixElectrodeConp::gausscorr(int eflag, bool fflag)
       int jtype = type[j];
 
       if (rsq < force->pair->cutsq[itype][jtype]) {
+        double const eta_j = etaflag ? atom->dvector[eta_index][j] : eta;
+        double eta_ij;
+        if (i_in_ele && j_in_ele)
+          eta_ij = eta_i * eta_j / sqrt(eta_i * eta_i + eta_j * eta_j);
+        else if (i_in_ele)
+          eta_ij = eta_i;
+        else {
+          assert(j_in_ele);
+          eta_ij = eta_j;
+        }
         double r2inv = 1.0 / rsq;
         double r = sqrt(rsq);
         double erfc_etar = 0.;
diff --git a/src/ELECTRODE/fix_electrode_conp.h b/src/ELECTRODE/fix_electrode_conp.h
index 1289d96281..a1d7530bd1 100644
--- a/src/ELECTRODE/fix_electrode_conp.h
+++ b/src/ELECTRODE/fix_electrode_conp.h
@@ -29,7 +29,6 @@ FixStyle(electrode/conp, FixElectrodeConp);
 #include "fix.h"
 
 #include <map>
-#include <memory>
 #include <unordered_map>
 
 namespace LAMMPS_NS {
@@ -71,7 +70,7 @@ class FixElectrodeConp : public Fix {
 
  protected:
   enum class Algo { MATRIX_INV, MATRIX_CG, CG };
-  enum class VarStyle { CONST, EQUAL };
+  enum class VarStyle { CONST, EQUAL, UNSET };
   virtual void update_psi();
   virtual void pre_update(){};
   virtual void recompute_potential(std::vector<double>, std::vector<double>){};
@@ -80,6 +79,7 @@ class FixElectrodeConp : public Fix {
   virtual void compute_macro_matrices();
   std::vector<double> ele_ele_interaction(const std::vector<double> &);
   std::vector<double> group_psi;
+  std::vector<double> group_psi_const;    // needed to undo qtotal psi updates
   std::vector<int> group_bits;
   std::vector<int> groups;
   int num_of_groups;
@@ -101,6 +101,10 @@ class FixElectrodeConp : public Fix {
   std::string fixname;    // used by electrode/ffield to set up internal efield
   bool intelflag;
   inline virtual void intel_pack_buffers() {}
+  double qtotal;
+  std::string qtotal_var_name;
+  int qtotal_var_id;
+  VarStyle qtotal_var_style;
 
  private:
   std::string output_file_inv, output_file_mat, output_file_vec;
@@ -133,6 +137,8 @@ class FixElectrodeConp : public Fix {
   int get_top_group();    // used by ffield
   int top_group;          // used by ffield
   bool tfflag;
+  int eta_index;    // index of atom property for eta
+  bool etaflag;     // eta specified as atom property
   bool timer_flag;
   std::map<int, double> tf_types;
   // cg
@@ -143,6 +149,9 @@ class FixElectrodeConp : public Fix {
   std::vector<double> gather_ngroup(std::vector<double>);
   std::vector<double> gather_elevec_local(ElectrodeVector *);
   void set_charges(std::vector<double>);
+  // qtotal
+  double macro_capacitance_sum;
+  void update_psi_qtotal();
 
   // fix-specific electrode ID storage system:
 
diff --git a/src/ELECTRODE/fix_electrode_conq.cpp b/src/ELECTRODE/fix_electrode_conq.cpp
index 0d3d1d2aaf..a6baa1e122 100644
--- a/src/ELECTRODE/fix_electrode_conq.cpp
+++ b/src/ELECTRODE/fix_electrode_conq.cpp
@@ -30,7 +30,7 @@ FixElectrodeConq::FixElectrodeConq(LAMMPS *lmp, int narg, char **arg) :
     FixElectrodeConp(lmp, narg, arg)
 {
   // copy const-style values across because update_psi will change group_psi
-  group_q = group_psi;
+  group_q = group_psi_const;
 
   if (symm) {
     if (num_of_groups == 1)
diff --git a/src/ELECTRODE/fix_electrode_thermo.cpp b/src/ELECTRODE/fix_electrode_thermo.cpp
index 343bf14069..92db4b3ee0 100644
--- a/src/ELECTRODE/fix_electrode_thermo.cpp
+++ b/src/ELECTRODE/fix_electrode_thermo.cpp
@@ -47,7 +47,8 @@ FixElectrodeThermo::FixElectrodeThermo(LAMMPS *lmp, int narg, char **arg) :
   if (thermo_time < SMALL) error->all(FLERR, "Keyword temp not set or zero in electrode/thermo");
 
   thermo_random = new RanMars(lmp, thermo_init);
-  if (group_psi_var_styles[0] == VarStyle::CONST) delta_psi_0 = group_psi[1] - group_psi[0];
+  if (group_psi_var_styles[0] == VarStyle::CONST)
+    delta_psi_0 = group_psi_const[1] - group_psi_const[0];
 }
 
 /* ----------------------------------------------------------------------- */
@@ -102,7 +103,7 @@ void FixElectrodeThermo::update_psi()
   double const delta_psi = group_psi_old[1] - group_psi_old[0];
 
   // target potential difference from input parameters
-  if (group_psi_var_styles[0] != VarStyle::CONST) {
+  if (group_psi_var_styles[0] == VarStyle::EQUAL) {
     delta_psi_0 = input->variable->compute_equal(group_psi_var_ids[1]) -
         input->variable->compute_equal(group_psi_var_ids[0]);
   }
diff --git a/src/ELECTRODE/pppm_electrode.cpp b/src/ELECTRODE/pppm_electrode.cpp
index 39e7c66ce5..ee34def74d 100644
--- a/src/ELECTRODE/pppm_electrode.cpp
+++ b/src/ELECTRODE/pppm_electrode.cpp
@@ -20,6 +20,7 @@
 #include "angle.h"
 #include "atom.h"
 #include "bond.h"
+#include "boundary_correction.h"
 #include "citeme.h"
 #include "comm.h"
 #include "domain.h"
@@ -437,6 +438,7 @@ void PPPMElectrode::compute(int eflag, int vflag)
 
   start_compute();
 
+  /*
   if (compute_vector_called && last_invert_source) {
     // electrolyte_density_brick is filled, so we can grab only electrode atoms.
     // Does not work for direct cg algorithm because electrode charges change after compute_vector.
@@ -452,15 +454,17 @@ void PPPMElectrode::compute(int eflag, int vflag)
           density_brick[nz][ny][nx] += electrolyte_density_brick[nz][ny][nx];
         }
   } else {
-    make_rho();
+  */
+  particle_map();
+  make_rho();
 
-    // all procs communicate density values from their ghost cells
-    //   to fully sum contribution in their 3d bricks
-    // remap from 3d decomposition to FFT decomposition
+  // all procs communicate density values from their ghost cells
+  //   to fully sum contribution in their 3d bricks
+  // remap from 3d decomposition to FFT decomposition
 
-    gc->reverse_comm(Grid3d::KSPACE, this, REVERSE_RHO, 1, sizeof(FFT_SCALAR), gc_buf1, gc_buf2,
-                     MPI_FFT_SCALAR);
-  }
+  gc->reverse_comm(Grid3d::KSPACE, this, REVERSE_RHO, 1, sizeof(FFT_SCALAR), gc_buf1, gc_buf2,
+                   MPI_FFT_SCALAR);
+  //}
 
   brick2fft();
 
@@ -583,6 +587,7 @@ void PPPMElectrode::compute_vector(double *vec, int sensor_grpbit, int source_gr
   // electrolyte density (without writing an additional function)
   FFT_SCALAR ***density_brick_real = density_brick;
   FFT_SCALAR *density_fft_real = density_fft;
+  particle_map();
   make_rho_in_brick(source_grpbit, electrolyte_density_brick, invert_source);
   density_brick = electrolyte_density_brick;
   density_fft = electrolyte_density_fft;
@@ -668,7 +673,8 @@ void PPPMElectrode::compute_matrix(bigint *imat, double **matrix, bool timer_fla
   // fft green's function k -> r (double)
   double *greens_real;
   memory->create(greens_real, nz_pppm * ny_pppm * nx_pppm, "pppm/electrode:greens_real");
-  memset(greens_real, 0, (std::size_t)nz_pppm * (std::size_t)ny_pppm * (std::size_t)nx_pppm * sizeof(double));
+  memset(greens_real, 0,
+         (std::size_t) nz_pppm * (std::size_t) ny_pppm * (std::size_t) nx_pppm * sizeof(double));
   for (int i = 0, n = 0; i < nfft; i++) {
     work2[n++] = greensfn[i];
     work2[n++] = ZEROF;
@@ -861,7 +867,7 @@ void PPPMElectrode::two_step_multiplication(bigint *imat, double *greens_real, d
 
   double **gw;
   memory->create(gw, nmat, nxyz, "pppm/electrode:gw");
-  memset(&(gw[0][0]), 0, (std::size_t)nmat * (std::size_t)nxyz * sizeof(double));
+  memset(&(gw[0][0]), 0, (std::size_t) nmat * (std::size_t) nxyz * sizeof(double));
 
   auto fmod = [](int x, int n) {    // fast unsigned mod
     int r = abs(x);
@@ -980,17 +986,18 @@ void PPPMElectrode::allocate()
   // returns local owned and ghost grid bounds
   // setup communication patterns and buffers
 
-  gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-                  nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                  nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
+  gc = new Grid3d(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_in, nxhi_in, nylo_in, nyhi_in,
+                  nzlo_in, nzhi_in, nxlo_out, nxhi_out, nylo_out, nyhi_out, nzlo_out, nzhi_out);
 
-  gc->setup_comm(ngc_buf1,ngc_buf2);
+  gc->setup_comm(ngc_buf1, ngc_buf2);
 
-  if (differentiation_flag) npergrid = 1;
-  else npergrid = 3;
+  if (differentiation_flag)
+    npergrid = 1;
+  else
+    npergrid = 3;
 
-  memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
-  memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
+  memory->create(gc_buf1, npergrid * ngc_buf1, "pppm:gc_buf1");
+  memory->create(gc_buf2, npergrid * ngc_buf2, "pppm:gc_buf2");
 
   // tally local grid sizes
   // ngrid = count of owned+ghost grid cells on this proc
@@ -999,67 +1006,63 @@ void PPPMElectrode::allocate()
   // nfft = FFT points in x-pencil FFT decomposition on this proc
   // nfft_both = greater of nfft and nfft_brick
 
-  ngrid = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
-    (nzhi_out-nzlo_out+1);
+  ngrid = (nxhi_out - nxlo_out + 1) * (nyhi_out - nylo_out + 1) * (nzhi_out - nzlo_out + 1);
 
-  nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
-    (nzhi_in-nzlo_in+1);
+  nfft_brick = (nxhi_in - nxlo_in + 1) * (nyhi_in - nylo_in + 1) * (nzhi_in - nzlo_in + 1);
 
-  nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
-    (nzhi_fft-nzlo_fft+1);
+  nfft = (nxhi_fft - nxlo_fft + 1) * (nyhi_fft - nylo_fft + 1) * (nzhi_fft - nzlo_fft + 1);
 
-  nfft_both = MAX(nfft,nfft_brick);
+  nfft_both = MAX(nfft, nfft_brick);
 
   // allocate distributed grid data
 
-  memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:density_brick");
+  memory->create3d_offset(density_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
+                          "pppm:density_brick");
 
-  memory->create(density_fft,nfft_both,"pppm:density_fft");
-  memory->create(greensfn,nfft_both,"pppm:greensfn");
-  memory->create(work1,2*nfft_both,"pppm:work1");
-  memory->create(work2,2*nfft_both,"pppm:work2");
-  memory->create(vg,nfft_both,6,"pppm:vg");
+  memory->create(density_fft, nfft_both, "pppm:density_fft");
+  memory->create(greensfn, nfft_both, "pppm:greensfn");
+  memory->create(work1, 2 * nfft_both, "pppm:work1");
+  memory->create(work2, 2 * nfft_both, "pppm:work2");
+  memory->create(vg, nfft_both, 6, "pppm:vg");
 
   if (triclinic == 0) {
-    memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
-    memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
-    memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
+    memory->create1d_offset(fkx, nxlo_fft, nxhi_fft, "pppm:fkx");
+    memory->create1d_offset(fky, nylo_fft, nyhi_fft, "pppm:fky");
+    memory->create1d_offset(fkz, nzlo_fft, nzhi_fft, "pppm:fkz");
   } else {
-    memory->create(fkx,nfft_both,"pppm:fkx");
-    memory->create(fky,nfft_both,"pppm:fky");
-    memory->create(fkz,nfft_both,"pppm:fkz");
+    memory->create(fkx, nfft_both, "pppm:fkx");
+    memory->create(fky, nfft_both, "pppm:fky");
+    memory->create(fkz, nfft_both, "pppm:fkz");
   }
 
   if (differentiation_flag == 1) {
-    memory->create3d_offset(u_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                          nxlo_out,nxhi_out,"pppm:u_brick");
+    memory->create3d_offset(u_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
+                            "pppm:u_brick");
 
-    memory->create(sf_precoeff1,nfft_both,"pppm:sf_precoeff1");
-    memory->create(sf_precoeff2,nfft_both,"pppm:sf_precoeff2");
-    memory->create(sf_precoeff3,nfft_both,"pppm:sf_precoeff3");
-    memory->create(sf_precoeff4,nfft_both,"pppm:sf_precoeff4");
-    memory->create(sf_precoeff5,nfft_both,"pppm:sf_precoeff5");
-    memory->create(sf_precoeff6,nfft_both,"pppm:sf_precoeff6");
+    memory->create(sf_precoeff1, nfft_both, "pppm:sf_precoeff1");
+    memory->create(sf_precoeff2, nfft_both, "pppm:sf_precoeff2");
+    memory->create(sf_precoeff3, nfft_both, "pppm:sf_precoeff3");
+    memory->create(sf_precoeff4, nfft_both, "pppm:sf_precoeff4");
+    memory->create(sf_precoeff5, nfft_both, "pppm:sf_precoeff5");
+    memory->create(sf_precoeff6, nfft_both, "pppm:sf_precoeff6");
 
   } else {
-    memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                            nxlo_out,nxhi_out,"pppm:vdx_brick");
-    memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                            nxlo_out,nxhi_out,"pppm:vdy_brick");
-    memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
-                            nxlo_out,nxhi_out,"pppm:vdz_brick");
+    memory->create3d_offset(vdx_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
+                            "pppm:vdx_brick");
+    memory->create3d_offset(vdy_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
+                            "pppm:vdy_brick");
+    memory->create3d_offset(vdz_brick, nzlo_out, nzhi_out, nylo_out, nyhi_out, nxlo_out, nxhi_out,
+                            "pppm:vdz_brick");
   }
 
   // summation coeffs
 
   order_allocated = order;
-  if (!stagger_flag) memory->create(gf_b,order,"pppm:gf_b");
-  memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
-  memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
-  memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
-  memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
-                          "pppm:drho_coeff");
+  if (!stagger_flag) memory->create(gf_b, order, "pppm:gf_b");
+  memory->create2d_offset(rho1d, 3, -order / 2, order / 2, "pppm:rho1d");
+  memory->create2d_offset(drho1d, 3, -order / 2, order / 2, "pppm:drho1d");
+  memory->create2d_offset(rho_coeff, order, (1 - order) / 2, order / 2, "pppm:rho_coeff");
+  memory->create2d_offset(drho_coeff, order, (1 - order) / 2, order / 2, "pppm:drho_coeff");
 
   // create 2 FFTs and a Remap
   // 1st FFT keeps data in FFT decomposition
@@ -1068,20 +1071,17 @@ void PPPMElectrode::allocate()
 
   int tmp;
 
-  fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   0,0,&tmp,collective_flag);
+  fft1 = new FFT3d(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft,
+                   nzlo_fft, nzhi_fft, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft, nzlo_fft, nzhi_fft,
+                   0, 0, &tmp, collective_flag);
 
-  fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
-                   nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                   nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                   0,0,&tmp,collective_flag);
+  fft2 = new FFT3d(lmp, world, nx_pppm, ny_pppm, nz_pppm, nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft,
+                   nzlo_fft, nzhi_fft, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in, nzhi_in, 0, 0,
+                   &tmp, collective_flag);
 
-  remap = new Remap(lmp,world,
-                    nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
-                    nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
-                    1,0,0,FFT_PRECISION,collective_flag);
+  remap = new Remap(lmp, world, nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in, nzhi_in, nxlo_fft,
+                    nxhi_fft, nylo_fft, nyhi_fft, nzlo_fft, nzhi_fft, 1, 0, 0, FFT_PRECISION,
+                    collective_flag);
 
   // ELECTRODE specific allocations
 
diff --git a/src/EXTRA-COMPUTE/compute_adf.cpp b/src/EXTRA-COMPUTE/compute_adf.cpp
index 35ff8bfd33..20b1749fa9 100644
--- a/src/EXTRA-COMPUTE/compute_adf.cpp
+++ b/src/EXTRA-COMPUTE/compute_adf.cpp
@@ -34,32 +34,27 @@
 #include <cstring>
 
 using namespace LAMMPS_NS;
-using namespace MathConst;
+using MathConst::MY_PI;
+using MathConst::RAD2DEG;
 
-enum{DEGREE, RADIAN, COSINE};
+enum { DEGREE, RADIAN, COSINE };
 
 /* ----------------------------------------------------------------------
    compute angular distribution functions for I, J, K atoms
  ---------------------------------------------------------------------- */
 
 ComputeADF::ComputeADF(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg),
-  ilo(nullptr), ihi(nullptr), jlo(nullptr), jhi(nullptr), klo(nullptr), khi(nullptr),
-  hist(nullptr), histall(nullptr),
-  rcutinnerj(nullptr), rcutinnerk(nullptr),
-  rcutouterj(nullptr), rcutouterk(nullptr),
-  list(nullptr),
-  iatomcount(nullptr), iatomcountall(nullptr), iatomflag(nullptr),
-  maxjatom(nullptr), maxkatom(nullptr),
-  numjatom(nullptr), numkatom(nullptr),
-  neighjatom(nullptr),neighkatom(nullptr),
-  jatomflag(nullptr), katomflag(nullptr),
-  maxjkatom(nullptr), numjkatom(nullptr),
-  neighjkatom(nullptr), bothjkatom(nullptr), delrjkatom(nullptr)
+    Compute(lmp, narg, arg), ilo(nullptr), ihi(nullptr), jlo(nullptr), jhi(nullptr), klo(nullptr),
+    khi(nullptr), hist(nullptr), histall(nullptr), rcutinnerj(nullptr), rcutinnerk(nullptr),
+    rcutouterj(nullptr), rcutouterk(nullptr), list(nullptr), iatomcount(nullptr),
+    iatomcountall(nullptr), iatomflag(nullptr), maxjatom(nullptr), maxkatom(nullptr),
+    numjatom(nullptr), numkatom(nullptr), neighjatom(nullptr), neighkatom(nullptr),
+    jatomflag(nullptr), katomflag(nullptr), maxjkatom(nullptr), numjkatom(nullptr),
+    neighjkatom(nullptr), bothjkatom(nullptr), delrjkatom(nullptr)
 {
   int nargsperadf = 7;
 
-  if (narg < 4 ) error->all(FLERR,"Illegal compute adf command");
+  if (narg < 4) utils::missing_cmd_args(FLERR, "compute adf", error);
 
   array_flag = 1;
   extarray = 0;
@@ -89,17 +84,16 @@ ComputeADF::ComputeADF(LAMMPS *lmp, int narg, char **arg) :
       if (strcmp(arg[iarg+1],"degree") == 0) ordinate_style = DEGREE;
       else if (strcmp(arg[iarg+1],"radian") == 0) ordinate_style = RADIAN;
       else if (strcmp(arg[iarg+1],"cosine") == 0) ordinate_style = COSINE;
-      else error->all(FLERR,"Illegal compute adf command");
+      else error->all(FLERR,"Unknown compute adf ordinate flag {}",arg[iarg+1]);
       iarg += 2;
-    } else error->all(FLERR,"Illegal compute adf command");
+    } else error->all(FLERR,"Unknown compute adf keyword {}", arg[iarg]);
   }
 
   // triplewise args
 
   if (!nargtriple) ntriples = 1;
   else {
-    if (nargtriple % nargsperadf)
-      error->all(FLERR,"Illegal compute adf command");
+    if (nargtriple % nargsperadf) error->all(FLERR,"Illegal compute adf command");
     ntriples = nargtriple/nargsperadf;
   }
 
@@ -107,12 +101,9 @@ ComputeADF::ComputeADF(LAMMPS *lmp, int narg, char **arg) :
   size_array_cols = 1 + 2*ntriples;
 
   int ntypes = atom->ntypes;
-  memory->create(iatomflag,ntriples,ntypes+1,
-                 "adf:iatomflag");
-  memory->create(jatomflag,ntriples,ntypes+1,
-                 "adf:jatomflag");
-  memory->create(katomflag,ntriples,ntypes+1,
-                 "adf:katomflag");
+  memory->create(iatomflag,ntriples,ntypes+1,"adf:iatomflag");
+  memory->create(jatomflag,ntriples,ntypes+1,"adf:jatomflag");
+  memory->create(katomflag,ntriples,ntypes+1,"adf:katomflag");
 
   ilo = new int[ntriples];
   ihi = new int[ntriples];
@@ -134,14 +125,14 @@ ComputeADF::ComputeADF(LAMMPS *lmp, int narg, char **arg) :
     klo[0] = 1; khi[0] = ntypes;
   } else {
     cutflag = 1;
+    if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+      error->all(FLERR, "Compute adf with custom cutoffs requires neighbor style 'bin' or 'nsq'");
     iarg = 4;
     for (int m = 0; m < ntriples; m++) {
       utils::bounds(FLERR,arg[iarg],1,atom->ntypes,ilo[m],ihi[m],error);
       utils::bounds(FLERR,arg[iarg+1],1,atom->ntypes,jlo[m],jhi[m],error);
       utils::bounds(FLERR,arg[iarg+2],1,atom->ntypes,klo[m],khi[m],error);
-      if (ilo[m] > ihi[m] ||
-          jlo[m] > jhi[m] ||
-          klo[m] > khi[m])
+      if ((ilo[m] > ihi[m]) || (jlo[m] > jhi[m]) || (klo[m] > khi[m]))
         error->all(FLERR,"Illegal compute adf command");
       rcutinnerj[m] = utils::numeric(FLERR,arg[iarg+3],false,lmp);
       rcutouterj[m] = utils::numeric(FLERR,arg[iarg+4],false,lmp);
@@ -221,8 +212,6 @@ ComputeADF::ComputeADF(LAMMPS *lmp, int narg, char **arg) :
     memory->create(bothjkatom[m],maxjkatom[m],"adf:bothjkatom");
     memory->create(delrjkatom[m],maxjkatom[m],4,"adf:delrjkatom");
   }
-
-  rad2deg = 180.0 / MY_PI;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -230,47 +219,47 @@ ComputeADF::ComputeADF(LAMMPS *lmp, int narg, char **arg) :
 ComputeADF::~ComputeADF()
 {
   memory->destroy(iatomflag);
-  delete [] ilo;
-  delete [] ihi;
-  delete [] jlo;
-  delete [] jhi;
-  delete [] klo;
-  delete [] khi;
-  delete [] iatomcount;
-  delete [] iatomcountall;
+  delete[] ilo;
+  delete[] ihi;
+  delete[] jlo;
+  delete[] jhi;
+  delete[] klo;
+  delete[] khi;
+  delete[] iatomcount;
+  delete[] iatomcountall;
   memory->destroy(hist);
   memory->destroy(histall);
   memory->destroy(array);
 
   memory->destroy(jatomflag);
-  delete [] rcutinnerj;
-  delete [] rcutouterj;
-  delete [] maxjatom;
-  delete [] numjatom;
+  delete[] rcutinnerj;
+  delete[] rcutouterj;
+  delete[] maxjatom;
+  delete[] numjatom;
   for (int m = 0; m < ntriples; m++)
     memory->destroy(neighjatom[m]);
-  delete [] neighjatom;
+  delete[] neighjatom;
 
   memory->destroy(katomflag);
-  delete [] rcutinnerk;
-  delete [] rcutouterk;
-  delete [] maxkatom;
-  delete [] numkatom;
+  delete[] rcutinnerk;
+  delete[] rcutouterk;
+  delete[] maxkatom;
+  delete[] numkatom;
   for (int m = 0; m < ntriples; m++)
     memory->destroy(neighkatom[m]);
-  delete [] neighkatom;
+  delete[] neighkatom;
 
-  delete [] maxjkatom;
-  delete [] numjkatom;
+  delete[] maxjkatom;
+  delete[] numjkatom;
   for (int m = 0; m < ntriples; m++)
     memory->destroy(neighjkatom[m]);
-  delete [] neighjkatom;
+  delete[] neighjkatom;
   for (int m = 0; m < ntriples; m++)
     memory->destroy(bothjkatom[m]);
-  delete [] bothjkatom;
+  delete[] bothjkatom;
   for (int m = 0; m < ntriples; m++)
     memory->destroy(delrjkatom[m]);
-  delete [] delrjkatom;
+  delete[] delrjkatom;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -282,8 +271,7 @@ void ComputeADF::init()
 
   if (!cutflag) {
     if (!force->pair)
-      error->all(FLERR,"Compute adf requires a pair style be defined "
-                 "or an outer cutoff specified");
+      error->all(FLERR,"Compute adf requires a pair style be defined or an outer cutoff specified");
     rcutinnerj[0] = 0.0;
     rcutinnerk[0] = 0.0;
     rcutouterj[0] = force->pair->cutforce;
@@ -298,7 +286,7 @@ void ComputeADF::init()
 
   // specify mycutneigh if force cutoff too small or non-existent
 
-  if (!(force->pair) || maxouter > force->pair->cutforce) {
+  if (!(force->pair) || (maxouter > force->pair->cutforce)) {
     double skin = neighbor->skin;
     mycutneigh = maxouter + skin;
     if (mycutneigh > comm->cutghostuser)
@@ -310,7 +298,7 @@ void ComputeADF::init()
 
   int x0;
   if (ordinate_style == DEGREE) {
-    deltax = MY_PI / nbin * rad2deg;
+    deltax = MY_PI / nbin * RAD2DEG;
     deltaxinv = nbin / MY_PI;
     x0 = 0.0;
 
@@ -337,7 +325,11 @@ void ComputeADF::init()
   //   than maxouter apart, just like a normal neighbor list does
 
   auto req = neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
-  if (mycutneigh > 0.0) req->set_cutoff(mycutneigh);
+  if (mycutneigh > 0.0) {
+    if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+      error->all(FLERR, "Compute adf with custom cutoffs requires neighbor style 'bin' or 'nsq'");
+    req->set_cutoff(mycutneigh);
+  }
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/EXTRA-COMPUTE/compute_adf.h b/src/EXTRA-COMPUTE/compute_adf.h
index 5f30995aa2..f1f95d325e 100644
--- a/src/EXTRA-COMPUTE/compute_adf.h
+++ b/src/EXTRA-COMPUTE/compute_adf.h
@@ -59,7 +59,6 @@ class ComputeADF : public Compute {
   int **bothjkatom;        // 1 if atom is in both jatom and katom lists
   double ***delrjkatom;    // list of 4-vectors: delx, dely, delx, and 1/r
 
-  double rad2deg;        // conversion factor from radians to degrees
   int ordinate_style;    // DEGREE, RADIAN, or COSINE
   int cutflag;           // 1 if at least one outer cutoff specified
 };
diff --git a/src/EXTRA-COMPUTE/compute_ave_sphere_atom.cpp b/src/EXTRA-COMPUTE/compute_ave_sphere_atom.cpp
index 89011e7177..b3b920fef3 100644
--- a/src/EXTRA-COMPUTE/compute_ave_sphere_atom.cpp
+++ b/src/EXTRA-COMPUTE/compute_ave_sphere_atom.cpp
@@ -33,7 +33,7 @@
 #include <cstring>
 
 using namespace LAMMPS_NS;
-using namespace MathConst;
+using MathConst::MY_PI;
 
 /* ---------------------------------------------------------------------- */
 
@@ -108,6 +108,9 @@ void ComputeAveSphereAtom::init()
   else
     volume = MY_PI * cutsq;
 
+  if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+    error->all(FLERR, "Compute ave/sphere/atom requires neighbor style 'bin' or 'nsq'");
+
   // need an occasional full neighbor list
 
   auto req = neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
diff --git a/src/EXTRA-COMPUTE/compute_composition_atom.cpp b/src/EXTRA-COMPUTE/compute_composition_atom.cpp
index 48aaa68dea..d36cb96028 100644
--- a/src/EXTRA-COMPUTE/compute_composition_atom.cpp
+++ b/src/EXTRA-COMPUTE/compute_composition_atom.cpp
@@ -19,7 +19,6 @@
 
 #include "atom.h"
 #include "comm.h"
-#include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "math_const.h"
@@ -103,6 +102,9 @@ void ComputeCompositionAtom::init()
 
   cutsq = cutoff * cutoff;
 
+  if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+    error->all(FLERR, "Compute composition/atom requires neighbor style 'bin' or 'nsq'");
+
   // need an occasional full neighbor list
 
   auto req = neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
diff --git a/src/EXTRA-COMPUTE/compute_dipole_tip4p_chunk.cpp b/src/EXTRA-COMPUTE/compute_dipole_tip4p_chunk.cpp
index 571f1d562d..466bc0e882 100644
--- a/src/EXTRA-COMPUTE/compute_dipole_tip4p_chunk.cpp
+++ b/src/EXTRA-COMPUTE/compute_dipole_tip4p_chunk.cpp
@@ -16,7 +16,6 @@
 #include "angle.h"
 #include "atom.h"
 #include "bond.h"
-#include "comm.h"
 #include "compute_chunk_atom.h"
 #include "domain.h"
 #include "error.h"
diff --git a/src/EXTRA-COMPUTE/compute_dipole_tip4p_chunk.h b/src/EXTRA-COMPUTE/compute_dipole_tip4p_chunk.h
index b3354c9ab9..126f9962aa 100644
--- a/src/EXTRA-COMPUTE/compute_dipole_tip4p_chunk.h
+++ b/src/EXTRA-COMPUTE/compute_dipole_tip4p_chunk.h
@@ -23,7 +23,6 @@ ComputeStyle(dipole/tip4p/chunk,ComputeDipoleTIP4PChunk);
 #include "compute_chunk.h"
 
 namespace LAMMPS_NS {
-class Fix;
 
 class ComputeDipoleTIP4PChunk : public ComputeChunk {
  public:
diff --git a/src/EXTRA-COMPUTE/compute_efield_wolf_atom.cpp b/src/EXTRA-COMPUTE/compute_efield_wolf_atom.cpp
index ba5a16d52b..ceb16de2ec 100644
--- a/src/EXTRA-COMPUTE/compute_efield_wolf_atom.cpp
+++ b/src/EXTRA-COMPUTE/compute_efield_wolf_atom.cpp
@@ -92,7 +92,11 @@ void ComputeEfieldWolfAtom::init()
   if (atom->mu_flag && (comm->me == 0))
     error->warning(FLERR, "Compute efield/wolf/atom does not support per-atom dipoles");
 
-  // need an occasional full neighbor list
+  if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+    error->all(FLERR, "Compute efield/wolf/atom requires neighbor style 'bin' or 'nsq'");
+
+  // request an occasional full neighbor list
+
   auto req = neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
   if (cutoff_flag) req->set_cutoff(cutoff);
 
diff --git a/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp b/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp
index 9dacf14171..5f707d8433 100644
--- a/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp
+++ b/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp
@@ -29,7 +29,6 @@
 #include "pair.h"
 #include "update.h"
 
-#include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
diff --git a/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp b/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp
index 6c272938b6..e0b34b8ff1 100644
--- a/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp
+++ b/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp
@@ -22,12 +22,9 @@
 #include "citeme.h"
 #include "comm.h"
 #include "error.h"
-#include "force.h"
 #include "memory.h"
 #include "modify.h"
 #include "neigh_list.h"
-#include "neighbor.h"
-#include "pair.h"
 #include "potential_file_reader.h"
 #include "update.h"
 
diff --git a/src/EXTRA-FIX/fix_deform_pressure.cpp b/src/EXTRA-FIX/fix_deform_pressure.cpp
new file mode 100644
index 0000000000..ffa3f11d92
--- /dev/null
+++ b/src/EXTRA-FIX/fix_deform_pressure.cpp
@@ -0,0 +1,940 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Joel Clemmer (SNL)
+------------------------------------------------------------------------- */
+
+#include "fix_deform_pressure.h"
+
+#include "comm.h"
+#include "compute.h"
+#include "domain.h"
+#include "error.h"
+#include "group.h"
+#include "input.h"
+#include "irregular.h"
+#include "math_const.h"
+#include "modify.h"
+#include "update.h"
+#include "variable.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+using namespace MathConst;
+
+enum { NOCOUPLE = 0, XYZ, XY, YZ, XZ };
+
+/* ---------------------------------------------------------------------- */
+
+FixDeformPressure::FixDeformPressure(LAMMPS *lmp, int narg, char **arg) :
+  FixDeform(lmp, narg, arg), id_temp(nullptr), id_press(nullptr), temperature(nullptr),
+  pressure(nullptr)
+{
+  // set defaults
+
+  set_extra = new SetExtra[7];
+  memset(set_extra, 0, 7 * sizeof(SetExtra));
+  memset(&set_box, 0, sizeof(Set));
+
+  // parse only parameter/style arguments specific to this child class
+
+  int index, iarg;
+  std::size_t i = 0;
+  while (i < leftover_iarg.size()) {
+    iarg = leftover_iarg[i];
+    if (strcmp(arg[iarg], "x") == 0 ||
+        strcmp(arg[iarg], "y") == 0 ||
+        strcmp(arg[iarg], "z") == 0) {
+
+      if (strcmp(arg[iarg], "x") == 0) index = 0;
+      else if (strcmp(arg[iarg], "y") == 0) index = 1;
+      else if (strcmp(arg[iarg], "z") == 0) index = 2;
+
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure", error);
+      if (strcmp(arg[iarg + 1], "pressure") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure pressure", error);
+        set[index].style = PRESSURE;
+        if (strstr(arg[iarg + 2], "v_") != arg[iarg + 2]) {
+          set_extra[index].ptarget = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        } else {
+          set_extra[index].pstr = utils::strdup(&arg[iarg + 2][2]);
+          set_extra[index].pvar_flag = 1;
+        }
+        set_extra[index].pgain = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+        i += 4;
+      } else if (strcmp(arg[iarg + 1], "pressure/mean") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure pressure/mean", error);
+        set[index].style = PMEAN;
+        if (strstr(arg[iarg + 2], "v_") != arg[iarg + 2]) {
+          set_extra[index].ptarget = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        } else {
+          set_extra[index].pstr = utils::strdup(&arg[iarg + 2][2]);
+          set_extra[index].pvar_flag = 1;
+        }
+        set_extra[index].pgain = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+        i += 4;
+      } else error->all(FLERR, "Illegal fix deform/pressure command argument: {}", arg[iarg + 1]);
+
+    } else if (strcmp(arg[iarg], "xy") == 0 ||
+               strcmp(arg[iarg], "xz") == 0 ||
+               strcmp(arg[iarg], "yz") == 0) {
+
+      if (strcmp(arg[iarg], "xy") == 0) index = 5;
+      else if (strcmp(arg[iarg], "xz") == 0) index = 4;
+      else if (strcmp(arg[iarg], "yz") == 0) index = 3;
+
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure", error);
+      if (strcmp(arg[iarg + 1], "pressure") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure pressure", error);
+        set[index].style = PRESSURE;
+        if (strstr(arg[iarg + 2], "v_") != arg[iarg + 2]) {
+          set_extra[index].ptarget = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        } else {
+          set_extra[index].pstr = utils::strdup(&arg[iarg + 2][2]);
+          set_extra[index].pvar_flag = 1;
+        }
+        set_extra[index].pgain = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+        i += 4;
+      } else error->all(FLERR, "Illegal fix deform/pressure command: {}", arg[iarg + 1]);
+
+    } else if (strcmp(arg[iarg], "box") == 0) {
+      if (strcmp(arg[iarg + 1], "volume") == 0) {
+        set_box.style = VOLUME;
+        i += 2;
+      } else if (strcmp(arg[iarg + 1], "pressure") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure pressure", error);
+        set_box.style = PRESSURE;
+        if (strstr(arg[iarg + 2], "v_") != arg[iarg + 2]) {
+          set_extra[6].ptarget = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        } else {
+          set_extra[6].pstr = utils::strdup(&arg[iarg + 2][2]);
+          set_extra[6].pvar_flag = 1;
+        }
+        set_extra[6].pgain = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+        i += 4;
+      } else error->all(FLERR, "Illegal fix deform/pressure command argument: {}", arg[iarg + 1]);
+    } else break;
+  }
+
+  // read options from end of input line
+  // shift arguments before reading
+
+  iarg = iarg_options_start;
+  options(i, narg - iarg, &arg[iarg]);
+
+  // repeat: setup dimflags used by other classes to check for volume-change conflicts
+
+  for (int i = 0; i < 6; i++)
+    if (set[i].style == NONE) dimflag[i] = 0;
+    else dimflag[i] = 1;
+
+  if (set_box.style != NONE) {
+    dimflag[0] = 1;
+    dimflag[1] = 1;
+    dimflag[2] = 1;
+  }
+
+  if (dimflag[0]) box_change |= BOX_CHANGE_X;
+  if (dimflag[1]) box_change |= BOX_CHANGE_Y;
+  if (dimflag[2]) box_change |= BOX_CHANGE_Z;
+  if (dimflag[3]) box_change |= BOX_CHANGE_YZ;
+  if (dimflag[4]) box_change |= BOX_CHANGE_XZ;
+  if (dimflag[5]) box_change |= BOX_CHANGE_XY;
+
+  // repeat: no tensile deformation on shrink-wrapped dims
+  // b/c shrink wrap will change box-length
+
+  for (int i = 0; i < 3; i++)
+    if (set_box.style && (domain->boundary[i][0] >= 2 || domain->boundary[i][1] >= 2))
+      error->all(FLERR, "Cannot use fix deform/pressure on a shrink-wrapped boundary");
+
+  // repeat: no tilt deformation on shrink-wrapped 2nd dim
+  // b/c shrink wrap will change tilt factor in domain::reset_box()
+
+  if (set[3].style && (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2))
+    error->all(FLERR, "Cannot use fix deform/pressure tilt on a shrink-wrapped 2nd dim");
+  if (set[4].style && (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2))
+    error->all(FLERR, "Cannot use fix deform/pressure tilt on a shrink-wrapped 2nd dim");
+  if (set[5].style && (domain->boundary[1][0] >= 2 || domain->boundary[1][1] >= 2))
+    error->all(FLERR, "Cannot use fix deform/pressure tilt on a shrink-wrapped 2nd dim");
+
+  // for VOLUME, setup links to other dims
+  // fixed, dynamic1, dynamic2
+
+  for (int i = 0; i < 3; i++) {
+    if (set[i].style != VOLUME) continue;
+    int other1 = (i + 1) % 3;
+    int other2 = (i + 2) % 3;
+
+    // Cannot use VOLUME option without at least one deformed dimension
+    if (set[other1].style == NONE || set[other1].style == VOLUME)
+      if (set[other2].style == NONE || set[other2].style == VOLUME)
+        error->all(FLERR, "Fix {} volume setting is invalid", style);
+
+    if (set[other1].style == NONE) {
+      set[i].substyle = ONE_FROM_ONE;
+      set[i].fixed = other1;
+      set[i].dynamic1 = other2;
+    } else if (set[other2].style == NONE) {
+      set[i].substyle = ONE_FROM_ONE;
+      set[i].fixed = other2;
+      set[i].dynamic1 = other1;
+    } else if (set[other1].style == VOLUME) {
+      set[i].substyle = TWO_FROM_ONE;
+      set[i].fixed = other1;
+      set[i].dynamic1 = other2;
+    } else if (set[other2].style == VOLUME) {
+      set[i].substyle = TWO_FROM_ONE;
+      set[i].fixed = other2;
+      set[i].dynamic1 = other1;
+    } else {
+      set[i].substyle = ONE_FROM_TWO;
+      set[i].dynamic1 = other1;
+      set[i].dynamic2 = other2;
+    }
+  }
+
+  // repeat: set varflag
+
+  for (int i = 0; i < 7; i++)
+    if (set_extra[i].pvar_flag) varflag = 1;
+
+  // repeat: reneighboring only forced if flips can occur due to shape changes
+
+  if ((!force_reneighbor) && flipflag && (set[3].style || set[4].style || set[5].style)) {
+    force_reneighbor = 1;
+    irregular = new Irregular(lmp);
+  }
+
+  // set initial values at time fix deform/pressure is issued
+
+  set_box.vol_initial = domain->xprd * domain->yprd * domain->zprd;
+
+  // populate coupled pressure controls
+
+  if (pcouple != NOCOUPLE) {
+
+    if (domain->dimension == 2)
+      if (pcouple == XYZ || pcouple == XZ || pcouple == YZ)
+        error->all(FLERR, "Cannot couple Z dimension in fix deform/pressure in 2D");
+
+    int coupled_indices[3] = {0};
+    int j = -1;
+
+    if (pcouple == XYZ || pcouple == XY || pcouple == XZ)
+      coupled_indices[0] = 1;
+    if (pcouple == XYZ || pcouple == XY || pcouple == YZ)
+      coupled_indices[1] = 1;
+    if (pcouple == XYZ || pcouple == XZ || pcouple == YZ)
+      coupled_indices[2] = 1;
+
+    // Check coupled styles and find reference
+    for (int i = 0; i < 3; i++) {
+      if (coupled_indices[i]) {
+        set_extra[i].coupled_flag = 1;
+        if (set[i].style != PRESSURE && set[i].style != NONE)
+          error->all(FLERR, "Cannot couple non-pressure-controlled dimensions");
+        if (set[i].style == PRESSURE)
+          j = i;
+      }
+    }
+
+    if (j == -1)
+      error->all(FLERR, "Must specify deformation style for at least one coupled dimension");
+
+    // Copy or compare data for each coupled dimension
+
+    for (int i = 0; i < 3; i++) {
+      if (coupled_indices[i]) {
+        // Copy coupling information if dimension style is undefined
+        if (set[i].style == NONE) {
+          set[i].style = PRESSURE;
+          dimflag[i] = 1;
+          set_extra[i].pgain = set_extra[j].pgain;
+          if (set_extra[j].pvar_flag) {
+            set_extra[i].pstr = set_extra[j].pstr;
+            set_extra[i].pvar_flag = 1;
+          } else {
+            set_extra[i].ptarget = set_extra[j].ptarget;
+          }
+        } else {
+          // Check for incompatibilities in style
+          if (set[j].style != set[i].style && set[i].style != NONE)
+            error->all(FLERR, "Cannot couple dimensions with different control options");
+          if (set[j].style != PRESSURE) continue;
+
+          // If pressure controlled, check for incompatibilities in parameters
+          if (set_extra[i].pgain != set_extra[j].pgain || set_extra[i].pvar_flag != set_extra[j].pvar_flag ||
+              set_extra[i].ptarget != set_extra[j].ptarget)
+            error->all(FLERR, "Coupled dimensions must have identical gain parameters");
+
+          if (set_extra[j].pvar_flag)
+            if (strcmp(set_extra[i].pstr, set_extra[j].pstr) != 0)
+              error->all(FLERR, "Coupled dimensions must have the same target pressure");
+        }
+      }
+    }
+  }
+
+  // if vol/balance/p used, must have 2 free dimensions
+
+  if (vol_balance_flag) {
+    for (int i = 0; i < 3; i++) {
+      if (set[i].style != VOLUME) continue;
+      if (set[i].substyle != TWO_FROM_ONE)
+        error->all(FLERR, "Two dimensions must maintain constant volume to use the vol/balance/p option");
+    }
+  }
+
+  // set strain_flag
+
+  strain_flag = 0;
+  for (int i = 0; i < 6; i++)
+    if (set[i].style != NONE && set[i].style != VOLUME &&
+        set[i].style != PRESSURE && set[i].style != PMEAN)
+      strain_flag = 1;
+
+  // set pressure_flag
+
+  pressure_flag = 0;
+  for (int i = 0; i < 6; i++) {
+    if (set[i].style == PRESSURE || set[i].style == PMEAN) {
+      pressure_flag = 1;
+      if (set_extra[i].pgain <= 0.0)
+        error->all(FLERR, "Illegal fix deform/pressure gain constant, must be positive");
+    }
+    if (set_extra[i].coupled_flag) pressure_flag = 1;
+  }
+  if (set_box.style == PRESSURE) pressure_flag = 1;
+  if (vol_balance_flag) pressure_flag = 1;
+
+  // check conflict between constant volume/pressure
+
+  volume_flag = 0;
+  for (int i = 0; i < 3; i++)
+    if (set[i].style == VOLUME)
+      volume_flag = 1;
+
+  if (volume_flag)
+    for (int i = 0; i < 6; i++)
+      if (set[i].style == PMEAN)
+        error->all(FLERR, "Cannot use fix deform/pressure to assign constant volume and pressure");
+
+  // check conflicts between x,y,z styles and box
+
+  if (set_box.style)
+    for (int i = 0; i < 3; i++)
+      if (set[i].style == FINAL || set[i].style == DELTA || set[i].style == SCALE || set[i].style == PMEAN || set[i].style == VARIABLE)
+        error->all(FLERR, "Cannot use fix deform/pressure box parameter with x, y, or z styles other than vel, erate, trate, pressure, and wiggle");
+
+  // check pressure used for max rate and normalize error flag
+
+  if (!pressure_flag && max_h_rate != 0)
+    error->all(FLERR, "Can only assign a maximum strain rate using pressure-controlled dimensions");
+
+  if (!pressure_flag && normalize_pressure_flag)
+    error->all(FLERR, "Can only normalize error using pressure-controlled dimensions");
+
+  // Create pressure compute, if needed
+
+  pflag = 0;
+  tflag = 0;
+  if (pressure_flag) {
+    // create a new compute temp style
+    // id = fix-ID + temp
+    // compute group = all since pressure is always global (group all)
+    //   and thus its KE/temperature contribution should use group all
+
+    id_temp = utils::strdup(std::string(id) + "_temp");
+    temperature = modify->add_compute(fmt::format("{} all temp", id_temp));
+    tflag = 1;
+
+    // create a new compute pressure style
+    // id = fix-ID + press, compute group = all
+    // pass id_temp as 4th arg to pressure constructor
+
+    id_press = utils::strdup(std::string(id) + "_press");
+    pressure = modify->add_compute(fmt::format("{} all pressure {}", id_press, id_temp));
+    pflag = 1;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixDeformPressure::~FixDeformPressure()
+{
+  if (set_extra)
+    for (int i = 0; i < 7; i++)
+      delete[] set_extra[i].pstr;
+  delete[] set_extra;
+
+  delete[] set_box.hstr;
+  delete[] set_box.hratestr;
+
+  // delete temperature and pressure if fix created them
+
+  if (tflag) modify->delete_compute(id_temp);
+  if (pflag) modify->delete_compute(id_press);
+  delete [] id_temp;
+  delete [] id_press;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixDeformPressure::init()
+{
+  FixDeform::init();
+
+  set_box.vol_start = domain->xprd * domain->yprd * domain->zprd;
+
+  // check optional variables for PRESSURE or PMEAN style
+
+  for (int i = 0; i < 7; i++) {
+    if (!set_extra[i].pvar_flag) continue;
+    set_extra[i].pvar = input->variable->find(set_extra[i].pstr);
+    if (set_extra[i].pvar < 0)
+      error->all(FLERR, "Variable name {} for fix deform/pressure does not exist", set_extra[i].pstr);
+    if (!input->variable->equalstyle(set_extra[i].pvar))
+      error->all(FLERR, "Variable {} for fix deform/pressure is invalid style", set_extra[i].pstr);
+  }
+
+  // Find pressure/temp computes if needed
+
+  if (pressure_flag) {
+    temperature = modify->get_compute_by_id(id_temp);
+    if (!temperature)
+      error->all(FLERR, "Temperature ID {} for fix deform/pressure does not exist", id_temp);
+
+    pressure = modify->get_compute_by_id(id_press);
+    if (!pressure)
+      error->all(FLERR, "Pressure ID {} for fix deform/pressure does not exist", id_press);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute T,P if needed before integrator starts
+------------------------------------------------------------------------- */
+
+void FixDeformPressure::setup(int /*vflag*/)
+{
+  // trigger virial computation on next timestep
+  if (pressure_flag) pressure->addstep(update->ntimestep+1);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixDeformPressure::end_of_step()
+{
+  // wrap variable evaluations with clear/add
+
+  if (varflag) modify->clearstep_compute();
+
+  // set new box size for strain-based dims
+
+  if (strain_flag) FixDeform::apply_strain();
+
+  // set new box size for pressure-based dims
+
+  if (pressure_flag) {
+    temperature->compute_vector();
+    pressure->compute_vector();
+    pressure->compute_scalar();
+    for (int i = 0; i < 3; i++) {
+      if (!set_extra[i].saved) {
+        set_extra[i].saved = 1;
+        set_extra[i].prior_rate = 0.0;
+        set_extra[i].prior_pressure = pressure->vector[i];
+      }
+    }
+    apply_pressure();
+  }
+
+  // set new box size for VOLUME dims that are linked to other dims
+  // NOTE: still need to set h_rate for these dims
+
+  if (volume_flag) apply_volume();
+
+  // apply any final box scalings
+
+  if (set_box.style) apply_box();
+
+  // Save pressure/strain rate if required
+
+  if (pressure_flag) {
+    for (int i = 0; i < 3; i++) {
+      set_extra[i].prior_pressure = pressure->vector[i];
+      set_extra[i].prior_rate = ((set[i].hi_target - set[i].lo_target) /
+                           (domain->boxhi[i] - domain->boxlo[i]) - 1.0)  / update->dt;
+    }
+  }
+
+  if (varflag) modify->addstep_compute(update->ntimestep + nevery);
+
+
+  FixDeform::update_domain();
+
+  // trigger virial computation, if needed, on next timestep
+
+  if (pressure_flag)
+    pressure->addstep(update->ntimestep+1);
+}
+
+/* ----------------------------------------------------------------------
+   apply pressure controls
+------------------------------------------------------------------------- */
+
+void FixDeformPressure::apply_pressure()
+{
+  // If variable pressure, calculate current target
+  for (int i = 0; i < 6; i++)
+    if (set[i].style == PRESSURE)
+      if (set_extra[i].pvar_flag)
+        set_extra[i].ptarget = input->variable->compute_equal(set_extra[i].pvar);
+
+  // Find current (possibly coupled/hydrostatic) pressure for X, Y, Z
+  double *tensor = pressure->vector;
+  double scalar = pressure->scalar;
+  double p_current[3] = {0.0, 0.0, 0.0};
+
+  if (pcouple == XYZ) {
+    double ave = THIRD * (tensor[0] + tensor[1] + tensor[2]);
+    p_current[0] = p_current[1] = p_current[2] = ave;
+  } else if (pcouple == XY) {
+    double ave = 0.5 * (tensor[0] + tensor[1]);
+    p_current[0] = p_current[1] = ave;
+    p_current[2] = tensor[2];
+  } else if (pcouple == YZ) {
+    double ave = 0.5 * (tensor[1] + tensor[2]);
+    p_current[1] = p_current[2] = ave;
+    p_current[0] = tensor[0];
+  } else if (pcouple == XZ) {
+    double ave = 0.5 * (tensor[0] + tensor[2]);
+    p_current[0] = p_current[2] = ave;
+    p_current[1] = tensor[1];
+  } else {
+    if (set[0].style == PRESSURE) p_current[0] = tensor[0];
+    else if (set[0].style == PMEAN) p_current[0] = scalar;
+
+    if (set[1].style == PRESSURE) p_current[1] = tensor[1];
+    else if (set[1].style == PMEAN) p_current[1] = scalar;
+
+    if (set[2].style == PRESSURE) p_current[2] = tensor[2];
+    else if (set[2].style == PMEAN) p_current[2] = scalar;
+  }
+
+  for (int i = 0; i < 3; i++) {
+    if (set[i].style != PRESSURE && set[i].style != PMEAN) continue;
+
+    h_rate[i] = set_extra[i].pgain * (p_current[i] - set_extra[i].ptarget);
+
+    if (normalize_pressure_flag) {
+      if (set_extra[i].ptarget == 0) {
+        if (max_h_rate == 0) {
+          error->all(FLERR, "Cannot normalize error for zero pressure without defining a max rate");
+        } else h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]);
+      } else h_rate[i] /= fabs(set_extra[i].ptarget);
+    }
+
+    if (max_h_rate != 0)
+      if (fabs(h_rate[i]) > max_h_rate)
+        h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]);
+
+    h_ratelo[i] = -0.5 * h_rate[i];
+
+    double offset = 0.5 * (domain->boxhi[i] - domain->boxlo[i]) * (1.0 + update->dt * h_rate[i]);
+    set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - offset;
+    set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + offset;
+  }
+
+  for (int i = 3; i < 6; i++) {
+    if (set[i].style != PRESSURE) continue;
+
+    double L, tilt, pcurrent;
+    if (i == 3) {
+      L = domain->zprd;
+      tilt = domain->yz;
+      pcurrent = tensor[5];
+    } else if (i == 4) {
+      L = domain->zprd;
+      tilt = domain->xz + update->dt;
+      pcurrent = tensor[4];
+    } else {
+      L = domain->yprd;
+      tilt = domain->xy;
+      pcurrent = tensor[3];
+    }
+
+    h_rate[i] = L * set_extra[i].pgain * (pcurrent - set_extra[i].ptarget);
+    if (normalize_pressure_flag) {
+      if (set_extra[i].ptarget == 0) {
+        if (max_h_rate == 0) {
+          error->all(FLERR, "Cannot normalize error for zero pressure without defining a max rate");
+        } else h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]);
+      } else h_rate[i] /= fabs(set_extra[i].ptarget);
+    }
+
+    if (max_h_rate != 0)
+      if (fabs(h_rate[i]) > max_h_rate)
+        h_rate[i] = max_h_rate * h_rate[i] / fabs(h_rate[i]);
+
+    set[i].tilt_target = tilt + update->dt * h_rate[i];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   apply volume controls
+------------------------------------------------------------------------- */
+
+void FixDeformPressure::apply_volume()
+{
+  double e1, e2;
+  int linked_pressure = 0;
+
+  for (int i = 0; i < 3; i++) {
+    if (set[i].style != VOLUME) continue;
+
+    int dynamic1 = set[i].dynamic1;
+    int dynamic2 = set[i].dynamic2;
+    int fixed = set[i].fixed;
+    double v0 = set[i].vol_start;
+    double shift = 0.0;
+
+    if (set[i].substyle == ONE_FROM_ONE) {
+      shift = 0.5 * (v0 / (set[dynamic1].hi_target - set[dynamic1].lo_target) /
+             (set[fixed].hi_start-set[fixed].lo_start));
+    } else if (set[i].substyle == ONE_FROM_TWO) {
+      shift = 0.5 * (v0 / (set[dynamic1].hi_target - set[dynamic1].lo_target) /
+             (set[dynamic2].hi_target - set[dynamic2].lo_target));
+    } else if (set[i].substyle == TWO_FROM_ONE) {
+      if (!vol_balance_flag) {
+        shift = 0.5 * sqrt(v0 * (set[i].hi_start - set[i].lo_start) /
+                   (set[dynamic1].hi_target - set[dynamic1].lo_target) /
+                   (set[fixed].hi_start - set[fixed].lo_start));
+      } else {
+        double dt = update->dt;
+        double e1i = set_extra[i].prior_rate;
+        double e2i = set_extra[fixed].prior_rate;
+        double L1i = domain->boxhi[i] - domain->boxlo[i];
+        double L2i = domain->boxhi[fixed] - domain->boxlo[fixed];
+        double L3i = domain->boxhi[dynamic1] - domain->boxlo[dynamic1];
+        double L3 = (set[dynamic1].hi_target - set[dynamic1].lo_target);
+        double Vi = L1i * L2i * L3i;
+        double V = L3 * L1i * L2i;
+        double e3 = (L3 / L3i - 1.0) / dt;
+        double p1 = pressure->vector[i];
+        double p2 = pressure->vector[fixed];
+        double p1i = set_extra[i].prior_pressure;
+        double p2i = set_extra[fixed].prior_pressure;
+        double denominator;
+
+        if (e3 == 0) {
+          e1 = 0.0;
+          e2 = 0.0;
+          shift = 0.5 * L1i;
+        } else if (e1i == 0 || e2i == 0 || (p2 == p2i && p1 == p1i)) {
+          // If no prior strain or no change in pressure (initial step) just scale shift by relative box lengths
+          shift = 0.5 * sqrt(v0 * L1i / L3 / L2i);
+        } else {
+          if (!linked_pressure) {
+            // Calculate first strain rate by expanding stress to linear order, p1(t+dt) = p2(t+dt)
+            // Calculate second strain rate to preserve volume
+            denominator = p2 - p2i + e2i * ((p1 - p1i) / e1i);
+            if (denominator != 0.0 && e1i != 0.0) {
+              e1 = (((p2 - p2i) * (Vi - V) / (V * dt)) - e2i * (p1 - p2)) / denominator;
+            } else {
+              e1 = e2i;
+            }
+            e2 = (Vi - V * (1 + e1 * dt)) / (V * (1 + e1 * dt) * dt);
+
+            // If strain rate exceeds limit in either dimension, cap it at the maximum compatible rate
+            if (max_h_rate != 0) {
+              if ((fabs(e1) > max_h_rate) || (fabs(e2) > max_h_rate)) {
+                if (fabs(e1) > fabs(e2))
+                  adjust_linked_rates(e1, e2, e3, Vi, V);
+                else
+                  adjust_linked_rates(e2, e1, e3, Vi, V);
+              }
+            }
+            shift = 0.5 * L1i * (1.0 + e1 * dt);
+            linked_pressure = 1;
+          } else {
+            // Already calculated value of e2
+            shift = 0.5 * L1i * (1.0 + e2 * dt);
+          }
+        }
+      }
+    }
+
+    h_rate[i] = (2.0 * shift / (domain->boxhi[i] - domain->boxlo[i]) - 1.0) / update->dt;
+    h_ratelo[i] = -0.5 * h_rate[i];
+
+    set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift;
+    set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift;
+  }
+}
+
+
+/* ----------------------------------------------------------------------
+   Rescale volume preserving strain rates to enforce max rate
+------------------------------------------------------------------------- */
+
+void FixDeformPressure::adjust_linked_rates(double &e_larger, double &e_smaller, double e3, double Vi, double V)
+{
+  double dt = update->dt;
+  double e_lim_positive = (Vi - V * (1 + max_h_rate * dt)) / (V * (1 + max_h_rate * dt) * dt);
+  double e_lim_negative = (Vi - V * (1 - max_h_rate * dt)) / (V * (1 - max_h_rate * dt) * dt);
+  if ((e_larger * e3) >= 0) {
+    if (e_larger > 0.0) {
+      // Same sign as primary strain rate, cap third dimension
+      e_smaller = -max_h_rate;
+      e_larger = e_lim_negative;
+    } else {
+      e_smaller = max_h_rate;
+      e_larger = e_lim_positive;
+    }
+  } else {
+    // Opposite sign, set to maxrate.
+    if (e_larger > 0.0) {
+      e_larger = max_h_rate;
+      e_smaller = e_lim_positive;
+    } else {
+      e_larger = -max_h_rate;
+      e_smaller = e_lim_negative;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   apply box controls
+------------------------------------------------------------------------- */
+
+void FixDeformPressure::apply_box()
+{
+  int i;
+  double scale, shift = 0.0;
+  double v_rate;
+
+  if (set_box.style == VOLUME) {
+    double v0 = set_box.vol_start;
+    double v = 1.0;
+    for (i = 0; i < 3; i++)
+      v *= (set[i].hi_target - set[i].lo_target);
+
+    scale = std::pow(v0 / v, THIRD);
+    for (i = 0; i < 3; i++) {
+      shift = 0.5 * (set[i].hi_target - set[i].lo_target) * scale;
+      set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift;
+      set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift;
+
+      // Recalculate h_rate
+      h_rate[i] = (set[i].hi_target - set[i].lo_target) / (domain->boxhi[i] - domain->boxlo[i]) - 1.0;
+      h_rate[i] /= update->dt;
+      h_ratelo[i] = -0.5 * h_rate[i];
+    }
+
+  } else if (set_box.style == PRESSURE) {
+
+    // If variable pressure, calculate current target
+    if (set_extra[6].pvar_flag)
+      set_extra[6].ptarget = input->variable->compute_equal(set_extra[6].pvar);
+
+    v_rate = set_extra[6].pgain * (pressure->scalar - set_extra[6].ptarget);
+
+    if (normalize_pressure_flag) {
+      if (set_extra[6].ptarget == 0) {
+        if (max_h_rate == 0) {
+          error->all(FLERR, "Cannot normalize error for zero pressure without defining a max rate");
+        } else v_rate = max_h_rate * v_rate / fabs(v_rate);
+      } else v_rate /= fabs(set_extra[6].ptarget);
+    }
+
+    if (max_h_rate != 0)
+      if (fabs(v_rate) > max_h_rate)
+        v_rate = max_h_rate * v_rate / fabs(v_rate);
+
+    set_extra[6].cumulative_strain += update->dt * v_rate;
+    scale = (1.0 + set_extra[6].cumulative_strain);
+    for (i = 0; i < 3; i++) {
+      shift = 0.5 * (set[i].hi_target - set[i].lo_target) * scale;
+      set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift;
+      set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift;
+
+      // Recalculate h_rate
+      h_rate[i] = (set[i].hi_target - set[i].lo_target) / (domain->boxhi[i] - domain->boxlo[i]) - 1.0;
+      h_rate[i] /= update->dt;
+      h_ratelo[i] = -0.5 * h_rate[i];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   write Set data to restart file
+------------------------------------------------------------------------- */
+
+void FixDeformPressure::write_restart(FILE *fp)
+{
+  if (comm->me == 0) {
+    int size = 9 * sizeof(double) + 7 * sizeof(Set) + 7 * sizeof(SetExtra);
+    fwrite(&size, sizeof(int), 1, fp);
+    fwrite(set, sizeof(Set), 6, fp);
+    fwrite(&set_box, sizeof(Set), 1, fp);
+    fwrite(set_extra, sizeof(SetExtra), 7, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   use selected state info from restart file to restart the Fix
+------------------------------------------------------------------------- */
+
+void FixDeformPressure::restart(char *buf)
+{
+  int n = 0;
+  auto list = (double *) buf;
+  for (int i = 0; i < 6; i++)
+    h_rate[i] = list[n++];
+  for (int i = 0; i < 3; i++)
+    h_ratelo[i] = list[n++];
+
+  n = n * sizeof(double);
+  int samestyle = 1;
+  Set *set_restart = (Set *) &buf[n];
+  for (int i = 0; i < 6; ++i) {
+    // restore data from initial state
+    set[i].lo_initial = set_restart[i].lo_initial;
+    set[i].hi_initial = set_restart[i].hi_initial;
+    set[i].vol_initial = set_restart[i].vol_initial;
+    set[i].tilt_initial = set_restart[i].tilt_initial;
+    // check if style settings are consistent (should do the whole set?)
+    if (set[i].style != set_restart[i].style)
+      samestyle = 0;
+    if (set[i].substyle != set_restart[i].substyle)
+      samestyle = 0;
+  }
+  n += 6 * sizeof(Set);
+
+  // Only restore relevant box variables & check consistency
+  Set set_box_restart;
+  memcpy(&set_box_restart, (Set *) &buf[n], sizeof(Set));
+  set_box.vol_initial = set_box_restart.vol_initial;
+  if (set_box.style != set_box_restart.style)
+    samestyle = 0;
+
+  if (!samestyle)
+    error->all(FLERR, "Fix deform/pressure settings not consistent with restart");
+
+  n += sizeof(Set);
+  SetExtra *set_extra_restart = (SetExtra *) &buf[n];
+  for (int i = 0; i < 7; ++i) {
+    set_extra[i].saved = set_extra_restart[i].saved;
+    set_extra[i].prior_rate = set_extra_restart[i].prior_rate;
+    set_extra[i].prior_pressure = set_extra_restart[i].prior_pressure;
+    set_extra[i].cumulative_strain = set_extra_restart[i].cumulative_strain;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixDeformPressure::options(int i, int narg, char **arg)
+{
+  pcouple = NOCOUPLE;
+  max_h_rate = 0.0;
+  vol_balance_flag = 0;
+  normalize_pressure_flag = 0;
+
+  // parse only options not handled by parent class
+
+  int iarg;
+  while (i < (int) leftover_iarg.size()) {
+    iarg = leftover_iarg[i];
+    if (strcmp(arg[iarg], "couple") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure couple", error);
+      if (strcmp(arg[iarg + 1], "xyz") == 0) pcouple = XYZ;
+      else if (strcmp(arg[iarg + 1], "xy") == 0) pcouple = XY;
+      else if (strcmp(arg[iarg + 1], "yz") == 0) pcouple = YZ;
+      else if (strcmp(arg[iarg + 1], "xz") == 0) pcouple = XZ;
+      else if (strcmp(arg[iarg + 1], "none") == 0) pcouple = NOCOUPLE;
+      else error->all(FLERR, "Illegal fix deform/pressure couple command: {}", arg[iarg + 1]);
+      i += 2;
+    } else if (strcmp(arg[iarg], "max/rate") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure max/rate", error);
+      max_h_rate = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      if (max_h_rate <= 0.0)
+        error->all(FLERR, "Maximum strain rate must be a positive, non-zero value");
+      i += 2;
+    } else if (strcmp(arg[iarg], "normalize/pressure") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure normalize/pressure", error);
+      normalize_pressure_flag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
+      i += 2;
+    } else if (strcmp(arg[iarg], "vol/balance/p") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix deform/pressure vol/balance/p", error);
+      vol_balance_flag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
+      i += 2;
+    } else error->all(FLERR, "Illegal fix deform/pressure command: {}", arg[iarg]);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixDeformPressure::modify_param(int narg, char **arg)
+{
+  if (strcmp(arg[0], "temp") == 0) {
+    if (narg < 2) error->all(FLERR, "Illegal fix_modify command");
+    if (tflag) {
+      modify->delete_compute(id_temp);
+      tflag = 0;
+    }
+    delete[] id_temp;
+    id_temp = utils::strdup(arg[1]);
+
+    temperature = modify->get_compute_by_id(arg[1]);
+    if (!temperature)
+      error->all(FLERR, "Could not find fix_modify temperature compute ID: ", arg[1]);
+
+    if (temperature->tempflag == 0)
+      error->all(FLERR, "Fix_modify temperature compute {} does not compute temperature", arg[1]);
+    if (temperature->igroup != 0 && comm->me == 0)
+      error->warning(FLERR, "Temperature compute {} for fix {} is not for group all: {}",
+                     arg[1], style, group->names[temperature->igroup]);
+
+    // reset id_temp of pressure to new temperature ID
+
+    auto icompute = modify->get_compute_by_id(id_press);
+    if (!icompute)
+      error->all(FLERR, "Pressure compute ID {} for fix {} does not exist", id_press, style);
+    icompute->reset_extra_compute_fix(id_temp);
+
+    return 2;
+
+  } else if (strcmp(arg[0], "press") == 0) {
+    if (narg < 2) error->all(FLERR, "Illegal fix_modify command");
+    if (pflag) {
+      modify->delete_compute(id_press);
+      pflag = 0;
+    }
+    delete[] id_press;
+    id_press = utils::strdup(arg[1]);
+
+    pressure = modify->get_compute_by_id(arg[1]);
+    if (!pressure) error->all(FLERR, "Could not find fix_modify pressure compute ID: {}", arg[1]);
+    if (pressure->pressflag == 0)
+      error->all(FLERR, "Fix_modify pressure compute {} does not compute pressure", arg[1]);
+    return 2;
+  }
+
+  return 0;
+}
diff --git a/src/EXTRA-FIX/fix_deform_pressure.h b/src/EXTRA-FIX/fix_deform_pressure.h
new file mode 100644
index 0000000000..5a0d844bad
--- /dev/null
+++ b/src/EXTRA-FIX/fix_deform_pressure.h
@@ -0,0 +1,74 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(deform/pressure,FixDeformPressure);
+// clang-format on
+#else
+
+#ifndef LMP_FIX_DEFORM_PRESSURE_H
+#define LMP_FIX_DEFORM_PRESSURE_H
+
+#include "fix_deform.h"
+
+namespace LAMMPS_NS {
+
+class FixDeformPressure : public FixDeform {
+ public:
+  FixDeformPressure(class LAMMPS *, int, char **);
+  ~FixDeformPressure() override;
+  void init() override;
+  void setup(int) override;
+  void end_of_step() override;
+  void write_restart(FILE *) override;
+  void restart(char *buf) override;
+  int modify_param(int, char **) override;
+
+ protected:
+  int pcouple;
+  double max_h_rate;
+  int strain_flag;               // 1 if strain-based option is used, 0 if not
+  int pressure_flag;             // 1 if pressure tensor used, 0 if not
+  int volume_flag;               // 1 if VOLUME option is used, 0 if not
+  int normalize_pressure_flag;   // 1 if normalize pressure deviation by target
+  int vol_balance_flag;          // 1 if pressures balanced when maintaining const vol
+
+  char *id_temp, *id_press;
+  class Compute *temperature, *pressure;
+  int tflag, pflag;
+
+  struct SetExtra {
+    double ptarget, pgain;
+    double prior_pressure, prior_rate;
+    double cumulative_strain;
+    int saved;
+    char *pstr;
+    int pvar, pvar_flag;
+    int coupled_flag;
+  };
+  SetExtra *set_extra;
+  Set set_box;
+
+  void options(int, int, char **);
+  void apply_volume() override;
+  void apply_pressure();
+  void apply_box();
+  void couple();
+  void adjust_linked_rates(double&, double&, double, double, double);
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/EXTRA-FIX/fix_efield_tip4p.cpp b/src/EXTRA-FIX/fix_efield_tip4p.cpp
index 47b1d9e27a..a83939a620 100644
--- a/src/EXTRA-FIX/fix_efield_tip4p.cpp
+++ b/src/EXTRA-FIX/fix_efield_tip4p.cpp
@@ -16,7 +16,6 @@
 #include "angle.h"
 #include "atom.h"
 #include "bond.h"
-#include "comm.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
@@ -25,11 +24,10 @@
 #include "modify.h"
 #include "pair.h"
 #include "region.h"
-#include "respa.h"
 #include "update.h"
 #include "variable.h"
 
-#include <cstring>
+#include <cmath>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/EXTRA-FIX/fix_nonaffine_displacement.cpp b/src/EXTRA-FIX/fix_nonaffine_displacement.cpp
index a426a8fb55..ee14822e98 100644
--- a/src/EXTRA-FIX/fix_nonaffine_displacement.cpp
+++ b/src/EXTRA-FIX/fix_nonaffine_displacement.cpp
@@ -35,6 +35,7 @@
 #include "pair.h"
 #include "update.h"
 
+#include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
@@ -64,9 +65,9 @@ static const char cite_nonaffine_d2min[] =
 /* ---------------------------------------------------------------------- */
 
 FixNonaffineDisplacement::FixNonaffineDisplacement(LAMMPS *lmp, int narg, char **arg) :
-  Fix(lmp, narg, arg), id_fix(nullptr), X(nullptr), Y(nullptr), F(nullptr), norm(nullptr)
+  Fix(lmp, narg, arg), id_fix(nullptr), X(nullptr), Y(nullptr), F(nullptr), norm(nullptr), D2min(nullptr)
 {
-  if (narg < 4) error->all(FLERR,"Illegal fix nonaffine/displacement command");
+  if (narg < 4) utils::missing_cmd_args(FLERR,"fix nonaffine/displacement", error);
 
   nevery = utils::inumeric(FLERR, arg[3], false, lmp);
   if (nevery <= 0) error->all(FLERR,"Illegal nevery value {} in fix nonaffine/displacement", nevery);
@@ -75,17 +76,18 @@ FixNonaffineDisplacement::FixNonaffineDisplacement(LAMMPS *lmp, int narg, char *
   int iarg = 4;
   if (strcmp(arg[iarg], "integrated") == 0) {
     nad_style = INTEGRATED;
-    nevery = 1;
     iarg += 1;
   } else if (strcmp(arg[iarg], "d2min") == 0) {
-    if (iarg + 1 > narg) error->all(FLERR,"Illegal fix nonaffine/displacement command");
+    if (iarg + 1 > narg) utils::missing_cmd_args(FLERR,"fix nonaffine/displacement d2min", error);
     nad_style = D2MIN;
     if (strcmp(arg[iarg + 1], "type") == 0) {
       cut_style = TYPE;
     } else if (strcmp(arg[iarg + 1], "radius") == 0) {
       cut_style = RADIUS;
     } else if (strcmp(arg[iarg + 1], "custom") == 0) {
-      if (iarg + 2 > narg) error->all(FLERR,"Illegal fix nonaffine/displacement command");
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR,"fix nonaffine/displacement custom", error);
+      if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+        error->all(FLERR, "Fix nonaffine/displacement with custom cutoff requires neighbor style 'bin' or 'nsq'");
       cut_style = CUSTOM;
       cutoff_custom = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
       cutsq_custom = cutoff_custom * cutoff_custom;
@@ -96,7 +98,7 @@ FixNonaffineDisplacement::FixNonaffineDisplacement(LAMMPS *lmp, int narg, char *
     iarg += 2;
   } else error->all(FLERR,"Illegal nonaffine displacement style {} in fix nonaffine/displacement", arg[iarg]);
 
-  if (iarg + 2 > narg) error->all(FLERR,"Illegal fix nonaffine/displacement command");
+  if (iarg + 2 > narg) utils::missing_cmd_args(FLERR,"fix nonaffine/displacement", error);
   if (strcmp(arg[iarg], "fixed") == 0) {
     reference_style = FIXED;
     reference_timestep = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
@@ -105,7 +107,7 @@ FixNonaffineDisplacement::FixNonaffineDisplacement(LAMMPS *lmp, int narg, char *
   } else if (strcmp(arg[iarg], "update") == 0) {
     reference_style = UPDATE;
     update_timestep = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
-    if (update_timestep < 0)
+    if (update_timestep <= 0)
       error->all(FLERR, "Illegal update timestep {} in fix nonaffine/displacement", arg[iarg + 1]);
   } else if (strcmp(arg[iarg], "offset") == 0) {
     reference_style = OFFSET;
@@ -118,9 +120,6 @@ FixNonaffineDisplacement::FixNonaffineDisplacement(LAMMPS *lmp, int narg, char *
     if (cut_style == RADIUS && (!atom->radius_flag))
       error->all(FLERR, "Fix nonaffine/displacement radius style requires atom attribute radius");
 
-  if (nad_style == INTEGRATED && reference_style == OFFSET)
-    error->all(FLERR, "Fix nonaffine/displacement cannot use the integrated style with an offset reference state");
-
   peratom_flag = 1;
   peratom_freq = nevery;
   nmax = -1;
@@ -150,8 +149,10 @@ FixNonaffineDisplacement::~FixNonaffineDisplacement()
     memory->destroy(Y);
     memory->destroy(F);
     memory->destroy(norm);
-    memory->destroy(array_atom);
+    memory->destroy(D2min);
   }
+
+  memory->destroy(array_atom);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -175,12 +176,7 @@ void FixNonaffineDisplacement::post_constructor()
   id_fix = utils::strdup(id + std::string("_FIX_PA"));
   fix = dynamic_cast<FixStoreAtom *>(modify->add_fix(fmt::format("{} {} STORE/ATOM 3 0 {} 1", id_fix, group->names[igroup], ghost_status)));
 
-  if (nad_style == INTEGRATED)
-    array_atom = fix->astore;
-
-  if (nad_style == D2MIN)
-    grow_arrays(atom->nmax);
-
+  grow_arrays(atom->nmax);
   for (int i = 0; i < atom->nlocal; i++)
     for (int j = 0; j < 3; j++) array_atom[i][j] = 0.0;
 }
@@ -206,6 +202,9 @@ void FixNonaffineDisplacement::init()
     } else {
       auto req = neighbor->add_request(this, NeighConst::REQ_OCCASIONAL);
       if (cut_style == CUSTOM) {
+        if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+          error->all(FLERR, "Fix nonaffine/displacement with custom cutoff requires neighbor style 'bin' or 'nsq'");
+
         double skin = neighbor->skin;
         mycutneigh = cutoff_custom + skin;
 
@@ -245,6 +244,15 @@ void FixNonaffineDisplacement::post_force(int /*vflag*/)
   if (reference_saved && (!update->setupflag)) {
     if (nad_style == INTEGRATED) {
       integrate_velocity();
+      if ((update->ntimestep % nevery) == 0) {
+        if (atom->nmax > nmax)
+          grow_arrays(atom->nmax);
+
+        double **x_nonaffine = fix->astore;
+        for (int i = 0; i < atom->nlocal; i++)
+          for (int m = 0; m < 3; m++)
+            array_atom[i][m] = x_nonaffine[i][m];
+      }
     } else {
       if ((update->ntimestep % nevery) == 0) calculate_D2Min();
     }
@@ -291,11 +299,12 @@ void FixNonaffineDisplacement::integrate_velocity()
 
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
+  double **x_nonaffine = fix->astore;
 
   for (int m = 0; m < 3; m++) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
-        array_atom[i][m] += dtv * v[i][m];
+        x_nonaffine[i][m] += dtv * v[i][m];
       }
     }
   }
@@ -306,6 +315,7 @@ void FixNonaffineDisplacement::integrate_velocity()
 void FixNonaffineDisplacement::save_reference_state()
 {
   double **x = atom->x;
+  double **x0 = fix->astore;
 
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
@@ -314,13 +324,13 @@ void FixNonaffineDisplacement::save_reference_state()
   if (nad_style == D2MIN) {
     for (int m = 0; m < 3; m++) {
       for (int i = 0; i < nall; i++) {
-        if (mask[i] & groupbit)  array_atom[i][m] = x[i][m];
+        if (mask[i] & groupbit)  x0[i][m] = x[i][m];
       }
     }
   } else {
     for (int m = 0; m < 3; m++) {
       for (int i = 0; i < nall; i++) {
-        if (mask[i] & groupbit)  array_atom[i][m] = 0.0;
+        if (mask[i] & groupbit)  x0[i][m] = 0.0;
       }
     }
   }
@@ -358,7 +368,7 @@ void FixNonaffineDisplacement::calculate_D2Min()
   int *ilist, *jlist, *numneigh, **firstneigh;
 
   double **x = atom->x;
-  double **x0 = array_atom;
+  double **x0 = fix->astore;
   double *radius = atom->radius;
   int *type = atom->type;
   int *mask = atom->mask;
@@ -383,7 +393,7 @@ void FixNonaffineDisplacement::calculate_D2Min()
       }
     }
     norm[i] = 0;
-    array_atom[i][0] = 0;
+    D2min[i] = 0;
   }
 
   // First loop through neighbors
@@ -524,7 +534,7 @@ void FixNonaffineDisplacement::calculate_D2Min()
       }
 
       sub3(r, temp, temp);
-      array_atom[i][0] += lensq3(temp);
+      D2min[i] += lensq3(temp);
       norm[i] += 1;
 
       if (newton_pair || j < nlocal) {
@@ -535,7 +545,7 @@ void FixNonaffineDisplacement::calculate_D2Min()
         }
 
         sub3(r, temp, temp);
-        array_atom[j][0] += lensq3(temp);
+        D2min[j] += lensq3(temp);
         norm[j] += 1;
       }
     }
@@ -548,10 +558,9 @@ void FixNonaffineDisplacement::calculate_D2Min()
     if (!(mask[i] & groupbit)) continue;
 
     if (norm[i] != 0)
-      array_atom[i][0] /= norm[i];
+      D2min[i] /= norm[i];
     else
-      array_atom[i][0] = 0.0;
-    array_atom[i][0] = sqrt(array_atom[i][0]);
+      D2min[i] = 0.0;
 
     for (j = 0; j < 3; j++)
       for (k = 0; k < 3; k++)
@@ -571,6 +580,7 @@ void FixNonaffineDisplacement::calculate_D2Min()
 
     edev = sqrt(0.5 * j2);
 
+    array_atom[i][0] = sqrt(D2min[i]);
     array_atom[i][1] = evol;
     array_atom[i][2] = edev;
   }
@@ -593,7 +603,7 @@ int FixNonaffineDisplacement::pack_reverse_comm(int n, int first, double *buf)
         }
       }
     } else {
-      buf[m++] = array_atom[i][0];
+      buf[m++] = D2min[i];
       buf[m++] = ubuf(norm[i]).d;
     }
   }
@@ -617,7 +627,7 @@ void FixNonaffineDisplacement::unpack_reverse_comm(int n, int *list, double *buf
         }
       }
     } else {
-      array_atom[j][0] += buf[m++];
+      D2min[j] += buf[m++];
       norm[j] += (int) ubuf(buf[m++]).i;
     }
   }
@@ -723,12 +733,18 @@ void FixNonaffineDisplacement::minimum_image0(double *delta)
 void FixNonaffineDisplacement::grow_arrays(int nmax_new)
 {
   nmax = nmax_new;
-  memory->destroy(X);
-  memory->destroy(Y);
-  memory->destroy(F);
-  memory->destroy(norm);
-  memory->create(X, nmax, 3, 3, "fix_nonaffine_displacement:X");
-  memory->create(Y, nmax, 3, 3, "fix_nonaffine_displacement:Y");
-  memory->create(F, nmax, 3, 3, "fix_nonaffine_displacement:F");
-  memory->create(norm, nmax, "fix_nonaffine_displacement:norm");
+  memory->destroy(array_atom);
+  memory->create(array_atom, nmax, 3, "fix_nonaffine_displacement:array_atom");
+  if (nad_style == D2MIN) {
+    memory->destroy(X);
+    memory->destroy(Y);
+    memory->destroy(F);
+    memory->destroy(D2min);
+    memory->destroy(norm);
+    memory->create(X, nmax, 3, 3, "fix_nonaffine_displacement:X");
+    memory->create(Y, nmax, 3, 3, "fix_nonaffine_displacement:Y");
+    memory->create(F, nmax, 3, 3, "fix_nonaffine_displacement:F");
+    memory->create(D2min, nmax, "fix_nonaffine_displacement:D2min");
+    memory->create(norm, nmax, "fix_nonaffine_displacement:norm");
+  }
 }
diff --git a/src/EXTRA-FIX/fix_nonaffine_displacement.h b/src/EXTRA-FIX/fix_nonaffine_displacement.h
index 3341ab1834..79dbdabf49 100644
--- a/src/EXTRA-FIX/fix_nonaffine_displacement.h
+++ b/src/EXTRA-FIX/fix_nonaffine_displacement.h
@@ -52,7 +52,7 @@ class FixNonaffineDisplacement : public Fix {
   double cutoff_custom, cutsq_custom, mycutneigh;
   double xprd0, yprd0, zprd0, xprd0_half, yprd0_half, zprd0_half, xy0, xz0, yz0;
 
-  double ***X, ***Y, ***F;
+  double *D2min, ***X, ***Y, ***F;
   int *norm;
 
   class NeighList *list;    // half neighbor list
diff --git a/src/EXTRA-FIX/fix_tmd.cpp b/src/EXTRA-FIX/fix_tmd.cpp
index 401ed41573..242efcf41c 100644
--- a/src/EXTRA-FIX/fix_tmd.cpp
+++ b/src/EXTRA-FIX/fix_tmd.cpp
@@ -32,6 +32,7 @@
 
 #include <cmath>
 #include <cstring>
+#include <exception>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/EXTRA-FIX/fix_ttm_mod.cpp b/src/EXTRA-FIX/fix_ttm_mod.cpp
index 79af414f0a..335acdd853 100644
--- a/src/EXTRA-FIX/fix_ttm_mod.cpp
+++ b/src/EXTRA-FIX/fix_ttm_mod.cpp
@@ -486,12 +486,12 @@ void FixTTMMod::read_parameters(const std::string &filename)
       reader.next_line();
       intensity = reader.next_values(1).next_double();
 
-      // coordinate of 1st surface in x-direction (in box units) - constant
+      // coordinate of 1st surface in x-direction (electron grid units) - constant
 
       reader.next_line();
       surface_l = reader.next_values(1).next_int();
 
-      // coordinate of 2nd surface in x-direction (in box units) - constant
+      // coordinate of 2nd surface in x-direction (electron grid units) - constant
 
       reader.next_line();
       surface_r = reader.next_values(1).next_int();
diff --git a/src/EXTRA-FIX/fix_wall_flow.cpp b/src/EXTRA-FIX/fix_wall_flow.cpp
new file mode 100644
index 0000000000..35997b5b63
--- /dev/null
+++ b/src/EXTRA-FIX/fix_wall_flow.cpp
@@ -0,0 +1,323 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Vladislav Galigerov (HSE),
+                         Daniil Pavlov (MIPT)
+------------------------------------------------------------------------- */
+
+#include "fix_wall_flow.h"
+
+#include "atom.h"
+#include "citeme.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "lattice.h"
+#include "math_const.h"
+#include "memory.h"
+#include "modify.h"
+#include "random_mars.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <functional>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+/* ---------------------------------------------------------------------- */
+
+static const char cite_fix_wall_flow_c[] =
+    "fix wall/flow command: doi:10.1177/10943420231213013\n\n"
+    "@Article{Pavlov-etal-IJHPCA-2024,\n"
+    " author = {Daniil Pavlov and Vladislav Galigerov and Daniil Kolotinskii and Vsevolod "
+    "Nikolskiy and Vladimir Stegailov},\n"
+    " title = {GPU-based molecular dynamics of fluid flows: Reaching for turbulence},\n"
+    " journal = {The International Journal of High Performance Computing Applications},\n"
+    " year =    2024,\n"
+    " volume =  38,\n"
+    " number =  1,\n"
+    " pages =   34-49\n"
+    "}\n\n";
+
+FixWallFlow::FixWallFlow(LAMMPS *lmp, int narg, char **arg) :
+    Fix(lmp, narg, arg), flowax(FlowAxis::AX_X), flowvel(0.0), flowdir(0), rndseed(0),
+    current_segment(nullptr)
+{
+  if (lmp->citeme) lmp->citeme->add(cite_fix_wall_flow_c);
+  if (narg < 9) utils::missing_cmd_args(FLERR, "fix wall/flow", error);
+
+  if (domain->triclinic != 0)
+    error->all(FLERR, "Fix wall/flow cannot be used with triclinic simulation box");
+
+  dynamic_group_allow = 1;
+  bool do_abort = false;
+
+  int iarg = 3;
+  // parsing axis
+  if (strcmp(arg[iarg], "x") == 0)
+    flowax = FlowAxis::AX_X;
+  else if (strcmp(arg[iarg], "y") == 0)
+    flowax = FlowAxis::AX_Y;
+  else if (strcmp(arg[iarg], "z") == 0)
+    flowax = FlowAxis::AX_Z;
+  else
+    error->all(FLERR, "Illegal fix wall/flow argument: axis must by x or y or z, but {} specified",
+               arg[iarg]);
+
+  if (domain->periodicity[flowax] != 1)
+    error->all(FLERR,
+               "Fix wall/flow cannot be used with a non-periodic boundary along the flow axis");
+
+  ++iarg;
+  // parsing velocity
+  flowvel = utils::numeric(FLERR, arg[iarg], do_abort, lmp);
+  if (flowvel == 0.0) error->all(FLERR, "Illegal fix wall/flow argument: velocity cannot be 0");
+  if (flowvel > 0.0)
+    flowdir = 1;
+  else
+    flowdir = -1;
+  if (flowdir < 0)
+    error->all(FLERR, "Illegal fix wall/flow argument: negative direction is not supported yet");
+
+  ++iarg;
+  // parsing temperature
+  double flowtemp = utils::numeric(FLERR, arg[iarg], do_abort, lmp);
+  kT = lmp->force->boltz * flowtemp / force->mvv2e;
+
+  ++iarg;
+  // parsing seed
+  rndseed = utils::inumeric(FLERR, arg[iarg], do_abort, lmp);
+  if (rndseed <= 0)
+    error->all(FLERR, "Illegal fix wall/flow argument: random seed must be positive integer");
+
+  ++iarg;
+  // parsing wall count
+  int wallcount = utils::inumeric(FLERR, arg[iarg], do_abort, lmp);
+  if (wallcount <= 0)
+    error->all(FLERR, "Illegal fix wall/flow argument: wall count must be positive integer");
+
+  ++iarg;
+  // parsing walls
+  if (narg - iarg != wallcount && narg - iarg != wallcount + 2)
+    error->all(FLERR, "Wrong fix wall/flow wall count");
+
+  double scale = 0.0;
+  if (flowax == FlowAxis::AX_X)
+    scale = domain->lattice->xlattice;
+  else if (flowax == FlowAxis::AX_Y)
+    scale = domain->lattice->ylattice;
+  else if (flowax == FlowAxis::AX_Z)
+    scale = domain->lattice->zlattice;
+
+  if (narg - iarg == wallcount + 2) {
+    if (strcmp(arg[narg - 2], "units") != 0) error->all(FLERR, "Wrong fix wall/flow units command");
+    if (strcmp(arg[narg - 1], "box") == 0)
+      scale = 1.0;
+    else if (strcmp(arg[narg - 1], "lattice") != 0)
+      error->all(FLERR, "Wrong fix wall/flow units command");
+  }
+
+  walls.resize(wallcount + 2);
+  walls.front() = domain->boxlo[flowax];
+  for (int w = 1; w <= wallcount; ++w, ++iarg) {
+    walls[w] = utils::numeric(FLERR, arg[iarg], do_abort, lmp) * scale;
+  }
+  walls.back() = domain->boxhi[flowax];
+  if (!std::is_sorted(walls.begin(), walls.end(), std::less_equal<double>())) {
+    error->all(FLERR,
+               "Wrong fix wall/flow wall ordering or some walls are outside simulation domain");
+  }
+
+  if (std::adjacent_find(walls.begin(), walls.end()) != walls.end()) {
+    error->all(FLERR,
+               "Wrong fix wall/flow wall coordinates: some walls have the same coordinates or lie "
+               "on the boundary");
+  }
+
+  memory->grow(current_segment, atom->nmax, "WallFlow::current_segment");
+  atom->add_callback(Atom::GROW);
+  if (restart_peratom) atom->add_callback(Atom::RESTART);
+
+  maxexchange = 1;
+
+  random = new RanMars(lmp, rndseed + comm->me);
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixWallFlow::~FixWallFlow()
+{
+  if (copymode) return;
+  atom->delete_callback(id, Atom::GROW);
+  if (restart_peratom) atom->delete_callback(id, Atom::RESTART);
+  memory->destroy(current_segment);
+
+  delete random;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixWallFlow::setmask()
+{
+  int mask = 0;
+
+  mask |= END_OF_STEP;
+
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallFlow::init()
+{
+  if (domain->triclinic != 0)
+    error->all(FLERR, "Fix wall/flow cannot be used with triclinic simulation box");
+
+  int nrigid = 0;
+  int box_change_flowax = 0;
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (ifix->rigid_flag) nrigid++;
+    switch (flowax) {
+      case FlowAxis::AX_X:
+        if (ifix->box_change & Fix::BOX_CHANGE_X) box_change_flowax++;
+        break;
+      case FlowAxis::AX_Y:
+        if (ifix->box_change & Fix::BOX_CHANGE_Y) box_change_flowax++;
+        break;
+      case FlowAxis::AX_Z:
+        if (ifix->box_change & Fix::BOX_CHANGE_Z) box_change_flowax++;
+        break;
+    }
+  }
+
+  if (nrigid) error->all(FLERR, "Fix wall/flow is not compatible with rigid bodies");
+  if (box_change_flowax)
+    error->all(
+        FLERR,
+        "Fix wall/flow is not compatible with simulation box size changing along flow direction");
+
+  for (int i = 0; i < atom->nlocal; ++i) {
+    double pos = atom->x[i][flowax];
+    current_segment[i] = compute_current_segment(pos);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallFlow::end_of_step()
+{
+  double **x = atom->x;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; ++i) {
+    if (mask[i] & groupbit) {
+      double pos = x[i][flowax];
+      int prev_segment = current_segment[i];
+      current_segment[i] = compute_current_segment(pos);
+
+      if (prev_segment != current_segment[i]) generate_velocity(i);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallFlow::generate_velocity(int atom_i)
+{
+  const int newton_iteration_count = 10;
+  double *vel = atom->v[atom_i];
+
+  double *prmass = atom->rmass;
+  double *pmass = atom->mass;
+  double mass = 0.0;
+  if (prmass)
+    mass = prmass[atom_i];
+  else
+    mass = pmass[atom->type[atom_i]];
+
+  const double gamma = 1.0 / std::sqrt(2.0 * kT / mass);
+  double delta = gamma * flowvel;
+
+  const double edd = std::exp(-delta * delta) / MathConst::MY_PIS + delta * std::erf(delta);
+  const double probability_threshold = 0.5f * (1.f + delta / edd);
+
+  double direction = 1.0;
+
+  if (random->uniform() > probability_threshold) {
+    delta = -delta;
+    direction = -direction;
+  }
+
+  const double xi_0 = random->uniform();
+  const double F_inf = edd + delta;
+  const double xi = xi_0 * F_inf;
+  const double x_0 = (std::sqrt(delta * delta + 2) - delta) * 0.5;
+  double x = x_0;
+  for (int i = 0; i < newton_iteration_count; ++i) {
+    x -= (std::exp(x * x) * MathConst::MY_PIS * (xi - delta * std::erfc(x)) - 1.0) / (x + delta) *
+        0.5;
+  }
+
+  const double nu = x + delta;
+  const double v = nu / gamma;
+
+  vel[flowax] = v * direction;
+  vel[(flowax + 1) % 3] = random->gaussian() / (gamma * MathConst::MY_SQRT2);
+  vel[(flowax + 2) % 3] = random->gaussian() / (gamma * MathConst::MY_SQRT2);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixWallFlow::compute_current_segment(double pos) const
+{
+  int result = 0;
+  for (; result < (int)walls.size() - 1; ++result) {
+    if (pos >= walls[result] && pos < walls[result + 1]) { return result; }
+  }
+  return -1;    // -1 is "out of box" region
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallFlow::grow_arrays(int nmax)
+{
+  memory->grow(current_segment, nmax, "WallFlow::current_segment");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixWallFlow::copy_arrays(int i, int j, int)
+{
+  current_segment[j] = current_segment[i];
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixWallFlow::pack_exchange(int i, double *buf)
+{
+  buf[0] = static_cast<double>(current_segment[i]);
+  return 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixWallFlow::unpack_exchange(int i, double *buf)
+{
+  current_segment[i] = static_cast<int>(buf[0]);
+  return 1;
+}
diff --git a/src/EXTRA-FIX/fix_wall_flow.h b/src/EXTRA-FIX/fix_wall_flow.h
new file mode 100644
index 0000000000..6a662f3d94
--- /dev/null
+++ b/src/EXTRA-FIX/fix_wall_flow.h
@@ -0,0 +1,61 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(wall/flow,FixWallFlow);
+// clang-format on
+#else
+
+#ifndef LMP_FIX_WALL_FLOW_H
+#define LMP_FIX_WALL_FLOW_H
+
+#include "fix.h"
+
+namespace LAMMPS_NS {
+
+class FixWallFlow : public Fix {
+ public:
+  enum FlowAxis { AX_X = 0, AX_Y = 1, AX_Z = 2 };
+
+  FixWallFlow(class LAMMPS *, int, char **);
+  ~FixWallFlow() override;
+  int setmask() override;
+  void init() override;
+  void end_of_step() override;
+
+  void grow_arrays(int) override;
+  void copy_arrays(int, int, int) override;
+
+  int pack_exchange(int, double *) override;
+  int unpack_exchange(int, double *) override;
+
+ protected:
+  FlowAxis flowax;
+  double flowvel;
+  double kT;
+  std::vector<double> walls;
+
+  int flowdir;
+  int rndseed;
+  class RanMars *random;
+  int *current_segment;
+
+  int compute_current_segment(double pos) const;
+  void generate_velocity(int i);
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/EXTRA-PAIR/pair_born_gauss.cpp b/src/EXTRA-PAIR/pair_born_gauss.cpp
index f60cc4dc6f..4b1390889d 100644
--- a/src/EXTRA-PAIR/pair_born_gauss.cpp
+++ b/src/EXTRA-PAIR/pair_born_gauss.cpp
@@ -18,10 +18,8 @@
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
-#include "fix.h"
 #include "force.h"
 #include "memory.h"
-#include "modify.h"
 #include "neigh_list.h"
 
 #include <cmath>
diff --git a/src/EXTRA-PAIR/pair_lj_cut_sphere.cpp b/src/EXTRA-PAIR/pair_lj_cut_sphere.cpp
index 3f16a96666..852b2eea1d 100644
--- a/src/EXTRA-PAIR/pair_lj_cut_sphere.cpp
+++ b/src/EXTRA-PAIR/pair_lj_cut_sphere.cpp
@@ -20,10 +20,7 @@
 #include "math_special.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neighbor.h"
-#include "update.h"
 
-#include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
diff --git a/src/EXTRA-PAIR/pair_lj_expand_sphere.cpp b/src/EXTRA-PAIR/pair_lj_expand_sphere.cpp
index 089a9deea7..c275a9f9ee 100644
--- a/src/EXTRA-PAIR/pair_lj_expand_sphere.cpp
+++ b/src/EXTRA-PAIR/pair_lj_expand_sphere.cpp
@@ -20,8 +20,6 @@
 #include "math_special.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neighbor.h"
-#include "update.h"
 
 #include <cmath>
 #include <cstring>
diff --git a/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp b/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp
index b71e526bf2..1052e16c11 100644
--- a/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp
@@ -179,7 +179,7 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
   double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fx, fy, fz;
   double rsq, rinv, r2inv, r6inv, r3inv, r5inv, r7inv;
   double forcecoulx, forcecouly, forcecoulz, crossx, crossy, crossz;
-  double tixcoul, tiycoul, tizcoul, tjxcoul, tjycoul, tjzcoul;
+  double tixcoul, tiycoul, tizcoul;
   double fq, pdotp, pidotr, pjdotr, pre1, pre2, pre3, pre4;
   double forcelj, factor_coul, factor_lj;
   int *jlist;
@@ -230,7 +230,6 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
 
         forcecoulx = forcecouly = forcecoulz = 0.0;
         tixcoul = tiycoul = tizcoul = 0.0;
-        tjxcoul = tjycoul = tjzcoul = 0.0;
 
         if (rsq < cut_coulsq[itype][jtype]) {
 
@@ -268,9 +267,6 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
             tixcoul += crossx + pre2 * (mu[i][1] * delz - mu[i][2] * dely);
             tiycoul += crossy + pre2 * (mu[i][2] * delx - mu[i][0] * delz);
             tizcoul += crossz + pre2 * (mu[i][0] * dely - mu[i][1] * delx);
-            tjxcoul += -crossx + pre3 * (mu[j][1] * delz - mu[j][2] * dely);
-            tjycoul += -crossy + pre3 * (mu[j][2] * delx - mu[j][0] * delz);
-            tjzcoul += -crossz + pre3 * (mu[j][0] * dely - mu[j][1] * delx);
           }
 
           if (mu[i][3] > 0.0 && q[j] != 0.0) {
@@ -298,9 +294,6 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
             forcecoulx += pre1 * delx - pre2 * mu[j][0];
             forcecouly += pre1 * dely - pre2 * mu[j][1];
             forcecoulz += pre1 * delz - pre2 * mu[j][2];
-            tjxcoul += -pre2 * (mu[j][1] * delz - mu[j][2] * dely);
-            tjycoul += -pre2 * (mu[j][2] * delx - mu[j][0] * delz);
-            tjzcoul += -pre2 * (mu[j][0] * dely - mu[j][1] * delx);
           }
         }
 
diff --git a/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp b/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp
index 4d8fbb5139..57ba3ec353 100644
--- a/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp
+++ b/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp
@@ -178,8 +178,8 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
   double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fx, fy, fz;
   double rsq, rinv, r2inv, r6inv, r3inv, r5inv;
   double forcecoulx, forcecouly, forcecoulz, crossx, crossy, crossz;
-  double tixcoul, tiycoul, tizcoul, tjxcoul, tjycoul, tjzcoul;
-  double fq, pdotp, pidotr, pjdotr, pre1, pre2, pre3, pre4;
+  double tixcoul, tiycoul, tizcoul;
+  double fq, pdotp, pidotr, pjdotr, pre1, pre2, pre4;
   double forcelj, factor_coul, factor_lj;
   double presf, afac, bfac, pqfac, qpfac, forceljcut, forceljsf;
   double aforcecoulx, aforcecouly, aforcecoulz;
@@ -233,7 +233,6 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
 
         forcecoulx = forcecouly = forcecoulz = 0.0;
         tixcoul = tiycoul = tizcoul = 0.0;
-        tjxcoul = tjycoul = tjzcoul = 0.0;
 
         if (rsq < cut_coulsq[itype][jtype]) {
 
@@ -272,7 +271,6 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
             forcecoulz += 3.0 * r5inv * (aforcecoulz + bforcecoulz);
 
             pre2 = 3.0 * bfac * r5inv * pjdotr;
-            pre3 = 3.0 * bfac * r5inv * pidotr;
             pre4 = -bfac * r3inv;
 
             crossx = pre4 * (mu[i][1] * mu[j][2] - mu[i][2] * mu[j][1]);
@@ -282,9 +280,6 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
             tixcoul += crossx + pre2 * (mu[i][1] * delz - mu[i][2] * dely);
             tiycoul += crossy + pre2 * (mu[i][2] * delx - mu[i][0] * delz);
             tizcoul += crossz + pre2 * (mu[i][0] * dely - mu[i][1] * delx);
-            tjxcoul += -crossx + pre3 * (mu[j][1] * delz - mu[j][2] * dely);
-            tjycoul += -crossy + pre3 * (mu[j][2] * delx - mu[j][0] * delz);
-            tjzcoul += -crossz + pre3 * (mu[j][0] * dely - mu[j][1] * delx);
           }
 
           if (mu[i][3] > 0.0 && q[j] != 0.0) {
@@ -318,9 +313,6 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
             forcecoulx += pre1 * delx - pre2 * mu[j][0];
             forcecouly += pre1 * dely - pre2 * mu[j][1];
             forcecoulz += pre1 * delz - pre2 * mu[j][2];
-            tjxcoul += -pre2 * (mu[j][1] * delz - mu[j][2] * dely);
-            tjycoul += -pre2 * (mu[j][2] * delx - mu[j][0] * delz);
-            tjzcoul += -pre2 * (mu[j][0] * dely - mu[j][1] * delx);
           }
         }
 
diff --git a/src/GRANULAR/compute_fabric.cpp b/src/GRANULAR/compute_fabric.cpp
index adaf242c92..04afc95280 100644
--- a/src/GRANULAR/compute_fabric.cpp
+++ b/src/GRANULAR/compute_fabric.cpp
@@ -184,7 +184,7 @@ void ComputeFabric::compute_vector()
   double nx, ny, nz;
   double ncinv, denom, fn, ft, prefactor;
   double br_tensor[6], ft_tensor[6], fn_tensor[6];
-  double trace_phi, trace_D, trace_Xfn, trace_Xft;
+  double trace_third_phi, trace_third_D, trace_third_Xfn, trace_third_Xft;
   double phi_ij[6] = {0.0};
   double Ac_ij[6] = {0.0};
   double D_ij[6] = {0.0};
@@ -295,11 +295,11 @@ void ComputeFabric::compute_vector()
   MPI_Allreduce(phi_ij, temp_dbl, 6, MPI_DOUBLE, MPI_SUM, world);
   for (i = 0; i < 6; i++) phi_ij[i] = temp_dbl[i] * ncinv;
 
-  trace_phi = (1.0 / 3.0) * (phi_ij[0] + phi_ij[1] + phi_ij[2]);
+  trace_third_phi = (1.0 / 3.0) * (phi_ij[0] + phi_ij[1] + phi_ij[2]);
 
-  Ac_ij[0] = (15.0 / 2.0) * (phi_ij[0] - trace_phi);
-  Ac_ij[1] = (15.0 / 2.0) * (phi_ij[1] - trace_phi);
-  Ac_ij[2] = (15.0 / 2.0) * (phi_ij[2] - trace_phi);
+  Ac_ij[0] = (15.0 / 2.0) * (phi_ij[0] - trace_third_phi);
+  Ac_ij[1] = (15.0 / 2.0) * (phi_ij[1] - trace_third_phi);
+  Ac_ij[2] = (15.0 / 2.0) * (phi_ij[2] - trace_third_phi);
   Ac_ij[3] = (15.0 / 2.0) * (phi_ij[3]);
   Ac_ij[4] = (15.0 / 2.0) * (phi_ij[4]);
   Ac_ij[5] = (15.0 / 2.0) * (phi_ij[5]);
@@ -419,14 +419,14 @@ void ComputeFabric::compute_vector()
     MPI_Allreduce(D_ij, temp_dbl, 6, MPI_DOUBLE, MPI_SUM, world);
     for (i = 0; i < 6; i++) D_ij[i] = temp_dbl[i];
 
-    trace_D = (1.0 / 3.0) * (D_ij[0] + D_ij[1] + D_ij[2]);
+    trace_third_D = (1.0 / 3.0) * (D_ij[0] + D_ij[1] + D_ij[2]);
 
-    br_tensor[0] = (15.0 / (6.0 * trace_D)) * (D_ij[0] - trace_D);
-    br_tensor[1] = (15.0 / (6.0 * trace_D)) * (D_ij[1] - trace_D);
-    br_tensor[2] = (15.0 / (6.0 * trace_D)) * (D_ij[2] - trace_D);
-    br_tensor[3] = (15.0 / (6.0 * trace_D)) * (D_ij[3]);
-    br_tensor[4] = (15.0 / (6.0 * trace_D)) * (D_ij[4]);
-    br_tensor[5] = (15.0 / (6.0 * trace_D)) * (D_ij[5]);
+    br_tensor[0] = (15.0 / (6.0 * trace_third_D)) * (D_ij[0] - trace_third_D);
+    br_tensor[1] = (15.0 / (6.0 * trace_third_D)) * (D_ij[1] - trace_third_D);
+    br_tensor[2] = (15.0 / (6.0 * trace_third_D)) * (D_ij[2] - trace_third_D);
+    br_tensor[3] = (15.0 / (6.0 * trace_third_D)) * (D_ij[3]);
+    br_tensor[4] = (15.0 / (6.0 * trace_third_D)) * (D_ij[4]);
+    br_tensor[5] = (15.0 / (6.0 * trace_third_D)) * (D_ij[5]);
 
     for (i = 0; i < ntensors; i++) {
       if (tensor_style[i] == BR) {
@@ -439,17 +439,17 @@ void ComputeFabric::compute_vector()
     MPI_Allreduce(Xfn_ij, temp_dbl, 6, MPI_DOUBLE, MPI_SUM, world);
     for (i = 0; i < 6; i++) Xfn_ij[i] = temp_dbl[i];
 
-    trace_Xfn = (1.0 / 3.0) * (Xfn_ij[0] + Xfn_ij[1] + Xfn_ij[2]);
+    trace_third_Xfn = (1.0 / 3.0) * (Xfn_ij[0] + Xfn_ij[1] + Xfn_ij[2]);
   }
 
   if (fn_flag) {
 
-    fn_tensor[0] = (15.0 / (6.0 * trace_Xfn)) * (Xfn_ij[0] - trace_Xfn);
-    fn_tensor[1] = (15.0 / (6.0 * trace_Xfn)) * (Xfn_ij[1] - trace_Xfn);
-    fn_tensor[2] = (15.0 / (6.0 * trace_Xfn)) * (Xfn_ij[2] - trace_Xfn);
-    fn_tensor[3] = (15.0 / (6.0 * trace_Xfn)) * (Xfn_ij[3]);
-    fn_tensor[4] = (15.0 / (6.0 * trace_Xfn)) * (Xfn_ij[4]);
-    fn_tensor[5] = (15.0 / (6.0 * trace_Xfn)) * (Xfn_ij[5]);
+    fn_tensor[0] = (15.0 / (6.0 * trace_third_Xfn)) * (Xfn_ij[0] - trace_third_Xfn);
+    fn_tensor[1] = (15.0 / (6.0 * trace_third_Xfn)) * (Xfn_ij[1] - trace_third_Xfn);
+    fn_tensor[2] = (15.0 / (6.0 * trace_third_Xfn)) * (Xfn_ij[2] - trace_third_Xfn);
+    fn_tensor[3] = (15.0 / (6.0 * trace_third_Xfn)) * (Xfn_ij[3]);
+    fn_tensor[4] = (15.0 / (6.0 * trace_third_Xfn)) * (Xfn_ij[4]);
+    fn_tensor[5] = (15.0 / (6.0 * trace_third_Xfn)) * (Xfn_ij[5]);
 
     for (i = 0; i < ntensors; i++) {
       if (tensor_style[i] == FN) {
@@ -462,14 +462,14 @@ void ComputeFabric::compute_vector()
     MPI_Allreduce(Xft_ij, temp_dbl, 6, MPI_DOUBLE, MPI_SUM, world);
     for (i = 0; i < 6; i++) Xft_ij[i] = temp_dbl[i];
 
-    trace_Xft = (1.0 / 3.0) * (Xft_ij[0] + Xft_ij[1] + Xft_ij[2]);
+    trace_third_Xft = (1.0 / 3.0) * (Xft_ij[0] + Xft_ij[1] + Xft_ij[2]);
 
-    ft_tensor[0] = (15.0 / (9.0 * trace_Xfn)) * (Xft_ij[0] - trace_Xft);
-    ft_tensor[1] = (15.0 / (9.0 * trace_Xfn)) * (Xft_ij[1] - trace_Xft);
-    ft_tensor[2] = (15.0 / (9.0 * trace_Xfn)) * (Xft_ij[2] - trace_Xft);
-    ft_tensor[3] = (15.0 / (9.0 * trace_Xfn)) * (Xft_ij[3]);
-    ft_tensor[4] = (15.0 / (9.0 * trace_Xfn)) * (Xft_ij[4]);
-    ft_tensor[5] = (15.0 / (9.0 * trace_Xfn)) * (Xft_ij[5]);
+    ft_tensor[0] = (15.0 / (9.0 * trace_third_Xfn)) * (Xft_ij[0] - trace_third_Xft);
+    ft_tensor[1] = (15.0 / (9.0 * trace_third_Xfn)) * (Xft_ij[1] - trace_third_Xft);
+    ft_tensor[2] = (15.0 / (9.0 * trace_third_Xfn)) * (Xft_ij[2] - trace_third_Xft);
+    ft_tensor[3] = (15.0 / (9.0 * trace_third_Xfn)) * (Xft_ij[3]);
+    ft_tensor[4] = (15.0 / (9.0 * trace_third_Xfn)) * (Xft_ij[4]);
+    ft_tensor[5] = (15.0 / (9.0 * trace_third_Xfn)) * (Xft_ij[5]);
 
     for (i = 0; i < ntensors; i++) {
       if (tensor_style[i] == FT) {
diff --git a/src/GRANULAR/fix_heat_flow.cpp b/src/GRANULAR/fix_heat_flow.cpp
index d0d7a73ce6..b7643c2c24 100644
--- a/src/GRANULAR/fix_heat_flow.cpp
+++ b/src/GRANULAR/fix_heat_flow.cpp
@@ -16,12 +16,12 @@
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
-#include "force.h"
 #include "memory.h"
 #include "modify.h"
-#include "respa.h"
 #include "update.h"
 
+#include <cstring>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/GRANULAR/fix_wall_gran.cpp b/src/GRANULAR/fix_wall_gran.cpp
index 5930280053..b8b06add2e 100644
--- a/src/GRANULAR/fix_wall_gran.cpp
+++ b/src/GRANULAR/fix_wall_gran.cpp
@@ -34,6 +34,7 @@
 #include "update.h"
 #include "variable.h"
 
+#include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
diff --git a/src/GRANULAR/fix_wall_gran.h b/src/GRANULAR/fix_wall_gran.h
index 45e4e43844..cd1e8778c3 100644
--- a/src/GRANULAR/fix_wall_gran.h
+++ b/src/GRANULAR/fix_wall_gran.h
@@ -20,7 +20,6 @@ FixStyle(wall/gran,FixWallGran);
 #ifndef LMP_FIX_WALL_GRAN_H
 #define LMP_FIX_WALL_GRAN_H
 
-#include "granular_model.h"
 #include "fix.h"
 
 namespace LAMMPS_NS {
diff --git a/src/GRANULAR/fix_wall_gran_region.cpp b/src/GRANULAR/fix_wall_gran_region.cpp
index 1c2fd4bcc0..b90620f3aa 100644
--- a/src/GRANULAR/fix_wall_gran_region.cpp
+++ b/src/GRANULAR/fix_wall_gran_region.cpp
@@ -30,8 +30,6 @@
 #include "update.h"
 #include "variable.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
 using namespace MathExtra;
diff --git a/src/GRANULAR/gran_sub_mod.cpp b/src/GRANULAR/gran_sub_mod.cpp
index bf945523dd..bac9c9edfe 100644
--- a/src/GRANULAR/gran_sub_mod.cpp
+++ b/src/GRANULAR/gran_sub_mod.cpp
@@ -21,7 +21,8 @@
 ----------------------------------------------------------------------- */
 
 #include "gran_sub_mod.h"
-#include "error.h"
+
+#include <cmath>
 
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
diff --git a/src/GRANULAR/gran_sub_mod.h b/src/GRANULAR/gran_sub_mod.h
index 2524565332..88e77eb77b 100644
--- a/src/GRANULAR/gran_sub_mod.h
+++ b/src/GRANULAR/gran_sub_mod.h
@@ -14,50 +14,50 @@
 #ifndef LMP_GRAN_SUB_MOD_H
 #define LMP_GRAN_SUB_MOD_H
 
-#include "granular_model.h"
 #include "pointers.h"    // IWYU pragma: export
 
 namespace LAMMPS_NS {
 namespace Granular_NS {
+  class GranularModel;
 
-class GranSubMod : protected Pointers {
- public:
-  GranSubMod(class GranularModel *, class LAMMPS *);
-  ~GranSubMod() override;
+  class GranSubMod : protected Pointers {
+   public:
+    GranSubMod(class GranularModel *, class LAMMPS *);
+    ~GranSubMod() override;
 
-  int num_coeffs;
-  double *coeffs;
-  void read_restart();
-  virtual void mix_coeffs(double*, double*);
-  virtual void coeffs_to_local() {};
-  virtual void init() {}; // called after all sub models + coeffs defined
+    int num_coeffs;
+    double *coeffs;
+    void read_restart();
+    virtual void mix_coeffs(double *, double *);
+    virtual void coeffs_to_local(){};
+    virtual void init(){};    // called after all sub models + coeffs defined
 
-  void allocate_coeffs();
-  std::string name;
+    void allocate_coeffs();
+    std::string name;
 
-  int size_history;
-  int nondefault_history_transfer;
-  double *transfer_history_factor;
+    int size_history;
+    int nondefault_history_transfer;
+    double *transfer_history_factor;
 
-  int history_index;
-  int beyond_contact;       // If the sub model contact extends beyond overlap
-  int allow_cohesion;       // If the sub model works with a cohesive normal force
-  int contact_radius_flag;  // If the sub model requires contact radius
+    int history_index;
+    int beyond_contact;         // If the sub model contact extends beyond overlap
+    int allow_cohesion;         // If the sub model works with a cohesive normal force
+    int contact_radius_flag;    // If the sub model requires contact radius
 
-  GranularModel *gm;
+    GranularModel *gm;
 
- protected:
-  int allocated;
+   protected:
+    int allocated;
 
-  double mix_stiffnessE(double, double, double, double);
-  double mix_stiffnessG(double, double, double, double);
-  double mix_stiffnessE_wall(double, double);
-  double mix_stiffnessG_wall(double, double);
-  double mix_geom(double, double);
-  double mix_mean(double, double);
-};
+    double mix_stiffnessE(double, double, double, double);
+    double mix_stiffnessG(double, double, double, double);
+    double mix_stiffnessE_wall(double, double);
+    double mix_stiffnessG_wall(double, double);
+    double mix_geom(double, double);
+    double mix_mean(double, double);
+  };
 
-}    // namespace GranularModel
+}    // namespace Granular_NS
 }    // namespace LAMMPS_NS
 
 #endif /* GRAN_SUB_MOD_H */
diff --git a/src/GRANULAR/gran_sub_mod_damping.cpp b/src/GRANULAR/gran_sub_mod_damping.cpp
index 7d6a02b8f0..4386ed71fc 100644
--- a/src/GRANULAR/gran_sub_mod_damping.cpp
+++ b/src/GRANULAR/gran_sub_mod_damping.cpp
@@ -17,6 +17,8 @@
 #include "granular_model.h"
 #include "math_special.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
 
@@ -130,6 +132,12 @@ void GranSubModDampingTsuji::init()
 
 double GranSubModDampingTsuji::calculate_forces()
 {
-  damp_prefactor = damp * sqrt(gm->meff * gm->Fnormal / gm->delta);
+  // in case argument <= 0 due to precision issues
+  double sqrt1;
+  if (gm->delta > 0.0)
+    sqrt1 = MAX(0.0, gm->meff * gm->Fnormal / gm->delta);
+  else
+    sqrt1 = 0.0;
+  damp_prefactor = damp * sqrt(sqrt1);
   return -damp_prefactor * gm->vnnr;
 }
diff --git a/src/GRANULAR/gran_sub_mod_normal.cpp b/src/GRANULAR/gran_sub_mod_normal.cpp
index ffc18b8c32..f4294bbc35 100644
--- a/src/GRANULAR/gran_sub_mod_normal.cpp
+++ b/src/GRANULAR/gran_sub_mod_normal.cpp
@@ -16,6 +16,8 @@
 #include "granular_model.h"
 #include "math_const.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
 
diff --git a/src/GRANULAR/gran_sub_mod_rolling.cpp b/src/GRANULAR/gran_sub_mod_rolling.cpp
index 554aa7ab63..4b10bd2358 100644
--- a/src/GRANULAR/gran_sub_mod_rolling.cpp
+++ b/src/GRANULAR/gran_sub_mod_rolling.cpp
@@ -18,6 +18,8 @@
 #include "granular_model.h"
 #include "math_extra.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
 using namespace MathExtra;
diff --git a/src/GRANULAR/gran_sub_mod_tangential.cpp b/src/GRANULAR/gran_sub_mod_tangential.cpp
index c74233701b..f8f39a38f9 100644
--- a/src/GRANULAR/gran_sub_mod_tangential.cpp
+++ b/src/GRANULAR/gran_sub_mod_tangential.cpp
@@ -19,6 +19,8 @@
 #include "granular_model.h"
 #include "math_extra.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
 using namespace MathExtra;
diff --git a/src/GRANULAR/gran_sub_mod_twisting.cpp b/src/GRANULAR/gran_sub_mod_twisting.cpp
index 48af89c9e7..95c62ad342 100644
--- a/src/GRANULAR/gran_sub_mod_twisting.cpp
+++ b/src/GRANULAR/gran_sub_mod_twisting.cpp
@@ -19,6 +19,8 @@
 #include "granular_model.h"
 #include "math_const.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
 
diff --git a/src/GRANULAR/granular_model.cpp b/src/GRANULAR/granular_model.cpp
index c1ad692fb3..6de147b34a 100644
--- a/src/GRANULAR/granular_model.cpp
+++ b/src/GRANULAR/granular_model.cpp
@@ -31,6 +31,7 @@
 #include "style_gran_sub_mod.h"    // IWYU pragma: keep
 
 #include <cmath>
+#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp
index 119feb1c38..d5179a19b7 100644
--- a/src/GRANULAR/pair_granular.cpp
+++ b/src/GRANULAR/pair_granular.cpp
@@ -37,7 +37,6 @@
 #include "update.h"
 
 #include <cstring>
-#include <vector>
 
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
diff --git a/src/GRANULAR/pair_granular.h b/src/GRANULAR/pair_granular.h
index 956717d598..46c5570543 100644
--- a/src/GRANULAR/pair_granular.h
+++ b/src/GRANULAR/pair_granular.h
@@ -21,7 +21,6 @@ PairStyle(granular,PairGranular);
 #define LMP_PAIR_GRANULAR_H
 
 #include "pair.h"
-#include <vector>
 
 namespace LAMMPS_NS {
 
diff --git a/src/INTERLAYER/pair_aip_water_2dm.cpp b/src/INTERLAYER/pair_aip_water_2dm.cpp
index ea3812504d..2c6b222d45 100644
--- a/src/INTERLAYER/pair_aip_water_2dm.cpp
+++ b/src/INTERLAYER/pair_aip_water_2dm.cpp
@@ -24,9 +24,6 @@
 #include "error.h"
 #include "force.h"
 
-#include <cmath>
-#include <cstring>
-
 using namespace LAMMPS_NS;
 
 static const char cite_aip_water[] =
diff --git a/src/INTERLAYER/pair_ilp_tmd.h b/src/INTERLAYER/pair_ilp_tmd.h
index 8381c2e830..7e7edbb01b 100644
--- a/src/INTERLAYER/pair_ilp_tmd.h
+++ b/src/INTERLAYER/pair_ilp_tmd.h
@@ -20,7 +20,7 @@ PairStyle(ilp/tmd,PairILPTMD);
 #ifndef LMP_PAIR_ILP_TMD_H
 #define LMP_PAIR_ILP_TMD_H
 
-#include "pair_ilp_graphene_hbn.h"
+#include "pair_ilp_graphene_hbn.h"    // IWYU pragma: export
 
 namespace LAMMPS_NS {
 
diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index 462c0cbe57..75949c35d8 100755
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -187,6 +187,8 @@ action fix_temp_rescale_kokkos.cpp
 action fix_temp_rescale_kokkos.h
 action fix_viscous_kokkos.cpp
 action fix_viscous_kokkos.h
+action fix_wall_flow_kokkos.cpp fix_wall_flow.cpp
+action fix_wall_flow_kokkos.h fix_wall_flow.h
 action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp
 action fix_wall_gran_kokkos.h fix_wall_gran.h
 action fix_wall_gran_old.cpp fix_wall_gran.cpp
diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp
index af1816c1d3..b1a066f165 100644
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@@ -16,7 +16,7 @@
 #include "atom_masks.h"
 #include "atom_vec.h"
 #include "atom_vec_kokkos.h"
-#include "comm_kokkos.h"
+#include "comm.h"
 #include "domain.h"
 #include "error.h"
 #include "kokkos.h"
diff --git a/src/KOKKOS/atom_map_kokkos.cpp b/src/KOKKOS/atom_map_kokkos.cpp
index 3f86afe788..8203e1e6a2 100644
--- a/src/KOKKOS/atom_map_kokkos.cpp
+++ b/src/KOKKOS/atom_map_kokkos.cpp
@@ -16,13 +16,8 @@
 #include "atom_masks.h"
 #include "comm.h"
 #include "error.h"
-#include "fix.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
-#include "modify.h"
-#include "neighbor_kokkos.h"
-
-#include <cmath>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp
index 418c2d629d..dc8641a6b6 100644
--- a/src/KOKKOS/atom_vec_angle_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp
@@ -186,302 +186,13 @@ void AtomVecAngleKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecAngleKokkos_PackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecAngleKokkos_PackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        const size_t maxsend = (buf.view<DeviceType>().extent(0)
-                                *buf.view<DeviceType>().extent(1))/3;
-        const size_t elements = 3;
-        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _buf(i,0) = _x(j,0);
-          _buf(i,1) = _x(j,1);
-          _buf(i,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
-                                         const DAT::tdual_int_2d &list,
-                                         const int & iswap,
-                                         const DAT::tdual_xfloat_2d &buf,
-                                         const int &pbc_flag,
-                                         const int* const pbc)
-{
-  // Check whether to always run forward communication on the host
-  // Choose correct forward PackComm kernel
-
-  if (commKK->forward_comm_on_host) {
-    atomKK->sync(Host,X_MASK);
-    if (pbc_flag) {
-      if (domain->triclinic) {
-        struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecAngleKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if (domain->triclinic) {
-        struct AtomVecAngleKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecAngleKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    atomKK->sync(Device,X_MASK);
-    if (pbc_flag) {
-      if (domain->triclinic) {
-        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if (domain->triclinic) {
-        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-
-  return n*size_forward;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecAngleKokkos_PackCommSelf {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_x_array _xw;
-  int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecAngleKokkos_PackCommSelf(
-      const typename DAT::tdual_x_array &x,
-      const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _xw(i+_nfirst,0) = _x(j,0);
-          _xw(i+_nfirst,1) = _x(j,1);
-          _xw(i+_nfirst,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                                       const int & iswap,
-                                       const int nfirst, const int &pbc_flag,
-                                       const int* const pbc) {
-  if (commKK->forward_comm_on_host) {
-    atomKK->sync(Host,X_MASK);
-    atomKK->modified(Host,X_MASK);
-    if (pbc_flag) {
-      if (domain->triclinic) {
-      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1>
-        f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,0>
-        f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if (domain->triclinic) {
-      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,1>
-        f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,0>
-        f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    atomKK->sync(Device,X_MASK);
-    atomKK->modified(Device,X_MASK);
-    if (pbc_flag) {
-      if (domain->triclinic) {
-      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1>
-        f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,0>
-        f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if (domain->triclinic) {
-      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,1>
-        f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,0>
-        f(atomKK->k_x,nfirst,list,iswap,
-          domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-        return n*3;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecAngleKokkos_UnpackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
-  int _first;
-
-  AtomVecAngleKokkos_UnpackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
-                        _first(first) {};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-      _x(i+_first,0) = _buf(i,0);
-      _x(i+_first,1) = _buf(i,1);
-      _x(i+_first,2) = _buf(i,2);
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf) {
-  if (commKK->forward_comm_on_host) {
-    atomKK->sync(Host,X_MASK);
-    atomKK->modified(Host,X_MASK);
-    struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  } else {
-    atomKK->sync(Device,X_MASK);
-    atomKK->modified(Device,X_MASK);
-    struct AtomVecAngleKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType,int PBC_FLAG>
 struct AtomVecAngleKokkos_PackBorder {
   typedef DeviceType device_type;
   typedef ArrayTypes<DeviceType> AT;
 
   typename AT::t_xfloat_2d _buf;
-  const typename AT::t_int_2d_const _list;
-  const int _iswap;
+  const typename AT::t_int_1d_const _list;
   const typename AT::t_x_array_randomread _x;
   const typename AT::t_tagint_1d _tag;
   const typename AT::t_int_1d _type;
@@ -491,21 +202,20 @@ struct AtomVecAngleKokkos_PackBorder {
 
   AtomVecAngleKokkos_PackBorder(
       const typename AT::t_xfloat_2d &buf,
-      const typename AT::t_int_2d_const &list,
-      const int & iswap,
+      const typename AT::t_int_1d_const &list,
       const typename AT::t_x_array &x,
       const typename AT::t_tagint_1d &tag,
       const typename AT::t_int_1d &type,
       const typename AT::t_int_1d &mask,
       const typename AT::t_tagint_1d &molecule,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-      _buf(buf),_list(list),_iswap(iswap),
+      _buf(buf),_list(list),
       _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
       _dx(dx),_dy(dy),_dz(dz) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -528,8 +238,8 @@ struct AtomVecAngleKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                          DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                          DAT::tdual_xfloat_2d buf,
                                           int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -547,12 +257,12 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecAngleKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecAngleKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
 
@@ -561,12 +271,12 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecAngleKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecAngleKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
   }
diff --git a/src/KOKKOS/atom_vec_angle_kokkos.h b/src/KOKKOS/atom_vec_angle_kokkos.h
index 44f1d824b2..157e8b45cc 100644
--- a/src/KOKKOS/atom_vec_angle_kokkos.h
+++ b/src/KOKKOS/atom_vec_angle_kokkos.h
@@ -35,17 +35,8 @@ class AtomVecAngleKokkos : public AtomVecKokkos, public AtomVecAngle {
   void grow(int) override;
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
-                       const DAT::tdual_xfloat_2d &buf,
-                       const int &pbc_flag, const int pbc[]) override;
-  void unpack_comm_kokkos(const int &n, const int &nfirst,
-                          const DAT::tdual_xfloat_2d &buf) override;
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
-                     const int &pbc_flag, const int pbc[]) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
index 973ad2f7f2..81f4f55962 100644
--- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
@@ -16,7 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "comm_kokkos.h"
 #include "domain.h"
 #include "error.h"
 #include "fix.h"
@@ -125,8 +124,7 @@ struct AtomVecAtomicKokkos_PackBorder {
   typedef DeviceType device_type;
 
   typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
-  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
   const typename ArrayTypes<DeviceType>::t_int_1d _type;
@@ -135,20 +133,19 @@ struct AtomVecAtomicKokkos_PackBorder {
 
   AtomVecAtomicKokkos_PackBorder(
     const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
-    const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
-    const int &iswap,
+    const typename ArrayTypes<DeviceType>::t_int_1d_const &list,
     const typename ArrayTypes<DeviceType>::t_x_array &x,
     const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
     const typename ArrayTypes<DeviceType>::t_int_1d &type,
     const typename ArrayTypes<DeviceType>::t_int_1d &mask,
     const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-    _buf(buf),_list(list),_iswap(iswap),
+    _buf(buf),_list(list),
     _x(x),_tag(tag),_type(type),_mask(mask),
     _dx(dx),_dy(dy),_dz(dz) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     if (PBC_FLAG == 0) {
       _buf(i,0) = _x(j,0);
       _buf(i,1) = _x(j,1);
@@ -169,7 +166,7 @@ struct AtomVecAtomicKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_2d buf,
                                int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -187,12 +184,12 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecAtomicKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecAtomicKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
 
@@ -201,12 +198,12 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecAtomicKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecAtomicKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
   }
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.h b/src/KOKKOS/atom_vec_atomic_kokkos.h
index 07631dda98..457b5b61a9 100644
--- a/src/KOKKOS/atom_vec_atomic_kokkos.h
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.h
@@ -36,8 +36,8 @@ class AtomVecAtomicKokkos : public AtomVecKokkos, public AtomVecAtomic {
   void grow(int) override;
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp
index a4fd9ca1b5..6b32574c2a 100644
--- a/src/KOKKOS/atom_vec_bond_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp
@@ -16,7 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "comm_kokkos.h"
 #include "domain.h"
 #include "error.h"
 #include "fix.h"
@@ -158,8 +157,7 @@ struct AtomVecBondKokkos_PackBorder {
   typedef ArrayTypes<DeviceType> AT;
 
   typename AT::t_xfloat_2d _buf;
-  const typename AT::t_int_2d_const _list;
-  const int _iswap;
+  const typename AT::t_int_1d_const _list;
   const typename AT::t_x_array_randomread _x;
   const typename AT::t_tagint_1d _tag;
   const typename AT::t_int_1d _type;
@@ -169,21 +167,20 @@ struct AtomVecBondKokkos_PackBorder {
 
   AtomVecBondKokkos_PackBorder(
       const typename AT::t_xfloat_2d &buf,
-      const typename AT::t_int_2d_const &list,
-      const int & iswap,
+      const typename AT::t_int_1d_const &list,
       const typename AT::t_x_array &x,
       const typename AT::t_tagint_1d &tag,
       const typename AT::t_int_1d &type,
       const typename AT::t_int_1d &mask,
       const typename AT::t_tagint_1d &molecule,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-      _buf(buf),_list(list),_iswap(iswap),
+      _buf(buf),_list(list),
       _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
       _dx(dx),_dy(dy),_dz(dz) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -206,8 +203,8 @@ struct AtomVecBondKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                          DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                          DAT::tdual_xfloat_2d buf,
                                           int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -225,12 +222,12 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecBondKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecBondKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
 
@@ -239,12 +236,12 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecBondKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecBondKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
   }
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.h b/src/KOKKOS/atom_vec_bond_kokkos.h
index 5ed59432de..cad1ea86d7 100644
--- a/src/KOKKOS/atom_vec_bond_kokkos.h
+++ b/src/KOKKOS/atom_vec_bond_kokkos.h
@@ -35,8 +35,8 @@ class AtomVecBondKokkos : public AtomVecKokkos, public AtomVecBond {
   void grow(int) override;
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp
index 4fa814f1ac..637a219433 100644
--- a/src/KOKKOS/atom_vec_charge_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp
@@ -16,7 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "comm_kokkos.h"
 #include "domain.h"
 #include "error.h"
 #include "fix.h"
@@ -134,19 +133,17 @@ struct AtomVecChargeKokkos_PackComm {
 
   typename AT::t_x_array_randomread _x;
   typename AT::t_xfloat_2d_um _buf;
-  typename AT::t_int_2d_const _list;
-  const int _iswap;
+  typename AT::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
   AtomVecChargeKokkos_PackComm(
       const typename DAT::tdual_x_array &x,
       const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
+      const typename DAT::tdual_int_1d &list,
       const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
       const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),
       _xprd(xprd),_yprd(yprd),_zprd(zprd),
       _xy(xy),_xz(xz),_yz(yz) {
         const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/3;
@@ -158,7 +155,7 @@ struct AtomVecChargeKokkos_PackComm {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
+        const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -185,8 +182,7 @@ struct AtomVecChargeKokkos_PackBorder {
   typedef ArrayTypes<DeviceType> AT;
 
   typename AT::t_xfloat_2d _buf;
-  const typename AT::t_int_2d_const _list;
-  const int _iswap;
+  const typename AT::t_int_1d_const _list;
   const typename AT::t_x_array_randomread _x;
   const typename AT::t_tagint_1d _tag;
   const typename AT::t_int_1d _type;
@@ -196,21 +192,20 @@ struct AtomVecChargeKokkos_PackBorder {
 
   AtomVecChargeKokkos_PackBorder(
       const typename AT::t_xfloat_2d &buf,
-      const typename AT::t_int_2d_const &list,
-      const int & iswap,
+      const typename AT::t_int_1d_const &list,
       const typename AT::t_x_array &x,
       const typename AT::t_tagint_1d &tag,
       const typename AT::t_int_1d &type,
       const typename AT::t_int_1d &mask,
       const typename AT::t_float_1d &q,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-      _buf(buf),_list(list),_iswap(iswap),
+      _buf(buf),_list(list),
       _x(x),_tag(tag),_type(type),_mask(mask),_q(q),
       _dx(dx),_dy(dy),_dz(dz) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -233,7 +228,7 @@ struct AtomVecChargeKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_2d buf,
                                int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -251,12 +246,12 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecChargeKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecChargeKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
 
@@ -265,12 +260,12 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecChargeKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecChargeKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_q,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
   }
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.h b/src/KOKKOS/atom_vec_charge_kokkos.h
index 397a5ee4c0..1d1c68735e 100644
--- a/src/KOKKOS/atom_vec_charge_kokkos.h
+++ b/src/KOKKOS/atom_vec_charge_kokkos.h
@@ -36,8 +36,8 @@ class AtomVecChargeKokkos : public AtomVecKokkos, public AtomVecCharge {
   void grow(int) override;
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp
index ecc0f3b497..7728fedd4b 100644
--- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp
@@ -16,7 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "comm_kokkos.h"
 #include "domain.h"
 #include "error.h"
 #include "fix.h"
@@ -136,8 +135,7 @@ struct AtomVecDipoleKokkos_PackComm {
   typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   typename ArrayTypes<DeviceType>::t_mu_array_randomread _mu;
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
@@ -145,13 +143,12 @@ struct AtomVecDipoleKokkos_PackComm {
       const typename DAT::tdual_x_array &x,
       const typename DAT::tdual_float_1d_4 &mu,
       const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
+      const typename DAT::tdual_int_1d &list,
       const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
       const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
       _x(x.view<DeviceType>()),
       _mu(mu.view<DeviceType>()),
-      _list(list.view<DeviceType>()),_iswap(iswap),
+      _list(list.view<DeviceType>()),
       _xprd(xprd),_yprd(yprd),_zprd(zprd),
       _xy(xy),_xz(xz),_yz(yz) {
         const size_t elements = 7; // size_forward
@@ -163,7 +160,7 @@ struct AtomVecDipoleKokkos_PackComm {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     if (PBC_FLAG == 0) {
       _buf(i,0) = _x(j,0);
       _buf(i,1) = _x(j,1);
@@ -201,8 +198,7 @@ struct AtomVecDipoleKokkos_PackBorder {
   typedef DeviceType device_type;
 
   typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
-  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
   const typename ArrayTypes<DeviceType>::t_int_1d _type;
@@ -213,8 +209,7 @@ struct AtomVecDipoleKokkos_PackBorder {
 
   AtomVecDipoleKokkos_PackBorder(
       const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
-      const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
-      const int & iswap,
+      const typename ArrayTypes<DeviceType>::t_int_1d_const &list,
       const typename ArrayTypes<DeviceType>::t_x_array &x,
       const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
       const typename ArrayTypes<DeviceType>::t_int_1d &type,
@@ -222,13 +217,13 @@ struct AtomVecDipoleKokkos_PackBorder {
       const typename ArrayTypes<DeviceType>::t_float_1d &q,
       const typename ArrayTypes<DeviceType>::t_mu_array_randomread &mu,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-      _buf(buf),_list(list),_iswap(iswap),
+      _buf(buf),_list(list),
       _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_mu(mu),
       _dx(dx),_dy(dy),_dz(dz) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -259,7 +254,7 @@ struct AtomVecDipoleKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecDipoleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecDipoleKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_2d buf,
                                int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -277,12 +272,12 @@ int AtomVecDipoleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecDipoleKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecDipoleKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
 
@@ -291,12 +286,12 @@ int AtomVecDipoleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecDipoleKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_q,h_mu,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecDipoleKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_q,d_mu,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
   }
diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.h b/src/KOKKOS/atom_vec_dipole_kokkos.h
index 97ec92c6c6..46e102936a 100644
--- a/src/KOKKOS/atom_vec_dipole_kokkos.h
+++ b/src/KOKKOS/atom_vec_dipole_kokkos.h
@@ -36,8 +36,8 @@ class AtomVecDipoleKokkos : public AtomVecKokkos, public AtomVecDipole {
   void grow(int) override;
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
index 70aedcc931..6152fa60fb 100644
--- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
@@ -20,6 +20,7 @@
 #include "domain.h"
 #include "error.h"
 #include "fix.h"
+#include "kokkos.h"
 #include "memory_kokkos.h"
 #include "modify.h"
 
@@ -168,8 +169,7 @@ struct AtomVecDPDKokkos_PackComm {
   typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   typename ArrayTypes<DeviceType>::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem;
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
@@ -180,8 +180,7 @@ struct AtomVecDPDKokkos_PackComm {
       const typename DAT::tdual_efloat_1d &uMech,
       const typename DAT::tdual_efloat_1d &uChem,
       const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
+      const typename DAT::tdual_int_1d &list,
       const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
       const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
       _x(x.view<DeviceType>()),
@@ -189,7 +188,7 @@ struct AtomVecDPDKokkos_PackComm {
       _uCond(uCond.view<DeviceType>()),
       _uMech(uMech.view<DeviceType>()),
       _uChem(uChem.view<DeviceType>()),
-      _list(list.view<DeviceType>()),_iswap(iswap),
+      _list(list.view<DeviceType>()),
       _xprd(xprd),_yprd(yprd),_zprd(zprd),
       _xy(xy),_xz(xz),_yz(yz) {
         const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/3;
@@ -201,7 +200,7 @@ struct AtomVecDPDKokkos_PackComm {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -227,8 +226,7 @@ struct AtomVecDPDKokkos_PackComm {
 /* ---------------------------------------------------------------------- */
 
 int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
-                                          const DAT::tdual_int_2d &list,
-                                          const int & iswap,
+                                          const DAT::tdual_int_1d &list,
                                           const DAT::tdual_xfloat_2d &buf,
                                           const int &pbc_flag,
                                           const int* const pbc)
@@ -236,20 +234,20 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
   // Check whether to always run forward communication on the host
   // Choose correct forward PackComm kernel
 
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
     if (pbc_flag) {
       if (domain->triclinic) {
         struct AtomVecDPDKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       } else {
         struct AtomVecDPDKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -258,14 +256,14 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
       if (domain->triclinic) {
         struct AtomVecDPDKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       } else {
         struct AtomVecDPDKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -277,14 +275,14 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
       if (domain->triclinic) {
         struct AtomVecDPDKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       } else {
         struct AtomVecDPDKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -293,14 +291,14 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
       if (domain->triclinic) {
         struct AtomVecDPDKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       } else {
         struct AtomVecDPDKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -321,8 +319,7 @@ struct AtomVecDPDKokkos_PackCommSelf {
   typename ArrayTypes<DeviceType>::t_x_array _xw;
   typename ArrayTypes<DeviceType>::t_efloat_1d _dpdTheta,_uCond,_uMech,_uChem;
   int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
@@ -333,8 +330,7 @@ struct AtomVecDPDKokkos_PackCommSelf {
       const typename DAT::tdual_efloat_1d &uMech,
       const typename DAT::tdual_efloat_1d &uChem,
       const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
+      const typename DAT::tdual_int_1d &list,
       const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
       const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
       _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),
@@ -342,7 +338,7 @@ struct AtomVecDPDKokkos_PackCommSelf {
       _uCond(uCond.view<DeviceType>()),
       _uMech(uMech.view<DeviceType>()),
       _uChem(uChem.view<DeviceType>()),
-      _nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+      _nfirst(nfirst),_list(list.view<DeviceType>()),
       _xprd(xprd),_yprd(yprd),_zprd(zprd),
       _xy(xy),_xz(xz),_yz(yz) {
         _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
@@ -351,7 +347,7 @@ struct AtomVecDPDKokkos_PackCommSelf {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
+        const int j = _list(i);
       if (PBC_FLAG == 0) {
           _xw(i+_nfirst,0) = _x(j,0);
           _xw(i+_nfirst,1) = _x(j,1);
@@ -376,23 +372,23 @@ struct AtomVecDPDKokkos_PackCommSelf {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
+int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list,
                                                                                 const int nfirst, const int &pbc_flag, const int* const pbc) {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
     atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
     if (pbc_flag) {
       if (domain->triclinic) {
       struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
       Kokkos::parallel_for(n,f);
       } else {
       struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
       Kokkos::parallel_for(n,f);
@@ -401,14 +397,14 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
       if (domain->triclinic) {
       struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
       Kokkos::parallel_for(n,f);
       } else {
       struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
       Kokkos::parallel_for(n,f);
@@ -421,14 +417,14 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
       if (domain->triclinic) {
       struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
       Kokkos::parallel_for(n,f);
       } else {
       struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
       Kokkos::parallel_for(n,f);
@@ -437,14 +433,14 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
       if (domain->triclinic) {
       struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
       Kokkos::parallel_for(n,f);
       } else {
       struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,
           atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
       Kokkos::parallel_for(n,f);
@@ -496,7 +492,7 @@ struct AtomVecDPDKokkos_UnpackComm {
 
 void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first,
     const DAT::tdual_xfloat_2d &buf) {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
     atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
     struct AtomVecDPDKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,
@@ -520,8 +516,7 @@ struct AtomVecDPDKokkos_PackBorder {
   typedef DeviceType device_type;
 
   typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
-  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
   const typename ArrayTypes<DeviceType>::t_int_1d _type;
@@ -531,8 +526,7 @@ struct AtomVecDPDKokkos_PackBorder {
 
   AtomVecDPDKokkos_PackBorder(
       const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
-      const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
-      const int & iswap,
+      const typename ArrayTypes<DeviceType>::t_int_1d_const &list,
       const typename ArrayTypes<DeviceType>::t_x_array &x,
       const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
       const typename ArrayTypes<DeviceType>::t_int_1d &type,
@@ -544,7 +538,7 @@ struct AtomVecDPDKokkos_PackBorder {
       const typename ArrayTypes<DeviceType>::t_efloat_1d &uCG,
       const typename ArrayTypes<DeviceType>::t_efloat_1d &uCGnew,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-      _buf(buf),_list(list),_iswap(iswap),
+      _buf(buf),_list(list),
       _x(x),_tag(tag),_type(type),_mask(mask),
       _dpdTheta(dpdTheta),
       _uCond(uCond),
@@ -556,7 +550,7 @@ struct AtomVecDPDKokkos_PackBorder {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -580,7 +574,7 @@ struct AtomVecDPDKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_2d buf,
                                int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -600,14 +594,14 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
     if (space==Host) {
       AtomVecDPDKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,
+        h_x,h_tag,h_type,h_mask,
         h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew,
         dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecDPDKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,
+        d_x,d_tag,d_type,d_mask,
         d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew,
         dx,dy,dz);
       Kokkos::parallel_for(n,f);
@@ -618,14 +612,14 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
     if (space==Host) {
       AtomVecDPDKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,
+        h_x,h_tag,h_type,h_mask,
         h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew,
         dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecDPDKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,
+        d_x,d_tag,d_type,d_mask,
         d_dpdTheta,d_uCond,d_uMech,d_uChem,d_uCG,d_uCGnew,
         dx,dy,dz);
       Kokkos::parallel_for(n,f);
diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.h b/src/KOKKOS/atom_vec_dpd_kokkos.h
index a76d7f908a..185422cfcf 100644
--- a/src/KOKKOS/atom_vec_dpd_kokkos.h
+++ b/src/KOKKOS/atom_vec_dpd_kokkos.h
@@ -36,17 +36,16 @@ class AtomVecDPDKokkos : public AtomVecKokkos, public AtomVecDPD {
   void grow(int) override;
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist,
                        const DAT::tdual_xfloat_2d &buf,
                        const int &pbc_flag, const int pbc[]) override;
   void unpack_comm_kokkos(const int &n, const int &nfirst,
                           const DAT::tdual_xfloat_2d &buf) override;
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
+  int pack_comm_self(const int &n, const DAT::tdual_int_1d &list,
+                     const int nfirst,
                      const int &pbc_flag, const int pbc[]) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp
index 732078a627..1ae93a3df6 100644
--- a/src/KOKKOS/atom_vec_full_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_full_kokkos.cpp
@@ -16,7 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "comm_kokkos.h"
 #include "domain.h"
 #include "error.h"
 #include "fix.h"
@@ -275,8 +274,7 @@ struct AtomVecFullKokkos_PackBorder {
   typedef ArrayTypes<DeviceType> AT;
 
   typename AT::t_xfloat_2d _buf;
-  const typename AT::t_int_2d_const _list;
-  const int _iswap;
+  const typename AT::t_int_1d_const _list;
   const typename AT::t_x_array_randomread _x;
   const typename AT::t_tagint_1d _tag;
   const typename AT::t_int_1d _type;
@@ -287,8 +285,7 @@ struct AtomVecFullKokkos_PackBorder {
 
   AtomVecFullKokkos_PackBorder(
       const typename AT::t_xfloat_2d &buf,
-      const typename AT::t_int_2d_const &list,
-      const int & iswap,
+      const typename AT::t_int_1d_const &list,
       const typename AT::t_x_array &x,
       const typename AT::t_tagint_1d &tag,
       const typename AT::t_int_1d &type,
@@ -296,13 +293,13 @@ struct AtomVecFullKokkos_PackBorder {
       const typename AT::t_float_1d &q,
       const typename AT::t_tagint_1d &molecule,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-      _buf(buf),_list(list),_iswap(iswap),
+      _buf(buf),_list(list),
       _x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule),
       _dx(dx),_dy(dy),_dz(dz) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -327,8 +324,8 @@ struct AtomVecFullKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                               DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                               DAT::tdual_xfloat_2d buf,
                                                int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -346,12 +343,12 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecFullKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecFullKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
 
@@ -360,12 +357,12 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
     if (space==Host) {
       AtomVecFullKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecFullKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_q,d_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
   }
diff --git a/src/KOKKOS/atom_vec_full_kokkos.h b/src/KOKKOS/atom_vec_full_kokkos.h
index 4937ef4152..9eea48ef95 100644
--- a/src/KOKKOS/atom_vec_full_kokkos.h
+++ b/src/KOKKOS/atom_vec_full_kokkos.h
@@ -35,8 +35,8 @@ class AtomVecFullKokkos : public AtomVecKokkos, public AtomVecFull {
   void grow(int) override;
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
index 08bcaaef74..77af2cff05 100644
--- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
@@ -15,14 +15,7 @@
 #include "atom_vec_hybrid_kokkos.h"
 
 #include "atom_kokkos.h"
-#include "atom_masks.h"
-#include "domain.h"
 #include "error.h"
-#include "fix.h"
-#include "memory_kokkos.h"
-#include "modify.h"
-
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
@@ -63,8 +56,7 @@ void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorte
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_2d &/*k_sendlist*/,
-                                          const int & /*iswap*/,
+int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_1d &/*k_sendlist*/,
                                           const DAT::tdual_xfloat_2d &/*buf*/,
                                           const int &/*pbc_flag*/, const int /*pbc*/[])
 {
@@ -78,16 +70,16 @@ void AtomVecHybridKokkos::unpack_comm_kokkos(const int &/*n*/, const int &/*nfir
   error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
 }
 
-int AtomVecHybridKokkos::pack_comm_self(const int &/*n*/, const DAT::tdual_int_2d &/*list*/,
-                                        const int & /*iswap*/, const int /*nfirst*/,
-                                        const int &/*pbc_flag*/, const int /*pbc*/[])
+int AtomVecHybridKokkos::pack_comm_self(const int &/*n*/, const DAT::tdual_int_1d &/*list*/,
+                                        const int /*nfirst*/,
+                                        const int &/*pbc_flag*/, const int pbc[])
 {
   error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
   return 0;
 }
 
-int AtomVecHybridKokkos::pack_border_kokkos(int /*n*/, DAT::tdual_int_2d /*k_sendlist*/,
-                                            DAT::tdual_xfloat_2d /*buf*/,int /*iswap*/,
+int AtomVecHybridKokkos::pack_border_kokkos(int /*n*/, DAT::tdual_int_1d /*k_sendlist*/,
+                                            DAT::tdual_xfloat_2d /*buf*/,
                                             int /*pbc_flag*/, int * /*pbc*/, ExecutionSpace /*space*/)
 {
   error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.h b/src/KOKKOS/atom_vec_hybrid_kokkos.h
index 6f81c93673..19210549f5 100644
--- a/src/KOKKOS/atom_vec_hybrid_kokkos.h
+++ b/src/KOKKOS/atom_vec_hybrid_kokkos.h
@@ -36,17 +36,16 @@ class AtomVecHybridKokkos : public AtomVecKokkos, public AtomVecHybrid {
   void grow(int) override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
 
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist,
                        const DAT::tdual_xfloat_2d &buf,
                        const int &pbc_flag, const int pbc[]) override;
   void unpack_comm_kokkos(const int &n, const int &nfirst,
                           const DAT::tdual_xfloat_2d &buf) override;
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
+  int pack_comm_self(const int &n, const DAT::tdual_int_1d &list,
+                     const int nfirst,
                      const int &pbc_flag, const int pbc[]) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp
index 93393e9e09..d54cc2c3af 100644
--- a/src/KOKKOS/atom_vec_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_kokkos.cpp
@@ -17,8 +17,8 @@
 #include "atom_kokkos.h"
 #include "atom_masks.h"
 #include "comm_kokkos.h"
-#include "error.h"
 #include "domain.h"
+#include "kokkos.h"
 
 using namespace LAMMPS_NS;
 
@@ -37,7 +37,6 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
 
   k_count = DAT::tdual_int_1d("atom:k_count",1);
   atomKK = (AtomKokkos *) atom;
-  commKK = (CommKokkos *) comm;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -57,19 +56,17 @@ struct AtomVecKokkos_PackComm {
 
   typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
   AtomVecKokkos_PackComm(
       const typename DAT::tdual_x_array &x,
       const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
+      const typename DAT::tdual_int_1d &list,
       const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
       const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
+      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),
       _xprd(xprd),_yprd(yprd),_zprd(zprd),
       _xy(xy),_xz(xz),_yz(yz) {
         const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/3;
@@ -81,7 +78,7 @@ struct AtomVecKokkos_PackComm {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
+        const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -103,8 +100,7 @@ struct AtomVecKokkos_PackComm {
 /* ---------------------------------------------------------------------- */
 
 int AtomVecKokkos::pack_comm_kokkos(const int &n,
-                                          const DAT::tdual_int_2d &list,
-                                          const int & iswap,
+                                          const DAT::tdual_int_1d &list,
                                           const DAT::tdual_xfloat_2d &buf,
                                           const int &pbc_flag,
                                           const int* const pbc)
@@ -112,28 +108,28 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
   // Check whether to always run forward communication on the host
   // Choose correct forward PackComm kernel
 
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK);
     if (pbc_flag) {
       if (domain->triclinic) {
-        struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
+        struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       } else {
-        struct AtomVecKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,iswap,
+        struct AtomVecKokkos_PackComm<LMPHostType,1,0> f(atomKK->k_x,buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       }
     } else {
       if (domain->triclinic) {
-        struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
+        struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       } else {
-        struct AtomVecKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,iswap,
+        struct AtomVecKokkos_PackComm<LMPHostType,0,0> f(atomKK->k_x,buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -143,24 +139,24 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
     atomKK->sync(Device,X_MASK);
     if (pbc_flag) {
       if (domain->triclinic) {
-        struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
+        struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       } else {
-        struct AtomVecKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,iswap,
+        struct AtomVecKokkos_PackComm<LMPDeviceType,1,0> f(atomKK->k_x,buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       }
     } else {
       if (domain->triclinic) {
-        struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
+        struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
       } else {
-        struct AtomVecKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,iswap,
+        struct AtomVecKokkos_PackComm<LMPDeviceType,0,0> f(atomKK->k_x,buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -168,7 +164,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
     }
   }
 
-        return n*size_forward;
+  return n*size_forward;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -180,19 +176,17 @@ struct AtomVecKokkos_PackCommSelf {
   typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   typename ArrayTypes<DeviceType>::t_x_array _xw;
   int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
   AtomVecKokkos_PackCommSelf(
       const typename DAT::tdual_x_array &x,
       const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
+      const typename DAT::tdual_int_1d &list,
       const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
       const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+      _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),
       _xprd(xprd),_yprd(yprd),_zprd(zprd),
       _xy(xy),_xz(xz),_yz(yz) {
         _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
@@ -201,7 +195,7 @@ struct AtomVecKokkos_PackCommSelf {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
+        const int j = _list(i);
       if (PBC_FLAG == 0) {
           _xw(i+_nfirst,0) = _x(j,0);
           _xw(i+_nfirst,1) = _x(j,1);
@@ -223,66 +217,67 @@ struct AtomVecKokkos_PackCommSelf {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
+int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_1d &list,
                                         const int nfirst, const int &pbc_flag, const int* const pbc) {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK);
-    atomKK->modified(Host,X_MASK);
     if (pbc_flag) {
       if (domain->triclinic) {
-      struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+        struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
+        Kokkos::parallel_for(n,f);
       } else {
-      struct AtomVecKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+        struct AtomVecKokkos_PackCommSelf<LMPHostType,1,0> f(atomKK->k_x,nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
+        Kokkos::parallel_for(n,f);
       }
     } else {
       if (domain->triclinic) {
-      struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+        struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
+        Kokkos::parallel_for(n,f);
       } else {
-      struct AtomVecKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+        struct AtomVecKokkos_PackCommSelf<LMPHostType,0,0> f(atomKK->k_x,nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
+        Kokkos::parallel_for(n,f);
       }
     }
+    atomKK->modified(Host,X_MASK);
   } else {
     atomKK->sync(Device,X_MASK);
-    atomKK->modified(Device,X_MASK);
     if (pbc_flag) {
       if (domain->triclinic) {
-      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
+        struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
+        Kokkos::parallel_for(n,f);
       } else {
-      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,iswap,
+        struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,0> f(atomKK->k_x,nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
+        Kokkos::parallel_for(n,f);
       }
     } else {
       if (domain->triclinic) {
-      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
+        struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
+        Kokkos::parallel_for(n,f);
       } else {
-      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,iswap,
+        struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,0> f(atomKK->k_x,nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
+        Kokkos::parallel_for(n,f);
       }
     }
+    atomKK->modified(Device,X_MASK);
   }
-        return n*3;
+
+  return n*3;
 }
 
 
@@ -361,35 +356,36 @@ struct AtomVecKokkos_PackCommSelfFused {
 int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan,
                                          const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc,
                                          const DAT::tdual_int_1d &g2l) {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK);
-    atomKK->modified(Host,X_MASK);
     if (domain->triclinic) {
-    struct AtomVecKokkos_PackCommSelfFused<LMPHostType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
+      struct AtomVecKokkos_PackCommSelfFused<LMPHostType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
         domain->xprd,domain->yprd,domain->zprd,
         domain->xy,domain->xz,domain->yz);
-    Kokkos::parallel_for(n,f);
+      Kokkos::parallel_for(n,f);
     } else {
-    struct AtomVecKokkos_PackCommSelfFused<LMPHostType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
+      struct AtomVecKokkos_PackCommSelfFused<LMPHostType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
         domain->xprd,domain->yprd,domain->zprd,
         domain->xy,domain->xz,domain->yz);
-    Kokkos::parallel_for(n,f);
+      Kokkos::parallel_for(n,f);
     }
+    atomKK->modified(Host,X_MASK);
   } else {
     atomKK->sync(Device,X_MASK);
-    atomKK->modified(Device,X_MASK);
     if (domain->triclinic) {
-    struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
+      struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
         domain->xprd,domain->yprd,domain->zprd,
         domain->xy,domain->xz,domain->yz);
-    Kokkos::parallel_for(n,f);
+      Kokkos::parallel_for(n,f);
     } else {
-    struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
+      struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
         domain->xprd,domain->yprd,domain->zprd,
         domain->xy,domain->xz,domain->yz);
-    Kokkos::parallel_for(n,f);
+      Kokkos::parallel_for(n,f);
     }
+    atomKK->modified(Device,X_MASK);
   }
+
   return n*3;
 }
 
@@ -421,20 +417,19 @@ struct AtomVecKokkos_UnpackComm {
 
 void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first,
     const DAT::tdual_xfloat_2d &buf) {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK);
-    atomKK->modified(Host,X_MASK);
     struct AtomVecKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
     Kokkos::parallel_for(n,f);
+    atomKK->modified(Host,X_MASK);
   } else {
     atomKK->sync(Device,X_MASK);
-    atomKK->modified(Device,X_MASK);
     struct AtomVecKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
     Kokkos::parallel_for(n,f);
+    atomKK->modified(Device,X_MASK);
   }
 }
 
-
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType,int PBC_FLAG,int TRICLINIC,int DEFORM_VREMAP>
@@ -445,8 +440,7 @@ struct AtomVecKokkos_PackCommVel {
   typename ArrayTypes<DeviceType>::t_int_1d _mask;
   typename ArrayTypes<DeviceType>::t_v_array _v;
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
   X_FLOAT _h_rate[6];
@@ -457,8 +451,7 @@ struct AtomVecKokkos_PackCommVel {
     const typename DAT::tdual_int_1d &mask,
     const typename DAT::tdual_v_array &v,
     const typename DAT::tdual_xfloat_2d &buf,
-    const typename DAT::tdual_int_2d &list,
-    const int &iswap,
+    const typename DAT::tdual_int_1d &list,
     const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
     const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc,
     const double * const h_rate,
@@ -466,7 +459,7 @@ struct AtomVecKokkos_PackCommVel {
     _x(x.view<DeviceType>()),
     _mask(mask.view<DeviceType>()),
     _v(v.view<DeviceType>()),
-    _list(list.view<DeviceType>()),_iswap(iswap),
+    _list(list.view<DeviceType>()),
     _xprd(xprd),_yprd(yprd),_zprd(zprd),
     _xy(xy),_xz(xz),_yz(yz),
     _deform_vremap(deform_vremap)
@@ -482,7 +475,7 @@ struct AtomVecKokkos_PackCommVel {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     if (PBC_FLAG == 0) {
       _buf(i,0) = _x(j,0);
       _buf(i,1) = _x(j,1);
@@ -524,13 +517,12 @@ struct AtomVecKokkos_PackCommVel {
 
 int AtomVecKokkos::pack_comm_vel_kokkos(
   const int &n,
-  const DAT::tdual_int_2d &list,
-  const int & iswap,
+  const DAT::tdual_int_1d &list,
   const DAT::tdual_xfloat_2d &buf,
   const int &pbc_flag,
   const int* const pbc)
 {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK|V_MASK);
     if (pbc_flag) {
       if (deform_vremap) {
@@ -538,7 +530,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
           struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,1> f(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_v,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -546,7 +538,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
           struct AtomVecKokkos_PackCommVel<LMPHostType,1,0,1> f(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_v,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -556,7 +548,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
           struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,0> f(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_v,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -564,7 +556,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
           struct AtomVecKokkos_PackCommVel<LMPHostType,1,0,0> f(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_v,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -575,7 +567,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
         struct AtomVecKokkos_PackCommVel<LMPHostType,0,1,0> f(
           atomKK->k_x,atomKK->k_mask,
           atomKK->k_v,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
         Kokkos::parallel_for(n,f);
@@ -583,7 +575,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
         struct AtomVecKokkos_PackCommVel<LMPHostType,0,0,0> f(
           atomKK->k_x,atomKK->k_mask,
           atomKK->k_v,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
         Kokkos::parallel_for(n,f);
@@ -597,7 +589,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
           struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,1> f(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_v,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -605,7 +597,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
           struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,0,1> f(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_v,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -615,7 +607,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
           struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,0> f(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_v,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -623,7 +615,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
           struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,0,0> f(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_v,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -634,7 +626,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
         struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,1,0> f(
           atomKK->k_x,atomKK->k_mask,
           atomKK->k_v,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
         Kokkos::parallel_for(n,f);
@@ -642,13 +634,14 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
         struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,0,0> f(
           atomKK->k_x,atomKK->k_mask,
           atomKK->k_v,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
         Kokkos::parallel_for(n,f);
       }
     }
   }
+
   return n*6;
 }
 
@@ -692,16 +685,16 @@ struct AtomVecKokkos_UnpackCommVel {
 
 void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first,
     const DAT::tdual_xfloat_2d &buf) {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK|V_MASK);
-    atomKK->modified(Host,X_MASK|V_MASK);
     struct AtomVecKokkos_UnpackCommVel<LMPHostType> f(atomKK->k_x,atomKK->k_v,buf,first);
     Kokkos::parallel_for(n,f);
+    atomKK->modified(Host,X_MASK|V_MASK);
   } else {
     atomKK->sync(Device,X_MASK|V_MASK);
-    atomKK->modified(Device,X_MASK|V_MASK);
     struct AtomVecKokkos_UnpackCommVel<LMPDeviceType> f(atomKK->k_x,atomKK->k_v,buf,first);
     Kokkos::parallel_for(n,f);
+    atomKK->modified(Device,X_MASK|V_MASK);
   }
 }
 
@@ -733,7 +726,7 @@ struct AtomVecKokkos_PackReverse {
 
 int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first,
     const DAT::tdual_ffloat_2d &buf) {
-  if (commKK->reverse_comm_on_host) {
+  if (lmp->kokkos->reverse_comm_on_host) {
     atomKK->sync(Host,F_MASK);
     struct AtomVecKokkos_PackReverse<LMPHostType> f(atomKK->k_f,buf,first);
     Kokkos::parallel_for(n,f);
@@ -755,20 +748,18 @@ struct AtomVecKokkos_UnPackReverseSelf {
   typename ArrayTypes<DeviceType>::t_f_array_randomread _f;
   typename ArrayTypes<DeviceType>::t_f_array _fw;
   int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
 
   AtomVecKokkos_UnPackReverseSelf(
       const typename DAT::tdual_f_array &f,
       const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap):
-      _f(f.view<DeviceType>()),_fw(f.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap) {
+      const typename DAT::tdual_int_1d &list):
+      _f(f.view<DeviceType>()),_fw(f.view<DeviceType>()),_nfirst(nfirst),_list(list.view<DeviceType>()) {
   };
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     _fw(j,0) += _f(i+_nfirst,0);
     _fw(j,1) += _f(i+_nfirst,1);
     _fw(j,2) += _f(i+_nfirst,2);
@@ -777,19 +768,20 @@ struct AtomVecKokkos_UnPackReverseSelf {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
-                                        const int nfirst) {
-  if (commKK->reverse_comm_on_host) {
+int AtomVecKokkos::pack_reverse_self(const int &n, const DAT::tdual_int_1d &list,
+                                     const int nfirst) {
+  if (lmp->kokkos->reverse_comm_on_host) {
     atomKK->sync(Host,F_MASK);
-    struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list,iswap);
+    struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list);
     Kokkos::parallel_for(n,f);
     atomKK->modified(Host,F_MASK);
   } else {
     atomKK->sync(Device,F_MASK);
-    struct AtomVecKokkos_UnPackReverseSelf<LMPDeviceType> f(atomKK->k_f,nfirst,list,iswap);
+    struct AtomVecKokkos_UnPackReverseSelf<LMPDeviceType> f(atomKK->k_f,nfirst,list);
     Kokkos::parallel_for(n,f);
     atomKK->modified(Device,F_MASK);
   }
+
   return n*3;
 }
 
@@ -801,15 +793,13 @@ struct AtomVecKokkos_UnPackReverse {
 
   typename ArrayTypes<DeviceType>::t_f_array _f;
   typename ArrayTypes<DeviceType>::t_ffloat_2d_const _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
 
   AtomVecKokkos_UnPackReverse(
       const typename DAT::tdual_f_array &f,
       const typename DAT::tdual_ffloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap):
-      _f(f.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap) {
+      const typename DAT::tdual_int_1d &list):
+      _f(f.view<DeviceType>()),_list(list.view<DeviceType>()) {
         const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/3;
         const size_t elements = 3;
         buffer_view<DeviceType>(_buf,buf,maxsend,elements);
@@ -817,7 +807,7 @@ struct AtomVecKokkos_UnPackReverse {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     _f(j,0) += _buf(i,0);
     _f(j,1) += _buf(i,1);
     _f(j,2) += _buf(i,2);
@@ -827,19 +817,18 @@ struct AtomVecKokkos_UnPackReverse {
 /* ---------------------------------------------------------------------- */
 
 void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
-                                          const DAT::tdual_int_2d &list,
-                                          const int & iswap,
+                                          const DAT::tdual_int_1d &list,
                                           const DAT::tdual_ffloat_2d &buf)
 {
   // Check whether to always run reverse communication on the host
   // Choose correct reverse UnPackReverse kernel
 
-  if (commKK->reverse_comm_on_host) {
-    struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list,iswap);
+  if (lmp->kokkos->reverse_comm_on_host) {
+    struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list);
     Kokkos::parallel_for(n,f);
     atomKK->modified(Host,F_MASK);
   } else {
-    struct AtomVecKokkos_UnPackReverse<LMPDeviceType> f(atomKK->k_f,buf,list,iswap);
+    struct AtomVecKokkos_UnPackReverse<LMPDeviceType> f(atomKK->k_f,buf,list);
     Kokkos::parallel_for(n,f);
     atomKK->modified(Device,F_MASK);
   }
diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h
index c10ff5b40a..646d9a4840 100644
--- a/src/KOKKOS/atom_vec_kokkos.h
+++ b/src/KOKKOS/atom_vec_kokkos.h
@@ -50,8 +50,8 @@ class AtomVecKokkos : virtual public AtomVec {
   virtual void sync_overlapping_device(ExecutionSpace space, unsigned int mask) = 0;
 
   virtual int
-    pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                   const int & iswap, const int nfirst,
+    pack_comm_self(const int &n, const DAT::tdual_int_1d &list,
+                   const int nfirst,
                    const int &pbc_flag, const int pbc[]);
 
   virtual int
@@ -63,8 +63,8 @@ class AtomVecKokkos : virtual public AtomVec {
                          const DAT::tdual_int_1d &g2l);
 
   virtual int
-    pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const DAT::tdual_xfloat_2d &buf,
+    pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &list,
+                     const DAT::tdual_xfloat_2d &buf,
                      const int &pbc_flag, const int pbc[]);
 
   virtual void
@@ -72,8 +72,8 @@ class AtomVecKokkos : virtual public AtomVec {
                        const DAT::tdual_xfloat_2d &buf);
 
   virtual int
-    pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_2d &list,
-                         const int & iswap, const DAT::tdual_xfloat_2d &buf,
+    pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &list,
+                         const DAT::tdual_xfloat_2d &buf,
                          const int &pbc_flag, const int pbc[]);
 
   virtual void
@@ -81,20 +81,20 @@ class AtomVecKokkos : virtual public AtomVec {
                            const DAT::tdual_xfloat_2d &buf);
 
   virtual int
-    unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list,
-                      const int & iswap, const int nfirst);
+    pack_reverse_self(const int &n, const DAT::tdual_int_1d &list,
+                      const int nfirst);
 
   virtual int
     pack_reverse_kokkos(const int &n, const int &nfirst,
                         const DAT::tdual_ffloat_2d &buf);
 
   virtual void
-    unpack_reverse_kokkos(const int &n, const DAT::tdual_int_2d &list,
-                          const int & iswap, const DAT::tdual_ffloat_2d &buf);
+    unpack_reverse_kokkos(const int &n, const DAT::tdual_int_1d &list,
+                          const DAT::tdual_ffloat_2d &buf);
 
   virtual int
-    pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                       DAT::tdual_xfloat_2d buf,int iswap,
+    pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                       DAT::tdual_xfloat_2d buf,
                        int pbc_flag, int *pbc, ExecutionSpace space) = 0;
 
   virtual void
@@ -103,8 +103,8 @@ class AtomVecKokkos : virtual public AtomVec {
                          ExecutionSpace space) = 0;
 
   virtual int
-    pack_border_vel_kokkos(int /*n*/, DAT::tdual_int_2d /*k_sendlist*/,
-                           DAT::tdual_xfloat_2d /*buf*/,int /*iswap*/,
+    pack_border_vel_kokkos(int /*n*/, DAT::tdual_int_1d /*k_sendlist*/,
+                           DAT::tdual_xfloat_2d /*buf*/,
                            int /*pbc_flag*/, int * /*pbc*/, ExecutionSpace /*space*/) { return 0; }
 
   virtual void
@@ -133,7 +133,6 @@ class AtomVecKokkos : virtual public AtomVec {
   HAT::t_v_array h_v;
   HAT::t_f_array h_f;
 
-  class CommKokkos *commKK;
   size_t buffer_size;
   void* buffer;
 
diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
index ec98ff9239..68369e2fad 100644
--- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
@@ -260,295 +260,13 @@ void AtomVecMolecularKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &So
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecMolecularKokkos_PackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecMolecularKokkos_PackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-      _x(x.view<DeviceType>()),_list(list.view<DeviceType>()),_iswap(iswap),
-      _xprd(xprd),_yprd(yprd),_zprd(zprd),
-      _xy(xy),_xz(xz),_yz(yz) {
-        const size_t maxsend = (buf.view<DeviceType>().extent(0)
-                                *buf.view<DeviceType>().extent(1))/3;
-        const size_t elements = 3;
-        buffer_view<DeviceType>(_buf,buf,maxsend,elements);
-        _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-        _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _buf(i,0) = _x(j,0);
-          _buf(i,1) = _x(j,1);
-          _buf(i,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _buf(i,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _buf(i,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _buf(i,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
-                                             const DAT::tdual_int_2d &list,
-                                             const int & iswap,
-                                             const DAT::tdual_xfloat_2d &buf,
-                                             const int &pbc_flag,
-                                             const int* const pbc)
-{
-  // Check whether to always run forward communication on the host
-  // Choose correct forward PackComm kernel
-
-  if (commKK->forward_comm_on_host) {
-    atomKK->sync(Host,X_MASK);
-    if (pbc_flag) {
-      if (domain->triclinic) {
-        struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,0>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if (domain->triclinic) {
-        struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,1>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,0>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    atomKK->sync(Device,X_MASK);
-    if (pbc_flag) {
-      if (domain->triclinic) {
-        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,0>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if (domain->triclinic) {
-        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,1>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      } else {
-        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,0>
-          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-            domain->xy,domain->xz,domain->yz,pbc);
-        Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-
-        return n*size_forward;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType,int PBC_FLAG,int TRICLINIC>
-struct AtomVecMolecularKokkos_PackCommSelf {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
-  typename ArrayTypes<DeviceType>::t_x_array _xw;
-  int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
-  X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
-  X_FLOAT _pbc[6];
-
-  AtomVecMolecularKokkos_PackCommSelf(
-      const typename DAT::tdual_x_array &x,
-      const int &nfirst,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
-      const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
-      const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
-    _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),_nfirst(nfirst),
-    _list(list.view<DeviceType>()),_iswap(iswap),
-    _xprd(xprd),_yprd(yprd),_zprd(zprd),
-    _xy(xy),_xz(xz),_yz(yz) {
-    _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
-    _pbc[3] = pbc[3]; _pbc[4] = pbc[4]; _pbc[5] = pbc[5];
-  };
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
-      if (PBC_FLAG == 0) {
-          _xw(i+_nfirst,0) = _x(j,0);
-          _xw(i+_nfirst,1) = _x(j,1);
-          _xw(i+_nfirst,2) = _x(j,2);
-      } else {
-        if (TRICLINIC == 0) {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        } else {
-          _xw(i+_nfirst,0) = _x(j,0) + _pbc[0]*_xprd + _pbc[5]*_xy + _pbc[4]*_xz;
-          _xw(i+_nfirst,1) = _x(j,1) + _pbc[1]*_yprd + _pbc[3]*_yz;
-          _xw(i+_nfirst,2) = _x(j,2) + _pbc[2]*_zprd;
-        }
-      }
-
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                                           const int & iswap,
-                                           const int nfirst, const int &pbc_flag,
-                                           const int* const pbc) {
-  if (commKK->forward_comm_on_host) {
-    atomKK->sync(Host,X_MASK);
-    atomKK->modified(Host,X_MASK);
-    if (pbc_flag) {
-      if (domain->triclinic) {
-      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,0>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if (domain->triclinic) {
-      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,1>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,0>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  } else {
-    atomKK->sync(Device,X_MASK);
-    atomKK->modified(Device,X_MASK);
-    if (pbc_flag) {
-      if (domain->triclinic) {
-      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,0>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    } else {
-      if (domain->triclinic) {
-      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,1>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      } else {
-      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,0>
-        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
-          domain->xy,domain->xz,domain->yz,pbc);
-      Kokkos::parallel_for(n,f);
-      }
-    }
-  }
-        return n*3;
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-struct AtomVecMolecularKokkos_UnpackComm {
-  typedef DeviceType device_type;
-
-  typename ArrayTypes<DeviceType>::t_x_array _x;
-  typename ArrayTypes<DeviceType>::t_xfloat_2d_const _buf;
-  int _first;
-
-  AtomVecMolecularKokkos_UnpackComm(
-      const typename DAT::tdual_x_array &x,
-      const typename DAT::tdual_xfloat_2d &buf,
-      const int& first):_x(x.view<DeviceType>()),_buf(buf.view<DeviceType>()),
-                        _first(first) {};
-
-  KOKKOS_INLINE_FUNCTION
-  void operator() (const int& i) const {
-      _x(i+_first,0) = _buf(i,0);
-      _x(i+_first,1) = _buf(i,1);
-      _x(i+_first,2) = _buf(i,2);
-  }
-};
-
-/* ---------------------------------------------------------------------- */
-
-void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf) {
-  if (commKK->forward_comm_on_host) {
-    atomKK->sync(Host,X_MASK);
-    atomKK->modified(Host,X_MASK);
-    struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  } else {
-    atomKK->sync(Device,X_MASK);
-    atomKK->modified(Device,X_MASK);
-    struct AtomVecMolecularKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
-    Kokkos::parallel_for(n,f);
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType,int PBC_FLAG>
 struct AtomVecMolecularKokkos_PackBorder {
   typedef DeviceType device_type;
   typedef ArrayTypes<DeviceType> AT;
 
   typename AT::t_xfloat_2d _buf;
-  const typename AT::t_int_2d_const _list;
-  const int _iswap;
+  const typename AT::t_int_1d_const _list;
   const typename AT::t_x_array_randomread _x;
   const typename AT::t_tagint_1d _tag;
   const typename AT::t_int_1d _type;
@@ -558,21 +276,20 @@ struct AtomVecMolecularKokkos_PackBorder {
 
   AtomVecMolecularKokkos_PackBorder(
       const typename AT::t_xfloat_2d &buf,
-      const typename AT::t_int_2d_const &list,
-      const int & iswap,
+      const typename AT::t_int_1d_const &list,
       const typename AT::t_x_array &x,
       const typename AT::t_tagint_1d &tag,
       const typename AT::t_int_1d &type,
       const typename AT::t_int_1d &mask,
       const typename AT::t_tagint_1d &molecule,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-      _buf(buf),_list(list),_iswap(iswap),
+      _buf(buf),_list(list),
       _x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
       _dx(dx),_dy(dy),_dz(dz) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -595,8 +312,8 @@ struct AtomVecMolecularKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                               DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                               DAT::tdual_xfloat_2d buf,
                                                int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -614,12 +331,12 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
     if (space==Host) {
       AtomVecMolecularKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecMolecularKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
 
@@ -628,12 +345,12 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
     if (space==Host) {
       AtomVecMolecularKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecMolecularKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_molecule,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
   }
diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.h b/src/KOKKOS/atom_vec_molecular_kokkos.h
index eb976e9073..a92784fbd9 100644
--- a/src/KOKKOS/atom_vec_molecular_kokkos.h
+++ b/src/KOKKOS/atom_vec_molecular_kokkos.h
@@ -35,17 +35,8 @@ class AtomVecMolecularKokkos : public AtomVecKokkos, public AtomVecMolecular {
   void grow(int) override;
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
-                       const DAT::tdual_xfloat_2d &buf,
-                       const int &pbc_flag, const int pbc[]) override;
-  void unpack_comm_kokkos(const int &n, const int &nfirst,
-                          const DAT::tdual_xfloat_2d &buf) override;
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
-                     const int &pbc_flag, const int pbc[]) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp
index 3dfb5143cd..8802275e31 100644
--- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp
@@ -20,13 +20,11 @@
 #include "domain.h"
 #include "error.h"
 #include "fix.h"
+#include "kokkos.h"
 #include "math_const.h"
-#include "memory.h"
 #include "memory_kokkos.h"
 #include "modify.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
@@ -152,8 +150,7 @@ struct AtomVecSphereKokkos_PackComm {
   typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
@@ -162,14 +159,13 @@ struct AtomVecSphereKokkos_PackComm {
     const typename DAT::tdual_float_1d &radius,
     const typename DAT::tdual_float_1d &rmass,
     const typename DAT::tdual_xfloat_2d &buf,
-    const typename DAT::tdual_int_2d &list,
-    const int & iswap,
+    const typename DAT::tdual_int_1d &list,
     const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
     const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
     _x(x.view<DeviceType>()),
     _radius(radius.view<DeviceType>()),
     _rmass(rmass.view<DeviceType>()),
-    _list(list.view<DeviceType>()),_iswap(iswap),
+    _list(list.view<DeviceType>()),
     _xprd(xprd),_yprd(yprd),_zprd(zprd),
     _xy(xy),_xz(xz),_yz(yz) {
     const size_t elements = 5;
@@ -181,7 +177,7 @@ struct AtomVecSphereKokkos_PackComm {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     if (PBC_FLAG == 0) {
       _buf(i,0) = _x(j,0);
       _buf(i,1) = _x(j,1);
@@ -206,25 +202,24 @@ struct AtomVecSphereKokkos_PackComm {
 
 int AtomVecSphereKokkos::pack_comm_kokkos(
   const int &n,
-  const DAT::tdual_int_2d &list,
-  const int & iswap,
+  const DAT::tdual_int_1d &list,
   const DAT::tdual_xfloat_2d &buf,
   const int &pbc_flag,
   const int* const pbc)
 {
   // Fallback to AtomVecKokkos if radvary == 0
   if (radvary == 0)
-    return AtomVecKokkos::pack_comm_kokkos(n,list,iswap,buf,pbc_flag,pbc);
+    return AtomVecKokkos::pack_comm_kokkos(n,list,buf,pbc_flag,pbc);
   // Check whether to always run forward communication on the host
   // Choose correct forward PackComm kernel
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
     if (pbc_flag) {
       if (domain->triclinic) {
         struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -232,7 +227,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
         struct AtomVecSphereKokkos_PackComm<LMPHostType,1,0> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -242,7 +237,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
         struct AtomVecSphereKokkos_PackComm<LMPHostType,0,1> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -250,7 +245,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
         struct AtomVecSphereKokkos_PackComm<LMPHostType,0,0> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -263,7 +258,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
         struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -271,7 +266,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
         struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,0> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -281,7 +276,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
         struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,1> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -289,7 +284,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
         struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,0> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          buf,list,iswap,
+          buf,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -310,8 +305,7 @@ struct AtomVecSphereKokkos_PackCommVel {
   typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
   typename ArrayTypes<DeviceType>::t_v_array _v, _omega;
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
   X_FLOAT _h_rate[6];
@@ -325,8 +319,7 @@ struct AtomVecSphereKokkos_PackCommVel {
     const typename DAT::tdual_v_array &v,
     const typename DAT::tdual_v_array &omega,
     const typename DAT::tdual_xfloat_2d &buf,
-    const typename DAT::tdual_int_2d &list,
-    const int &iswap,
+    const typename DAT::tdual_int_1d &list,
     const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
     const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc,
     const double * const h_rate,
@@ -337,7 +330,7 @@ struct AtomVecSphereKokkos_PackCommVel {
     _rmass(rmass.view<DeviceType>()),
     _v(v.view<DeviceType>()),
     _omega(omega.view<DeviceType>()),
-    _list(list.view<DeviceType>()),_iswap(iswap),
+    _list(list.view<DeviceType>()),
     _xprd(xprd),_yprd(yprd),_zprd(zprd),
     _xy(xy),_xz(xz),_yz(yz),
     _deform_vremap(deform_vremap)
@@ -353,7 +346,7 @@ struct AtomVecSphereKokkos_PackCommVel {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     if (PBC_FLAG == 0) {
       _buf(i,0) = _x(j,0);
       _buf(i,1) = _x(j,1);
@@ -398,13 +391,12 @@ struct AtomVecSphereKokkos_PackCommVel {
 
 int AtomVecSphereKokkos::pack_comm_vel_kokkos(
   const int &n,
-  const DAT::tdual_int_2d &list,
-  const int & iswap,
+  const DAT::tdual_int_1d &list,
   const DAT::tdual_xfloat_2d &buf,
   const int &pbc_flag,
   const int* const pbc)
 {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
     if (pbc_flag) {
       if (deform_vremap) {
@@ -414,7 +406,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -423,7 +415,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -434,7 +426,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -443,7 +435,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -456,7 +448,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -465,7 +457,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -476,7 +468,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -485,7 +477,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -499,7 +491,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_radius,atomKK->k_rmass,
             atomKK->k_v,atomKK->k_omega,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -508,7 +500,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_radius,atomKK->k_rmass,
             atomKK->k_v,atomKK->k_omega,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -519,7 +511,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_radius,atomKK->k_rmass,
             atomKK->k_v,atomKK->k_omega,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -528,7 +520,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_radius,atomKK->k_rmass,
             atomKK->k_v,atomKK->k_omega,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -545,7 +537,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -554,7 +546,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -565,7 +557,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -574,7 +566,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -587,7 +579,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -596,7 +588,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -607,7 +599,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -616,7 +608,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
               atomKK->k_x,atomKK->k_mask,
               atomKK->k_radius,atomKK->k_rmass,
               atomKK->k_v,atomKK->k_omega,
-              buf,list,iswap,
+              buf,list,
               domain->xprd,domain->yprd,domain->zprd,
               domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
             Kokkos::parallel_for(n,f);
@@ -630,7 +622,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_radius,atomKK->k_rmass,
             atomKK->k_v,atomKK->k_omega,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -639,7 +631,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_radius,atomKK->k_rmass,
             atomKK->k_v,atomKK->k_omega,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -650,7 +642,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_radius,atomKK->k_rmass,
             atomKK->k_v,atomKK->k_omega,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -659,7 +651,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
             atomKK->k_x,atomKK->k_mask,
             atomKK->k_radius,atomKK->k_rmass,
             atomKK->k_v,atomKK->k_omega,
-            buf,list,iswap,
+            buf,list,
             domain->xprd,domain->yprd,domain->zprd,
             domain->xy,domain->xz,domain->yz,pbc,h_rate,deform_vremap);
           Kokkos::parallel_for(n,f);
@@ -680,8 +672,7 @@ struct AtomVecSphereKokkos_PackCommSelf {
   typename ArrayTypes<DeviceType>::t_x_array _xw;
   typename ArrayTypes<DeviceType>::t_float_1d _radius,_rmass;
   int _nfirst;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
@@ -690,14 +681,13 @@ struct AtomVecSphereKokkos_PackCommSelf {
     const typename DAT::tdual_float_1d &radius,
     const typename DAT::tdual_float_1d &rmass,
     const int &nfirst,
-    const typename DAT::tdual_int_2d &list,
-    const int & iswap,
+    const typename DAT::tdual_int_1d &list,
     const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
     const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
     _x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),
     _radius(radius.view<DeviceType>()),
     _rmass(rmass.view<DeviceType>()),
-    _nfirst(nfirst),_list(list.view<DeviceType>()),_iswap(iswap),
+    _nfirst(nfirst),_list(list.view<DeviceType>()),
     _xprd(xprd),_yprd(yprd),_zprd(zprd),
     _xy(xy),_xz(xz),_yz(yz) {
     _pbc[0] = pbc[0]; _pbc[1] = pbc[1]; _pbc[2] = pbc[2];
@@ -706,7 +696,7 @@ struct AtomVecSphereKokkos_PackCommSelf {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     if (PBC_FLAG == 0) {
       _xw(i+_nfirst,0) = _x(j,0);
       _xw(i+_nfirst,1) = _x(j,1);
@@ -730,12 +720,12 @@ struct AtomVecSphereKokkos_PackCommSelf {
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphereKokkos::pack_comm_self(
-  const int &n, const DAT::tdual_int_2d &list, const int &iswap,
+  const int &n, const DAT::tdual_int_1d &list,
   const int nfirst, const int &pbc_flag, const int* const pbc) {
   // Fallback to AtomVecKokkos if radvary == 0
   if (radvary == 0)
-    return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc);
-  if (commKK->forward_comm_on_host) {
+    return AtomVecKokkos::pack_comm_self(n,list,nfirst,pbc_flag,pbc);
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
     atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
     if (pbc_flag) {
@@ -743,7 +733,7 @@ int AtomVecSphereKokkos::pack_comm_self(
         struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -751,7 +741,7 @@ int AtomVecSphereKokkos::pack_comm_self(
         struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,0> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -761,7 +751,7 @@ int AtomVecSphereKokkos::pack_comm_self(
         struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,1> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -769,7 +759,7 @@ int AtomVecSphereKokkos::pack_comm_self(
         struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,0> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -783,7 +773,7 @@ int AtomVecSphereKokkos::pack_comm_self(
         struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -791,7 +781,7 @@ int AtomVecSphereKokkos::pack_comm_self(
         struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,0> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -801,7 +791,7 @@ int AtomVecSphereKokkos::pack_comm_self(
         struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,1> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -809,7 +799,7 @@ int AtomVecSphereKokkos::pack_comm_self(
         struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,0> f(
           atomKK->k_x,
           atomKK->k_radius,atomKK->k_rmass,
-          nfirst,list,iswap,
+          nfirst,list,
           domain->xprd,domain->yprd,domain->zprd,
           domain->xy,domain->xz,domain->yz,pbc);
         Kokkos::parallel_for(n,f);
@@ -866,7 +856,7 @@ void AtomVecSphereKokkos::unpack_comm_kokkos(
     AtomVecKokkos::unpack_comm_kokkos(n,first,buf);
     return;
   }
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
     struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f(
       atomKK->k_x,
@@ -938,7 +928,7 @@ struct AtomVecSphereKokkos_UnpackCommVel {
 void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
   const int &n, const int &first,
   const DAT::tdual_xfloat_2d &buf) {
-  if (commKK->forward_comm_on_host) {
+  if (lmp->kokkos->forward_comm_on_host) {
     atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
     if (radvary == 0) {
       struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f(
@@ -982,8 +972,7 @@ struct AtomVecSphereKokkos_PackBorder {
   typedef DeviceType device_type;
 
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
   const typename ArrayTypes<DeviceType>::t_int_1d _type;
@@ -993,8 +982,7 @@ struct AtomVecSphereKokkos_PackBorder {
 
   AtomVecSphereKokkos_PackBorder(
     const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
-    const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
-    const int &iswap,
+    const typename ArrayTypes<DeviceType>::t_int_1d_const &list,
     const typename ArrayTypes<DeviceType>::t_x_array &x,
     const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
     const typename ArrayTypes<DeviceType>::t_int_1d &type,
@@ -1002,7 +990,7 @@ struct AtomVecSphereKokkos_PackBorder {
     const typename ArrayTypes<DeviceType>::t_float_1d &radius,
     const typename ArrayTypes<DeviceType>::t_float_1d &rmass,
     const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-    _list(list),_iswap(iswap),
+    _buf(buf),_list(list),
     _x(x),_tag(tag),_type(type),_mask(mask),
     _radius(radius),
     _rmass(rmass),
@@ -1015,7 +1003,7 @@ struct AtomVecSphereKokkos_PackBorder {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     if (PBC_FLAG == 0) {
       _buf(i,0) = _x(j,0);
       _buf(i,1) = _x(j,1);
@@ -1036,7 +1024,7 @@ struct AtomVecSphereKokkos_PackBorder {
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphereKokkos::pack_border_kokkos(
-  int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+  int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_2d buf,
   int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -1057,14 +1045,14 @@ int AtomVecSphereKokkos::pack_border_kokkos(
     if (space==Host) {
       AtomVecSphereKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,
+        h_x,h_tag,h_type,h_mask,
         h_radius,h_rmass,
         dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecSphereKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,
+        d_x,d_tag,d_type,d_mask,
         d_radius,d_rmass,
         dx,dy,dz);
       Kokkos::parallel_for(n,f);
@@ -1074,14 +1062,14 @@ int AtomVecSphereKokkos::pack_border_kokkos(
     if (space==Host) {
       AtomVecSphereKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,
+        h_x,h_tag,h_type,h_mask,
         h_radius,h_rmass,
         dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecSphereKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,
+        d_x,d_tag,d_type,d_mask,
         d_radius,d_rmass,
         dx,dy,dz);
       Kokkos::parallel_for(n,f);
@@ -1097,8 +1085,7 @@ struct AtomVecSphereKokkos_PackBorderVel {
   typedef DeviceType device_type;
 
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
   const typename ArrayTypes<DeviceType>::t_int_1d _type;
@@ -1110,8 +1097,7 @@ struct AtomVecSphereKokkos_PackBorderVel {
 
   AtomVecSphereKokkos_PackBorderVel(
     const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
-    const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
-    const int &iswap,
+    const typename ArrayTypes<DeviceType>::t_int_1d_const &list,
     const typename ArrayTypes<DeviceType>::t_x_array &x,
     const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
     const typename ArrayTypes<DeviceType>::t_int_1d &type,
@@ -1123,7 +1109,7 @@ struct AtomVecSphereKokkos_PackBorderVel {
     const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz,
     const X_FLOAT &dvx, const X_FLOAT &dvy, const X_FLOAT &dvz,
     const int &deform_groupbit):
-    _buf(buf),_list(list),_iswap(iswap),
+    _buf(buf),_list(list),
     _x(x),_tag(tag),_type(type),_mask(mask),
     _radius(radius),
     _rmass(rmass),
@@ -1139,7 +1125,7 @@ struct AtomVecSphereKokkos_PackBorderVel {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-    const int j = _list(_iswap,i);
+    const int j = _list(i);
     if (PBC_FLAG == 0) {
       _buf(i,0) = _x(j,0);
       _buf(i,1) = _x(j,1);
@@ -1175,7 +1161,7 @@ struct AtomVecSphereKokkos_PackBorderVel {
 /* ---------------------------------------------------------------------- */
 
 int AtomVecSphereKokkos::pack_border_vel_kokkos(
-  int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+  int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_2d buf,
   int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx=0,dy=0,dz=0;
@@ -1198,7 +1184,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
       if (space==Host) {
         AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,0> f(
           buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-          iswap,h_x,h_tag,h_type,h_mask,
+          h_x,h_tag,h_type,h_mask,
           h_radius,h_rmass,
           h_v, h_omega,
           dx,dy,dz,dvx,dvy,dvz,
@@ -1207,7 +1193,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
       } else {
         AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,1,0> f(
           buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-          iswap,d_x,d_tag,d_type,d_mask,
+          d_x,d_tag,d_type,d_mask,
           d_radius,d_rmass,
           d_v, d_omega,
           dx,dy,dz,dvx,dvy,dvz,
@@ -1222,7 +1208,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
       if (space==Host) {
         AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,1> f(
           buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-          iswap,h_x,h_tag,h_type,h_mask,
+          h_x,h_tag,h_type,h_mask,
           h_radius,h_rmass,
           h_v, h_omega,
           dx,dy,dz,dvx,dvy,dvz,
@@ -1231,7 +1217,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
       } else {
         AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,1,1> f(
           buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-          iswap,d_x,d_tag,d_type,d_mask,
+          d_x,d_tag,d_type,d_mask,
           d_radius,d_rmass,
           d_v, d_omega,
           dx,dy,dz,dvx,dvy,dvz,
@@ -1243,7 +1229,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
     if (space==Host) {
       AtomVecSphereKokkos_PackBorderVel<LMPHostType,0,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,
+        h_x,h_tag,h_type,h_mask,
         h_radius,h_rmass,
         h_v, h_omega,
         dx,dy,dz,dvx,dvy,dvz,
@@ -1252,7 +1238,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
     } else {
       AtomVecSphereKokkos_PackBorderVel<LMPDeviceType,0,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,
+        d_x,d_tag,d_type,d_mask,
         d_radius,d_rmass,
         d_v, d_omega,
         dx,dy,dz,dvx,dvy,dvz,
diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.h b/src/KOKKOS/atom_vec_sphere_kokkos.h
index 34529320d9..b28da45c6a 100644
--- a/src/KOKKOS/atom_vec_sphere_kokkos.h
+++ b/src/KOKKOS/atom_vec_sphere_kokkos.h
@@ -37,29 +37,27 @@ class AtomVecSphereKokkos : public AtomVecKokkos, public AtomVecSphere {
   void grow_pointers() override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
 
-  int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                       const int & iswap,
+  int pack_comm_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist,
                        const DAT::tdual_xfloat_2d &buf,
                        const int &pbc_flag, const int pbc[]) override;
   void unpack_comm_kokkos(const int &n, const int &nfirst,
                           const DAT::tdual_xfloat_2d &buf) override;
-  int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
-                           const int & iswap,
+  int pack_comm_vel_kokkos(const int &n, const DAT::tdual_int_1d &k_sendlist,
                            const DAT::tdual_xfloat_2d &buf,
                            const int &pbc_flag, const int pbc[]) override;
   void unpack_comm_vel_kokkos(const int &n, const int &nfirst,
                               const DAT::tdual_xfloat_2d &buf) override;
-  int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
-                     const int & iswap, const int nfirst,
+  int pack_comm_self(const int &n, const DAT::tdual_int_1d &list,
+                     const int nfirst,
                      const int &pbc_flag, const int pbc[]) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
                             ExecutionSpace space) override;
-  int pack_border_vel_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                             DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_vel_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                             DAT::tdual_xfloat_2d buf,
                              int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_vel_kokkos(const int &n, const int &nfirst,
                                 const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp
index c6ee7c1d3b..7cd0b8934c 100644
--- a/src/KOKKOS/atom_vec_spin_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp
@@ -26,7 +26,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "comm_kokkos.h"
 #include "domain.h"
 #include "error.h"
 #include "fix.h"
@@ -157,8 +156,7 @@ struct AtomVecSpinKokkos_PackComm {
   typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   typename ArrayTypes<DeviceType>::t_sp_array_randomread _sp;
   typename ArrayTypes<DeviceType>::t_xfloat_2d_um _buf;
-  typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
   X_FLOAT _pbc[6];
 
@@ -166,12 +164,11 @@ struct AtomVecSpinKokkos_PackComm {
       const typename DAT::tdual_x_array &x,
       const typename DAT::tdual_float_1d_4 &sp,
       const typename DAT::tdual_xfloat_2d &buf,
-      const typename DAT::tdual_int_2d &list,
-      const int & iswap,
+      const typename DAT::tdual_int_1d &list,
       const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
       const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz, const int* const pbc):
       _x(x.view<DeviceType>()),_sp(sp.view<DeviceType>()),
-      _list(list.view<DeviceType>()),_iswap(iswap),
+      _list(list.view<DeviceType>()),
       _xprd(xprd),_yprd(yprd),_zprd(zprd),
       _xy(xy),_xz(xz),_yz(yz) {
         const size_t maxsend = (buf.view<DeviceType>().extent(0)*buf.view<DeviceType>().extent(1))/3;
@@ -183,7 +180,7 @@ struct AtomVecSpinKokkos_PackComm {
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-        const int j = _list(_iswap,i);
+        const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -221,8 +218,7 @@ struct AtomVecSpinKokkos_PackBorder {
   typedef DeviceType device_type;
 
   typename ArrayTypes<DeviceType>::t_xfloat_2d _buf;
-  const typename ArrayTypes<DeviceType>::t_int_2d_const _list;
-  const int _iswap;
+  const typename ArrayTypes<DeviceType>::t_int_1d_const _list;
   const typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
   const typename ArrayTypes<DeviceType>::t_tagint_1d _tag;
   const typename ArrayTypes<DeviceType>::t_int_1d _type;
@@ -232,21 +228,20 @@ struct AtomVecSpinKokkos_PackBorder {
 
   AtomVecSpinKokkos_PackBorder(
       const typename ArrayTypes<DeviceType>::t_xfloat_2d &buf,
-      const typename ArrayTypes<DeviceType>::t_int_2d_const &list,
-      const int & iswap,
+      const typename ArrayTypes<DeviceType>::t_int_1d_const &list,
       const typename ArrayTypes<DeviceType>::t_x_array &x,
       const typename ArrayTypes<DeviceType>::t_tagint_1d &tag,
       const typename ArrayTypes<DeviceType>::t_int_1d &type,
       const typename ArrayTypes<DeviceType>::t_int_1d &mask,
       const typename ArrayTypes<DeviceType>::t_sp_array &sp,
       const X_FLOAT &dx, const X_FLOAT &dy, const X_FLOAT &dz):
-  _buf(buf),_list(list),_iswap(iswap),
+  _buf(buf),_list(list),
     _x(x),_tag(tag),_type(type),_mask(mask),_sp(sp),
     _dx(dx),_dy(dy),_dz(dz) {}
 
   KOKKOS_INLINE_FUNCTION
   void operator() (const int& i) const {
-      const int j = _list(_iswap,i);
+      const int j = _list(i);
       if (PBC_FLAG == 0) {
           _buf(i,0) = _x(j,0);
           _buf(i,1) = _x(j,1);
@@ -275,7 +270,7 @@ struct AtomVecSpinKokkos_PackBorder {
 
 /* ---------------------------------------------------------------------- */
 
-int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DAT::tdual_xfloat_2d buf,int iswap,
+int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_2d buf,
                                int pbc_flag, int *pbc, ExecutionSpace space)
 {
   X_FLOAT dx,dy,dz;
@@ -293,12 +288,12 @@ int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, D
     if(space==Host) {
       AtomVecSpinKokkos_PackBorder<LMPHostType,1> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecSpinKokkos_PackBorder<LMPDeviceType,1> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
 
@@ -307,12 +302,12 @@ int AtomVecSpinKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, D
     if(space==Host) {
       AtomVecSpinKokkos_PackBorder<LMPHostType,0> f(
         buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
-        iswap,h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz);
+        h_x,h_tag,h_type,h_mask,h_sp,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     } else {
       AtomVecSpinKokkos_PackBorder<LMPDeviceType,0> f(
         buf.view<LMPDeviceType>(), k_sendlist.view<LMPDeviceType>(),
-        iswap,d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz);
+        d_x,d_tag,d_type,d_mask,d_sp,dx,dy,dz);
       Kokkos::parallel_for(n,f);
     }
   }
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.h b/src/KOKKOS/atom_vec_spin_kokkos.h
index f0145e4db7..6f968dcd25 100644
--- a/src/KOKKOS/atom_vec_spin_kokkos.h
+++ b/src/KOKKOS/atom_vec_spin_kokkos.h
@@ -36,8 +36,8 @@ class AtomVecSpinKokkos : public AtomVecKokkos, public AtomVecSpin {
   void grow_pointers() override;
   void force_clear(int, size_t) override;
   void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
-  int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                         DAT::tdual_xfloat_2d buf,int iswap,
+  int pack_border_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                         DAT::tdual_xfloat_2d buf,
                          int pbc_flag, int *pbc, ExecutionSpace space) override;
   void unpack_border_kokkos(const int &n, const int &nfirst,
                             const DAT::tdual_xfloat_2d &buf,
diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
index 6bb2d7e4d0..2425857d61 100644
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -142,11 +142,15 @@ void CommKokkos::init()
   if (force->newton == 0) check_reverse = 0;
   if (force->pair) check_reverse += force->pair->comm_reverse_off;
 
-  if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
+  if (!comm_f_only) {// not all Kokkos atom_vec styles have reverse pack/unpack routines yet
     reverse_comm_classic = true;
+    lmp->kokkos->reverse_comm_classic = 1;
+  }
 
-  if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet
+  if (ghost_velocity && atomKK->avecKK->no_comm_vel_flag) { // not all Kokkos atom_vec styles have comm vel pack/unpack routines yet
     forward_comm_classic = true;
+    lmp->kokkos->forward_comm_classic = 1;
+  }
 }
 
 /* ----------------------------------------------------------------------
@@ -157,8 +161,8 @@ void CommKokkos::init()
 void CommKokkos::forward_comm(int dummy)
 {
   if (!forward_comm_classic) {
-    if (forward_comm_on_host) forward_comm_device<LMPHostType>(dummy);
-    else forward_comm_device<LMPDeviceType>(dummy);
+    if (forward_comm_on_host) forward_comm_device<LMPHostType>();
+    else forward_comm_device<LMPDeviceType>();
     return;
   }
 
@@ -181,7 +185,7 @@ void CommKokkos::forward_comm(int dummy)
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-void CommKokkos::forward_comm_device(int)
+void CommKokkos::forward_comm_device()
 {
   int n;
   MPI_Request request;
@@ -192,7 +196,6 @@ void CommKokkos::forward_comm_device(int)
   // if comm_x_only set, exchange or copy directly to x, don't unpack
 
   k_sendlist.sync<DeviceType>();
-  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
 
   if (comm->nprocs == 1 && !ghost_velocity) {
     k_swap.sync<DeviceType>();
@@ -211,26 +214,26 @@ void CommKokkos::forward_comm_device(int)
             MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
                       recvproc[iswap],0,world,&request);
           }
-          n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap],k_sendlist,
-                                     iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
+          auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+          n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap],k_sendlist_iswap,
+                                     k_buf_send,pbc_flag[iswap],pbc[iswap]);
           DeviceType().fence();
           if (n) {
             MPI_Send(k_buf_send.view<DeviceType>().data(),
                      n,MPI_DOUBLE,sendproc[iswap],0,world);
           }
 
-          if (size_forward_recv[iswap]) {
+          if (size_forward_recv[iswap])
             MPI_Wait(&request,MPI_STATUS_IGNORE);
-            atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
-                             space,X_MASK);
-          }
+
         } else if (ghost_velocity) {
           if (size_forward_recv[iswap]) {
             MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
                       size_forward_recv[iswap],MPI_DOUBLE,
                       recvproc[iswap],0,world,&request);
           }
-          n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
+          auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+          n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist_iswap,
                                          k_buf_send,pbc_flag[iswap],pbc[iswap]);
           DeviceType().fence();
           if (n) {
@@ -245,7 +248,8 @@ void CommKokkos::forward_comm_device(int)
             MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
                       size_forward_recv[iswap],MPI_DOUBLE,
                       recvproc[iswap],0,world,&request);
-          n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
+          auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+          n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap],k_sendlist_iswap,
                                      k_buf_send,pbc_flag[iswap],pbc[iswap]);
           DeviceType().fence();
           if (n)
@@ -257,12 +261,15 @@ void CommKokkos::forward_comm_device(int)
         }
       } else {
         if (!ghost_velocity) {
-          if (sendnum[iswap])
-            n = atomKK->avecKK->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
+          if (sendnum[iswap]) {
+            auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+            n = atomKK->avecKK->pack_comm_self(sendnum[iswap],k_sendlist_iswap,
                                      firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
-          DeviceType().fence();
+            DeviceType().fence();
+          }
         } else {
-          n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
+          auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+          n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist_iswap,
                                          k_buf_send,pbc_flag[iswap],pbc[iswap]);
           DeviceType().fence();
           atomKK->avecKK->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
@@ -299,10 +306,10 @@ void CommKokkos::reverse_comm()
     atomKK->modified(Host,F_MASK);
   else
     atomKK->modified(Host,ALL_MASK);
-
-  //atomKK->sync(Device,ALL_MASK); // is this needed?
 }
 
+/* ---------------------------------------------------------------------- */
+
 template<class DeviceType>
 void CommKokkos::reverse_comm_device()
 {
@@ -315,7 +322,6 @@ void CommKokkos::reverse_comm_device()
   // if comm_f_only set, exchange or copy directly from f, don't pack
 
   k_sendlist.sync<DeviceType>();
-  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,F_MASK);
 
   for (int iswap = nswap-1; iswap >= 0; iswap--) {
     if (sendproc[iswap] != me) {
@@ -330,11 +336,9 @@ void CommKokkos::reverse_comm_device()
           MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world);
         }
-        if (size_reverse_recv[iswap]) {
+        if (size_reverse_recv[iswap])
           MPI_Wait(&request,MPI_STATUS_IGNORE);
-          atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
-                           space,F_MASK);
-        }
+
       } else {
         if (size_reverse_recv[iswap])
           MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
@@ -347,18 +351,28 @@ void CommKokkos::reverse_comm_device()
                    MPI_DOUBLE,recvproc[iswap],0,world);
         if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
       }
-      atomKK->avecKK->unpack_reverse_kokkos(sendnum[iswap],k_sendlist,iswap,
+      auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+      atomKK->avecKK->unpack_reverse_kokkos(sendnum[iswap],k_sendlist_iswap,
                                 k_buf_recv);
       DeviceType().fence();
     } else {
-      if (sendnum[iswap])
-        n = atomKK->avecKK->unpack_reverse_self(sendnum[iswap],k_sendlist,iswap,
+      if (sendnum[iswap]) {
+        auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+        n = atomKK->avecKK->pack_reverse_self(sendnum[iswap],k_sendlist_iswap,
                                  firstrecv[iswap]);
+      }
     }
   }
 }
 
-/* ---------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Fix
+   size/nsize used only to set recv buffer limit
+   size = 0 (default) -> use comm_forward from Fix
+   size > 0 -> Fix passes max size per atom
+   the latter is only useful if Fix does several comm modes,
+     some are smaller than max stored in its comm_forward
+------------------------------------------------------------------------- */
 
 void CommKokkos::forward_comm(Fix *fix, int size)
 {
@@ -371,6 +385,8 @@ void CommKokkos::forward_comm(Fix *fix, int size)
   }
 }
 
+/* ---------------------------------------------------------------------- */
+
 template<class DeviceType>
 void CommKokkos::forward_comm_device(Fix *fix, int size)
 {
@@ -393,8 +409,9 @@ void CommKokkos::forward_comm_device(Fix *fix, int size)
 
     // pack buffer
 
-    n = fixKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist,
-                                      iswap,k_buf_send_fix,pbc_flag[iswap],pbc[iswap]);
+    auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+    n = fixKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist_iswap,
+                                      k_buf_send_fix,pbc_flag[iswap],pbc[iswap]);
     DeviceType().fence();
 
     // exchange with another proc
@@ -435,7 +452,14 @@ void CommKokkos::forward_comm_device(Fix *fix, int size)
   }
 }
 
-/* ---------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Fix
+   size/nsize used only to set recv buffer limit
+   size = 0 (default) -> use comm_forward from Fix
+   size > 0 -> Fix passes max size per atom
+   the latter is only useful if Fix does several comm modes,
+     some are smaller than max stored in its comm_forward
+------------------------------------------------------------------------- */
 
 void CommKokkos::reverse_comm(Fix *fix, int size)
 {
@@ -443,18 +467,66 @@ void CommKokkos::reverse_comm(Fix *fix, int size)
   CommBrick::reverse_comm(fix, size);
 }
 
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Fix with variable size data
+   query fix for pack size to ensure buf_send is big enough
+   handshake sizes before each Irecv/Send to ensure buf_recv is big enough
+------------------------------------------------------------------------- */
+
+void CommKokkos::reverse_comm_variable(Fix *fix)
+{
+  k_sendlist.sync<LMPHostType>();
+  CommBrick::reverse_comm_variable(fix);
+}
+
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Compute
+   nsize used only to set recv buffer limit
+------------------------------------------------------------------------- */
+
 void CommKokkos::forward_comm(Compute *compute)
 {
   k_sendlist.sync<LMPHostType>();
   CommBrick::forward_comm(compute);
 }
 
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Bond
+   nsize used only to set recv buffer limit
+------------------------------------------------------------------------- */
+
+void CommKokkos::forward_comm(Bond *bond)
+{
+  CommBrick::forward_comm(bond);
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Bond
+   nsize used only to set recv buffer limit
+------------------------------------------------------------------------- */
+
+void CommKokkos::reverse_comm(Bond *bond)
+{
+  CommBrick::reverse_comm(bond);
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Compute
+   nsize used only to set recv buffer limit
+------------------------------------------------------------------------- */
+
 void CommKokkos::reverse_comm(Compute *compute)
 {
   k_sendlist.sync<LMPHostType>();
   CommBrick::reverse_comm(compute);
 }
 
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Dump
+   nsize used only to set recv buffer limit
+------------------------------------------------------------------------- */
+
 void CommKokkos::forward_comm(Pair *pair)
 {
   if (pair->execution_space == Host || forward_pair_comm_classic) {
@@ -466,6 +538,8 @@ void CommKokkos::forward_comm(Pair *pair)
   }
 }
 
+/* ---------------------------------------------------------------------- */
+
 template<class DeviceType>
 void CommKokkos::forward_comm_device(Pair *pair)
 {
@@ -488,8 +562,9 @@ void CommKokkos::forward_comm_device(Pair *pair)
 
     // pack buffer
 
-    n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist,
-                                       iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]);
+    auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+    n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist_iswap,
+                                       k_buf_send_pair,pbc_flag[iswap],pbc[iswap]);
     DeviceType().fence();
 
     // exchange with another proc
@@ -530,18 +605,24 @@ void CommKokkos::forward_comm_device(Pair *pair)
   }
 }
 
+/* ---------------------------------------------------------------------- */
+
 void CommKokkos::grow_buf_pair(int n) {
   max_buf_pair = n * BUFFACTOR;
   k_buf_send_pair.resize(max_buf_pair);
   k_buf_recv_pair.resize(max_buf_pair);
 }
 
+/* ---------------------------------------------------------------------- */
+
 void CommKokkos::grow_buf_fix(int n) {
   max_buf_fix = n * BUFFACTOR;
   k_buf_send_fix.resize(max_buf_fix);
   k_buf_recv_fix.resize(max_buf_fix);
 }
 
+/* ---------------------------------------------------------------------- */
+
 void CommKokkos::reverse_comm(Pair *pair)
 {
   if (pair->execution_space == Host || !pair->reverse_comm_device || reverse_pair_comm_classic) {
@@ -553,6 +634,8 @@ void CommKokkos::reverse_comm(Pair *pair)
   }
 }
 
+/* ---------------------------------------------------------------------- */
+
 template<class DeviceType>
 void CommKokkos::reverse_comm_device(Pair *pair)
 {
@@ -610,18 +693,23 @@ void CommKokkos::reverse_comm_device(Pair *pair)
 
     // unpack buffer
 
-    pairKKBase->unpack_reverse_comm_kokkos(sendnum[iswap],k_sendlist,
-                                       iswap,k_buf_tmp);
+    auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
+    pairKKBase->unpack_reverse_comm_kokkos(sendnum[iswap],k_sendlist_iswap,
+                                       k_buf_tmp);
     DeviceType().fence();
   }
 }
 
+/* ---------------------------------------------------------------------- */
+
 void CommKokkos::forward_comm(Dump *dump)
 {
   k_sendlist.sync<LMPHostType>();
   CommBrick::forward_comm(dump);
 }
 
+/* ---------------------------------------------------------------------- */
+
 void CommKokkos::reverse_comm(Dump *dump)
 {
   k_sendlist.sync<LMPHostType>();
@@ -668,6 +756,7 @@ void CommKokkos::exchange()
           }
         }
         exchange_comm_classic = true;
+        lmp->kokkos->exchange_comm_classic = 1;
       }
     }
   }
@@ -980,6 +1069,7 @@ void CommKokkos::borders()
                       "switching to classic exchange/border communication");
       }
       exchange_comm_classic = true;
+      lmp->kokkos->exchange_comm_classic = 1;
     }
   }
 
@@ -1202,14 +1292,15 @@ void CommKokkos::borders_device() {
       if (nsend*size_border > maxsend)
         grow_send_kokkos(nsend*size_border,0);
       if (ghost_velocity) {
+        auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
         n = atomKK->avecKK->
-          pack_border_vel_kokkos(nsend,k_sendlist,k_buf_send,iswap,
+          pack_border_vel_kokkos(nsend,k_sendlist_iswap,k_buf_send,
                                  pbc_flag[iswap],pbc[iswap],exec_space);
         DeviceType().fence();
-      }
-      else {
+      } else {
+        auto k_sendlist_iswap = Kokkos::subview(k_sendlist,iswap,Kokkos::ALL);
         n = atomKK->avecKK->
-          pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap,
+          pack_border_kokkos(nsend,k_sendlist_iswap,k_buf_send,
                              pbc_flag[iswap],pbc[iswap],exec_space);
         DeviceType().fence();
       }
diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h
index e06810b939..4fb4dfbe29 100644
--- a/src/KOKKOS/comm_kokkos.h
+++ b/src/KOKKOS/comm_kokkos.h
@@ -45,18 +45,21 @@ class CommKokkos : public CommBrick {
   void exchange() override;                     // move atoms to new procs
   void borders() override;                      // setup list of atoms to comm
 
-  void forward_comm(class Pair *) override;    // forward comm from a Pair
-  void reverse_comm(class Pair *) override;    // reverse comm from a Pair
-  void forward_comm(class Fix *, int size=0) override;      // forward comm from a Fix
-  void reverse_comm(class Fix *, int size=0) override;      // reverse comm from a Fix
-  void forward_comm(class Compute *) override;  // forward from a Compute
-  void reverse_comm(class Compute *) override;  // reverse from a Compute
-  void forward_comm(class Dump *) override;    // forward comm from a Dump
-  void reverse_comm(class Dump *) override;    // reverse comm from a Dump
+  void forward_comm(class Pair *) override;                 // forward comm from a Pair
+  void reverse_comm(class Pair *) override;                 // reverse comm from a Pair
+  void forward_comm(class Bond *) override;                 // forward comm from a Bond
+  void reverse_comm(class Bond *) override;                 // reverse comm from a Bond
+  void forward_comm(class Fix *, int size = 0) override;    // forward comm from a Fix
+  void reverse_comm(class Fix *, int size = 0) override;    // reverse comm from a Fix
+  void reverse_comm_variable(class Fix *) override;         // variable size reverse comm from a Fix
+  void forward_comm(class Compute *) override;              // forward from a Compute
+  void reverse_comm(class Compute *) override;              // reverse from a Compute
+  void forward_comm(class Dump *) override;                 // forward comm from a Dump
+  void reverse_comm(class Dump *) override;                 // reverse comm from a Dump
 
   void forward_comm_array(int, double **) override;            // forward comm of array
 
-  template<class DeviceType> void forward_comm_device(int dummy);
+  template<class DeviceType> void forward_comm_device();
   template<class DeviceType> void reverse_comm_device();
   template<class DeviceType> void forward_comm_device(Pair *pair);
   template<class DeviceType> void reverse_comm_device(Pair *pair);
@@ -95,7 +98,6 @@ class CommKokkos : public CommBrick {
   void copy_swap_info();
 };
 
-}
+}    // namespace LAMMPS_NS
 
 #endif
-
diff --git a/src/KOKKOS/comm_tiled_kokkos.cpp b/src/KOKKOS/comm_tiled_kokkos.cpp
index 5b35527b7e..2e4ca30bed 100644
--- a/src/KOKKOS/comm_tiled_kokkos.cpp
+++ b/src/KOKKOS/comm_tiled_kokkos.cpp
@@ -16,13 +16,28 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "atom_vec.h"
+#include "atom_vec_kokkos.h"
+#include "compute.h"
+#include "dump.h"
+#include "fix.h"
+#include "force.h"
+#include "kokkos.h"
+#include "memory_kokkos.h"
+#include "modify.h"
+#include "output.h"
 
 using namespace LAMMPS_NS;
 
+static constexpr double BUFFACTOR = 1.5;
+static constexpr int BUFMIN = 1024;
+static constexpr int BUFEXTRA = 1000;
+
 /* ---------------------------------------------------------------------- */
 
-CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp) {}
+CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp)
+{
+  sendlist = nullptr;
+}
 
 /* ---------------------------------------------------------------------- */
 //IMPORTANT: we *MUST* pass "*oldcomm" to the Comm initializer here, as
@@ -31,10 +46,69 @@ CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp) : CommTiled(_lmp) {}
 //           The call to Comm::copy_arrays() then converts the shallow copy
 //           into a deep copy of the class with the new layout.
 
-CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,oldcomm) {}
+CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,oldcomm)
+{
+  sendlist = nullptr;
+}
 
 /* ---------------------------------------------------------------------- */
 
+CommTiledKokkos::~CommTiledKokkos()
+{
+  memoryKK->destroy_kokkos(k_sendlist,sendlist);
+  sendlist = nullptr;
+  buf_send = nullptr;
+  buf_recv = nullptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void CommTiledKokkos::init()
+{
+  atomKK = (AtomKokkos *) atom;
+  exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
+  forward_comm_classic = lmp->kokkos->forward_comm_classic;
+  forward_pair_comm_classic = lmp->kokkos->forward_pair_comm_classic;
+  reverse_pair_comm_classic = lmp->kokkos->reverse_pair_comm_classic;
+  forward_fix_comm_classic = lmp->kokkos->forward_fix_comm_classic;
+  reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
+  exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
+  forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
+  reverse_comm_on_host = lmp->kokkos->reverse_comm_on_host;
+
+  CommTiled::init();
+
+  int check_forward = 0;
+  int check_reverse = 0;
+  if (force->pair && (force->pair->execution_space == Host))
+    check_forward += force->pair->comm_forward;
+  if (force->pair && (force->pair->execution_space == Host))
+    check_reverse += force->pair->comm_reverse;
+
+  for (const auto &fix : modify->get_fix_list()) {
+    check_forward += fix->comm_forward;
+    check_reverse += fix->comm_reverse;
+  }
+
+  for (const auto &compute : modify->get_compute_list()) {
+    check_forward += compute->comm_forward;
+    check_reverse += compute->comm_reverse;
+  }
+
+  for (const auto &dump : output->get_dump_list()) {
+    check_forward += dump->comm_forward;
+    check_reverse += dump->comm_reverse;
+  }
+
+  if (force->newton == 0) check_reverse = 0;
+  if (force->pair) check_reverse += force->pair->comm_reverse_off;
+
+  if (!comm_f_only) { // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
+    reverse_comm_classic = true;
+    lmp->kokkos->reverse_comm_classic = 1;
+  }
+}
+
 /* ----------------------------------------------------------------------
    forward communication of atom coords every timestep
    other per-atom attributes may also be sent via pack/unpack routines
@@ -42,6 +116,14 @@ CommTiledKokkos::CommTiledKokkos(LAMMPS *_lmp, Comm *oldcomm) : CommTiled(_lmp,o
 
 void CommTiledKokkos::forward_comm(int dummy)
 {
+  if (!forward_comm_classic) {
+    if (forward_comm_on_host) forward_comm_device<LMPHostType>();
+    else forward_comm_device<LMPDeviceType>();
+    return;
+  }
+
+  k_sendlist.sync<LMPHostType>();
+
   if (comm_x_only) {
     atomKK->sync(Host,X_MASK);
     atomKK->modified(Host,X_MASK);
@@ -56,6 +138,127 @@ void CommTiledKokkos::forward_comm(int dummy)
   CommTiled::forward_comm(dummy);
 }
 
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void CommTiledKokkos::forward_comm_device()
+{
+  int i,irecv,n,nsend,nrecv;
+  double *buf;
+
+  // exchange data with another set of procs in each swap
+  // post recvs from all procs except self
+  // send data to all procs except self
+  // copy data to self if sendself is set
+  // wait on all procs except self and unpack received data
+  // if comm_x_only set, exchange or copy directly to x, don't unpack
+
+  k_sendlist.sync<DeviceType>();
+
+  for (int iswap = 0; iswap < nswap; iswap++) {
+    nsend = nsendproc[iswap] - sendself[iswap];
+    nrecv = nrecvproc[iswap] - sendself[iswap];
+
+    if (comm_x_only) {
+      if (recvother[iswap]) {
+        for (i = 0; i < nrecv; i++) {
+          buf = atomKK->k_x.view<DeviceType>().data() +
+            firstrecv[iswap][i]*atomKK->k_x.view<DeviceType>().extent(1);
+          MPI_Irecv(buf,size_forward_recv[iswap][i],
+                    MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
+        }
+      }
+      if (sendother[iswap]) {
+        for (i = 0; i < nsend; i++) {
+          auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,i,Kokkos::ALL);
+          n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap][i],k_sendlist_small,
+                              k_buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
+          DeviceType().fence();
+          MPI_Send(k_buf_send.view<DeviceType>().data(),n,MPI_DOUBLE,sendproc[iswap][i],0,world);
+        }
+      }
+      if (sendself[iswap]) {
+        auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,nsend,Kokkos::ALL);
+        atomKK->avecKK->pack_comm_self(sendnum[iswap][nsend],k_sendlist_small,
+                        firstrecv[iswap][nrecv],pbc_flag[iswap][nsend],pbc[iswap][nsend]);
+        DeviceType().fence();
+      }
+      if (recvother[iswap]) MPI_Waitall(nrecv,requests,MPI_STATUS_IGNORE);
+
+    } else if (ghost_velocity) {
+      if (recvother[iswap]) {
+        for (i = 0; i < nrecv; i++) {
+          buf = k_buf_recv.view<DeviceType>().data() +
+            forward_recv_offset[iswap][i]*k_buf_recv.view<DeviceType>().extent(1);
+          MPI_Irecv(buf,
+                    size_forward_recv[iswap][i],MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
+        }
+      }
+      if (sendother[iswap]) {
+        for (i = 0; i < nsend; i++) {
+          auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,i,Kokkos::ALL);
+          n = atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap][i],k_sendlist_small,
+                                  k_buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
+          DeviceType().fence();
+          MPI_Send(k_buf_send.view<DeviceType>().data(),n,
+                   MPI_DOUBLE,sendproc[iswap][i],0,world);
+        }
+      }
+      if (sendself[iswap]) {
+        auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,nsend,Kokkos::ALL);
+        atomKK->avecKK->pack_comm_vel_kokkos(sendnum[iswap][nsend],k_sendlist_small,
+                            k_buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
+        DeviceType().fence();
+        atomKK->avecKK->unpack_comm_vel_kokkos(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],k_buf_send);
+        DeviceType().fence();
+      }
+      if (recvother[iswap]) {
+        for (i = 0; i < nrecv; i++) {
+          MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
+          auto k_buf_recv_offset = Kokkos::subview(k_buf_recv,std::pair<int,int>(forward_recv_offset[iswap][irecv],(int)k_buf_recv.extent(0)),Kokkos::ALL);
+          atomKK->avecKK->unpack_comm_vel_kokkos(recvnum[iswap][irecv],firstrecv[iswap][irecv],
+                                k_buf_recv_offset);
+          DeviceType().fence();
+        }
+      }
+
+    } else {
+      if (recvother[iswap]) {
+        for (i = 0; i < nrecv; i++) {
+          buf = k_buf_recv.view<DeviceType>().data() +
+            forward_recv_offset[iswap][i]*k_buf_recv.view<DeviceType>().extent(1);
+          MPI_Irecv(buf,
+                    size_forward_recv[iswap][i],MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
+        }
+      }
+      if (sendother[iswap]) {
+        for (i = 0; i < nsend; i++) {
+          auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,i,Kokkos::ALL);
+          n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap][i],k_sendlist_small,
+                              k_buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
+          DeviceType().fence();
+          MPI_Send(k_buf_send.view<DeviceType>().data(),n,MPI_DOUBLE,sendproc[iswap][i],0,world);
+        }
+      }
+      if (sendself[iswap]) {
+        auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,nsend,Kokkos::ALL);
+        n = atomKK->avecKK->pack_comm_kokkos(sendnum[iswap][nsend],k_sendlist_small,
+                        k_buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
+        DeviceType().fence();
+      }
+      if (recvother[iswap]) {
+        for (i = 0; i < nrecv; i++) {
+          MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
+          auto k_buf_recv_offset = Kokkos::subview(k_buf_recv,std::pair<int,int>(forward_recv_offset[iswap][irecv],(int)k_buf_recv.extent(0)),Kokkos::ALL);
+          atomKK->avecKK->unpack_comm_kokkos(recvnum[iswap][irecv],firstrecv[iswap][irecv],
+                                   k_buf_recv_offset);
+          DeviceType().fence();
+        }
+      }
+    }
+  }
+}
+
 /* ----------------------------------------------------------------------
    reverse communication of forces on atoms every timestep
    other per-atom attributes may also be sent via pack/unpack routines
@@ -63,16 +266,117 @@ void CommTiledKokkos::forward_comm(int dummy)
 
 void CommTiledKokkos::reverse_comm()
 {
+  if (!reverse_comm_classic) {
+    if (reverse_comm_on_host) reverse_comm_device<LMPHostType>();
+    else reverse_comm_device<LMPDeviceType>();
+    return;
+  }
+
+  k_sendlist.sync<LMPHostType>();
+
   if (comm_f_only)
     atomKK->sync(Host,F_MASK);
   else
     atomKK->sync(Host,ALL_MASK);
+
   CommTiled::reverse_comm();
+
   if (comm_f_only)
     atomKK->modified(Host,F_MASK);
   else
     atomKK->modified(Host,ALL_MASK);
-  atomKK->sync(Device,ALL_MASK);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void CommTiledKokkos::reverse_comm_device()
+{
+  int i,irecv,n,nsend,nrecv;
+  double *buf;
+
+  // exchange data with another set of procs in each swap
+  // post recvs from all procs except self
+  // send data to all procs except self
+  // copy data to self if sendself is set
+  // wait on all procs except self and unpack received data
+  // if comm_f_only set, exchange or copy directly from f, don't pack
+
+  k_sendlist.sync<DeviceType>();
+
+  for (int iswap = nswap-1; iswap >= 0; iswap--) {
+    nsend = nsendproc[iswap] - sendself[iswap];
+    nrecv = nrecvproc[iswap] - sendself[iswap];
+
+    if (comm_f_only) {
+      if (sendother[iswap]) {
+        for (i = 0; i < nsend; i++) {
+          buf = k_buf_recv.view<DeviceType>().data() +
+            reverse_recv_offset[iswap][i]*k_buf_recv.view<DeviceType>().extent(1);
+          MPI_Irecv(buf,
+                    size_reverse_recv[iswap][i],MPI_DOUBLE,sendproc[iswap][i],0,world,&requests[i]);
+        }
+      }
+      if (recvother[iswap]) {
+        for (i = 0; i < nrecv; i++) {
+          buf = atomKK->k_f.view<DeviceType>().data() +
+            firstrecv[iswap][i]*atomKK->k_f.view<DeviceType>().extent(1);
+          MPI_Send(buf,size_reverse_send[iswap][i],
+                   MPI_DOUBLE,recvproc[iswap][i],0,world);
+        }
+      }
+      if (sendself[iswap]) {
+        auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,nsend,Kokkos::ALL);
+        atomKK->avecKK->pack_reverse_self(sendnum[iswap][nsend],k_sendlist_small,
+                             firstrecv[iswap][nrecv]);
+        DeviceType().fence();
+      }
+      if (sendother[iswap]) {
+        for (i = 0; i < nsend; i++) {
+          MPI_Waitany(nsend,requests,&irecv,MPI_STATUS_IGNORE);
+          auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,irecv,Kokkos::ALL);
+          auto k_buf_recv_offset = Kokkos::subview(k_buf_recv,std::pair<int,int>(reverse_recv_offset[iswap][irecv],(int)k_buf_recv.extent(0)),Kokkos::ALL);
+          atomKK->avecKK->unpack_reverse_kokkos(sendnum[iswap][irecv],k_sendlist_small,
+                                      k_buf_recv_offset);
+          DeviceType().fence();
+        }
+      }
+
+    } else {
+      if (sendother[iswap]) {
+        for (i = 0; i < nsend; i++) {
+          buf = k_buf_recv.view<DeviceType>().data() +
+            reverse_recv_offset[iswap][i]*k_buf_recv.view<DeviceType>().extent(1);
+          MPI_Irecv(buf,
+                    size_reverse_recv[iswap][i],MPI_DOUBLE,sendproc[iswap][i],0,world,&requests[i]);
+        }
+      }
+      if (recvother[iswap]) {
+        for (i = 0; i < nrecv; i++) {
+          n = atomKK->avecKK->pack_reverse_kokkos(recvnum[iswap][i],firstrecv[iswap][i],k_buf_send);
+          DeviceType().fence();
+          MPI_Send(k_buf_send.view<DeviceType>().data(),n,MPI_DOUBLE,recvproc[iswap][i],0,world);
+        }
+      }
+      if (sendself[iswap]) {
+        auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,nsend,Kokkos::ALL);
+        atomKK->avecKK->pack_reverse_kokkos(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],k_buf_send);
+        DeviceType().fence();
+        atomKK->avecKK->unpack_reverse_kokkos(sendnum[iswap][nsend],k_sendlist_small,k_buf_send);
+        DeviceType().fence();
+      }
+      if (sendother[iswap]) {
+        for (i = 0; i < nsend; i++) {
+          MPI_Waitany(nsend,requests,&irecv,MPI_STATUS_IGNORE);
+          auto k_sendlist_small = Kokkos::subview(k_sendlist,iswap,irecv,Kokkos::ALL);
+          auto k_buf_recv_offset = Kokkos::subview(k_buf_recv,std::pair<int,int>(reverse_recv_offset[iswap][irecv],(int)k_buf_recv.extent(0)),Kokkos::ALL);
+          atomKK->avecKK->unpack_reverse_kokkos(sendnum[iswap][irecv],k_sendlist_small,
+                               k_buf_recv_offset);
+          DeviceType().fence();
+        }
+      }
+    }
+  }
 }
 
 /* ----------------------------------------------------------------------
@@ -108,6 +412,7 @@ void CommTiledKokkos::borders()
   atomKK->sync(Host,ALL_MASK);
   CommTiled::borders();
   atomKK->modified(Host,ALL_MASK);
+  k_sendlist.modify_host();
 }
 
 /* ----------------------------------------------------------------------
@@ -130,6 +435,26 @@ void CommTiledKokkos::reverse_comm(Pair *pair)
   CommTiled::reverse_comm(pair);
 }
 
+/* ----------------------------------------------------------------------
+   forward communication invoked by a Bond
+   nsize used only to set recv buffer limit
+------------------------------------------------------------------------- */
+
+void CommTiledKokkos::forward_comm(Bond *bond)
+{
+  CommTiled::forward_comm(bond);
+}
+
+/* ----------------------------------------------------------------------
+   reverse communication invoked by a Bond
+   nsize used only to set recv buffer limit
+------------------------------------------------------------------------- */
+
+void CommTiledKokkos::reverse_comm(Bond *bond)
+{
+  CommTiled::reverse_comm(bond);
+}
+
 /* ----------------------------------------------------------------------
    forward communication invoked by a Fix
    size/nsize used only to set recv buffer limit
@@ -218,3 +543,141 @@ void CommTiledKokkos::forward_comm_array(int nsize, double **array)
 {
   CommTiled::forward_comm_array(nsize,array);
 }
+
+/* ----------------------------------------------------------------------
+   realloc the size of the send buffer as needed with BUFFACTOR and bufextra
+   if flag = 1, realloc
+   if flag = 0, don't need to realloc with copy, just free/malloc
+------------------------------------------------------------------------- */
+
+void CommTiledKokkos::grow_send(int n, int flag)
+{
+  grow_send_kokkos(n,flag,Host);
+}
+
+/* ----------------------------------------------------------------------
+   free/malloc the size of the recv buffer as needed with BUFFACTOR
+------------------------------------------------------------------------- */
+
+void CommTiledKokkos::grow_recv(int n, int flag)
+{
+  grow_recv_kokkos(n,flag,Host);
+}
+
+/* ----------------------------------------------------------------------
+   realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
+   if flag = 1, realloc
+   if flag = 0, don't need to realloc with copy, just free/malloc
+------------------------------------------------------------------------- */
+
+void CommTiledKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
+{
+
+  maxsend = static_cast<int> (BUFFACTOR * n);
+  int maxsend_border = (maxsend+BUFEXTRA)/atomKK->avecKK->size_border;
+  if (flag) {
+    if (space == Device)
+      k_buf_send.modify<LMPDeviceType>();
+    else
+      k_buf_send.modify<LMPHostType>();
+
+    if (ghost_velocity)
+      k_buf_send.resize(maxsend_border,
+                        atomKK->avecKK->size_border + atomKK->avecKK->size_velocity);
+    else
+      k_buf_send.resize(maxsend_border,atomKK->avecKK->size_border);
+    buf_send = k_buf_send.view<LMPHostType>().data();
+  } else {
+    if (ghost_velocity)
+      MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border,
+                        atomKK->avecKK->size_border + atomKK->avecKK->size_velocity);
+    else
+      MemoryKokkos::realloc_kokkos(k_buf_send,"comm:k_buf_send",maxsend_border,
+                        atomKK->avecKK->size_border);
+    buf_send = k_buf_send.view<LMPHostType>().data();
+  }
+}
+
+/* ----------------------------------------------------------------------
+   free/malloc the size of the recv buffer as needed with BUFFACTOR
+------------------------------------------------------------------------- */
+
+void CommTiledKokkos::grow_recv_kokkos(int n, int flag, ExecutionSpace /*space*/)
+{
+  if (flag) maxrecv = n;
+  else maxrecv = static_cast<int> (BUFFACTOR * n);
+
+  int maxrecv_border = (maxrecv+BUFEXTRA)/atomKK->avecKK->size_border;
+
+  MemoryKokkos::realloc_kokkos(k_buf_recv,"comm:k_buf_recv",maxrecv_border,
+    atomKK->avecKK->size_border);
+  buf_recv = k_buf_recv.view<LMPHostType>().data();
+}
+
+/* ----------------------------------------------------------------------
+   realloc the size of the iswap sendlist as needed with BUFFACTOR
+------------------------------------------------------------------------- */
+
+void CommTiledKokkos::grow_list(int iswap, int iwhich, int n)
+{
+  int size = static_cast<int> (BUFFACTOR * n);
+
+  k_sendlist.sync<LMPHostType>();
+  k_sendlist.modify<LMPHostType>();
+
+  if (size > (int)k_sendlist.extent(2)) {
+    memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,maxsend,size,"comm:sendlist");
+
+    for (int i = 0; i < maxswap; i++)
+      maxsendlist[iswap][iwhich] = size;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   grow info for swap I, to allow for N procs to communicate with
+   ditto for complementary recv for swap I+1 or I-1, as invoked by caller
+------------------------------------------------------------------------- */
+
+void CommTiledKokkos::grow_swap_send(int i, int n, int /*nold*/)
+{
+  delete [] sendproc[i];
+  sendproc[i] = new int[n];
+  delete [] sendnum[i];
+  sendnum[i] = new int[n];
+
+  delete [] size_reverse_recv[i];
+  size_reverse_recv[i] = new int[n];
+  delete [] reverse_recv_offset[i];
+  reverse_recv_offset[i] = new int[n];
+
+  delete [] pbc_flag[i];
+  pbc_flag[i] = new int[n];
+  memory->destroy(pbc[i]);
+  memory->create(pbc[i],n,6,"comm:pbc_flag");
+  memory->destroy(sendbox[i]);
+  memory->create(sendbox[i],n,6,"comm:sendbox");
+  grow_swap_send_multi(i,n);
+  memory->destroy(sendbox_multiold[i]);
+  memory->create(sendbox_multiold[i],n,atom->ntypes+1,6,"comm:sendbox_multiold");
+
+  delete [] maxsendlist[i];
+  maxsendlist[i] = new int[n];
+
+  for (int j = 0; j < n; j++)
+    maxsendlist[i][j] = BUFMIN;
+
+  if (sendlist && !k_sendlist.d_view.data()) {
+    for (int ii = 0; ii < maxswap; ii++) {
+      if (sendlist[ii]) {
+        for (int jj = 0; jj < nprocmax[ii]; jj++)
+          memory->destroy(sendlist[ii][jj]);
+        delete [] sendlist[ii];
+      }
+    }
+    delete [] sendlist;
+  } else {
+    memoryKK->destroy_kokkos(k_sendlist,sendlist);
+  }
+
+  memoryKK->create_kokkos(k_sendlist,sendlist,maxswap,n,BUFMIN,"comm:sendlist");
+}
diff --git a/src/KOKKOS/comm_tiled_kokkos.h b/src/KOKKOS/comm_tiled_kokkos.h
index c80436b454..9033714796 100644
--- a/src/KOKKOS/comm_tiled_kokkos.h
+++ b/src/KOKKOS/comm_tiled_kokkos.h
@@ -25,28 +25,59 @@ class CommTiledKokkos : public CommTiled {
   CommTiledKokkos(class LAMMPS *);
   CommTiledKokkos(class LAMMPS *, class Comm *);
 
+  ~CommTiledKokkos() override;
+
+  bool exchange_comm_classic;
+  bool forward_comm_classic;
+  bool forward_pair_comm_classic;
+  bool reverse_pair_comm_classic;
+  bool forward_fix_comm_classic;
+  bool reverse_comm_classic;
+  bool exchange_comm_on_host;
+  bool forward_comm_on_host;
+  bool reverse_comm_on_host;
+
   using CommTiled::forward_comm;
   using CommTiled::reverse_comm;
+
+  void init() override;
   void forward_comm(int dummy = 0) override;    // forward comm of atom coords
   void reverse_comm() override;                 // reverse comm of forces
   void exchange() override;                     // move atoms to new procs
   void borders() override;                      // setup list of atoms to comm
 
-  void forward_comm(class Pair *) override;    // forward comm from a Pair
-  void reverse_comm(class Pair *) override;    // reverse comm from a Pair
-  void forward_comm(class Fix *, int size=0) override;
-                                                   // forward comm from a Fix
-  void reverse_comm(class Fix *, int size=0) override;
-                                                   // reverse comm from a Fix
-  void reverse_comm_variable(class Fix *) override;
-                                     // variable size reverse comm from a Fix
-  void forward_comm(class Compute *) override;  // forward from a Compute
-  void reverse_comm(class Compute *) override;  // reverse from a Compute
-  void forward_comm(class Dump *) override;    // forward comm from a Dump
-  void reverse_comm(class Dump *) override;    // reverse comm from a Dump
+  void forward_comm(class Pair *) override;                 // forward comm from a Pair
+  void reverse_comm(class Pair *) override;                 // reverse comm from a Pair
+  void forward_comm(class Bond *) override;                 // forward comm from a Bond
+  void reverse_comm(class Bond *) override;                 // reverse comm from a Bond
+  void forward_comm(class Fix *, int size = 0) override;    // forward comm from a Fix
+  void reverse_comm(class Fix *, int size = 0) override;    // reverse comm from a Fix
+  void reverse_comm_variable(class Fix *) override;         // variable size reverse comm from a Fix
+  void forward_comm(class Compute *) override;              // forward from a Compute
+  void reverse_comm(class Compute *) override;              // reverse from a Compute
+  void forward_comm(class Dump *) override;                 // forward comm from a Dump
+  void reverse_comm(class Dump *) override;                 // reverse comm from a Dump
 
   void forward_comm_array(int, double **) override;          // forward comm of array
-};
-}
-#endif
 
+  template<class DeviceType> void forward_comm_device();
+  template<class DeviceType> void reverse_comm_device();
+
+ protected:
+
+  DAT::tdual_int_3d k_sendlist;
+  //DAT::tdual_int_scalar k_total_send;
+  DAT::tdual_xfloat_2d k_buf_send,k_buf_recv;
+  //DAT::tdual_int_scalar k_count;
+
+  void grow_send(int, int) override;
+  void grow_recv(int, int flag = 0) override;
+  void grow_send_kokkos(int, int, ExecutionSpace space = Host);
+  void grow_recv_kokkos(int, int, ExecutionSpace space = Host);
+  void grow_list(int, int, int) override;
+  void grow_swap_send(int, int, int) override;     // grow swap arrays for send and recv
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
diff --git a/src/KOKKOS/compute_ave_sphere_atom_kokkos.cpp b/src/KOKKOS/compute_ave_sphere_atom_kokkos.cpp
index 87fe3621e4..57f1f2bb18 100644
--- a/src/KOKKOS/compute_ave_sphere_atom_kokkos.cpp
+++ b/src/KOKKOS/compute_ave_sphere_atom_kokkos.cpp
@@ -21,19 +21,12 @@
 #include "atom_masks.h"
 #include "comm.h"
 #include "domain.h"
-#include "error.h"
 #include "force.h"
 #include "memory_kokkos.h"
-#include "modify.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor_kokkos.h"
-#include "pair.h"
 #include "update.h"
 
-#include <cmath>
-#include <cstring>
-
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/KOKKOS/compute_composition_atom_kokkos.cpp b/src/KOKKOS/compute_composition_atom_kokkos.cpp
index 2b0e663011..47056951e7 100644
--- a/src/KOKKOS/compute_composition_atom_kokkos.cpp
+++ b/src/KOKKOS/compute_composition_atom_kokkos.cpp
@@ -19,21 +19,11 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "comm.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
 #include "memory_kokkos.h"
-#include "modify.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor_kokkos.h"
-#include "pair.h"
 #include "update.h"
 
-#include <cmath>
-#include <cstring>
-
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/KOKKOS/compute_coord_atom_kokkos.cpp b/src/KOKKOS/compute_coord_atom_kokkos.cpp
index 089a94f498..2d56a53abe 100644
--- a/src/KOKKOS/compute_coord_atom_kokkos.cpp
+++ b/src/KOKKOS/compute_coord_atom_kokkos.cpp
@@ -20,8 +20,6 @@
 #include "compute_orientorder_atom_kokkos.h"
 #include "error.h"
 #include "memory_kokkos.h"
-#include "modify.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor_kokkos.h"
 #include "update.h"
diff --git a/src/KOKKOS/compute_erotate_sphere_kokkos.cpp b/src/KOKKOS/compute_erotate_sphere_kokkos.cpp
index 9fc477b3a0..d18aa3d27a 100644
--- a/src/KOKKOS/compute_erotate_sphere_kokkos.cpp
+++ b/src/KOKKOS/compute_erotate_sphere_kokkos.cpp
@@ -16,8 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "error.h"
-#include "force.h"
 #include "update.h"
 
 using namespace LAMMPS_NS;
diff --git a/src/KOKKOS/compute_orientorder_atom_kokkos.cpp b/src/KOKKOS/compute_orientorder_atom_kokkos.cpp
index 35699cb5c1..447f15b830 100644
--- a/src/KOKKOS/compute_orientorder_atom_kokkos.cpp
+++ b/src/KOKKOS/compute_orientorder_atom_kokkos.cpp
@@ -24,14 +24,11 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "kokkos.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "memory_kokkos.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor_kokkos.h"
-#include "pair.h"
 #include "update.h"
 
 #include <cmath>
diff --git a/src/KOKKOS/compute_reaxff_atom_kokkos.cpp b/src/KOKKOS/compute_reaxff_atom_kokkos.cpp
index 2b5cbff13d..0683e63752 100644
--- a/src/KOKKOS/compute_reaxff_atom_kokkos.cpp
+++ b/src/KOKKOS/compute_reaxff_atom_kokkos.cpp
@@ -18,16 +18,12 @@
 
 #include "compute_reaxff_atom_kokkos.h"
 #include "atom.h"
-#include "molecule.h"
 #include "update.h"
-#include "force.h"
-#include "memory.h"
 #include "error.h"
 #include "neigh_list.h"
 
 #include "memory_kokkos.h"
 #include "pair_reaxff_kokkos.h"
-#include "reaxff_api.h"
 
 using namespace LAMMPS_NS;
 using namespace ReaxFF;
diff --git a/src/KOKKOS/compute_temp_deform_kokkos.cpp b/src/KOKKOS/compute_temp_deform_kokkos.cpp
index 55db344d6a..03aba5b10d 100644
--- a/src/KOKKOS/compute_temp_deform_kokkos.cpp
+++ b/src/KOKKOS/compute_temp_deform_kokkos.cpp
@@ -24,7 +24,6 @@
 #include "domain_kokkos.h"
 #include "error.h"
 #include "force.h"
-#include "memory_kokkos.h"
 #include "update.h"
 
 using namespace LAMMPS_NS;
diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp
index ebdd6971e0..78a35440c8 100644
--- a/src/KOKKOS/compute_temp_kokkos.cpp
+++ b/src/KOKKOS/compute_temp_kokkos.cpp
@@ -21,8 +21,6 @@
 #include "force.h"
 #include "update.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/KOKKOS/dynamical_matrix_kokkos.cpp b/src/KOKKOS/dynamical_matrix_kokkos.cpp
index ec2cc17ef2..e4c454c7f2 100644
--- a/src/KOKKOS/dynamical_matrix_kokkos.cpp
+++ b/src/KOKKOS/dynamical_matrix_kokkos.cpp
@@ -23,27 +23,18 @@
 #include "atom_masks.h"
 #include "bond.h"
 #include "comm.h"
-#include "compute.h"
 #include "dihedral.h"
 #include "domain.h"
-#include "error.h"
-#include "finish.h"
 #include "force.h"
-#include "group.h"
 #include "improper.h"
 #include "kokkos.h"
 #include "kspace.h"
-#include "memory.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "pair.h"
 #include "timer.h"
 #include "update.h"
 
-#include <cmath>
-#include <cstring>
-#include <algorithm>
-
 using namespace LAMMPS_NS;
 enum{REGULAR,ESKM};
 
diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp
index 617660d5ef..308df20c0e 100644
--- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp
+++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp
@@ -24,13 +24,11 @@
 #include "comm.h"
 #include "error.h"
 #include "force.h"
-#include "integrate.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
 #include "neigh_list_kokkos.h"
 #include "neigh_request.h"
 #include "neighbor.h"
-#include "pair_reaxff_kokkos.h"
 #include "update.h"
 
 #include <cmath>
@@ -536,7 +534,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::deallocate_array()
 {
   memoryKK->destroy_kokkos(k_s,s);
   memoryKK->destroy_kokkos(k_chi_field,chi_field);
-  memoryKK->destroy_kokkos(X_diag);
+  memoryKK->destroy_kokkos(k_X_diag,X_diag);
   memoryKK->destroy_kokkos(k_d,d);
   memoryKK->destroy_kokkos(k_q_hat,q_hat);
   memoryKK->destroy_kokkos(k_y,y);
diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.h b/src/KOKKOS/fix_acks2_reaxff_kokkos.h
index c27719c364..cb16b4cd24 100644
--- a/src/KOKKOS/fix_acks2_reaxff_kokkos.h
+++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.h
@@ -246,9 +246,8 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
   int count, isuccess;
   double alpha, beta, omega, cutsq;
 
-  int iswap;
   int first;
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d_um v_buf;
 
   void grow_arrays(int) override;
diff --git a/src/KOKKOS/fix_deform_kokkos.cpp b/src/KOKKOS/fix_deform_kokkos.cpp
index d49e335986..90c4380da9 100644
--- a/src/KOKKOS/fix_deform_kokkos.cpp
+++ b/src/KOKKOS/fix_deform_kokkos.cpp
@@ -21,7 +21,6 @@
 #include "atom_kokkos.h"
 #include "atom_masks.h"
 #include "domain_kokkos.h"
-#include "error.h"
 #include "force.h"
 #include "input.h"
 #include "irregular.h"
@@ -32,7 +31,6 @@
 #include "variable.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
@@ -120,11 +118,11 @@ void FixDeformKokkos::end_of_step()
     } else if (set[i].style == WIGGLE) {
       double delt = (update->ntimestep - update->beginstep) * update->dt;
       set[i].lo_target = set[i].lo_start -
-        0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
+        0.5*set[i].amplitude * sin(MY_2PI*delt/set[i].tperiod);
       set[i].hi_target = set[i].hi_start +
-        0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
-      h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude *
-        cos(TWOPI*delt/set[i].tperiod);
+        0.5*set[i].amplitude * sin(MY_2PI*delt/set[i].tperiod);
+      h_rate[i] = MY_2PI/set[i].tperiod * set[i].amplitude *
+        cos(MY_2PI*delt/set[i].tperiod);
       h_ratelo[i] = -0.5*h_rate[i];
     } else if (set[i].style == VARIABLE) {
       double del = input->variable->compute_equal(set[i].hvar);
@@ -212,9 +210,9 @@ void FixDeformKokkos::end_of_step()
       } else if (set[i].style == WIGGLE) {
         double delt = (update->ntimestep - update->beginstep) * update->dt;
         set[i].tilt_target = set[i].tilt_start +
-          set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
-        h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude *
-          cos(TWOPI*delt/set[i].tperiod);
+          set[i].amplitude * sin(MY_2PI*delt/set[i].tperiod);
+        h_rate[i] = MY_2PI/set[i].tperiod * set[i].amplitude *
+          cos(MY_2PI*delt/set[i].tperiod);
       } else if (set[i].style == VARIABLE) {
         double delta_tilt = input->variable->compute_equal(set[i].hvar);
         set[i].tilt_target = set[i].tilt_start + delta_tilt;
diff --git a/src/KOKKOS/fix_dt_reset_kokkos.cpp b/src/KOKKOS/fix_dt_reset_kokkos.cpp
index 83cdc5f26f..df354f19c8 100644
--- a/src/KOKKOS/fix_dt_reset_kokkos.cpp
+++ b/src/KOKKOS/fix_dt_reset_kokkos.cpp
@@ -18,15 +18,13 @@
 #include "atom_masks.h"
 #include "error.h"
 #include "force.h"
-#include "input.h"
-#include "integrate.h"
-#include "kokkos_base.h"
-#include "memory_kokkos.h"
 #include "modify.h"
 #include "output.h"
 #include "pair.h"
 #include "update.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp
index ffe1c34e97..4009773982 100644
--- a/src/KOKKOS/fix_efield_kokkos.cpp
+++ b/src/KOKKOS/fix_efield_kokkos.cpp
@@ -30,8 +30,6 @@
 #include "atom_masks.h"
 #include "kokkos_base.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/KOKKOS/fix_gravity_kokkos.cpp b/src/KOKKOS/fix_gravity_kokkos.cpp
index 42a16eda78..01fcc0780c 100644
--- a/src/KOKKOS/fix_gravity_kokkos.cpp
+++ b/src/KOKKOS/fix_gravity_kokkos.cpp
@@ -16,7 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "atom_vec.h"
 #include "input.h"
 #include "modify.h"
 #include "update.h"
diff --git a/src/KOKKOS/fix_langevin_kokkos.h b/src/KOKKOS/fix_langevin_kokkos.h
index 4fc22a1df1..fc25a0a748 100644
--- a/src/KOKKOS/fix_langevin_kokkos.h
+++ b/src/KOKKOS/fix_langevin_kokkos.h
@@ -27,7 +27,6 @@ FixStyle(langevin/kk/host,FixLangevinKokkos<LMPHostType>);
 #include "kokkos_type.h"
 #include "kokkos_base.h"
 #include "Kokkos_Random.hpp"
-#include "comm_kokkos.h"
 
 namespace LAMMPS_NS {
 
@@ -103,8 +102,6 @@ namespace LAMMPS_NS {
       void end_of_step_rmass_item(int) const;
 
   private:
-    class CommKokkos *commKK;
-
     typename ArrayTypes<DeviceType>::t_float_1d rmass;
     typename ArrayTypes<DeviceType>::t_float_1d mass;
     typename ArrayTypes<DeviceType>::tdual_double_2d k_franprev;
diff --git a/src/KOKKOS/fix_minimize_kokkos.cpp b/src/KOKKOS/fix_minimize_kokkos.cpp
index e2106b3d03..585c357992 100644
--- a/src/KOKKOS/fix_minimize_kokkos.cpp
+++ b/src/KOKKOS/fix_minimize_kokkos.cpp
@@ -19,6 +19,8 @@
 #include "domain.h"
 #include "memory_kokkos.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/KOKKOS/fix_momentum_kokkos.cpp b/src/KOKKOS/fix_momentum_kokkos.cpp
index b9220a417f..fa959cd582 100644
--- a/src/KOKKOS/fix_momentum_kokkos.cpp
+++ b/src/KOKKOS/fix_momentum_kokkos.cpp
@@ -18,11 +18,8 @@
 #include "atom_masks.h"
 #include "domain_kokkos.h"
 #include "group.h"
-#include "error.h"
 #include "kokkos_few.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp
index 49fe3f1177..d3df48354e 100644
--- a/src/KOKKOS/fix_neigh_history_kokkos.cpp
+++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp
@@ -17,7 +17,6 @@
 #include "atom_kokkos.h"
 #include "error.h"
 #include "memory_kokkos.h"
-#include "modify.h"
 #include "neigh_list_kokkos.h"
 #include "pair_kokkos.h"
 #include "atom_vec_kokkos.h"
diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp
index d038093317..1b87b3c775 100644
--- a/src/KOKKOS/fix_nh_kokkos.cpp
+++ b/src/KOKKOS/fix_nh_kokkos.cpp
@@ -18,18 +18,14 @@
 
 #include "fix_nh_kokkos.h"
 
-#include "atom.h"
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "comm.h"
 #include "compute.h"
 #include "domain_kokkos.h"
 #include "error.h"
-#include "fix_deform.h"
 #include "force.h"
 #include "irregular.h"
 #include "kspace.h"
-#include "memory_kokkos.h"
 #include "neighbor.h"
 #include "update.h"
 
diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp
index 59cc90c088..11b5184310 100644
--- a/src/KOKKOS/fix_nve_kokkos.cpp
+++ b/src/KOKKOS/fix_nve_kokkos.cpp
@@ -17,8 +17,6 @@
 #include "atom_kokkos.h"
 #include "atom_masks.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/KOKKOS/fix_nve_sphere_kokkos.cpp b/src/KOKKOS/fix_nve_sphere_kokkos.cpp
index 38f6a40792..aed45c938c 100644
--- a/src/KOKKOS/fix_nve_sphere_kokkos.cpp
+++ b/src/KOKKOS/fix_nve_sphere_kokkos.cpp
@@ -15,7 +15,8 @@
 #include "fix_nve_sphere_kokkos.h"
 #include "atom_masks.h"
 #include "atom_kokkos.h"
-#include "error.h"
+
+#include <cmath>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/fix_nvt_kokkos.cpp b/src/KOKKOS/fix_nvt_kokkos.cpp
index 16328c5e3a..7a8badd569 100644
--- a/src/KOKKOS/fix_nvt_kokkos.cpp
+++ b/src/KOKKOS/fix_nvt_kokkos.cpp
@@ -18,8 +18,6 @@
 #include "group.h"
 #include "modify.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp
index 948e3b88f6..ddcc0c728c 100644
--- a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp
+++ b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp
@@ -18,21 +18,19 @@
 
 #include "fix_nvt_sllod_kokkos.h"
 
-#include "atom.h"
-#include "atom.h"
 #include "atom_kokkos.h"
 #include "atom_masks.h"
 #include "compute.h"
 #include "domain.h"
 #include "error.h"
-#include "fix.h"
-#include "fix_deform_kokkos.h"
+#include "fix_deform.h"
 #include "group.h"
 #include "kokkos_few.h"
 #include "math_extra.h"
-#include "memory_kokkos.h"
 #include "modify.h"
 
+#include <cstring>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp
index dcd943cac6..10cea48e90 100644
--- a/src/KOKKOS/fix_property_atom_kokkos.cpp
+++ b/src/KOKKOS/fix_property_atom_kokkos.cpp
@@ -16,7 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "error.h"
 #include "memory_kokkos.h"
 
 #include <cstring>
diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp
index ba25d79ad5..deb41944bc 100644
--- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp
+++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp
@@ -27,10 +27,8 @@
 
 #include "fix_qeq_reaxff_kokkos.h"
 
-#include "atom.h"
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "atom_vec_kokkos.h"
 #include "comm.h"
 #include "error.h"
 #include "force.h"
@@ -1117,12 +1115,11 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqCalculateQ, const int &ii)
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-int FixQEqReaxFFKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                                        int iswap_in, DAT::tdual_xfloat_1d &k_buf,
+int FixQEqReaxFFKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                                        DAT::tdual_xfloat_1d &k_buf,
                                                         int /*pbc_flag*/, int * /*pbc*/)
 {
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   d_buf = k_buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagQEqPackForwardComm>(0,n),*this);
   if (pack_flag == 3) return n;
@@ -1134,7 +1131,7 @@ int FixQEqReaxFFKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_i
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqPackForwardComm, const int &i) const {
-  int j = d_sendlist(iswap, i);
+  int j = d_sendlist(i);
 
   if (pack_flag == 1) {
     if (!(converged & 1))
diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h
index 6aa345fba6..92026b209d 100644
--- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h
+++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h
@@ -154,7 +154,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
     F_FLOAT chi, eta, gamma;
   };
 
-  int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+  int pack_forward_comm_kokkos(int, DAT::tdual_int_1d, DAT::tdual_xfloat_1d&,
                        int, int *) override;
   void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   int pack_forward_comm(int, int *, double *, int, int *) override;
@@ -255,9 +255,9 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
   DupScatterView<F_FLOAT**, typename AT::t_ffloat2_1d::array_layout> dup_o;
   NonDupScatterView<F_FLOAT**, typename AT::t_ffloat2_1d::array_layout> ndup_o;
 
-  int iswap,nsend;
+  int nsend;
   int first;
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d d_buf;
   typename AT::t_int_1d d_copylist;
   typename AT::t_int_1d d_indices;
diff --git a/src/KOKKOS/fix_reaxff_species_kokkos.cpp b/src/KOKKOS/fix_reaxff_species_kokkos.cpp
index 960ba07a86..7d742a8fa4 100644
--- a/src/KOKKOS/fix_reaxff_species_kokkos.cpp
+++ b/src/KOKKOS/fix_reaxff_species_kokkos.cpp
@@ -23,13 +23,11 @@
 #include "comm.h"
 #include "error.h"
 #include "force.h"
-#include "input.h"
-#include "memory_kokkos.h"
-#include "neigh_list.h"
-#include "neigh_request.h"
+#include "kokkos_type.h"
+#include "neigh_list_kokkos.h"
 
 #include "fix_ave_atom.h"
-#include "pair_reaxff_kokkos.h"
+#include "pair_reaxff.h"
 #include "reaxff_defs.h"
 
 using namespace LAMMPS_NS;
diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp
index 0ba56c611e..0d1c250b3d 100644
--- a/src/KOKKOS/fix_rx_kokkos.cpp
+++ b/src/KOKKOS/fix_rx_kokkos.cpp
@@ -25,13 +25,12 @@
 #include "math_special_kokkos.h"
 #include "memory_kokkos.h"
 #include "modify.h"
-#include "neigh_list_kokkos.h"
 #include "neigh_request.h"
 #include "neighbor.h"
 #include "update.h"
 
 #include <cfloat> // DBL_EPSILON
-#include <cstring>
+#include <cmath>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
@@ -85,9 +84,6 @@ FixRxKokkos<DeviceType>::~FixRxKokkos()
     memoryKK->destroy_kokkos(k_dpdThetaLocal, dpdThetaLocal);
 
   memoryKK->destroy_kokkos(k_sumWeights, sumWeights);
-  memoryKK->destroy_kokkos(d_scratchSpace);
-
-  memoryKK->destroy_kokkos(k_cutsq);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -1463,8 +1459,8 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int /*vflag*/, const bool is
   this->scratchSpaceSize = (8*nspecies + 2*nreactions);
 
   if (nlocal*scratchSpaceSize > d_scratchSpace.extent(0)) {
-    memoryKK->destroy_kokkos (d_scratchSpace);
-    memoryKK->create_kokkos (d_scratchSpace, nlocal*scratchSpaceSize, "FixRxKokkos::d_scratchSpace");
+    d_scratchSpace = typename AT::t_double_1d();
+    d_scratchSpace = typename AT::t_double_1d("FixRxKokkos::d_scratchSpace", nlocal*scratchSpaceSize);
   }
 
   if (setRatesToZero)
@@ -1822,8 +1818,8 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
     const int ntypes = atom->ntypes;
 
     if (ntypes+1 > (int) k_cutsq.extent(0)) {
-      memoryKK->destroy_kokkos (k_cutsq);
-      memoryKK->create_kokkos (k_cutsq, ntypes+1, ntypes+1, "FixRxKokkos::k_cutsq");
+      k_cutsq = typename AT::tdual_ffloat_2d();
+      k_cutsq = typename AT::tdual_ffloat_2d("FixRxKokkos::k_cutsq", ntypes+1, ntypes+1);
       d_cutsq = k_cutsq.template view<DeviceType>();
     }
 
@@ -1843,7 +1839,7 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature()
 
   if (sumWeightsCt > (int)k_sumWeights.template view<DeviceType>().extent(0)) {
     memoryKK->destroy_kokkos(k_sumWeights, sumWeights);
-    memoryKK->create_kokkos (k_sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights");
+    memoryKK->create_kokkos(k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights");
     d_sumWeights = k_sumWeights.template view<DeviceType>();
     h_sumWeights = k_sumWeights.h_view;
   }
diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp
index 9f193bc6e4..e8f376643f 100644
--- a/src/KOKKOS/fix_setforce_kokkos.cpp
+++ b/src/KOKKOS/fix_setforce_kokkos.cpp
@@ -17,7 +17,6 @@
 #include "atom_kokkos.h"
 #include "update.h"
 #include "modify.h"
-#include "domain.h"
 #include "region.h"
 #include "input.h"
 #include "variable.h"
@@ -26,8 +25,6 @@
 #include "atom_masks.h"
 #include "kokkos_base.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp
index 791738e5a4..b25e2dad59 100644
--- a/src/KOKKOS/fix_shake_kokkos.cpp
+++ b/src/KOKKOS/fix_shake_kokkos.cpp
@@ -14,20 +14,12 @@
 
 #include "fix_shake_kokkos.h"
 
-#include "fix_rattle.h"
 #include "atom_kokkos.h"
-#include "atom_vec.h"
-#include "molecule.h"
 #include "update.h"
-#include "respa.h"
-#include "modify.h"
 #include "domain.h"
 #include "force.h"
-#include "bond.h"
-#include "angle.h"
 #include "comm.h"
 #include "group.h"
-#include "fix_respa.h"
 #include "math_const.h"
 #include "memory_kokkos.h"
 #include "error.h"
@@ -35,7 +27,6 @@
 #include "atom_masks.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
@@ -1808,12 +1799,11 @@ int FixShakeKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-int FixShakeKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                                        int iswap_in, DAT::tdual_xfloat_1d &k_buf,
-                                                        int pbc_flag, int* pbc)
+int FixShakeKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                                         DAT::tdual_xfloat_1d &k_buf,
+                                                         int pbc_flag, int* pbc)
 {
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   d_buf = k_buf.view<DeviceType>();
 
   if (domain->triclinic == 0) {
@@ -1837,7 +1827,7 @@ template<class DeviceType>
 template<int PBC_FLAG>
 KOKKOS_INLINE_FUNCTION
 void FixShakeKokkos<DeviceType>::operator()(TagFixShakePackForwardComm<PBC_FLAG>, const int &i) const {
-  const int j = d_sendlist(iswap, i);
+  const int j = d_sendlist(i);
 
   if (PBC_FLAG == 0) {
     d_buf[3*i] = d_xshake(j,0);
diff --git a/src/KOKKOS/fix_shake_kokkos.h b/src/KOKKOS/fix_shake_kokkos.h
index 19f3a2343d..31a6c340be 100644
--- a/src/KOKKOS/fix_shake_kokkos.h
+++ b/src/KOKKOS/fix_shake_kokkos.h
@@ -66,7 +66,7 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
 
   int pack_exchange(int, double *) override;
   int unpack_exchange(int, double *) override;
-  int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+  int pack_forward_comm_kokkos(int, DAT::tdual_int_1d, DAT::tdual_xfloat_1d&,
                        int, int *) override;
   void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   int pack_forward_comm(int, int *, double *, int, int *) override;
@@ -211,9 +211,9 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
   KOKKOS_INLINE_FUNCTION
   void v_tally(EV_FLOAT&, int, int *, double, double *) const;
 
-  int iswap,first,nsend;
+  int first,nsend;
 
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d_um d_buf;
 
   typename AT::t_int_1d d_exchange_sendlist;
diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp
index 4cbadc4803..a64adbcc38 100644
--- a/src/KOKKOS/fix_shardlow_kokkos.cpp
+++ b/src/KOKKOS/fix_shardlow_kokkos.cpp
@@ -43,7 +43,6 @@
 #include "domain.h"
 #include "error.h"
 #include "force.h"
-#include "memory_kokkos.h"
 #include "neigh_list_kokkos.h"
 #include "neigh_request.h"
 #include "neighbor.h"
diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp
index 6571db37ed..9ba796b1ab 100644
--- a/src/KOKKOS/fix_spring_self_kokkos.cpp
+++ b/src/KOKKOS/fix_spring_self_kokkos.cpp
@@ -20,17 +20,10 @@
 
 #include "atom_kokkos.h"
 #include "update.h"
-#include "modify.h"
 #include "domain_kokkos.h"
-#include "region.h"
-#include "input.h"
-#include "variable.h"
 #include "memory_kokkos.h"
 #include "error.h"
 #include "atom_masks.h"
-#include "kokkos_base.h"
-
-#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/KOKKOS/fix_temp_berendsen_kokkos.cpp b/src/KOKKOS/fix_temp_berendsen_kokkos.cpp
index b986b3189a..8aaf586194 100644
--- a/src/KOKKOS/fix_temp_berendsen_kokkos.cpp
+++ b/src/KOKKOS/fix_temp_berendsen_kokkos.cpp
@@ -15,11 +15,9 @@
 #include "fix_temp_berendsen_kokkos.h"
 
 #include "atom_kokkos.h"
-#include "comm.h"
 #include "compute.h"
 #include "error.h"
 #include "force.h"
-#include "group.h"
 #include "input.h"
 #include "modify.h"
 #include "update.h"
@@ -27,7 +25,6 @@
 #include "atom_masks.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/KOKKOS/fix_temp_rescale_kokkos.cpp b/src/KOKKOS/fix_temp_rescale_kokkos.cpp
index 3a1c6ddd26..5c295634e7 100644
--- a/src/KOKKOS/fix_temp_rescale_kokkos.cpp
+++ b/src/KOKKOS/fix_temp_rescale_kokkos.cpp
@@ -15,11 +15,9 @@
 #include "fix_temp_rescale_kokkos.h"
 
 #include "atom_kokkos.h"
-#include "comm.h"
 #include "compute.h"
 #include "error.h"
 #include "force.h"
-#include "group.h"
 #include "input.h"
 #include "modify.h"
 #include "update.h"
@@ -27,7 +25,6 @@
 #include "atom_masks.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/KOKKOS/fix_viscous_kokkos.cpp b/src/KOKKOS/fix_viscous_kokkos.cpp
index 80ddff2fce..86a5a35910 100644
--- a/src/KOKKOS/fix_viscous_kokkos.cpp
+++ b/src/KOKKOS/fix_viscous_kokkos.cpp
@@ -16,12 +16,8 @@
 
 #include "atom_kokkos.h"
 #include "update.h"
-#include "modify.h"
-#include "input.h"
-#include "memory_kokkos.h"
 #include "error.h"
 #include "atom_masks.h"
-#include "kokkos_base.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/KOKKOS/fix_wall_flow_kokkos.cpp b/src/KOKKOS/fix_wall_flow_kokkos.cpp
new file mode 100644
index 0000000000..e86cad54b4
--- /dev/null
+++ b/src/KOKKOS/fix_wall_flow_kokkos.cpp
@@ -0,0 +1,293 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Vladislav Galigerov (HSE),
+                         Daniil Pavlov (MIPT)
+------------------------------------------------------------------------- */
+
+#include "fix_wall_flow_kokkos.h"
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm.h"
+#include "math_const.h"
+#include "memory_kokkos.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+template <class DeviceType>
+FixWallFlowKokkos<DeviceType>::FixWallFlowKokkos(LAMMPS *lmp, int narg, char **arg) :
+    FixWallFlow(lmp, narg, arg), rand_pool(rndseed + comm->me)
+{
+  kokkosable = 1;
+  exchange_comm_device = sort_device = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | RMASS_MASK | TYPE_MASK | MASK_MASK;
+  datamask_modify = V_MASK;
+
+  memory->destroy(current_segment);
+  current_segment = nullptr;
+  grow_arrays(atomKK->nmax);
+
+  d_walls = d_walls_t("FixWallFlowKokkos::walls", walls.size());
+  auto h_walls = Kokkos::create_mirror_view(d_walls);
+  for (int i = 0; i < (int) walls.size(); ++i) h_walls(i) = walls[i];
+  Kokkos::deep_copy(d_walls, h_walls);
+}
+
+template <class DeviceType> FixWallFlowKokkos<DeviceType>::~FixWallFlowKokkos()
+{
+  if (copymode) return;
+  memoryKK->destroy_kokkos(k_current_segment, current_segment);
+}
+
+template <class DeviceType> void FixWallFlowKokkos<DeviceType>::init()
+{
+  atomKK->sync(execution_space, datamask_read);
+  k_current_segment.template sync<DeviceType>();
+  d_x = atomKK->k_x.template view<DeviceType>();
+
+  copymode = 1;
+  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixWallFlowInit>(0, atom->nlocal), *this);
+  copymode = 0;
+
+  k_current_segment.template modify<DeviceType>();
+}
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void FixWallFlowKokkos<DeviceType>::operator()(TagFixWallFlowInit,
+                                                                      const int &i) const
+{
+  double pos = d_x(i, flowax);
+  d_current_segment(i) = compute_current_segment_kk(pos);
+}
+
+template <class DeviceType> void FixWallFlowKokkos<DeviceType>::end_of_step()
+{
+  atomKK->sync(execution_space, datamask_read);
+  k_current_segment.template sync<DeviceType>();
+
+  d_x = atomKK->k_x.template view<DeviceType>();
+  d_v = atomKK->k_v.template view<DeviceType>();
+  d_type = atomKK->k_type.template view<DeviceType>();
+  d_mask = atomKK->k_mask.template view<DeviceType>();
+  d_mass = atomKK->k_mass.template view<DeviceType>();
+  d_rmass = atomKK->k_rmass.template view<DeviceType>();
+
+  copymode = 1;
+  if (d_rmass.data()) {
+    Kokkos::parallel_for(
+        Kokkos::RangePolicy<DeviceType, TagFixWallFlowEndOfStep<RMassTag>>(0, atom->nlocal), *this);
+  } else {
+    Kokkos::parallel_for(
+        Kokkos::RangePolicy<DeviceType, TagFixWallFlowEndOfStep<MassTag>>(0, atom->nlocal), *this);
+  }
+  copymode = 0;
+  atomKK->modified(execution_space, datamask_modify);
+  k_current_segment.template modify<DeviceType>();
+}
+
+template <class DeviceType>
+template <class MTag>
+KOKKOS_INLINE_FUNCTION void FixWallFlowKokkos<DeviceType>::operator()(TagFixWallFlowEndOfStep<MTag>,
+                                                                      const int &atom_i) const
+{
+  if (d_mask[atom_i] & groupbit) {
+    double pos = d_x(atom_i, flowax);
+    int prev_segment = d_current_segment(atom_i);
+    d_current_segment(atom_i) = compute_current_segment_kk(pos);
+    if (prev_segment != d_current_segment(atom_i)) { generate_velocity_kk<MTag>(atom_i); }
+  }
+}
+
+template <class DeviceType>
+template <class MTag>
+KOKKOS_INLINE_FUNCTION void FixWallFlowKokkos<DeviceType>::generate_velocity_kk(int atom_i) const
+{
+  const int newton_iteration_count = 10;
+  double mass = get_mass(MTag(), atom_i);
+  const double gamma = 1.0 / std::sqrt(2.0 * kT / mass);
+  double delta = gamma * flowvel;
+
+  const double edd = std::exp(-delta * delta) / MathConst::MY_PIS + delta * std::erf(delta);
+  const double probability_threshold = 0.5 * (1. + delta / edd);
+
+  double direction = 1.0;
+
+  rand_type_t rand_gen = rand_pool.get_state();
+
+  if (/*random->uniform()*/ rand_gen.drand() > probability_threshold) {
+    delta = -delta;
+    direction = -direction;
+  }
+
+  const double xi_0 = rand_gen.drand();    //random->uniform();
+  const double F_inf = edd + delta;
+  const double xi = xi_0 * F_inf;
+  const double x_0 = (std::sqrt(delta * delta + 2) - delta) * 0.5;
+  double x = x_0;
+  for (int i = 0; i < newton_iteration_count; ++i) {
+    x -= (std::exp(x * x) * MathConst::MY_PIS * (xi - delta * std::erfc(x)) - 1.0) / (x + delta) *
+        0.5;
+  }
+
+  const double nu = x + delta;
+  const double v = nu / gamma;
+
+  d_v(atom_i, flowax) = v * direction;
+  d_v(atom_i, (flowax + 1) % 3) =
+      /*random->gaussian()*/ rand_gen.normal() / (gamma * MathConst::MY_SQRT2);
+  d_v(atom_i, (flowax + 2) % 3) =
+      /*random->gaussian()*/ rand_gen.normal() / (gamma * MathConst::MY_SQRT2);
+
+  rand_pool.free_state(rand_gen);
+}
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION int
+FixWallFlowKokkos<DeviceType>::compute_current_segment_kk(double pos) const
+{
+  int result = 0;
+  for (; result < (int) d_walls.extent(0) - 1; ++result) {
+    if (pos >= d_walls[result] && pos < d_walls[result + 1]) { return result; }
+  }
+  return -1;    // -1 is "out of box" region
+}
+
+template <class DeviceType> void FixWallFlowKokkos<DeviceType>::grow_arrays(int nmax)
+{
+  k_current_segment.template sync<DeviceType>();
+  memoryKK->grow_kokkos(k_current_segment, current_segment, nmax, "WallFlowKK::current_segment");
+  k_current_segment.template modify<DeviceType>();
+
+  d_current_segment = k_current_segment.template view<DeviceType>();
+  h_current_segment = k_current_segment.template view<LMPHostType>();
+}
+
+template <class DeviceType> void FixWallFlowKokkos<DeviceType>::copy_arrays(int i, int j, int)
+{
+  k_current_segment.template sync<LMPHostType>();
+  h_current_segment(j) = h_current_segment(i);
+  k_current_segment.template modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   sort local atom-based arrays
+------------------------------------------------------------------------- */
+
+template <class DeviceType>
+void FixWallFlowKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
+{
+  // always sort on the device
+
+  k_current_segment.sync_device();
+
+  Sorter.sort(LMPDeviceType(), k_current_segment.d_view);
+
+  k_current_segment.modify_device();
+}
+
+template <class DeviceType> int FixWallFlowKokkos<DeviceType>::pack_exchange(int i, double *buf)
+{
+  k_current_segment.sync_host();
+  buf[0] = static_cast<double>(h_current_segment(i));
+  return 1;
+}
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void FixWallFlowKokkos<DeviceType>::operator()(TagFixWallFlowPackExchange,
+                                                                      const int &mysend) const
+{
+  const int send_i = d_sendlist(mysend);
+  const int segment = d_current_segment(send_i);
+  d_buf(mysend) = static_cast<double>(segment);
+
+  const int copy_i = d_copylist(mysend);
+  if (copy_i > -1) { d_current_segment(send_i) = d_current_segment(copy_i); }
+}
+
+template <class DeviceType>
+int FixWallFlowKokkos<DeviceType>::pack_exchange_kokkos(const int &nsend,
+                                                        DAT::tdual_xfloat_2d &k_buf,
+                                                        DAT::tdual_int_1d k_sendlist,
+                                                        DAT::tdual_int_1d k_copylist,
+                                                        ExecutionSpace /*space*/)
+{
+  k_current_segment.template sync<DeviceType>();
+
+  k_buf.template sync<DeviceType>();
+  k_sendlist.template sync<DeviceType>();
+  k_copylist.template sync<DeviceType>();
+
+  d_sendlist = k_sendlist.view<DeviceType>();
+  d_copylist = k_copylist.view<DeviceType>();
+
+  d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(k_buf.template view<DeviceType>().data(),
+                                                          k_buf.extent(0) * k_buf.extent(1));
+
+  copymode = 1;
+
+  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixWallFlowPackExchange>(0, nsend),
+                       *this);
+
+  copymode = 0;
+
+  k_buf.template modify<DeviceType>();
+  k_current_segment.template modify<DeviceType>();
+
+  return nsend;
+}
+
+template <class DeviceType> int FixWallFlowKokkos<DeviceType>::unpack_exchange(int i, double *buf)
+{
+  k_current_segment.sync_host();
+  h_current_segment(i) = static_cast<int>(buf[0]);
+  k_current_segment.modify_host();
+  return 1;
+}
+
+template <class DeviceType>
+KOKKOS_INLINE_FUNCTION void FixWallFlowKokkos<DeviceType>::operator()(TagFixWallFlowUnpackExchange,
+                                                                      const int &i) const
+{
+  int index = d_indices(i);
+  if (index > -1) { d_current_segment(index) = static_cast<int>(d_buf(i)); }
+}
+
+template <class DeviceType>
+void FixWallFlowKokkos<DeviceType>::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
+                                                           DAT::tdual_int_1d &k_indices, int nrecv,
+                                                           int /*nrecv1*/, int /*nextrarecv1*/,
+                                                           ExecutionSpace /*space*/)
+{
+  d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(k_buf.template view<DeviceType>().data(),
+                                                          k_buf.extent(0) * k_buf.extent(1));
+  d_indices = k_indices.view<DeviceType>();
+
+  copymode = 1;
+  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixWallFlowUnpackExchange>(0, nrecv),
+                       *this);
+  copymode = 0;
+
+  k_current_segment.template modify<DeviceType>();
+}
+
+namespace LAMMPS_NS {
+template class FixWallFlowKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class FixWallFlowKokkos<LMPHostType>;
+#endif
+}    // namespace LAMMPS_NS
diff --git a/src/KOKKOS/fix_wall_flow_kokkos.h b/src/KOKKOS/fix_wall_flow_kokkos.h
new file mode 100644
index 0000000000..3535c74eb7
--- /dev/null
+++ b/src/KOKKOS/fix_wall_flow_kokkos.h
@@ -0,0 +1,129 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(wall/flow/kk,FixWallFlowKokkos<LMPDeviceType>);
+FixStyle(wall/flow/kk/device,FixWallFlowKokkos<LMPDeviceType>);
+FixStyle(wall/flow/kk/host,FixWallFlowKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_FIX_WALL_FLOW_KOKKOS_H
+#define LMP_FIX_WALL_FLOW_KOKKOS_H
+
+#include "fix_wall_flow.h"
+#include "kokkos_type.h"
+#include "kokkos_base.h"
+#include "Kokkos_Random.hpp"
+
+namespace LAMMPS_NS {
+
+struct TagFixWallFlowInit{};
+template<class MTag>
+struct TagFixWallFlowEndOfStep{};
+struct TagFixWallFlowPackExchange{};
+struct TagFixWallFlowUnpackExchange{};
+
+template<class DeviceType>
+class FixWallFlowKokkos : public FixWallFlow, public KokkosBase {
+ public:
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  struct MassTag{};
+  struct RMassTag{};
+  FixWallFlowKokkos(class LAMMPS *, int, char **);
+  ~FixWallFlowKokkos();
+
+  void init() override;
+  void end_of_step() override;
+  void grow_arrays(int) override;
+  void copy_arrays(int, int, int) override;
+  void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
+  int pack_exchange(int, double *) override;
+  int unpack_exchange(int, double *) override;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (TagFixWallFlowInit, const int&) const;
+
+  template<class MTag>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagFixWallFlowEndOfStep<MTag>, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagFixWallFlowPackExchange, const int&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagFixWallFlowUnpackExchange, const int&) const;
+
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space) override;
+
+  void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
+                              DAT::tdual_int_1d &indices,int nrecv,
+                              int /*nrecv1*/, int /*nextrarecv1*/,
+                              ExecutionSpace space) override;
+ protected:
+  typename AT::t_x_array d_x;
+  typename AT::t_v_array d_v;
+  typename AT::t_int_1d d_type;
+  typename AT::t_int_1d d_mask;
+
+  typename AT::t_float_1d d_mass;
+  typename AT::t_float_1d d_rmass;
+
+  typedef typename AT::t_xfloat_1d d_walls_t;
+  typedef Kokkos::Random_XorShift64_Pool<DeviceType> rand_pool_t;
+  typedef typename rand_pool_t::generator_type rand_type_t;
+
+  typename AT::tdual_int_1d k_current_segment;
+  typename AT::t_int_1d d_current_segment;
+  typename HAT::t_int_1d h_current_segment;
+
+  typename AT::t_int_1d d_sendlist;
+  typename AT::t_xfloat_1d d_buf;
+  typename AT::t_int_1d d_copylist;
+  typename AT::t_int_1d d_indices;
+
+  d_walls_t d_walls;
+
+  rand_pool_t rand_pool;
+
+  template<class MTag>
+  KOKKOS_INLINE_FUNCTION
+  void generate_velocity_kk(int atom_i) const;
+
+  KOKKOS_INLINE_FUNCTION
+  int compute_current_segment_kk(double pos) const;
+
+  KOKKOS_INLINE_FUNCTION
+  double get_mass(MassTag, int atom_i) const
+  {
+    return d_mass(d_type(atom_i));
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  double get_mass(RMassTag, int atom_i) const
+  {
+    return d_rmass(atom_i);
+  }
+};
+
+}
+
+#endif
+#endif
+
diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp
index 25e405c798..3ff97084fe 100644
--- a/src/KOKKOS/fix_wall_gran_kokkos.cpp
+++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp
@@ -15,7 +15,6 @@
 #include "atom_kokkos.h"
 #include "error.h"
 #include "memory_kokkos.h"
-#include "atom_vec_kokkos.h"
 #include "atom_masks.h"
 #include "update.h"
 
diff --git a/src/KOKKOS/fix_wall_lj93_kokkos.cpp b/src/KOKKOS/fix_wall_lj93_kokkos.cpp
index dff47f1c30..93bcc9abc9 100644
--- a/src/KOKKOS/fix_wall_lj93_kokkos.cpp
+++ b/src/KOKKOS/fix_wall_lj93_kokkos.cpp
@@ -13,7 +13,7 @@
 ------------------------------------------------------------------------- */
 
 #include "fix_wall_lj93_kokkos.h"
-#include <cmath>
+
 #include "atom_kokkos.h"
 #include "error.h"
 #include "atom_masks.h"
@@ -51,8 +51,6 @@ void FixWallLJ93Kokkos<DeviceType>::wall_particle(int m_in, int which, double co
   x = atomKK->k_x.view<DeviceType>();
   f = atomKK->k_f.view<DeviceType>();
   mask = atomKK->k_mask.view<DeviceType>();
-  DAT::tdual_int_scalar k_oneflag = DAT::tdual_int_scalar("fix:oneflag");
-  d_oneflag = k_oneflag.view<DeviceType>();
 
   int nlocal = atom->nlocal;
 
@@ -66,10 +64,6 @@ void FixWallLJ93Kokkos<DeviceType>::wall_particle(int m_in, int which, double co
   copymode = 0;
 
   atomKK->modified(execution_space, F_MASK);
-
-  k_oneflag.template modify<DeviceType>();
-  k_oneflag.template sync<LMPHostType>();
-  if (k_oneflag.h_view()) error->one(FLERR,"Particle on or inside fix wall surface");
 }
 
 template <class DeviceType>
@@ -80,10 +74,8 @@ void FixWallLJ93Kokkos<DeviceType>::wall_particle_item(int i, value_type ewall)
     if (side < 0) delta = x(i,dim) - coord;
     else delta = coord - x(i,dim);
     if (delta >= cutoff[m]) return;
-    if (delta <= 0.0) {
-      d_oneflag() = 1;
-      return;
-    }
+    if (delta <= 0.0)
+      Kokkos::abort("Particle on or inside fix wall surface");
     double rinv = 1.0/delta;
     double r2inv = rinv*rinv;
     double r4inv = r2inv*r2inv;
diff --git a/src/KOKKOS/fix_wall_lj93_kokkos.h b/src/KOKKOS/fix_wall_lj93_kokkos.h
index 2bc78f3781..720e586f5d 100644
--- a/src/KOKKOS/fix_wall_lj93_kokkos.h
+++ b/src/KOKKOS/fix_wall_lj93_kokkos.h
@@ -50,7 +50,6 @@ class FixWallLJ93Kokkos : public FixWallLJ93 {
   typename AT::t_x_array x;
   typename AT::t_f_array f;
   typename AT::t_int_1d mask;
-  typename AT::t_int_scalar d_oneflag;
 };
 
 template <class DeviceType>
diff --git a/src/KOKKOS/fix_wall_reflect_kokkos.cpp b/src/KOKKOS/fix_wall_reflect_kokkos.cpp
index a8a01c1926..731ce11f10 100644
--- a/src/KOKKOS/fix_wall_reflect_kokkos.cpp
+++ b/src/KOKKOS/fix_wall_reflect_kokkos.cpp
@@ -21,8 +21,6 @@
 #include "update.h"
 #include "variable.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 
 enum{XLO=0,XHI=1,YLO=2,YHI=3,ZLO=4,ZHI=5};
diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h
index 24fcc47579..22ed1687a9 100644
--- a/src/KOKKOS/kokkos_base.h
+++ b/src/KOKKOS/kokkos_base.h
@@ -26,18 +26,18 @@ class KokkosBase {
   KokkosBase() {}
 
   // Pair
-  virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d,
-                                       int, DAT::tdual_xfloat_1d &,
+  virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_1d,
+                                       DAT::tdual_xfloat_1d &,
                                        int, int *) {return 0;};
   virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {}
 
   virtual int pack_reverse_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {return 0;};
-  virtual void unpack_reverse_comm_kokkos(int, DAT::tdual_int_2d,
-                                          int, DAT::tdual_xfloat_1d &) {}
+  virtual void unpack_reverse_comm_kokkos(int, DAT::tdual_int_1d,
+                                          DAT::tdual_xfloat_1d &) {}
 
   // Fix
-  virtual int pack_forward_comm_fix_kokkos(int, DAT::tdual_int_2d,
-                                           int, DAT::tdual_xfloat_1d &,
+  virtual int pack_forward_comm_fix_kokkos(int, DAT::tdual_int_1d,
+                                           DAT::tdual_xfloat_1d &,
                                            int, int *) {return 0;};
   virtual void unpack_forward_comm_fix_kokkos(int, int, DAT::tdual_xfloat_1d &) {}
 
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index 1009e43196..cc4e00819f 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -689,6 +689,14 @@ typedef tdual_int_2d_dl::t_dev_um t_int_2d_um_dl;
 typedef tdual_int_2d_dl::t_dev_const_um t_int_2d_const_um_dl;
 typedef tdual_int_2d_dl::t_dev_const_randomread t_int_2d_randomread_dl;
 
+typedef Kokkos::
+  DualView<int***, Kokkos::LayoutRight, LMPDeviceType> tdual_int_3d;
+typedef tdual_int_3d::t_dev t_int_3d;
+typedef tdual_int_3d::t_dev_const t_int_3d_const;
+typedef tdual_int_3d::t_dev_um t_int_3d_um;
+typedef tdual_int_3d::t_dev_const_um t_int_3d_const_um;
+typedef tdual_int_3d::t_dev_const_randomread t_int_3d_randomread;
+
 typedef Kokkos::
   DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType>
   tdual_tagint_1d;
@@ -1006,6 +1014,13 @@ typedef tdual_int_2d_dl::t_host_um t_int_2d_um_dl;
 typedef tdual_int_2d_dl::t_host_const_um t_int_2d_const_um_dl;
 typedef tdual_int_2d_dl::t_host_const_randomread t_int_2d_randomread_dl;
 
+typedef Kokkos::DualView<int***, Kokkos::LayoutRight, LMPDeviceType> tdual_int_3d;
+typedef tdual_int_3d::t_host t_int_3d;
+typedef tdual_int_3d::t_host_const t_int_3d_const;
+typedef tdual_int_3d::t_host_um t_int_3d_um;
+typedef tdual_int_3d::t_host_const_um t_int_3d_const_um;
+typedef tdual_int_3d::t_host_const_randomread t_int_3d_randomread;
+
 typedef Kokkos::DualView<LAMMPS_NS::tagint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_1d;
 typedef tdual_tagint_1d::t_host t_tagint_1d;
 typedef tdual_tagint_1d::t_host_const t_tagint_1d_const;
diff --git a/src/KOKKOS/meam_force_kokkos.h b/src/KOKKOS/meam_force_kokkos.h
index fec923f5b2..a546ab54d4 100644
--- a/src/KOKKOS/meam_force_kokkos.h
+++ b/src/KOKKOS/meam_force_kokkos.h
@@ -601,8 +601,31 @@ KOKKOS_INLINE_FUNCTION void MEAMKokkos<DeviceType>::operator()(TagMEAMForce<NEIG
             drho2mds2 = a2 * rhoa2mi * arg1j2m - 2.0 / 3.0 * d_arho2mb[j] * rhoa2mi;
             drho3mds1 = a3 * rhoa3mj * arg1i3m - a3a * rhoa3mj * arg3i3m;
             drho3mds2 = a3 * rhoa3mi * arg1j3m - a3a * rhoa3mi * arg3j3m;
+            drho1mds1 *= -1;
+            drho1mds2 *= -1;
             drho3mds1 *= -1;
             drho3mds2 *= -1;
+
+            t1i = 1.0;
+            t2i = 1.0;
+            t3i = 1.0;
+            t1j = 1.0;
+            t2j = 1.0;
+            t3j = 1.0;
+            dt1dr1 = 0.0;
+            dt1dr2 = 0.0;
+            dt2dr1 = 0.0;
+            dt2dr2 = 0.0;
+            dt3dr1 = 0.0;
+            dt3dr2 = 0.0;
+
+            // these formulae are simplifed by substituting t=1, dt=0 from above
+
+            drhods1 = d_dgamma1[i] * drho0ds1 + d_dgamma2[i]
+              * ((drho1ds1 - drho1mds1) + (drho2ds1 - drho2mds1) + (drho3ds1 - drho3mds1));
+            drhods2 = d_dgamma1[j] * drho0ds2 + d_dgamma2[j]
+              * ((drho1ds2 - drho1mds2) + (drho2ds2 - drho2mds2) + (drho3ds2 - drho3mds2));
+
           } else {
             drho1mds1 = 0.0;
             drho1mds2 = 0.0;
@@ -610,61 +633,49 @@ KOKKOS_INLINE_FUNCTION void MEAMKokkos<DeviceType>::operator()(TagMEAMForce<NEIG
             drho2mds2 = 0.0;
             drho3mds1 = 0.0;
             drho3mds2 = 0.0;
-          }
 
-          if (ialloy == 1) {
-            a1i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 0));
-            a1j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 0));
-            a2i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 1));
-            a2j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 1));
-            a3i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 2));
-            a3j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 2));
+            if (ialloy == 1) {
 
-            dt1ds1 = a1i * (t1mj - t1i * MathSpecialKokkos::square(t1mj));
-            dt1ds2 = a1j * (t1mi - t1j * MathSpecialKokkos::square(t1mi));
-            dt2ds1 = a2i * (t2mj - t2i * MathSpecialKokkos::square(t2mj));
-            dt2ds2 = a2j * (t2mi - t2j * MathSpecialKokkos::square(t2mi));
-            dt3ds1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj));
-            dt3ds2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi));
+              a1i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 0));
+              a1j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 0));
+              a2i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 1));
+              a2j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 1));
+              a3i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 2));
+              a3j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 2));
 
-          } else if (ialloy == 2) {
+              dt1ds1 = a1i * (t1mj - t1i * MathSpecialKokkos::square(t1mj));
+              dt1ds2 = a1j * (t1mi - t1j * MathSpecialKokkos::square(t1mi));
+              dt2ds1 = a2i * (t2mj - t2i * MathSpecialKokkos::square(t2mj));
+              dt2ds2 = a2j * (t2mi - t2j * MathSpecialKokkos::square(t2mi));
+              dt3ds1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj));
+              dt3ds2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi));
 
-            dt1ds1 = 0.0;
-            dt1ds2 = 0.0;
-            dt2ds1 = 0.0;
-            dt2ds2 = 0.0;
-            dt3ds1 = 0.0;
-            dt3ds2 = 0.0;
+            } else if (ialloy == 2) {
 
-          } else {
+              dt1ds1 = 0.0;
+              dt1ds2 = 0.0;
+              dt2ds1 = 0.0;
+              dt2ds2 = 0.0;
+              dt3ds1 = 0.0;
+              dt3ds2 = 0.0;
 
-            ai = 0.0;
-            if (!iszero_kk(d_rho0[i])) ai = rhoa0j / d_rho0[i];
-            aj = 0.0;
-            if (!iszero_kk(d_rho0[j])) aj = rhoa0i / d_rho0[j];
+            } else {
 
-            dt1ds1 = ai * (t1mj - t1i);
-            dt1ds2 = aj * (t1mi - t1j);
-            dt2ds1 = ai * (t2mj - t2i);
-            dt2ds2 = aj * (t2mi - t2j);
-            dt3ds1 = ai * (t3mj - t3i);
-            dt3ds2 = aj * (t3mi - t3j);
-          }
+              ai = 0.0;
+              if (!iszero_kk(d_rho0[i])) ai = rhoa0j / d_rho0[i];
+              aj = 0.0;
+              if (!iszero_kk(d_rho0[j])) aj = rhoa0i / d_rho0[j];
+
+              dt1ds1 = ai * (t1mj - t1i);
+              dt1ds2 = aj * (t1mi - t1j);
+              dt2ds1 = ai * (t2mj - t2i);
+              dt2ds2 = aj * (t2mi - t2j);
+              dt3ds1 = ai * (t3mj - t3i);
+              dt3ds2 = aj * (t3mi - t3j);
+            }
 
-          if (msmeamflag) {
             drhods1 = d_dgamma1[i] * drho0ds1 +
-              d_dgamma2[i] * (dt1ds1 * d_rho1[i] + t1i * (drho1ds1 - drho1mds1) +
-                              dt2ds1 * d_rho2[i] + t2i * (drho2ds1 - drho2mds1) +
-                              dt3ds1 * d_rho3[i] + t3i * (drho3ds1 - drho3mds1)) -
-              d_dgamma3[i] * (shpi[0] * dt1ds1 + shpi[1] * dt2ds1 + shpi[2] * dt3ds1);
-            drhods2 = d_dgamma1[j] * drho0ds2 +
-              d_dgamma2[j] * (dt1ds2 * d_rho1[j] + t1j * (drho1ds2 - drho1mds2) +
-                              dt2ds2 * d_rho2[j] + t2j * (drho2ds2 - drho2mds2) +
-                              dt3ds2 * d_rho3[j] + t3j * (drho3ds2 - drho3mds2)) -
-              d_dgamma3[j] * (shpj[0] * dt1ds2 + shpj[1] * dt2ds2 + shpj[2] * dt3ds2);
-          } else {
-            drhods1 = d_dgamma1[i] * drho0ds1 +
-                d_dgamma2[i] *
+              d_dgamma2[i] *
                     (dt1ds1 * d_rho1[i] + t1i * drho1ds1 + dt2ds1 * d_rho2[i] + t2i * drho2ds1 +
                      dt3ds1 * d_rho3[i] + t3i * drho3ds1) -
                 d_dgamma3[i] * (shpi[0] * dt1ds1 + shpi[1] * dt2ds1 + shpi[2] * dt3ds1);
diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h
index 9d894a344a..0c7555875e 100644
--- a/src/KOKKOS/memory_kokkos.h
+++ b/src/KOKKOS/memory_kokkos.h
@@ -65,8 +65,7 @@ template <typename TYPE, typename HTYPE>
 }
 
 /* ----------------------------------------------------------------------
-   grow or shrink 1st dim of a 1d array
-   last dim must stay the same
+   grow or shrink a 1d array
 ------------------------------------------------------------------------- */
 
 template <typename TYPE>
@@ -80,6 +79,10 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type *&array,
   return data;
 }
 
+/* ----------------------------------------------------------------------
+   destroy a 1d array
+------------------------------------------------------------------------- */
+
 template <typename TYPE>
 void destroy_kokkos(TYPE data, typename TYPE::value_type* &array)
 {
@@ -92,71 +95,6 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type* &array)
    create a 2d array
 ------------------------------------------------------------------------- */
 
-template <typename TYPE>
-TYPE destroy_kokkos(TYPE &data)
-{
-  /*if (data.data()!=nullptr)
-    free(data.data());*/
-  data = TYPE();
-  return data;
-}
-
-template <typename TYPE>
-TYPE create_kokkos(TYPE &data, int n1, const char *name)
-{
-  /*typename TYPE::non_const_value_type* ptr = (typename TYPE::non_const_value_type*)
-    malloc(n1*sizeof(typename TYPE::non_const_value_type)*4);*/
-  data = TYPE(std::string(name),n1);
-  return data;
-}
-
-template <typename TYPE>
-TYPE create_kokkos(TYPE &data, int n1, int n2, const char *name)
-{
-  /*typename TYPE::non_const_value_type* ptr = (typename TYPE::non_const_value_type*)
-    malloc(n1*n2*sizeof(typename TYPE::non_const_value_type)*4);*/
-  data = TYPE(std::string(name),n1,n2);
-  return data;
-}
-
-template <typename TYPE>
-TYPE create_kokkos(TYPE &data, int n1, int n2, int n3 ,const char *name)
-{
-  /*typename TYPE::non_const_value_type* ptr = (typename TYPE::non_const_value_type*)
-    malloc(n1*n2*n3*sizeof(typename TYPE::non_const_value_type)*4);*/
-  data = TYPE(std::string(name),n1,n2,n3);
-  return data;
-}
-
-template <typename TYPE>
-TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4 ,const char *name)
-{
-  /*typename TYPE::non_const_value_type* ptr = (typename TYPE::non_const_value_type*)
-    malloc(n1*n2*n3*n4*sizeof(typename TYPE::non_const_value_type)*4);*/
-  data = TYPE(std::string(name),n1,n2,n3,n4);
-  return data;
-}
-
-template <typename TYPE>
-TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 ,const char *name)
-{
-  /*typename TYPE::non_const_value_type* ptr = (typename TYPE::non_const_value_type*)
-    malloc(n1*n2*n3*n4*n5*sizeof(typename TYPE::non_const_value_type)*4);*/
-  data = TYPE(std::string(name),n1,n2,n3,n4,n5);
-  return data;
-}
-
-template <typename TYPE>
-TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 , int n6 ,const char *name)
-{
-  /*typename TYPE::non_const_value_type* ptr = (typename TYPE::non_const_value_type*)
-    malloc(n1*n2*n3*n4*n5*n6*sizeof(typename TYPE::non_const_value_type)*4);*/
-  data = TYPE(std::string(name) ,n1,n2,n3,n4,n5,n6);
-  return data;
-}
-
-
-
 template <typename TYPE, typename HTYPE>
   TYPE create_kokkos(TYPE &data, HTYPE &h_data, int n1, int n2,
                      const char *name)
@@ -202,20 +140,87 @@ template <typename TYPE, typename HTYPE>
   return data;
 }
 
-template <typename TYPE, typename HTYPE>
-  TYPE create_kokkos(TYPE &data, HTYPE &h_data, int n1, int n2, int n3,
-                     const char *name)
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
+                   int n1, const char *name)
 {
-  data = TYPE(std::string(name),n1,n2,n3);
-  h_data = Kokkos::create_mirror_view(data);
+  data = TYPE(std::string(name),n1);
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
+  array = (typename TYPE::value_type **) smalloc(nbytes,name);
+
+  for (int i = 0; i < n1; i++)
+    if (data.h_view.extent(1) == 0)
+      array[i] = nullptr;
+    else
+      array[i] = &data.h_view(i,0);
+
   return data;
 }
 
+/* ----------------------------------------------------------------------
+   grow or shrink a 2d array
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
+                 int n1, int n2, const char *name)
+{
+  if (array == nullptr) return create_kokkos(data,array,n1,n2,name);
+  data.resize(n1,n2);
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
+  array = (typename TYPE::value_type**) srealloc(array,nbytes,name);
+
+  for (int i = 0; i < n1; i++)
+    if (n2 == 0)
+      array[i] = nullptr;
+    else
+      array[i] = &data.h_view(i,0);
+
+  return data;
+}
+
+template <typename TYPE>
+TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
+                 int n1, const char *name)
+{
+  if (array == nullptr) return create_kokkos(data,array,n1,name);
+
+  data.resize(n1);
+
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
+  array = (typename TYPE::value_type **) srealloc(array,nbytes,name);
+
+  for (int i = 0; i < n1; i++)
+    if (data.h_view.extent(1) == 0)
+      array[i] = nullptr;
+    else
+      array[i] = &data.h_view(i,0);
+
+  return data;
+}
+
+/* ----------------------------------------------------------------------
+   destroy a 2d array
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
+{
+  if (array == nullptr) return;
+  data = TYPE();
+  sfree(array);
+  array = nullptr;
+}
+
+/* ----------------------------------------------------------------------
+   create a 3d array
+------------------------------------------------------------------------- */
+
 template <typename TYPE>
 TYPE create_kokkos(TYPE &data, typename TYPE::value_type ***&array,
                    int n1, int n2, int n3, const char *name)
 {
-  data = TYPE(std::string(name),n1,n2);
+  data = TYPE(std::string(name),n1,n2,n3);
   bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
   array = (typename TYPE::value_type ***) smalloc(nbytes,name);
 
@@ -263,79 +268,46 @@ template <typename TYPE, typename HTYPE>
   return data;
 }
 
+template <typename TYPE, typename HTYPE>
+  TYPE create_kokkos(TYPE &data, HTYPE &h_data, int n1, int n2, int n3,
+                     const char *name)
+{
+  data = TYPE(std::string(name),n1,n2,n3);
+  h_data = Kokkos::create_mirror_view(data);
+  return data;
+}
+
+
 /* ----------------------------------------------------------------------
-   grow or shrink 1st dim of a 2d array
-   last dim must stay the same
+   grow or shrink a 3d array
 ------------------------------------------------------------------------- */
 
 template <typename TYPE>
-TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
-                 int n1, int n2, const char *name)
+TYPE grow_kokkos(TYPE &data, typename TYPE::value_type ***&array,
+                   int n1, int n2, int n3, const char *name)
 {
-  if (array == nullptr) return create_kokkos(data,array,n1,n2,name);
-  data.resize(n1,n2);
-  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
-  array = (typename TYPE::value_type**) srealloc(array,nbytes,name);
+  if (array == nullptr) return create_kokkos(data,array,n1,n2,n3,name);
+  data.resize(n1,n2,n3);
+  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type **)) * n1;
+  array = (typename TYPE::value_type ***) smalloc(nbytes,name);
 
-  for (int i = 0; i < n1; i++)
-    if (n2 == 0)
+  for (int i = 0; i < n1; i++) {
+    if (n2 == 0) {
       array[i] = nullptr;
-    else
-      array[i] = &data.h_view(i,0);
-
+    } else {
+      nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n2;
+      array[i] = (typename TYPE::value_type **) smalloc(nbytes,name);
+      for (int j = 0; j < n2; j++) {
+        if (n3 == 0)
+           array[i][j] = nullptr;
+         else
+           array[i][j] = &data.h_view(i,j,0);
+      }
+    }
+  }
   return data;
 }
 
-template <typename TYPE>
-TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
-                   int n1, const char *name)
-{
-  data = TYPE(std::string(name),n1);
-  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
-  array = (typename TYPE::value_type **) smalloc(nbytes,name);
-
-  for (int i = 0; i < n1; i++)
-    if (data.h_view.extent(1) == 0)
-      array[i] = nullptr;
-    else
-      array[i] = &data.h_view(i,0);
-
-  return data;
-}
-
-template <typename TYPE>
-TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
-                 int n1, const char *name)
-{
-  if (array == nullptr) return create_kokkos(data,array,n1,name);
-
-  data.resize(n1);
-
-  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
-  array = (typename TYPE::value_type **) srealloc(array,nbytes,name);
-
-  for (int i = 0; i < n1; i++)
-    if (data.h_view.extent(1) == 0)
-      array[i] = nullptr;
-    else
-      array[i] = &data.h_view(i,0);
-
-  return data;
-}
-
-/* ----------------------------------------------------------------------
-   destroy a 2d array
-------------------------------------------------------------------------- */
-
-template <typename TYPE>
-void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
-{
-  if (array == nullptr) return;
-  data = TYPE();
-  sfree(array);
-  array = nullptr;
-}
-
 /* ----------------------------------------------------------------------
    destroy a 3d array
 ------------------------------------------------------------------------- */
@@ -374,6 +346,65 @@ static double memory_usage(TYPE &data)
   return data.span() * sizeof(typename TYPE::value_type);
 }
 
+/* ----------------------------------------------------------------------
+  legacy functions
+------------------------------------------------------------------------- */
+
+template <typename TYPE>
+TYPE destroy_kokkos(TYPE &data)
+{
+  data = TYPE();
+  return data;
+}
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, int n1, const char *name)
+{
+  data = TYPE();
+  data = TYPE(std::string(name),n1);
+  return data;
+}
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, int n1, int n2, const char *name)
+{
+  data = TYPE();
+  data = TYPE(std::string(name),n1,n2);
+  return data;
+}
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, int n1, int n2, int n3 ,const char *name)
+{
+  data = TYPE();
+  data = TYPE(std::string(name),n1,n2,n3);
+  return data;
+}
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4 ,const char *name)
+{
+  data = TYPE();
+  data = TYPE(std::string(name),n1,n2,n3,n4);
+  return data;
+}
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 ,const char *name)
+{
+  data = TYPE();
+  data = TYPE(std::string(name),n1,n2,n3,n4,n5);
+  return data;
+}
+
+template <typename TYPE>
+TYPE create_kokkos(TYPE &data, int n1, int n2, int n3, int n4, int n5 , int n6 ,const char *name)
+{
+  data = TYPE();
+  data = TYPE(std::string(name) ,n1,n2,n3,n4,n5,n6);
+  return data;
+}
+
 };
 
 }
diff --git a/src/KOKKOS/min_cg_kokkos.cpp b/src/KOKKOS/min_cg_kokkos.cpp
index 2ac869e4ea..17cce19a70 100644
--- a/src/KOKKOS/min_cg_kokkos.cpp
+++ b/src/KOKKOS/min_cg_kokkos.cpp
@@ -13,15 +13,16 @@
 ------------------------------------------------------------------------- */
 
 #include "min_cg_kokkos.h"
-#include <mpi.h>
-#include <cmath>
-#include "update.h"
-#include "output.h"
-#include "timer.h"
+
 #include "atom_kokkos.h"
 #include "atom_masks.h"
 #include "error.h"
 #include "fix_minimize_kokkos.h"
+#include "output.h"
+#include "timer.h"
+#include "update.h"
+
+#include <cmath>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/mliap_data_kokkos.cpp b/src/KOKKOS/mliap_data_kokkos.cpp
index dc8fe7dd83..fd5a852114 100644
--- a/src/KOKKOS/mliap_data_kokkos.cpp
+++ b/src/KOKKOS/mliap_data_kokkos.cpp
@@ -22,8 +22,6 @@
 #include "kokkos_type.h"
 #include "pair_mliap_kokkos.h"
 #include "atom_masks.h"
-#include "mliap_descriptor.h"
-#include "lammps.h"
 #include "kokkos.h"
 
 /* ---------------------------------------------------------------------- */
@@ -59,7 +57,6 @@ MLIAPDataKokkos<DeviceType>::~MLIAPDataKokkos() {
   memoryKK->destroy_kokkos(k_pair_i,pair_i);
   memoryKK->destroy_kokkos(k_jelems,jelems);
   memoryKK->destroy_kokkos(k_elems,elems);
-  memoryKK->destroy_kokkos(k_ij);
   memoryKK->destroy_kokkos(k_rij,rij);
   memoryKK->destroy_kokkos(k_graddesc,graddesc);
 }
@@ -213,7 +210,6 @@ void MLIAPDataKokkos<DeviceType>::grow_neigharrays() {
     memoryKK->create_kokkos(k_iatoms, iatoms, natomneigh_max, "mliap_data:iatoms");
     memoryKK->destroy_kokkos(k_ielems,ielems);
     memoryKK->create_kokkos(k_ielems, ielems, natomneigh_max, "mliap_data:ielems");
-    memoryKK->destroy_kokkos(k_ij);
     memoryKK->create_kokkos(k_ij, natomneigh_max, "mliap_data:ij");
     memoryKK->destroy_kokkos(k_numneighs,numneighs);
     memoryKK->create_kokkos(k_numneighs, numneighs, natomneigh_max, "mliap_data:numneighs");
diff --git a/src/KOKKOS/mliap_descriptor_kokkos.h b/src/KOKKOS/mliap_descriptor_kokkos.h
index 075f0e9fed..d4ab72ff96 100644
--- a/src/KOKKOS/mliap_descriptor_kokkos.h
+++ b/src/KOKKOS/mliap_descriptor_kokkos.h
@@ -29,13 +29,11 @@ template <class DeviceType> class MLIAPDescriptorKokkos : virtual protected Poin
   MLIAPDescriptorKokkos(LAMMPS *lmp, MLIAPDescriptor *descriptor_in) :
       Pointers(lmp), descriptor(descriptor_in)
   {
-    memoryKK->destroy_kokkos(k_wjelem);
   }
 
   void init_data()
   {
     int num_elems = descriptor->nelements;
-    memoryKK->destroy_kokkos(k_wjelem);
     memoryKK->create_kokkos(k_wjelem, num_elems, "MLIAPDescriptorKokkos::k_wjelem");
     for (int i = 0; i < num_elems; ++i) k_wjelem.h_view(i) = descriptor->wjelem[i];
     k_wjelem.modify<LMPHostType>();
@@ -44,7 +42,6 @@ template <class DeviceType> class MLIAPDescriptorKokkos : virtual protected Poin
 
   virtual ~MLIAPDescriptorKokkos()
   {
-    memoryKK->destroy_kokkos(k_wjelem);
   }
 
   MLIAPDescriptor *descriptor;
diff --git a/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp b/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp
index 7e30ab8cc7..1cf368e952 100644
--- a/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp
+++ b/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp
@@ -21,13 +21,9 @@
 #include "atom_kokkos.h"
 #include "comm.h"
 #include "error.h"
-#include "memory.h"
 #include "mliap_data_kokkos.h"
 #include "mliap_so3_kokkos.h"
 #include "pair_mliap.h"
-#include "tokenizer.h"
-
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/mliap_model_kokkos.h b/src/KOKKOS/mliap_model_kokkos.h
index 72077f5988..4840798cc1 100644
--- a/src/KOKKOS/mliap_model_kokkos.h
+++ b/src/KOKKOS/mliap_model_kokkos.h
@@ -29,14 +29,12 @@ template <class DeviceType> class MLIAPModelKokkos : protected Pointers {
   MLIAPModelKokkos(LAMMPS *lmp, MLIAPModel *model_in) : Pointers(lmp), model(model_in) {}
   virtual ~MLIAPModelKokkos()
   {
-    memoryKK->destroy_kokkos(k_coeffelem);
     model->coeffelem = nullptr;
   }
 
   void set_k_coeffelem()
   {
     double **tmp = nullptr;
-    memoryKK->destroy_kokkos(k_coeffelem);
     memoryKK->create_kokkos(k_coeffelem, tmp, model->nelements, model->nparams,
                             "MLIAPModelKokkos::coeffelem");
     for (int i = 0; i < model->nelements; ++i)
diff --git a/src/KOKKOS/mliap_model_linear_kokkos.cpp b/src/KOKKOS/mliap_model_linear_kokkos.cpp
index b294dad294..f4fef82023 100644
--- a/src/KOKKOS/mliap_model_linear_kokkos.cpp
+++ b/src/KOKKOS/mliap_model_linear_kokkos.cpp
@@ -19,7 +19,6 @@
 #include "mliap_model_linear_kokkos.h"
 
 #include "mliap_data_kokkos.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/mliap_so3_kokkos.cpp b/src/KOKKOS/mliap_so3_kokkos.cpp
index 3f6370a6fc..155bee9294 100644
--- a/src/KOKKOS/mliap_so3_kokkos.cpp
+++ b/src/KOKKOS/mliap_so3_kokkos.cpp
@@ -21,7 +21,6 @@
 #include "error.h"
 #include "math_const.h"
 #include "math_special_kokkos.h"
-#include "memory.h"
 #include "memory_kokkos.h"
 #include "mliap_so3_math.h"
 
@@ -60,44 +59,6 @@ MLIAP_SO3Kokkos<DeviceType>::MLIAP_SO3Kokkos(LAMMPS *lmp, double vrcut, int vlma
 template <class DeviceType>
 MLIAP_SO3Kokkos<DeviceType>::~MLIAP_SO3Kokkos()
 {
-  memoryKK->destroy_kokkos(m_ellpl1);
-  memoryKK->destroy_kokkos(m_ellm1);
-  memoryKK->destroy_kokkos(m_pfac);
-  memoryKK->destroy_kokkos(m_Ylms);
-  memoryKK->destroy_kokkos(m_dfac0);
-  memoryKK->destroy_kokkos(m_dfac1);
-  memoryKK->destroy_kokkos(m_dfac2);
-  memoryKK->destroy_kokkos(m_dfac3);
-  memoryKK->destroy_kokkos(m_dfac4);
-  memoryKK->destroy_kokkos(m_dfac5);
-  memoryKK->destroy_kokkos(m_w);
-  memoryKK->destroy_kokkos(m_g_array);
-
-  memoryKK->destroy_kokkos(m_rootpq);
-  memoryKK->destroy_kokkos(m_idxu_block);
-  memoryKK->destroy_kokkos(m_idxylm);
-
-  memoryKK->destroy_kokkos(m_rip_array);
-  memoryKK->destroy_kokkos(m_rip_darray);
-
-  memoryKK->destroy_kokkos(m_sbes_array);
-  memoryKK->destroy_kokkos(m_sbes_darray);
-
-  memoryKK->destroy_kokkos(m_plist_r);
-
-  memoryKK->destroy_kokkos(m_ulist_r);
-  memoryKK->destroy_kokkos(m_ulist_i);
-
-  memoryKK->destroy_kokkos(m_dYlm_r);
-  memoryKK->destroy_kokkos(m_dYlm_i);
-
-  memoryKK->destroy_kokkos(k_dplist_r);
-
-  memoryKK->destroy_kokkos(m_dclist);
-
-  memoryKK->destroy_kokkos(m_clisttot_r);
-  memoryKK->destroy_kokkos(m_clisttot_i);
-
   t_numneighs = int_1d();
   t_jelems = int_1d();
   t_wjelem = float_1d();
@@ -121,9 +82,7 @@ void MLIAP_SO3Kokkos<DeviceType>::init()
   int totali;
 
   totali = m_lmax + 1;
-  memoryKK->destroy_kokkos(m_ellpl1);
   memoryKK->create_kokkos(m_ellpl1, totali, "MLIAP_SO3Kokkos:m_ellpl1");
-  memoryKK->destroy_kokkos(m_ellm1);
   memoryKK->create_kokkos(m_ellm1, totali, "MLIAP_SO3Kokkos:m_ellm1");
   alloc_init = 2.0 * totali * sizeof(double);
   using range=Kokkos::RangePolicy<DeviceType>;
@@ -139,9 +98,7 @@ void MLIAP_SO3Kokkos<DeviceType>::init()
   m_pfac_l1 = m_lmax + 2;
   m_pfac_l2 = (m_lmax + 2) * (m_lmax + 2) + 1;
   totali = m_pfac_l1 * m_pfac_l2;
-  memoryKK->destroy_kokkos(m_pfac);
   memoryKK->create_kokkos(m_pfac, totali, "MLIAP_SO3Kokkos:m_pfac");
-  memoryKK->destroy_kokkos(m_Ylms);
   memoryKK->create_kokkos(m_Ylms, totali, "MLIAP_SO3Kokkos:m_Ylms");
   alloc_init += 2 * totali * sizeof(double);
 
@@ -161,17 +118,11 @@ void MLIAP_SO3Kokkos<DeviceType>::init()
   m_dfac_l1 = m_lmax + 1;
   m_dfac_l2 = m_numYlms + 1;
   totali = m_dfac_l1 * m_dfac_l2;
-  memoryKK->destroy_kokkos(m_dfac0);
   memoryKK->create_kokkos(m_dfac0, totali, "MLIAP_SO3Kokkos:m_dfac0");
-  memoryKK->destroy_kokkos(m_dfac1);
   memoryKK->create_kokkos(m_dfac1, totali, "MLIAP_SO3Kokkos:m_dfac1");
-  memoryKK->destroy_kokkos(m_dfac2);
   memoryKK->create_kokkos(m_dfac2, totali, "MLIAP_SO3Kokkos:m_dfac2");
-  memoryKK->destroy_kokkos(m_dfac3);
   memoryKK->create_kokkos(m_dfac3, totali, "MLIAP_SO3Kokkos:m_dfac3");
-  memoryKK->destroy_kokkos(m_dfac4);
   memoryKK->create_kokkos(m_dfac4, totali, "MLIAP_SO3Kokkos:m_dfac4");
-  memoryKK->destroy_kokkos(m_dfac5);
   memoryKK->create_kokkos(m_dfac5, totali, "MLIAP_SO3Kokkos:m_dfac5");
   alloc_init += 6.0 * totali * sizeof(double);
 
@@ -197,12 +148,10 @@ void MLIAP_SO3Kokkos<DeviceType>::init()
   });
 
   totali = m_nmax * m_nmax;
-  memoryKK->destroy_kokkos(m_w);
   memoryKK->create_kokkos(m_w, totali, "MLIAP_SO3Kokkos:w");
   alloc_init += totali * sizeof(double);
 
   totali = m_nmax * m_Nmax;
-  memoryKK->destroy_kokkos(m_g_array);
   memoryKK->create_kokkos(m_g_array, totali, "MLIAP_SO3Kokkos:g_array");
   alloc_init += totali * sizeof(double);
 
@@ -218,7 +167,6 @@ void MLIAP_SO3Kokkos<DeviceType>::init()
   twolmax = 2 * (m_lmax + 1);
   int m_ldim = twolmax + 1;
   totali = m_ldim * m_ldim;
-  memoryKK->destroy_kokkos(m_rootpq);
   memoryKK->create_kokkos(m_rootpq, totali, "MLIAP_SO3Kokkos:rootpq");
   alloc_init += totali * sizeof(double);
 
@@ -229,12 +177,10 @@ void MLIAP_SO3Kokkos<DeviceType>::init()
       rootpq[p * ldim + q] = sqrt(static_cast<double>(p) / q);
   });
 
-  memoryKK->destroy_kokkos(m_idxu_block);
   memoryKK->create_kokkos(m_idxu_block, m_ldim, "MLIAP_SO3Kokkos:idxu_bloc");
   alloc_init += totali * sizeof(double);
 
   totali = square(m_lmax + 2);
-  memoryKK->destroy_kokkos(m_idxylm);
   memoryKK->create_kokkos(m_idxylm, totali, "MLIAP_SO3Kokkos:idxylm");
   alloc_init += totali * sizeof(double);
 
@@ -278,7 +224,6 @@ void MLIAP_SO3Kokkos<DeviceType>::init_arrays(int nlocal, int ncoefs)
 
   int totali = nlocal * ncoefs;
   if ( nlocal > (int)m_plist_r.extent(0)) {
-    memoryKK->destroy_kokkos(m_plist_r);
     memoryKK->create_kokkos(m_plist_r, nlocal, ncoefs, "MLIAP_SO3Kokkos:m_plist_r");
     alloc_arrays = totali * sizeof(double);
   }
@@ -286,26 +231,19 @@ void MLIAP_SO3Kokkos<DeviceType>::init_arrays(int nlocal, int ncoefs)
   int num_of_temp = std::min(nlocal, m_chunk_size);
   if ((int)m_ulist_r.extent(0) < num_of_temp ) {
     totali = m_idxu_count;
-    memoryKK->destroy_kokkos(m_ulist_r);
     memoryKK->create_kokkos(m_ulist_r, num_of_temp, totali, "MLIAP_SO3Kokkos:m_ulist_r");
-    memoryKK->destroy_kokkos(m_ulist_i);
     memoryKK->create_kokkos(m_ulist_i, num_of_temp, totali, "MLIAP_SO3Kokkos:m_ulist_i");
     alloc_arrays += 2.0 * totali * num_of_temp * sizeof(double);
 
     totali = m_numYlms * 3;
-    memoryKK->destroy_kokkos(m_dYlm_r);
     memoryKK->create_kokkos(m_dYlm_r, num_of_temp, m_numYlms, 3, "MLIAP_SO3Kokkos:m_dYlm_r");
-    memoryKK->destroy_kokkos(m_dYlm_i);
     memoryKK->create_kokkos(m_dYlm_i, num_of_temp, m_numYlms, 3, "MLIAP_SO3Kokkos:m_dYlm_i");
     alloc_arrays += 2.0 * m_numYlms * 3 * num_of_temp * sizeof(double);
 
-    memoryKK->destroy_kokkos(m_dclist);
     memoryKK->create_kokkos(m_dclist, num_of_temp, m_nmax, m_numYlms, 3, "MLIAP_SO3Kokkos:k_dclist_r");
     alloc_arrays += m_nmax * m_numYlms * 3 * num_of_temp* sizeof(double);
 
-    memoryKK->destroy_kokkos(m_clisttot_r);
     memoryKK->create_kokkos(m_clisttot_r, num_of_temp, m_nmax, m_numYlms, "MLIAP_SO3Kokkos:m_clisttot_r");
-    memoryKK->destroy_kokkos(m_clisttot_i);
     memoryKK->create_kokkos(m_clisttot_i, num_of_temp, m_nmax, m_numYlms, "MLIAP_SO3Kokkos:m_clisttot_i");
     alloc_arrays += 2.0 * m_nmax * m_numYlms * num_of_temp * sizeof(double);
     m_init_arrays = 1;
@@ -850,21 +788,15 @@ void MLIAP_SO3Kokkos<DeviceType>::spectrum_dxdr(int nlocal, DAT::tdual_int_1d nu
   bigint totali;
 
   if ( nlocal > (int)m_clisttot_r.extent(0)){
-    memoryKK->destroy_kokkos(m_clisttot_r);
     memoryKK->create_kokkos(m_clisttot_r, nlocal, m_nmax, m_numYlms, "MLIAP_SO3Kokkos:m_clisttot_r");
-    memoryKK->destroy_kokkos(m_clisttot_i);
     memoryKK->create_kokkos(m_clisttot_i, nlocal, m_nmax, m_numYlms, "MLIAP_SO3Kokkos:m_clisttot_i");
     int num_of_temp = std::min(nlocal, m_chunk_size);
     int delta=num_of_temp-m_ulist_r.extent(0);
     if (delta > 0){
-      memoryKK->destroy_kokkos(m_ulist_r);
       memoryKK->create_kokkos(m_ulist_r, num_of_temp, m_idxu_count, "MLIAP_SO3Kokkos:m_ulist_r");
-      memoryKK->destroy_kokkos(m_ulist_i);
       memoryKK->create_kokkos(m_ulist_i, num_of_temp, m_idxu_count, "MLIAP_SO3Kokkos:m_ulist_i");
       alloc_arrays += 2.0 * m_idxu_count * delta * sizeof(double);
-      memoryKK->destroy_kokkos(m_dYlm_r);
       memoryKK->create_kokkos(m_dYlm_r, num_of_temp, m_numYlms, 3, "MLIAP_SO3Kokkos:m_dYlm_r");
-      memoryKK->destroy_kokkos(m_dYlm_i);
       memoryKK->create_kokkos(m_dYlm_i, num_of_temp, m_numYlms, 3, "MLIAP_SO3Kokkos:m_dYlm_i");
       alloc_arrays += 2.0 * m_numYlms * 3 * delta * sizeof(double);
     }
@@ -872,18 +804,13 @@ void MLIAP_SO3Kokkos<DeviceType>::spectrum_dxdr(int nlocal, DAT::tdual_int_1d nu
 
   totali = totaln * m_Nmax * (m_lmax + 1);
   if ( totali > (int)m_sbes_array.extent(0)) {
-    memoryKK->destroy_kokkos(m_sbes_array);
     memoryKK->create_kokkos(m_sbes_array, totali, "MLIAP_SO3Kokkos:m_sbes_array");
-    memoryKK->destroy_kokkos(m_sbes_darray);
     memoryKK->create_kokkos(m_sbes_darray, totali, "MLIAP_SO3Kokkos:m_sbes_darray");
 
     totali = totaln * m_nmax * (m_lmax + 1);
-    memoryKK->destroy_kokkos(m_rip_array);
     memoryKK->create_kokkos(m_rip_array, totali, "MLIAP_SO3Kokkos:m_rip_array");
-    memoryKK->destroy_kokkos(m_rip_darray);
     memoryKK->create_kokkos(m_rip_darray, totali, "MLIAP_SO3Kokkos:m_rip_darray");
 
-    memoryKK->destroy_kokkos(k_dplist_r);
     memoryKK->create_kokkos(k_dplist_r, (int)totaln, ncoefs, 3, "MLIAP_SO3Kokkos:m_dplist_r");
   }
 
diff --git a/src/KOKKOS/mliap_unified_couple_kokkos.pyx b/src/KOKKOS/mliap_unified_couple_kokkos.pyx
index 97d807ac33..385a770bb3 100644
--- a/src/KOKKOS/mliap_unified_couple_kokkos.pyx
+++ b/src/KOKKOS/mliap_unified_couple_kokkos.pyx
@@ -13,6 +13,7 @@ from libc.stdint cimport uintptr_t
 cimport cython
 from cpython.ref cimport PyObject
 from libc.stdlib cimport malloc, free
+from libc.string cimport memcpy
 
 
 cdef extern from "lammps.h" namespace "LAMMPS_NS":
@@ -451,15 +452,24 @@ cdef public object mliap_unified_connect_kokkos(char *fname, MLIAPDummyModel * m
 
     cdef int nelements = <int>len(unified.element_types)
     cdef char **elements = <char**>malloc(nelements * sizeof(char*))
+    cdef char * c_str
+    cdef char * s
+    cdef ssize_t slen
 
     if not elements:
         raise MemoryError("failed to allocate memory for element names")
 
-    cdef char *elem_name
     for i, elem in enumerate(unified.element_types):
-        elem_name_bytes = elem.encode('UTF-8')
-        elem_name = elem_name_bytes
-        elements[i] = &elem_name[0]
+        py_str = elem.encode('UTF-8')
+        s = py_str
+        slen = len(py_str)
+        c_str = <char *>malloc((slen+1)*sizeof(char))
+        if not c_str:
+            raise MemoryError("failed to allocate memory for element names")
+        memcpy(c_str, s, slen)
+        c_str[slen] = 0
+        elements[i] = c_str
+
     unified_int.descriptor.set_elements(elements, nelements)
     unified_int.model.nelements = nelements
 
diff --git a/src/KOKKOS/mliap_unified_kokkos.cpp b/src/KOKKOS/mliap_unified_kokkos.cpp
index 4c38e4f1d6..e85f836254 100644
--- a/src/KOKKOS/mliap_unified_kokkos.cpp
+++ b/src/KOKKOS/mliap_unified_kokkos.cpp
@@ -315,7 +315,6 @@ void LAMMPS_NS::update_pair_forces(MLIAPDataKokkosDevice *data, double *fij)
     int i = pair_i[ii];
     int j = j_atoms[ii];
     // must not count any contribution where i is not a local atom
-    if (i < nlocal) {
       Kokkos::atomic_add(&f[i*3+0], fij[ii3+0]);
       Kokkos::atomic_add(&f[i*3+1], fij[ii3+1]);
       Kokkos::atomic_add(&f[i*3+2], fij[ii3+2]);
@@ -352,7 +351,6 @@ void LAMMPS_NS::update_pair_forces(MLIAPDataKokkosDevice *data, double *fij)
           Kokkos::atomic_add(&d_vatom(j,3), 0.5*v[3]);
           Kokkos::atomic_add(&d_vatom(j,4), 0.5*v[4]);
           Kokkos::atomic_add(&d_vatom(j,5), 0.5*v[5]);
-        }
       }
     }
   });
@@ -382,11 +380,9 @@ void LAMMPS_NS::update_atom_energy(MLIAPDataKokkosDevice *data, double *ei)
 
   Kokkos::parallel_reduce(nlocal, KOKKOS_LAMBDA(int i, double &local_sum){
     double e = ei[i];
-    // must not count any contribution where i is not a local atom
-    if (i < nlocal) {
-      d_eatoms[i] = e;
-      local_sum += e;
-    }
+
+    d_eatoms[i] = e;
+    local_sum += e;
   },*data->energy);
 }
 
diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp
index 6e7390d3c6..63c2fe22c1 100644
--- a/src/KOKKOS/nbin_ssa_kokkos.cpp
+++ b/src/KOKKOS/nbin_ssa_kokkos.cpp
@@ -18,13 +18,12 @@
 ------------------------------------------------------------------------- */
 
 #include "nbin_ssa_kokkos.h"
-#include "neighbor.h"
+
 #include "atom_kokkos.h"
 #include "domain.h"
 #include "update.h"
 #include "atom_masks.h"
-
-// #include "memory_kokkos.h"
+#include "kokkos_type.h"
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp
index 85ca6c916e..9e1c8c273b 100644
--- a/src/KOKKOS/neigh_bond_kokkos.cpp
+++ b/src/KOKKOS/neigh_bond_kokkos.cpp
@@ -27,6 +27,7 @@
 #include "force.h"
 #include "memory_kokkos.h"
 #include "modify.h"
+#include "neighbor.h"
 #include "output.h"
 #include "thermo.h"
 #include "update.h"
diff --git a/src/KOKKOS/neigh_bond_kokkos.h b/src/KOKKOS/neigh_bond_kokkos.h
index 480726c602..f60903f73e 100644
--- a/src/KOKKOS/neigh_bond_kokkos.h
+++ b/src/KOKKOS/neigh_bond_kokkos.h
@@ -15,9 +15,7 @@
 #ifndef LMP_NEIGH_BOND_KOKKOS_H
 #define LMP_NEIGH_BOND_KOKKOS_H
 
-#include "neighbor.h"
 #include "kokkos_type.h"
-#include "domain_kokkos.h"
 #include "pointers.h"
 #include <Kokkos_UnorderedMap.hpp>
 
diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp
index efb1247560..214b2e86d9 100644
--- a/src/KOKKOS/neighbor_kokkos.cpp
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@@ -18,16 +18,14 @@
 #include "atom_kokkos.h"
 #include "atom_masks.h"
 #include "bond.h"
-#include "comm.h"
+#include "domain.h"
 #include "dihedral.h"
 #include "error.h"
-#include "fix.h"
 #include "force.h"
 #include "improper.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
 #include "neigh_request.h"
-#include "pair.h"
 #include "style_nbin.h"
 #include "style_npair.h"
 #include "style_nstencil.h"
diff --git a/src/KOKKOS/npair_skip_kokkos.cpp b/src/KOKKOS/npair_skip_kokkos.cpp
index 15c0487010..91a2cfa17e 100644
--- a/src/KOKKOS/npair_skip_kokkos.cpp
+++ b/src/KOKKOS/npair_skip_kokkos.cpp
@@ -16,7 +16,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "atom_vec.h"
 #include "neigh_list_kokkos.h"
 
 using namespace LAMMPS_NS;
diff --git a/src/KOKKOS/pair_adp_kokkos.cpp b/src/KOKKOS/pair_adp_kokkos.cpp
index d02edc43ab..1297d62651 100644
--- a/src/KOKKOS/pair_adp_kokkos.cpp
+++ b/src/KOKKOS/pair_adp_kokkos.cpp
@@ -472,12 +472,11 @@ void PairADPKokkos<DeviceType>::interpolate(int n, double delta, double *f, t_ho
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-int PairADPKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                                        int iswap_in, DAT::tdual_xfloat_1d &buf,
+int PairADPKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                                        DAT::tdual_xfloat_1d &buf,
                                                         int /*pbc_flag*/, int * /*pbc*/)
 {
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairADPPackForwardComm>(0,n),*this);
   return n*10;
@@ -486,7 +485,7 @@ int PairADPKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairADPKokkos<DeviceType>::operator()(TagPairADPPackForwardComm, const int &i) const {
-  int j = d_sendlist(iswap, i);
+  int j = d_sendlist(i);
   v_buf[10 * i] = d_fp(j);
   v_buf[10 * i + 1] = d_mu(j, 0);
   v_buf[10 * i + 2] = d_mu(j, 1);
diff --git a/src/KOKKOS/pair_adp_kokkos.h b/src/KOKKOS/pair_adp_kokkos.h
index 5714bdb699..41328a567e 100644
--- a/src/KOKKOS/pair_adp_kokkos.h
+++ b/src/KOKKOS/pair_adp_kokkos.h
@@ -108,7 +108,7 @@ class PairADPKokkos : public PairADP, public KokkosBase
       const F_FLOAT &epair, const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, const F_FLOAT &delx,
                   const F_FLOAT &dely, const F_FLOAT &delz) const;
 
-  int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+  int pack_forward_comm_kokkos(int, DAT::tdual_int_1d, DAT::tdual_xfloat_1d&,
                        int, int *) override;
   void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   int pack_forward_comm(int, int *, double *, int, int *) override;
@@ -185,9 +185,8 @@ class PairADPKokkos : public PairADP, public KokkosBase
   typename AT::t_int_1d d_ilist;
   typename AT::t_int_1d d_numneigh;
 
-  int iswap;
   int first;
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d_um v_buf;
 
   int neighflag,newton_pair;
diff --git a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp
index a859b232be..ebe49b59a0 100644
--- a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp
@@ -25,14 +25,12 @@
 #include "kokkos.h"
 #include "math_const.h"
 #include "memory_kokkos.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor.h"
 #include "respa.h"
 #include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
diff --git a/src/KOKKOS/pair_buck_kokkos.cpp b/src/KOKKOS/pair_buck_kokkos.cpp
index b549fcd329..88b0445dc8 100644
--- a/src/KOKKOS/pair_buck_kokkos.cpp
+++ b/src/KOKKOS/pair_buck_kokkos.cpp
@@ -24,14 +24,12 @@
 #include "force.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor.h"
 #include "respa.h"
 #include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/pair_coul_cut_kokkos.cpp b/src/KOKKOS/pair_coul_cut_kokkos.cpp
index 283a4b2b69..1796bd93fd 100644
--- a/src/KOKKOS/pair_coul_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_cut_kokkos.cpp
@@ -20,7 +20,6 @@
 #include "force.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor.h"
 
diff --git a/src/KOKKOS/pair_coul_debye_kokkos.cpp b/src/KOKKOS/pair_coul_debye_kokkos.cpp
index 4c0e610e89..eb61716640 100644
--- a/src/KOKKOS/pair_coul_debye_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_debye_kokkos.cpp
@@ -24,14 +24,10 @@
 #include "force.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor.h"
-#include "respa.h"
-#include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp
index 5184c42096..9e0db27f36 100644
--- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp
@@ -20,7 +20,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "error.h"
 #include "ewald_const.h"
 #include "force.h"
 #include "kokkos.h"
diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp
index 2ccf7a5a15..af5067db16 100644
--- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp
+++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp
@@ -20,7 +20,6 @@
 
 #include "atom_kokkos.h"
 #include "atom_masks.h"
-#include "error.h"
 #include "force.h"
 #include "kokkos.h"
 #include "math_const.h"
diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
index 93ed9fc620..b02faced1e 100644
--- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
@@ -449,12 +449,11 @@ void PairEAMAlloyKokkos<DeviceType>::interpolate(int n, double delta, double *f,
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                                             int iswap_in, DAT::tdual_xfloat_1d &buf,
+int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                                             DAT::tdual_xfloat_1d &buf,
                                                              int /*pbc_flag*/, int * /*pbc*/)
 {
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairEAMAlloyPackForwardComm>(0,n),*this);
   return n;
@@ -463,7 +462,7 @@ int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_i
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairEAMAlloyKokkos<DeviceType>::operator()(TagPairEAMAlloyPackForwardComm, const int &i) const {
-  int j = d_sendlist(iswap, i);
+  int j = d_sendlist(i);
   v_buf[i] = d_fp[j];
 }
 
diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.h b/src/KOKKOS/pair_eam_alloy_kokkos.h
index 2eb40189ac..572dc1aca8 100644
--- a/src/KOKKOS/pair_eam_alloy_kokkos.h
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.h
@@ -122,7 +122,7 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase {
       const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
                   const F_FLOAT &dely, const F_FLOAT &delz) const;
 
-  int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+  int pack_forward_comm_kokkos(int, DAT::tdual_int_1d, DAT::tdual_xfloat_1d&,
                        int, int *) override;
   void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   int pack_forward_comm(int, int *, double *, int, int *) override;
@@ -190,9 +190,8 @@ class PairEAMAlloyKokkos : public PairEAM, public KokkosBase {
   typename AT::t_int_1d d_ilist;
   typename AT::t_int_1d d_numneigh;
 
-  int iswap;
   int first;
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d_um v_buf;
 
   int neighflag,newton_pair;
diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp
index 5dee601302..4da146e68e 100644
--- a/src/KOKKOS/pair_eam_fs_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp
@@ -449,12 +449,11 @@ void PairEAMFSKokkos<DeviceType>::interpolate(int n, double delta, double *f, t_
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                                          int iswap_in, DAT::tdual_xfloat_1d &buf,
+int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                                          DAT::tdual_xfloat_1d &buf,
                                                           int /*pbc_flag*/, int * /*pbc*/)
 {
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagPairEAMFSPackForwardComm>(0,n),*this);
   return n;
@@ -463,7 +462,7 @@ int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairEAMFSKokkos<DeviceType>::operator()(TagPairEAMFSPackForwardComm, const int &i) const {
-  int j = d_sendlist(iswap, i);
+  int j = d_sendlist(i);
   v_buf[i] = d_fp[j];
 }
 
diff --git a/src/KOKKOS/pair_eam_fs_kokkos.h b/src/KOKKOS/pair_eam_fs_kokkos.h
index bd03ab0015..8e4ee7851e 100644
--- a/src/KOKKOS/pair_eam_fs_kokkos.h
+++ b/src/KOKKOS/pair_eam_fs_kokkos.h
@@ -122,7 +122,7 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase {
       const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
                   const F_FLOAT &dely, const F_FLOAT &delz) const;
 
-  int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+  int pack_forward_comm_kokkos(int, DAT::tdual_int_1d, DAT::tdual_xfloat_1d&,
                        int, int *) override;
   void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   int pack_forward_comm(int, int *, double *, int, int *) override;
@@ -190,9 +190,8 @@ class PairEAMFSKokkos : public PairEAM, public KokkosBase {
   typename AT::t_int_1d d_ilist;
   typename AT::t_int_1d d_numneigh;
 
-  int iswap;
   int first;
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d_um v_buf;
 
   int neighflag,newton_pair;
diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp
index 32f4afe225..54ffa84f2d 100644
--- a/src/KOKKOS/pair_eam_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_kokkos.cpp
@@ -444,12 +444,11 @@ void PairEAMKokkos<DeviceType>::interpolate(int n, double delta, double *f, t_ho
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-int PairEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist,
-                                                        int iswap_in, DAT::tdual_xfloat_1d &buf,
+int PairEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist,
+                                                        DAT::tdual_xfloat_1d &buf,
                                                         int /*pbc_flag*/, int * /*pbc*/)
 {
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMPackForwardComm>(0,n),*this);
   return n;
@@ -458,7 +457,7 @@ int PairEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairEAMKokkos<DeviceType>::operator()(TagPairEAMPackForwardComm, const int &i) const {
-  int j = d_sendlist(iswap, i);
+  int j = d_sendlist(i);
   v_buf[i] = d_fp[j];
 }
 
diff --git a/src/KOKKOS/pair_eam_kokkos.h b/src/KOKKOS/pair_eam_kokkos.h
index 9d066d40a0..950db43fb2 100644
--- a/src/KOKKOS/pair_eam_kokkos.h
+++ b/src/KOKKOS/pair_eam_kokkos.h
@@ -120,7 +120,7 @@ class PairEAMKokkos : public PairEAM, public KokkosBase {
       const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
                   const F_FLOAT &dely, const F_FLOAT &delz) const;
 
-  int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+  int pack_forward_comm_kokkos(int, DAT::tdual_int_1d, DAT::tdual_xfloat_1d&,
                        int, int *) override;
   void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   int pack_forward_comm(int, int *, double *, int, int *) override;
@@ -186,9 +186,8 @@ class PairEAMKokkos : public PairEAM, public KokkosBase {
   typename AT::t_int_1d d_ilist;
   typename AT::t_int_1d d_numneigh;
 
-  int iswap;
   int first;
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d_um v_buf;
 
   int neighflag,newton_pair;
diff --git a/src/KOKKOS/pair_hybrid_kokkos.cpp b/src/KOKKOS/pair_hybrid_kokkos.cpp
index eabce17a1c..84d43bcec8 100644
--- a/src/KOKKOS/pair_hybrid_kokkos.cpp
+++ b/src/KOKKOS/pair_hybrid_kokkos.cpp
@@ -14,14 +14,10 @@
 ------------------------------------------------------------------------- */
 
 #include "pair_hybrid_kokkos.h"
-#include <cstring>
+
 #include "atom_kokkos.h"
 #include "force.h"
-#include "pair.h"
-#include "neighbor.h"
-#include "neigh_request.h"
 #include "update.h"
-#include "memory_kokkos.h"
 #include "respa.h"
 #include "atom_masks.h"
 #include "kokkos.h"
diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp
index 7c61c684e4..626f05106c 100644
--- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp
@@ -20,14 +20,12 @@
 #include "force.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor.h"
 #include "respa.h"
 #include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp
index 9a1ced9da3..566d74088c 100644
--- a/src/KOKKOS/pair_lj_cut_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp
@@ -20,14 +20,11 @@
 #include "force.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
-#include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor.h"
 #include "respa.h"
 #include "update.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/KOKKOS/pair_meam_kokkos.cpp b/src/KOKKOS/pair_meam_kokkos.cpp
index 4361a387ba..9082c410e0 100644
--- a/src/KOKKOS/pair_meam_kokkos.cpp
+++ b/src/KOKKOS/pair_meam_kokkos.cpp
@@ -338,11 +338,10 @@ void PairMEAMKokkos<DeviceType>::init_style()
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-int PairMEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
+int PairMEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_1d &buf,
                                 int /*pbc_flag*/, int * /*pbc*/)
 {
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMPackForwardComm>(0,n),*this);
   return n*comm_forward;
@@ -353,7 +352,7 @@ int PairMEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMPackForwardComm, const int &i) const {
-  int j = d_sendlist(iswap, i);
+  int j = d_sendlist(i);
   int m = i*comm_forward;
   v_buf[m++] = d_rho0[j];
   v_buf[m++] = d_rho1[j];
@@ -782,10 +781,9 @@ int PairMEAMKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-void PairMEAMKokkos<DeviceType>::unpack_reverse_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf)
+void PairMEAMKokkos<DeviceType>::unpack_reverse_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_1d &buf)
 {
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMEAMUnpackReverseComm>(0,n),*this);
 }
@@ -795,7 +793,7 @@ void PairMEAMKokkos<DeviceType>::unpack_reverse_comm_kokkos(int n, DAT::tdual_in
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairMEAMKokkos<DeviceType>::operator()(TagPairMEAMUnpackReverseComm, const int &i) const {
-  int j = d_sendlist(iswap, i);
+  int j = d_sendlist(i);
   //int m = i*30;
   int m = i*comm_reverse;
 
diff --git a/src/KOKKOS/pair_meam_kokkos.h b/src/KOKKOS/pair_meam_kokkos.h
index 0d0d7667f3..66b5700a72 100644
--- a/src/KOKKOS/pair_meam_kokkos.h
+++ b/src/KOKKOS/pair_meam_kokkos.h
@@ -76,15 +76,15 @@ class PairMEAMKokkos : public PairMEAM, public KokkosBase {
   KOKKOS_INLINE_FUNCTION
   void operator()(TagPairMEAMOffsets,  const int, int&) const;
 
-  int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+  int pack_forward_comm_kokkos(int, DAT::tdual_int_1d, DAT::tdual_xfloat_1d&,
                                int, int *) override;
   int pack_forward_comm(int, int *, double *, int, int *) override;
   void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   void unpack_forward_comm(int, int, double *) override;
   int pack_reverse_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   int pack_reverse_comm(int, int, double *) override;
-  void unpack_reverse_comm_kokkos(int, DAT::tdual_int_2d,
-                                  int, DAT::tdual_xfloat_1d&) override;
+  void unpack_reverse_comm_kokkos(int, DAT::tdual_int_1d,
+                                  DAT::tdual_xfloat_1d&) override;
   void unpack_reverse_comm(int, int *, double *) override;
 
  protected:
@@ -108,10 +108,10 @@ class PairMEAMKokkos : public PairMEAM, public KokkosBase {
   typename AT::t_neighbors_2d d_neighbors_half;
   typename AT::t_int_1d d_numneigh_full;
   typename AT::t_neighbors_2d d_neighbors_full;
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d_um v_buf;
 
-  int iswap,first;
+  int first;
   int neighflag,nlocal,nall,eflag,vflag;
 
   typename ArrayTypes<DeviceType>::t_ffloat_1d d_rho, d_rho0, d_rho1, d_rho2, d_rho3, d_frhop;
diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp
index 41fcac126d..2a20c9e013 100644
--- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp
+++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp
@@ -646,12 +646,11 @@ void PairMultiLucyRXKokkos<DeviceType>::getMixingWeights(int id, double &mixWtSi
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf, int /*pbc_flag*/, int * /*pbc*/)
+int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_1d k_sendlist, DAT::tdual_xfloat_1d &buf, int /*pbc_flag*/, int * /*pbc*/)
 {
   atomKK->sync(execution_space,DPDRHO_MASK);
 
   d_sendlist = k_sendlist.view<DeviceType>();
-  iswap = iswap_in;
   v_buf = buf.view<DeviceType>();
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairMultiLucyRXPackForwardComm>(0,n),*this);
   return n;
@@ -660,7 +659,7 @@ int PairMultiLucyRXKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdua
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairMultiLucyRXKokkos<DeviceType>::operator()(TagPairMultiLucyRXPackForwardComm, const int &i) const {
-  int j = d_sendlist(iswap, i);
+  int j = d_sendlist(i);
   v_buf[i] = rho[j];
 }
 
diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h
index c335ed526f..753012e0c7 100644
--- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h
+++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h
@@ -61,7 +61,7 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX, public KokkosBase {
   void compute_style(int, int);
 
   void init_style() override;
-  int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+  int pack_forward_comm_kokkos(int, DAT::tdual_int_1d, DAT::tdual_xfloat_1d&,
                                int, int *) override;
   void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) override;
   int pack_forward_comm(int, int *, double *, int, int *) override;
@@ -181,9 +181,8 @@ class PairMultiLucyRXKokkos : public PairMultiLucyRX, public KokkosBase {
   typename AT::tdual_ffloat_2d k_cutsq;
   typename AT::t_ffloat_2d d_cutsq;
 
-  int iswap;
   int first;
-  typename AT::t_int_2d d_sendlist;
+  typename AT::t_int_1d d_sendlist;
   typename AT::t_xfloat_1d_um v_buf;
 
   friend void pair_virial_fdotr_compute<PairMultiLucyRXKokkos>(PairMultiLucyRXKokkos*);
diff --git a/src/KOKKOS/third_order_kokkos.cpp b/src/KOKKOS/third_order_kokkos.cpp
index 04c467777f..569a94a773 100644
--- a/src/KOKKOS/third_order_kokkos.cpp
+++ b/src/KOKKOS/third_order_kokkos.cpp
@@ -23,27 +23,18 @@
 #include "atom_masks.h"
 #include "bond.h"
 #include "comm.h"
-#include "compute.h"
 #include "dihedral.h"
 #include "domain.h"
-#include "error.h"
-#include "finish.h"
 #include "force.h"
-#include "group.h"
 #include "improper.h"
 #include "kokkos.h"
 #include "kspace.h"
-#include "memory.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "pair.h"
 #include "timer.h"
 #include "update.h"
 
-#include <cmath>
-#include <cstring>
-#include <algorithm>
-
 using namespace LAMMPS_NS;
 enum{REGULAR,ESKM};
 
diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp
index 7570f1d8fa..858df5df6c 100644
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@@ -29,7 +29,6 @@
 #include "update.h"
 #include "modify_kokkos.h"
 #include "timer.h"
-#include "memory_kokkos.h"
 #include "kokkos.h"
 
 using namespace LAMMPS_NS;
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 2616282973..4fe5075f44 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -24,14 +24,12 @@
 #include "angle.h"
 #include "atom.h"
 #include "bond.h"
-#include "comm.h"
 #include "domain.h"
 #include "error.h"
 #include "fft3d_wrap.h"
 #include "force.h"
 #include "grid3d.h"
 #include "math_const.h"
-#include "math_extra.h"
 #include "math_special.h"
 #include "memory.h"
 #include "neighbor.h"
diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp
index e5ae0b05c8..cebf9e0000 100644
--- a/src/KSPACE/pppm_cg.cpp
+++ b/src/KSPACE/pppm_cg.cpp
@@ -26,7 +26,6 @@
 #include "math_const.h"
 #include "memory.h"
 #include "neighbor.h"
-#include "remap.h"
 
 #include <cmath>
 #include <cstring>
diff --git a/src/LEPTON/angle_lepton.cpp b/src/LEPTON/angle_lepton.cpp
index 9fe565f8ee..6efded950f 100644
--- a/src/LEPTON/angle_lepton.cpp
+++ b/src/LEPTON/angle_lepton.cpp
@@ -27,6 +27,8 @@
 #include "neighbor.h"
 
 #include <cmath>
+#include <cstring>
+#include <exception>
 
 #include "Lepton.h"
 #include "lepton_utils.h"
diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index 8679d0ed62..63c66011a1 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -25,6 +25,8 @@
 #include "neighbor.h"
 
 #include <cmath>
+#include <cstring>
+#include <exception>
 
 #include "Lepton.h"
 #include "lepton_utils.h"
diff --git a/src/LEPTON/dihedral_lepton.cpp b/src/LEPTON/dihedral_lepton.cpp
index 069ff13d74..16975a8f52 100644
--- a/src/LEPTON/dihedral_lepton.cpp
+++ b/src/LEPTON/dihedral_lepton.cpp
@@ -29,6 +29,7 @@
 #include "neighbor.h"
 
 #include <cmath>
+#include <exception>
 
 #include "Lepton.h"
 #include "lepton_utils.h"
diff --git a/src/LEPTON/fix_wall_lepton.cpp b/src/LEPTON/fix_wall_lepton.cpp
index 7530188c00..320efb090e 100644
--- a/src/LEPTON/fix_wall_lepton.cpp
+++ b/src/LEPTON/fix_wall_lepton.cpp
@@ -19,6 +19,8 @@
 #include "Lepton.h"
 #include "lepton_utils.h"
 
+#include <exception>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/LEPTON/lepton_utils.cpp b/src/LEPTON/lepton_utils.cpp
index 89e69beddd..c4e527d7d7 100644
--- a/src/LEPTON/lepton_utils.cpp
+++ b/src/LEPTON/lepton_utils.cpp
@@ -17,7 +17,6 @@
 
 #include "lepton_utils.h"
 
-#include "error.h"
 #include "input.h"
 #include "lammps.h"
 #include "pair_zbl_const.h"
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index adc07cbfa8..90003e9091 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -23,12 +23,12 @@
 #include "force.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "update.h"
 
 #include "Lepton.h"
 #include "lepton_utils.h"
-#include <array>
+
 #include <cmath>
+#include <exception>
 #include <map>
 
 using namespace LAMMPS_NS;
diff --git a/src/LEPTON/pair_lepton_coul.cpp b/src/LEPTON/pair_lepton_coul.cpp
index f7d2042874..bb6b8ed55f 100644
--- a/src/LEPTON/pair_lepton_coul.cpp
+++ b/src/LEPTON/pair_lepton_coul.cpp
@@ -21,16 +21,16 @@
 #include "comm.h"
 #include "error.h"
 #include "force.h"
-#include "memory.h"
 #include "neigh_list.h"
 #include "neighbor.h"
-#include "update.h"
 
 #include "Lepton.h"
 #include "lepton_utils.h"
 
 #include <array>
 #include <cmath>
+#include <cstring>
+#include <exception>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/LEPTON/pair_lepton_sphere.cpp b/src/LEPTON/pair_lepton_sphere.cpp
index 72d0e85d0b..63b082774f 100644
--- a/src/LEPTON/pair_lepton_sphere.cpp
+++ b/src/LEPTON/pair_lepton_sphere.cpp
@@ -21,15 +21,15 @@
 #include "comm.h"
 #include "error.h"
 #include "force.h"
-#include "memory.h"
 #include "neigh_list.h"
 #include "neighbor.h"
-#include "update.h"
 
 #include "Lepton.h"
 #include "lepton_utils.h"
+
 #include <array>
 #include <cmath>
+#include <exception>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/MACHDYN/fix_smd_wall_surface.cpp b/src/MACHDYN/fix_smd_wall_surface.cpp
index f8a8ef970c..25e76e1dab 100644
--- a/src/MACHDYN/fix_smd_wall_surface.cpp
+++ b/src/MACHDYN/fix_smd_wall_surface.cpp
@@ -26,6 +26,7 @@
 #include "text_file_reader.h"
 
 #include <cstring>
+#include <exception>
 #include <Eigen/Eigen>
 
 using namespace LAMMPS_NS;
diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp
index 13d2b97433..41c8faf752 100644
--- a/src/MANYBODY/pair_airebo.cpp
+++ b/src/MANYBODY/pair_airebo.cpp
@@ -44,6 +44,8 @@ using namespace MathSpecial;
 static constexpr double TOL = 1.0e-9;
 static constexpr int PGDELTA = 1;
 
+static const char *style[3] = {"airebo", "rebo", "airebo/morse"};
+
 /* ---------------------------------------------------------------------- */
 
 PairAIREBO::PairAIREBO(LAMMPS *lmp)
@@ -150,7 +152,7 @@ void PairAIREBO::allocate()
 void PairAIREBO::settings(int narg, char **arg)
 {
   if (narg != 1 && narg != 3 && narg != 4)
-    error->all(FLERR,"Illegal pair_style command");
+    error->all(FLERR,"Illegal pair_style {} command", style[variant]);
 
   cutlj = utils::numeric(FLERR,arg[0],false,lmp);
 
@@ -175,12 +177,7 @@ void PairAIREBO::coeff(int narg, char **arg)
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
-    error->all(FLERR,"Incorrect args for pair coefficients");
-
-  // ensure I,J args are * *
-
-  if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
-    error->all(FLERR,"Incorrect args for pair coefficients");
+    error->all(FLERR,"Incorrect number of args for pair coefficient.");
 
   // read args that map atom types to C and H
   // map[i] = which element (0,1) the Ith atom type is, -1 if "NULL"
@@ -193,7 +190,7 @@ void PairAIREBO::coeff(int narg, char **arg)
       map[i-2] = 0;
     } else if (strcmp(arg[i],"H") == 0) {
       map[i-2] = 1;
-    } else error->all(FLERR,"Incorrect args for pair coefficients");
+    } else error->all(FLERR,"Element {} not supported by pair style {}", arg[i], style[variant]);
   }
 
   // read potential file and initialize fitting splines
@@ -228,13 +225,13 @@ void PairAIREBO::coeff(int narg, char **arg)
 void PairAIREBO::init_style()
 {
   if (atom->tag_enable == 0)
-    error->all(FLERR,"Pair style AIREBO requires atom IDs");
+    error->all(FLERR,"Pair style {} requires atom IDs", style[variant]);
   if (force->newton_pair == 0)
-    error->all(FLERR,"Pair style AIREBO requires newton pair on");
+    error->all(FLERR,"Pair style {} requires newton pair on", style[variant]);
 
   // need a full neighbor list, including neighbors of ghosts
 
-  neighbor->add_request(this,NeighConst::REQ_FULL|NeighConst::REQ_GHOST);
+  neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
 
   // local REBO neighbor list
   // create pages if first time or if neighbor pgsize/oneatom has changed
diff --git a/src/MANYBODY/pair_meam_spline.cpp b/src/MANYBODY/pair_meam_spline.cpp
index e888e2274c..e3d17f6fae 100644
--- a/src/MANYBODY/pair_meam_spline.cpp
+++ b/src/MANYBODY/pair_meam_spline.cpp
@@ -46,6 +46,7 @@
 
 #include <cmath>
 #include <cstring>
+#include <exception>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/MANYBODY/pair_meam_sw_spline.cpp b/src/MANYBODY/pair_meam_sw_spline.cpp
index 5b5713dc4c..a19e1cc0fa 100644
--- a/src/MANYBODY/pair_meam_sw_spline.cpp
+++ b/src/MANYBODY/pair_meam_sw_spline.cpp
@@ -33,6 +33,7 @@
 
 #include <cmath>
 #include <cstring>
+#include <exception>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/MANYBODY/pair_rebomos.cpp b/src/MANYBODY/pair_rebomos.cpp
new file mode 100644
index 0000000000..0941ca0a3a
--- /dev/null
+++ b/src/MANYBODY/pair_rebomos.cpp
@@ -0,0 +1,1123 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   References:
+
+   This code:
+   Stewart J A and Spearot D E (2013) Atomistic simulations of nanoindentation on the basal plane of crystalline molybdenum disulfide. Modelling Simul. Mater. Sci. Eng. 21.
+
+   Based on:
+   Liang T, Phillpot S R and Sinnott S B (2009) Parameterization of a reactive many-body potential for Mo2S systems. Phys. Rev. B79 245110.
+   Liang T, Phillpot S R and Sinnott S B (2012) Erratum: Parameterization of a reactive many-body potential for Mo-S systems. (Phys. Rev. B79 245110 (2009)) Phys. Rev. B85 199903(E).
+
+   LAMMPS file contributing authors: James Stewart, Khanh Dang and Douglas Spearot (University of Arkansas)
+------------------------------------------------------------------------- */
+
+// clang-format on
+
+#include "pair_rebomos.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "math_special.h"
+#include "memory.h"
+#include "my_page.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "potential_file_reader.h"
+#include "text_file_reader.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using MathSpecial::cube;
+using MathSpecial::powint;
+using MathSpecial::square;
+
+static constexpr double TOL = 1.0e-9;
+static constexpr int PGDELTA = 1;
+
+/* ---------------------------------------------------------------------- */
+
+PairREBOMoS::PairREBOMoS(LAMMPS *lmp) :
+    Pair(lmp), lj1(nullptr), lj2(nullptr), lj3(nullptr), lj4(nullptr), ipage(nullptr),
+    REBO_numneigh(nullptr), REBO_firstneigh(nullptr), nM(nullptr), nS(nullptr)
+{
+  single_enable = 0;
+  restartinfo = 0;
+  one_coeff = 1;
+  ghostneigh = 1;
+  manybody_flag = 1;
+  centroidstressflag = CENTROID_NOTAVAIL;
+
+  cut3rebo = 0.0;
+  maxlocal = 0;
+  pgsize = oneatom = 0;
+}
+
+// clang-format off
+
+/* ----------------------------------------------------------------------
+   Check if allocated, since class can be destructed when incomplete
+------------------------------------------------------------------------- */
+
+PairREBOMoS::~PairREBOMoS()
+{
+  memory->destroy(REBO_numneigh);
+  memory->sfree(REBO_firstneigh);
+  delete[] ipage;
+  memory->destroy(nM);
+  memory->destroy(nS);
+
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+    memory->destroy(cutghost);
+
+    memory->destroy(lj1);
+    memory->destroy(lj2);
+    memory->destroy(lj3);
+    memory->destroy(lj4);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairREBOMoS::compute(int eflag, int vflag)
+{
+  ev_init(eflag,vflag);
+
+  REBO_neigh();
+  FREBO(eflag);
+  FLJ(eflag);
+
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairREBOMoS::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+
+  memory->create(setflag,n+1,n+1,"pair:setflag");
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  memory->create(cutsq,n+1,n+1,"pair:cutsq");
+  memory->create(cutghost,n+1,n+1,"pair:cutghost");
+
+  // only sized by M,S = 2 types
+
+  memory->create(lj1,2,2,"pair:lj1");
+  memory->create(lj2,2,2,"pair:lj2");
+  memory->create(lj3,2,2,"pair:lj3");
+  memory->create(lj4,2,2,"pair:lj4");
+
+  map = new int[n+1];
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairREBOMoS::settings(int narg, char ** /* arg */)
+{
+  if (narg != 0) error->all(FLERR,"Illegal pair_style command");
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairREBOMoS::coeff(int narg, char **arg)
+{
+  if (!allocated) allocate();
+
+  if (narg != 3 + atom->ntypes)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // insure I,J args are * *
+
+  if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // read args that map atom types to Mo and S
+  // map[i] = which element (0,1) the Ith atom type is, -1 if NULL
+
+  for (int i = 3; i < narg; i++) {
+    if (strcmp(arg[i],"NULL") == 0) {
+      map[i-2] = -1;
+      continue;
+    } else if (strcmp(arg[i],"Mo") == 0) {
+      map[i-2] = 0;
+    } else if (strcmp(arg[i],"M") == 0) { // backward compatibility
+      map[i-2] = 0;
+    } else if (strcmp(arg[i],"S") == 0) {
+      map[i-2] = 1;
+    } else error->all(FLERR,"Incorrect args for pair coefficients");
+  }
+
+  // read potential file and initialize fitting splines
+
+  read_file(arg[2]);
+
+  // clear setflag since coeff() called once with I,J = * *
+
+  int n = atom->ntypes;
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  // set setflag i,j for type pairs where both are mapped to elements
+
+  int count = 0;
+  for (int i = 1; i <= n; i++)
+    for (int j = i; j <= n; j++)
+      if (map[i] >= 0 && map[j] >= 0) {
+        setflag[i][j] = 1;
+        count++;
+      }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairREBOMoS::init_style()
+{
+  if (atom->tag_enable == 0)
+    error->all(FLERR,"Pair style REBOMoS requires atom IDs");
+  if (force->newton_pair == 0)
+    error->all(FLERR,"Pair style REBOMoS requires newton pair on");
+
+  // need a full neighbor list, including neighbors of ghosts
+
+  neighbor->add_request(this,NeighConst::REQ_FULL|NeighConst::REQ_GHOST);
+
+  // local REBO neighbor list
+  // create pages if first time or if neighbor pgsize/oneatom has changed
+
+  int create = 0;
+  if (ipage == nullptr) create = 1;
+  if (pgsize != neighbor->pgsize) create = 1;
+  if (oneatom != neighbor->oneatom) create = 1;
+
+  if (create) {
+    delete[] ipage;
+    pgsize = neighbor->pgsize;
+    oneatom = neighbor->oneatom;
+
+    int nmypage= comm->nthreads;
+    ipage = new MyPage<int>[nmypage];
+    for (int i = 0; i < nmypage; i++)
+      ipage[i].init(oneatom,pgsize,PGDELTA);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairREBOMoS::init_one(int i, int j)
+{
+  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+
+  // convert to Mo,S types
+
+  int ii = map[i];
+  int jj = map[j];
+
+  // use Mo-Mo values for these cutoffs since M atoms are biggest
+
+  // cut3rebo = 3 REBO distances
+
+  cut3rebo = 3.0 * rcmax[0][0];
+
+  // cutghost = REBO cutoff used in REBO_neigh() for neighbors of ghosts
+
+  cutghost[i][j] = rcmax[ii][jj];
+  lj1[ii][jj] = 48.0 * epsilon[ii][jj] * powint(sigma[ii][jj],12);
+  lj2[ii][jj] = 24.0 * epsilon[ii][jj] * powint(sigma[ii][jj],6);
+  lj3[ii][jj] = 4.0 * epsilon[ii][jj] * powint(sigma[ii][jj],12);
+  lj4[ii][jj] = 4.0 * epsilon[ii][jj] * powint(sigma[ii][jj],6);
+
+  cutghost[j][i] = cutghost[i][j];
+  lj1[jj][ii] = lj1[ii][jj];
+  lj2[jj][ii] = lj2[ii][jj];
+  lj3[jj][ii] = lj3[ii][jj];
+  lj4[jj][ii] = lj4[ii][jj];
+
+  return cut3rebo;
+}
+
+/* ----------------------------------------------------------------------
+   create REBO neighbor list from main neighbor list
+   REBO neighbor list stores neighbors of ghost atoms
+------------------------------------------------------------------------- */
+
+void PairREBOMoS::REBO_neigh()
+{
+  int i,j,ii,jj,n,allnum,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,rsq,dS;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  int *neighptr;
+
+  double **x = atom->x;
+  int *type = atom->type;
+
+  if (atom->nmax > maxlocal) {
+    maxlocal = atom->nmax;
+    memory->destroy(REBO_numneigh);
+    memory->sfree(REBO_firstneigh);
+    memory->destroy(nM);
+    memory->destroy(nS);
+    memory->create(REBO_numneigh,maxlocal,"REBOMoS:numneigh");
+    REBO_firstneigh = (int **) memory->smalloc(maxlocal*sizeof(int *),
+                                               "REBOMoS:firstneigh");
+    memory->create(nM,maxlocal,"REBOMoS:nM");
+    memory->create(nS,maxlocal,"REBOMoS:nS");
+  }
+
+  allnum = list->inum + list->gnum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // store all REBO neighs of owned and ghost atoms
+  // scan full neighbor list of I
+
+  ipage->reset();
+
+  for (ii = 0; ii < allnum; ii++) {
+    i = ilist[ii];
+
+    n = 0;
+    neighptr = ipage->vget();
+
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = map[type[i]];
+    nM[i] = nS[i] = 0.0;
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = map[type[j]];
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < rcmaxsq[itype][jtype]) {
+        neighptr[n++] = j;
+        if (jtype == 0)
+          nM[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS);
+        else
+          nS[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS);
+      }
+    }
+
+    REBO_firstneigh[i] = neighptr;
+    REBO_numneigh[i] = n;
+    ipage->vgot(n);
+    if (ipage->status())
+      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   REBO forces and energy
+------------------------------------------------------------------------- */
+
+void PairREBOMoS::FREBO(int eflag)
+{
+  int i,j,k,ii,inum,itype,jtype;
+  tagint itag, jtag;
+  double delx,dely,delz,evdwl,fpair,xtmp,ytmp,ztmp;
+  double rsq,rij,wij;
+  double Qij,Aij,alphaij,VR,pre,dVRdi,VA,bij,dVAdi,dVA;
+  double dwij,del[3];
+  int *ilist,*REBO_neighs;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  int *type = atom->type;
+  tagint *tag = atom->tag;
+  int nlocal = atom->nlocal;
+
+  inum = list->inum;
+  ilist = list->ilist;
+
+  // two-body interactions from REBO neighbor list, skip half of them
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    REBO_neighs = REBO_firstneigh[i];
+
+    for (k = 0; k < REBO_numneigh[i]; k++) {
+      j = REBO_neighs[k];
+      jtag = tag[j];
+
+      if (itag > jtag) {
+        if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+        if ((itag+jtag) % 2 == 1) continue;
+      } else {
+        if (x[j][2] < ztmp) continue;
+        if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+        if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      jtype = map[type[j]];
+
+      delx = x[i][0] - x[j][0];
+      dely = x[i][1] - x[j][1];
+      delz = x[i][2] - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      rij = sqrt(rsq);
+      wij = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
+      if (wij <= TOL) continue;
+
+      Qij = Q[itype][jtype];
+      Aij = A[itype][jtype];
+      alphaij = alpha[itype][jtype];
+
+      VR = wij*(1.0+(Qij/rij)) * Aij*exp(-alphaij*rij);
+      pre = wij*Aij * exp(-alphaij*rij);
+      dVRdi = pre * ((-alphaij)-(Qij/rsq)-(Qij*alphaij/rij));
+      dVRdi += VR/wij * dwij;
+
+      VA = dVA = 0.0;
+      VA = -wij * BIJc[itype][jtype] * exp(-Beta[itype][jtype]*rij);
+
+      dVA = -Beta[itype][jtype] * VA;
+      dVA += VA/wij * dwij;
+
+      del[0] = delx;
+      del[1] = dely;
+      del[2] = delz;
+      bij = bondorder(i,j,del,rij,VA,f);
+      dVAdi = bij*dVA;
+
+      fpair = -(dVRdi+dVAdi) / rij;
+      f[i][0] += delx*fpair;
+      f[i][1] += dely*fpair;
+      f[i][2] += delz*fpair;
+      f[j][0] -= delx*fpair;
+      f[j][1] -= dely*fpair;
+      f[j][2] -= delz*fpair;
+
+      if (eflag) evdwl = VR + bij*VA;
+      if (evflag) ev_tally(i,j,nlocal,/*newton_pair*/1,evdwl,0.0,fpair,delx,dely,delz);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ forces and energy
+------------------------------------------------------------------------- */
+
+void PairREBOMoS::FLJ(int eflag)
+{
+  int i,j,ii,jj,inum,jnum,itype,jtype;
+  tagint itag,jtag;
+  double evdwl,fpair,xtmp,ytmp,ztmp;
+  double rij,delij[3],rijsq;
+  double VLJ,dVLJ;
+  double vdw,dvdw;
+  double r2inv,r6inv;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double c2,c3,dr,drp,r6;
+
+  // I-J interaction from full neighbor list
+  // skip 1/2 of interactions since only consider each pair once
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  tagint *tag = atom->tag;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtag = tag[j];
+
+      if (itag > jtag) {
+        if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+        if ((itag+jtag) % 2 == 1) continue;
+      } else {
+        if (x[j][2] < ztmp) continue;
+        if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+        if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+      jtype = map[type[j]];
+
+      delij[0] = xtmp - x[j][0];
+      delij[1] = ytmp - x[j][1];
+      delij[2] = ztmp - x[j][2];
+      rijsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
+      rij = sqrt(rijsq);
+
+      // compute LJ forces and energy
+
+      // Outside Rmax
+      if (rij > rcLJmax[itype][jtype] || rij < rcLJmin[itype][jtype]){
+          VLJ = 0;
+          dVLJ = 0;
+      }
+
+      // Inside Rmax and above 0.95*sigma
+      else if (rij <= rcLJmax[itype][jtype] && rij >= 0.95*sigma[itype][jtype]){
+              r2inv = 1.0/rijsq;
+              r6inv = r2inv*r2inv*r2inv;
+              VLJ = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+              dVLJ = -r6inv*(lj1[itype][jtype]*r6inv - lj2[itype][jtype])/rij;
+      }
+
+      // Below 0.95*sigma
+      else if (rij < 0.95*sigma[itype][jtype] && rij >= rcLJmin[itype][jtype]){
+              dr = 0.95*sigma[itype][jtype] - rcLJmin[itype][jtype];
+              r6 = powint((sigma[itype][jtype]/(0.95*sigma[itype][jtype])),6);
+              vdw = 4*epsilon[itype][jtype]*r6*(r6 - 1.0);
+              dvdw = (-4*epsilon[itype][jtype]/(0.95*sigma[itype][jtype]))*r6*(12.0*r6 - 6.0);
+              c2 = ((3.0/dr)*vdw - dvdw)/dr;
+              c3 = (vdw/(dr*dr) - c2)/dr;
+
+              drp = rij - rcLJmin[itype][jtype];
+              VLJ = drp*drp*(drp*c3 + c2);
+              dVLJ = drp*(3.0*drp*c3 + 2.0*c2);
+      }
+
+      fpair = -dVLJ/rij;
+      f[i][0] += delij[0]*fpair;
+      f[i][1] += delij[1]*fpair;
+      f[i][2] += delij[2]*fpair;
+      f[j][0] -= delij[0]*fpair;
+      f[j][1] -= delij[1]*fpair;
+      f[j][2] -= delij[2]*fpair;
+
+      if (eflag) evdwl = VLJ;
+      if (evflag) ev_tally(i,j,nlocal,/*newton_pair*/1,evdwl,0.0,fpair,delij[0],delij[1],delij[2]);
+
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Bij function
+
+   The bond order term modified the attractive portion of the REBO
+   potential based on the number of atoms around a specific pair
+   and the bond angle between sets of three atoms.
+
+   The functions G(cos(theta)) and P(N) are evaluated and their
+   derivatives are also computed for use in the force calculation.
+------------------------------------------------------------------------- */
+
+double PairREBOMoS::bondorder(int i, int j, double rij[3], double rijmag, double VA, double **f)
+{
+  int atomi,atomj,atomk,atoml;
+  int k,l;
+  int itype, jtype, ktype, ltype;
+  double rik[3], rjl[3], rji[3], rki[3],rlj[3], dwjl, bij;
+  double NijM,NijS,NjiM,NjiS,wik,dwik,wjl;
+  double rikmag,rjlmag,cosjik,cosijl,g,tmp2;
+  double Etmp,pij,tmp,dwij,dS;
+  double dgdc,pji;
+  double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3];
+  double dp;
+  double dcosjikdrj[3],dcosijldrj[3],dcosijldrl[3];
+  double fi[3],fj[3],fk[3],fl[3];
+  double PijS, PjiS;
+  int *REBO_neighs;
+
+  double **x = atom->x;
+  int *type = atom->type;
+
+  atomi = i;
+  atomj = j;
+  itype = map[type[i]];
+  jtype = map[type[j]];
+  Sp(rijmag,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
+  NijM = nM[i];
+  NijS = nS[i];
+  NjiM = nM[j];
+  NjiS = nS[j];
+  bij = 0.0;
+  tmp = 0.0;
+  tmp2 = 0.0;
+  dgdc = 0.0;
+  Etmp = 0.0;
+
+  REBO_neighs = REBO_firstneigh[i];
+  for (k = 0; k < REBO_numneigh[i]; k++) {
+    atomk = REBO_neighs[k];
+    if (atomk != atomj) {
+      ktype = map[type[atomk]];
+      rik[0] = x[atomi][0]-x[atomk][0];
+      rik[1] = x[atomi][1]-x[atomk][1];
+      rik[2] = x[atomi][2]-x[atomk][2];
+      rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+      wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS);
+      cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) / (rijmag*rikmag);
+      cosjik = MIN(cosjik,1.0);
+      cosjik = MAX(cosjik,-1.0);
+
+      // evaluate g and derivative dg
+
+      g = gSpline(cosjik,itype,dgdc);
+      Etmp = Etmp+(wik*g);
+    }
+  }
+
+  dp = 0.0;
+  PijS = PijSpline(NijM,NijS,itype,dp);
+  pij = 1.0/sqrt(1.0+Etmp+PijS);
+  tmp = -0.5*cube(pij);
+
+  // derivative calculations
+
+  REBO_neighs = REBO_firstneigh[i];
+  for (k = 0; k < REBO_numneigh[i]; k++) {
+    atomk = REBO_neighs[k];
+    if (atomk != atomj) {
+      ktype = map[type[atomk]];
+      rik[0] = x[atomi][0]-x[atomk][0];
+      rik[1] = x[atomi][1]-x[atomk][1];
+      rik[2] = x[atomi][2]-x[atomk][2];
+      rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+      wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+      cosjik = (rij[0]*rik[0] + rij[1]*rik[1] + rij[2]*rik[2]) / (rijmag*rikmag);
+      cosjik = MIN(cosjik,1.0);
+      cosjik = MAX(cosjik,-1.0);
+
+      dcosjikdri[0] = ((rij[0]+rik[0])/(rijmag*rikmag)) -
+        (cosjik*((rij[0]/(rijmag*rijmag))+(rik[0]/(rikmag*rikmag))));
+      dcosjikdri[1] = ((rij[1]+rik[1])/(rijmag*rikmag)) -
+        (cosjik*((rij[1]/(rijmag*rijmag))+(rik[1]/(rikmag*rikmag))));
+      dcosjikdri[2] = ((rij[2]+rik[2])/(rijmag*rikmag)) -
+        (cosjik*((rij[2]/(rijmag*rijmag))+(rik[2]/(rikmag*rikmag))));
+      dcosjikdrk[0] = (-rij[0]/(rijmag*rikmag)) +
+        (cosjik*(rik[0]/(rikmag*rikmag)));
+      dcosjikdrk[1] = (-rij[1]/(rijmag*rikmag)) +
+        (cosjik*(rik[1]/(rikmag*rikmag)));
+      dcosjikdrk[2] = (-rij[2]/(rijmag*rikmag)) +
+        (cosjik*(rik[2]/(rikmag*rikmag)));
+      dcosjikdrj[0] = (-rik[0]/(rijmag*rikmag)) +
+        (cosjik*(rij[0]/(rijmag*rijmag)));
+      dcosjikdrj[1] = (-rik[1]/(rijmag*rikmag)) +
+        (cosjik*(rij[1]/(rijmag*rijmag)));
+      dcosjikdrj[2] = (-rik[2]/(rijmag*rikmag)) +
+        (cosjik*(rij[2]/(rijmag*rijmag)));
+
+      g = gSpline(cosjik,itype,dgdc);
+      tmp2 = VA*0.5*(tmp*wik*dgdc);
+      fj[0] = -tmp2*dcosjikdrj[0];
+      fj[1] = -tmp2*dcosjikdrj[1];
+      fj[2] = -tmp2*dcosjikdrj[2];
+      fi[0] = -tmp2*dcosjikdri[0];
+      fi[1] = -tmp2*dcosjikdri[1];
+      fi[2] = -tmp2*dcosjikdri[2];
+      fk[0] = -tmp2*dcosjikdrk[0];
+      fk[1] = -tmp2*dcosjikdrk[1];
+      fk[2] = -tmp2*dcosjikdrk[2];
+
+      // coordination forces
+
+      // dwik forces (from partial derivative)
+
+      tmp2 = VA*0.5*(tmp*dwik*g)/rikmag;
+      fi[0] -= tmp2*rik[0];
+      fi[1] -= tmp2*rik[1];
+      fi[2] -= tmp2*rik[2];
+      fk[0] += tmp2*rik[0];
+      fk[1] += tmp2*rik[1];
+      fk[2] += tmp2*rik[2];
+
+      // PIJ forces (from coordination P(N) term)
+
+      tmp2 = VA*0.5*(tmp*dp*dwik)/rikmag;
+      fi[0] -= tmp2*rik[0];
+      fi[1] -= tmp2*rik[1];
+      fi[2] -= tmp2*rik[2];
+      fk[0] += tmp2*rik[0];
+      fk[1] += tmp2*rik[1];
+      fk[2] += tmp2*rik[2];
+
+      // dgdN forces are removed
+
+      f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
+      f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
+      f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2];
+
+      if (vflag_either) {
+        rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2];
+        rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2];
+        v_tally3(atomi,atomj,atomk,fj,fk,rji,rki);
+      }
+    }
+  }
+
+  // PIJ force contribution additional term
+  tmp2 = -VA*0.5*(tmp*dp*dwij)/rijmag;
+
+  f[atomi][0] += rij[0]*tmp2;
+  f[atomi][1] += rij[1]*tmp2;
+  f[atomi][2] += rij[2]*tmp2;
+  f[atomj][0] -= rij[0]*tmp2;
+  f[atomj][1] -= rij[1]*tmp2;
+  f[atomj][2] -= rij[2]*tmp2;
+
+  if (vflag_either) v_tally2(atomi,atomj,tmp2,rij);
+
+  tmp = 0.0;
+  tmp2 = 0.0;
+  Etmp = 0.0;
+
+  REBO_neighs = REBO_firstneigh[j];
+  for (l = 0; l < REBO_numneigh[j]; l++) {
+    atoml = REBO_neighs[l];
+    if (atoml != atomi) {
+      ltype = map[type[atoml]];
+      rjl[0] = x[atomj][0]-x[atoml][0];
+      rjl[1] = x[atomj][1]-x[atoml][1];
+      rjl[2] = x[atomj][2]-x[atoml][2];
+      rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+      wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS);
+      cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) / (rijmag*rjlmag);
+      cosijl = MIN(cosijl,1.0);
+      cosijl = MAX(cosijl,-1.0);
+
+      // evaluate g and derivative dg
+
+      g = gSpline(cosijl,jtype,dgdc);
+      Etmp = Etmp+(wjl*g);
+    }
+  }
+
+  dp = 0.0;
+  PjiS = PijSpline(NjiM,NjiS,jtype,dp);
+  pji = 1.0/sqrt(1.0+Etmp+PjiS);
+  tmp = -0.5*cube(pji);
+
+  REBO_neighs = REBO_firstneigh[j];
+  for (l = 0; l < REBO_numneigh[j]; l++) {
+    atoml = REBO_neighs[l];
+    if (atoml != atomi) {
+      ltype = map[type[atoml]];
+      rjl[0] = x[atomj][0]-x[atoml][0];
+      rjl[1] = x[atomj][1]-x[atoml][1];
+      rjl[2] = x[atomj][2]-x[atoml][2];
+      rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+      wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
+      cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) / (rijmag*rjlmag);
+      cosijl = MIN(cosijl,1.0);
+      cosijl = MAX(cosijl,-1.0);
+
+      dcosijldri[0] = (-rjl[0]/(rijmag*rjlmag)) - (cosijl*rij[0]/(rijmag*rijmag));
+      dcosijldri[1] = (-rjl[1]/(rijmag*rjlmag)) - (cosijl*rij[1]/(rijmag*rijmag));
+      dcosijldri[2] = (-rjl[2]/(rijmag*rjlmag)) - (cosijl*rij[2]/(rijmag*rijmag));
+      dcosijldrj[0] = ((-rij[0]+rjl[0])/(rijmag*rjlmag)) +
+        (cosijl*((rij[0]/square(rijmag))-(rjl[0]/(rjlmag*rjlmag))));
+      dcosijldrj[1] = ((-rij[1]+rjl[1])/(rijmag*rjlmag)) +
+        (cosijl*((rij[1]/square(rijmag))-(rjl[1]/(rjlmag*rjlmag))));
+      dcosijldrj[2] = ((-rij[2]+rjl[2])/(rijmag*rjlmag)) +
+        (cosijl*((rij[2]/square(rijmag))-(rjl[2]/(rjlmag*rjlmag))));
+      dcosijldrl[0] = (rij[0]/(rijmag*rjlmag))+(cosijl*rjl[0]/(rjlmag*rjlmag));
+      dcosijldrl[1] = (rij[1]/(rijmag*rjlmag))+(cosijl*rjl[1]/(rjlmag*rjlmag));
+      dcosijldrl[2] = (rij[2]/(rijmag*rjlmag))+(cosijl*rjl[2]/(rjlmag*rjlmag));
+
+      // evaluate g and derivatives dg
+
+      g = gSpline(cosijl,jtype,dgdc);
+      tmp2 = VA*0.5*(tmp*wjl*dgdc);
+      fi[0] = -tmp2*dcosijldri[0];
+      fi[1] = -tmp2*dcosijldri[1];
+      fi[2] = -tmp2*dcosijldri[2];
+      fj[0] = -tmp2*dcosijldrj[0];
+      fj[1] = -tmp2*dcosijldrj[1];
+      fj[2] = -tmp2*dcosijldrj[2];
+      fl[0] = -tmp2*dcosijldrl[0];
+      fl[1] = -tmp2*dcosijldrl[1];
+      fl[2] = -tmp2*dcosijldrl[2];
+
+      // coordination forces
+
+      // dwik forces (from partial derivative)
+
+      tmp2 = VA*0.5*(tmp*dwjl*g)/rjlmag;
+      fj[0] -= tmp2*rjl[0];
+      fj[1] -= tmp2*rjl[1];
+      fj[2] -= tmp2*rjl[2];
+      fl[0] += tmp2*rjl[0];
+      fl[1] += tmp2*rjl[1];
+      fl[2] += tmp2*rjl[2];
+
+      // PIJ forces (coordination)
+
+      tmp2 = VA*0.5*(tmp*dp*dwjl)/rjlmag;
+      fj[0] -= tmp2*rjl[0];
+      fj[1] -= tmp2*rjl[1];
+      fj[2] -= tmp2*rjl[2];
+      fl[0] += tmp2*rjl[0];
+      fl[1] += tmp2*rjl[1];
+      fl[2] += tmp2*rjl[2];
+
+      // dgdN forces are removed
+
+      f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
+      f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
+      f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2];
+
+      if (vflag_either) {
+        rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2];
+        v_tally3(atomi,atomj,atoml,fi,fl,rij,rlj);
+      }
+    }
+  }
+
+  // PIJ force contribution additional term
+
+  tmp2 = -VA*0.5*(tmp*dp*dwij)/rijmag;
+  f[atomi][0] += rij[0]*tmp2;
+  f[atomi][1] += rij[1]*tmp2;
+  f[atomi][2] += rij[2]*tmp2;
+  f[atomj][0] -= rij[0]*tmp2;
+  f[atomj][1] -= rij[1]*tmp2;
+  f[atomj][2] -= rij[2]*tmp2;
+
+  if (vflag_either) v_tally2(atomi,atomj,tmp2,rij);
+
+  bij = (0.5*(pij+pji));
+  return bij;
+}
+
+/* ----------------------------------------------------------------------
+   G calculation
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   read REBO potential file
+------------------------------------------------------------------------- */
+
+void PairREBOMoS::read_file(char *filename)
+{
+  // REBO Parameters (Mo-S REBO)
+
+  double rcmin_MM,rcmin_MS,rcmin_SS,rcmax_MM,rcmax_MS,rcmax_SS;
+  double Q_MM,Q_MS,Q_SS,alpha_MM,alpha_MS,alpha_SS,A_MM,A_MS,A_SS;
+  double BIJc_MM1,BIJc_MS1,BIJc_SS1;
+  double Beta_MM1,Beta_MS1,Beta_SS1;
+  double M_bg0,M_bg1,M_bg2,M_bg3,M_bg4,M_bg5,M_bg6;
+  double S_bg0,S_bg1,S_bg2,S_bg3,S_bg4,S_bg5,S_bg6;
+  double M_b0,M_b1,M_b2,M_b3,M_b4,M_b5,M_b6;
+  double S_b0,S_b1,S_b2,S_b3,S_b4,S_b5,S_b6;
+  double M_a0,M_a1,M_a2,M_a3;
+  double S_a0,S_a1,S_a2,S_a3;
+
+  // LJ Parameters (Mo-S REBO)
+
+  double epsilon_MM,epsilon_SS;
+  double sigma_MM,sigma_SS;
+
+  // read file on proc 0
+
+  if (comm->me == 0) {
+    PotentialFileReader reader(lmp, filename, "rebomos");
+
+    // read parameters
+
+    std::vector<double*> params {
+      &rcmin_MM,
+      &rcmin_MS,
+      &rcmin_SS,
+      &rcmax_MM,
+      &rcmax_MS,
+      &rcmax_SS,
+      &Q_MM,
+      &Q_MS,
+      &Q_SS,
+      &alpha_MM,
+      &alpha_MS,
+      &alpha_SS,
+      &A_MM,
+      &A_MS,
+      &A_SS,
+      &BIJc_MM1,
+      &BIJc_MS1,
+      &BIJc_SS1,
+      &Beta_MM1,
+      &Beta_MS1,
+      &Beta_SS1,
+      &M_b0,
+      &M_b1,
+      &M_b2,
+      &M_b3,
+      &M_b4,
+      &M_b5,
+      &M_b6,
+      &M_bg0,
+      &M_bg1,
+      &M_bg2,
+      &M_bg3,
+      &M_bg4,
+      &M_bg5,
+      &M_bg6,
+      &S_b0,
+      &S_b1,
+      &S_b2,
+      &S_b3,
+      &S_b4,
+      &S_b5,
+      &S_b6,
+      &S_bg0,
+      &S_bg1,
+      &S_bg2,
+      &S_bg3,
+      &S_bg4,
+      &S_bg5,
+      &S_bg6,
+      &M_a0,
+      &M_a1,
+      &M_a2,
+      &M_a3,
+      &S_a0,
+      &S_a1,
+      &S_a2,
+      &S_a3,
+
+      // LJ parameters
+      &epsilon_MM,
+      &epsilon_SS,
+      &sigma_MM,
+      &sigma_SS,
+    };
+
+    try {
+      for (auto &param : params) {
+        *param = reader.next_double();
+      }
+    } catch (TokenizerException &e) {
+      error->one(FLERR, "reading rebomos potential file {}\nREASON: {}\n", filename, e.what());
+    } catch (FileReaderException &fre) {
+      error->one(FLERR, "reading rebomos potential file {}\nREASON: {}\n", filename, fre.what());
+    }
+
+    // store read-in values in arrays
+
+    // REBO
+
+    rcmin[0][0] = rcmin_MM;
+    rcmin[0][1] = rcmin_MS;
+    rcmin[1][0] = rcmin[0][1];
+    rcmin[1][1] = rcmin_SS;
+
+    rcmax[0][0] = rcmax_MM;
+    rcmax[0][1] = rcmax_MS;
+    rcmax[1][0] = rcmax[0][1];
+    rcmax[1][1] = rcmax_SS;
+
+    rcmaxsq[0][0] = rcmax[0][0]*rcmax[0][0];
+    rcmaxsq[1][0] = rcmax[1][0]*rcmax[1][0];
+    rcmaxsq[0][1] = rcmax[0][1]*rcmax[0][1];
+    rcmaxsq[1][1] = rcmax[1][1]*rcmax[1][1];
+
+    Q[0][0] = Q_MM;
+    Q[0][1] = Q_MS;
+    Q[1][0] = Q[0][1];
+    Q[1][1] = Q_SS;
+
+    alpha[0][0] = alpha_MM;
+    alpha[0][1] = alpha_MS;
+    alpha[1][0] = alpha[0][1];
+    alpha[1][1] = alpha_SS;
+
+    A[0][0] = A_MM;
+    A[0][1] = A_MS;
+    A[1][0] = A[0][1];
+    A[1][1] = A_SS;
+
+    BIJc[0][0] = BIJc_MM1;
+    BIJc[0][1] = BIJc_MS1;
+    BIJc[1][0] = BIJc_MS1;
+    BIJc[1][1] = BIJc_SS1;
+
+    Beta[0][0] = Beta_MM1;
+    Beta[0][1] = Beta_MS1;
+    Beta[1][0] = Beta_MS1;
+    Beta[1][1] = Beta_SS1;
+
+    b0[0] = M_b0;
+    b1[0] = M_b1;
+    b2[0] = M_b2;
+    b3[0] = M_b3;
+    b4[0] = M_b4;
+    b5[0] = M_b5;
+    b6[0] = M_b6;
+
+    bg0[0] = M_bg0;
+    bg1[0] = M_bg1;
+    bg2[0] = M_bg2;
+    bg3[0] = M_bg3;
+    bg4[0] = M_bg4;
+    bg5[0] = M_bg5;
+    bg6[0] = M_bg6;
+
+    b0[1] = S_b0;
+    b1[1] = S_b1;
+    b2[1] = S_b2;
+    b3[1] = S_b3;
+    b4[1] = S_b4;
+    b5[1] = S_b5;
+    b6[1] = S_b6;
+
+    bg0[1] = S_bg0;
+    bg1[1] = S_bg1;
+    bg2[1] = S_bg2;
+    bg3[1] = S_bg3;
+    bg4[1] = S_bg4;
+    bg5[1] = S_bg5;
+    bg6[1] = S_bg6;
+
+    a0[0] = M_a0;
+    a1[0] = M_a1;
+    a2[0] = M_a2;
+    a3[0] = M_a3;
+
+    a0[1] = S_a0;
+    a1[1] = S_a1;
+    a2[1] = S_a2;
+    a3[1] = S_a3;
+
+    // LJ
+
+    sigma[0][0] = sigma_MM;
+    sigma[0][1] = (sigma_MM + sigma_SS)/2;
+    sigma[1][0] = sigma[0][1];
+    sigma[1][1] = sigma_SS;
+
+    epsilon[0][0] = epsilon_MM;
+    epsilon[0][1] = sqrt(epsilon_MM*epsilon_SS);
+    epsilon[1][0] = epsilon[0][1];
+    epsilon[1][1] = epsilon_SS;
+
+    rcLJmin[0][0] = rcmin_MM;
+    rcLJmin[0][1] = rcmin_MS;
+    rcLJmin[1][0] = rcmin[0][1];
+    rcLJmin[1][1] = rcmin_SS;
+
+    rcLJmax[0][0] = 2.5*sigma[0][0];
+    rcLJmax[0][1] = 2.5*sigma[0][1];
+    rcLJmax[1][0] = rcLJmax[0][1];
+    rcLJmax[1][1] = 2.5*sigma[1][1];
+  }
+
+  // broadcast read-in and setup values
+
+  MPI_Bcast(&rcmin[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&rcmax[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&rcmaxsq[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&rcmaxp[0][0],4,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&Q[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&alpha[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&A[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&BIJc[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&Beta[0][0],4,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&b0[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&b1[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&b2[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&b3[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&b4[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&b5[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&b6[0],2,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&a0[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&a1[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&a2[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&a3[0],2,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&bg0[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bg1[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bg2[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bg3[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bg4[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bg5[0],2,MPI_DOUBLE,0,world);
+  MPI_Bcast(&bg6[0],2,MPI_DOUBLE,0,world);
+
+  MPI_Bcast(&rcLJmin[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&rcLJmax[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&epsilon[0][0],4,MPI_DOUBLE,0,world);
+  MPI_Bcast(&sigma[0][0],4,MPI_DOUBLE,0,world);
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+double PairREBOMoS::memory_usage()
+{
+  double bytes = 0.0;
+  bytes += (double)maxlocal * sizeof(int);
+  bytes += (double)maxlocal * sizeof(int *);
+
+  for (int i = 0; i < comm->nthreads; i++)
+    bytes += ipage[i].size();
+
+  bytes += 3.0 * maxlocal * sizeof(double);
+  return bytes;
+}
diff --git a/src/MANYBODY/pair_rebomos.h b/src/MANYBODY/pair_rebomos.h
new file mode 100644
index 0000000000..856a52ca81
--- /dev/null
+++ b/src/MANYBODY/pair_rebomos.h
@@ -0,0 +1,216 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(rebomos,PairREBOMoS);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_REBOMOS_H
+#define LMP_PAIR_REBOMOS_H
+
+#include "math_const.h"
+#include "pair.h"
+
+#include <cmath>
+
+namespace LAMMPS_NS {
+
+class PairREBOMoS : public Pair {
+ public:
+  PairREBOMoS(class LAMMPS *);
+  ~PairREBOMoS() override;
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+  double memory_usage() override;
+
+ protected:
+  double **lj1, **lj2, **lj3, **lj4;    // pre-computed LJ coeffs for M,S types
+  double cut3rebo;                      // maximum distance for 3rd REBO neigh
+
+  int maxlocal;             // size of numneigh, firstneigh arrays
+  int pgsize;               // size of neighbor page
+  int oneatom;              // max # of neighbors for one atom
+  MyPage<int> *ipage;       // neighbor list pages
+  int *REBO_numneigh;       // # of pair neighbors for each atom
+  int **REBO_firstneigh;    // ptr to 1st neighbor of each atom
+
+  double *nM, *nS;          // sum of weighting fns with REBO neighs
+
+  double rcmin[2][2], rcmax[2][2], rcmaxsq[2][2], rcmaxp[2][2];
+  double Q[2][2], alpha[2][2], A[2][2], BIJc[2][2], Beta[2][2];
+  double b0[2], b1[2], b2[2], b3[2], b4[2], b5[2], b6[2];
+  double bg0[2], bg1[2], bg2[2], bg3[2], bg4[2], bg5[2], bg6[2];
+  double a0[2], a1[2], a2[2], a3[2];
+  double rcLJmin[2][2], rcLJmax[2][2];
+  double epsilon[2][2], sigma[2][2];
+
+  void REBO_neigh();
+  void FREBO(int);
+  void FLJ(int);
+
+  double bondorder(int, int, double *, double, double, double **);
+
+  inline double gSpline(const double costh, const int typei, double &dgdc) const
+  {
+    const double b0i = b0[typei];
+    const double b1i = b1[typei];
+    const double b2i = b2[typei];
+    const double b3i = b3[typei];
+    const double b4i = b4[typei];
+    const double b5i = b5[typei];
+    const double b6i = b6[typei];
+    double g = 0.0;
+
+    if (costh >= -1.0 && costh < 0.5) {
+      g = b6i * costh;
+      double dg = 6.0 * b6i * costh;
+      g += b5i;
+      dg += 5.0 * b5i;
+      g *= costh;
+      dg *= costh;
+      g += b4i;
+      dg += 4.0 * b4i;
+      g *= costh;
+      dg *= costh;
+      g += b3i;
+      dg += 3.0 * b3i;
+      g *= costh;
+      dg *= costh;
+      g += b2i;
+      dg += 2.0 * b2i;
+      g *= costh;
+      dg *= costh;
+      g += b1i;
+      dg += b1i;
+      g *= costh;
+      g += b0i;
+      dgdc = dg;
+
+    } else if (costh >= 0.5 && costh <= 1.0) {
+      double gcos = b6i * costh;
+      double dgcos = 6.0 * b6i * costh;
+      gcos += b5i;
+      dgcos += 5.0 * b5i;
+      gcos *= costh;
+      dgcos *= costh;
+      gcos += b4i;
+      dgcos += 4.0 * b4i;
+      gcos *= costh;
+      dgcos *= costh;
+      gcos += b3i;
+      dgcos += 3.0 * b3i;
+      gcos *= costh;
+      dgcos *= costh;
+      gcos += b2i;
+      dgcos += 2.0 * b2i;
+      gcos *= costh;
+      dgcos *= costh;
+      gcos += b1i;
+      dgcos += b1i;
+      gcos *= costh;
+      gcos += b0i;
+
+      const double bg0i = bg0[typei];
+      const double bg1i = bg1[typei];
+      const double bg2i = bg2[typei];
+      const double bg3i = bg3[typei];
+      const double bg4i = bg4[typei];
+      const double bg5i = bg5[typei];
+      const double bg6i = bg6[typei];
+      double gamma = bg6i * costh;
+      double dgamma = 6.0 * bg6i * costh;
+      gamma += bg5i;
+      dgamma += 5.0 * bg5i;
+      gamma *= costh;
+      dgamma *= costh;
+      gamma += bg4i;
+      dgamma += 4.0 * bg4i;
+      gamma *= costh;
+      dgamma *= costh;
+      gamma += bg3i;
+      dgamma += 3.0 * bg3i;
+      gamma *= costh;
+      dgamma *= costh;
+      gamma += bg2i;
+      dgamma += 2.0 * bg2i;
+      gamma *= costh;
+      dgamma *= costh;
+      gamma += bg1i;
+      dgamma += bg1i;
+      gamma *= costh;
+      gamma += bg0i;
+
+      const double tmp = MathConst::MY_2PI * (costh - 0.5);
+      const double psi = 0.5 * (1 - cos(tmp));
+      const double dpsi = MathConst::MY_PI * sin(tmp);
+      g = gcos + psi * (gamma - gcos);
+      dgdc = dgcos + dpsi * (gamma - gcos) + psi * (dgamma - dgcos);
+    } else {
+      dgdc = 0.0;
+    }
+    return g;
+  }
+
+  /* ----------------------------------------------------------------------
+    Pij calculation
+ ------------------------------------------------------------------------- */
+
+  inline double PijSpline(const double NM, const double NS, const int typei, double &dp) const
+  {
+    const double N = NM + NS;
+
+    dp = -a0[typei] + a1[typei] * a2[typei] * exp(-a2[typei] * N);
+    return -a0[typei] * (N - 1) - a1[typei] * exp(-a2[typei] * N) + a3[typei];
+  }
+
+  void read_file(char *);
+  void allocate();
+
+  // ----------------------------------------------------------------------
+  // S'(t) and S(t) cutoff functions
+  // added to header for inlining
+  // ----------------------------------------------------------------------
+
+  /* ----------------------------------------------------------------------
+     cutoff function Sprime
+     return cutoff and dX = derivative
+     no side effects
+  ------------------------------------------------------------------------- */
+
+  inline double Sp(double Xij, double Xmin, double Xmax, double &dX) const
+  {
+    double cutoff;
+
+    const double t = (Xij - Xmin) / (Xmax - Xmin);
+    if (t <= 0.0) {
+      cutoff = 1.0;
+      dX = 0.0;
+    } else if (t >= 1.0) {
+      cutoff = 0.0;
+      dX = 0.0;
+    } else {
+      cutoff = 0.5 * (1.0 + cos(t * MathConst::MY_PI));
+      dX = (-0.5 * MathConst::MY_PI * sin(t * MathConst::MY_PI)) / (Xmax - Xmin);
+    }
+    return cutoff;
+  };
+};
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/MANYBODY/pair_sw_angle_table.cpp b/src/MANYBODY/pair_sw_angle_table.cpp
index 8e605caebd..12592f4af6 100644
--- a/src/MANYBODY/pair_sw_angle_table.cpp
+++ b/src/MANYBODY/pair_sw_angle_table.cpp
@@ -26,7 +26,6 @@
 #include "math_const.h"
 #include "memory.h"
 #include "neigh_list.h"
-#include "neighbor.h"
 #include "table_file_reader.h"
 #include "potential_file_reader.h"
 
diff --git a/src/MC/fix_charge_regulation.cpp b/src/MC/fix_charge_regulation.cpp
index 2fade2be79..4358513095 100644
--- a/src/MC/fix_charge_regulation.cpp
+++ b/src/MC/fix_charge_regulation.cpp
@@ -46,6 +46,7 @@
 
 #include <cmath>
 #include <cstring>
+#include <exception>
 #include <memory>
 
 using namespace LAMMPS_NS;
@@ -191,6 +192,11 @@ int FixChargeRegulation::setmask() {
 
 void FixChargeRegulation::init() {
 
+  if (!atom->mass) error->all(FLERR, "Fix charge/regulation requires per atom type masses");
+  if (atom->rmass_flag && (comm->me == 0))
+    error->warning(FLERR, "Fix charge/regulation will use per atom type masses for "
+                   "velocity initialization");
+
   triclinic = domain->triclinic;
   int ipe = modify->find_compute("thermo_pe");
   c_pe = modify->compute[ipe];
diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp
index b3d000e8b1..bd7e46b3d1 100644
--- a/src/MC/fix_gcmc.cpp
+++ b/src/MC/fix_gcmc.cpp
@@ -45,6 +45,7 @@
 
 #include <cmath>
 #include <cstring>
+#include <exception>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
@@ -463,6 +464,10 @@ int FixGCMC::setmask()
 
 void FixGCMC::init()
 {
+  if (!atom->mass) error->all(FLERR, "Fix gcmc requires per atom type masses");
+  if (atom->rmass_flag && (comm->me == 0))
+    error->warning(FLERR, "Fix gcmc will use per atom type masses for velocity initialization");
+
   triclinic = domain->triclinic;
 
   // set index and check validity of region
diff --git a/src/MC/fix_sgcmc.cpp b/src/MC/fix_sgcmc.cpp
index a70f3240db..ae0e69d77e 100644
--- a/src/MC/fix_sgcmc.cpp
+++ b/src/MC/fix_sgcmc.cpp
@@ -235,11 +235,13 @@ int FixSemiGrandCanonicalMC::setmask()
  *********************************************************************/
 void FixSemiGrandCanonicalMC::init()
 {
-  // Make sure the user has defined only one Monte-Carlo fix.
-  int count = 0;
-  for (int i = 0; i < modify->nfix; i++)
-    if (strcmp(modify->fix[i]->style,"sgcmc") == 0) count++;
-  if (count > 1) error->all(FLERR, "More than one fix sgcmc defined.");
+  if (!atom->mass) error->all(FLERR, "Fix sgcmc requires per atom type masses");
+  if (atom->rmass_flag && (comm->me == 0))
+    error->warning(FLERR, "Fix sgcmc will use per atom type masses for velocity initialization");
+
+  // Make sure the user has defined only one Semi-Grand Monte-Carlo fix.
+  if (modify->get_fix_by_style("sgcmc").size() > 1)
+    error->all(FLERR, "More than one fix sgcmc defined.");
 
   // Save a pointer to the EAM potential.
   pairEAM = dynamic_cast<PairEAM*>(force->pair);
@@ -248,7 +250,8 @@ void FixSemiGrandCanonicalMC::init()
       utils::logmesg(lmp, "  SGC - Using naive total energy calculation for MC -> SLOW!\n");
 
     if (comm->nprocs > 1)
-      error->all(FLERR, "Can not run fix vcsgc with naive total energy calculation and more than one MPI process.");
+      error->all(FLERR, "Can not run fix sgcmc with naive total energy calculation "
+                 "and more than one MPI process.");
 
     // Create a compute that will provide the total energy of the system.
     // This is needed by computeTotalEnergy().
diff --git a/src/MC/fix_widom.cpp b/src/MC/fix_widom.cpp
index 2e48630f59..9871dc8f60 100644
--- a/src/MC/fix_widom.cpp
+++ b/src/MC/fix_widom.cpp
@@ -45,6 +45,7 @@
 
 #include <cmath>
 #include <cstring>
+#include <exception>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
@@ -280,6 +281,10 @@ int FixWidom::setmask()
 
 void FixWidom::init()
 {
+  if (!atom->mass) error->all(FLERR, "Fix widom requires per atom type masses");
+  if (atom->rmass_flag && (comm->me == 0))
+    error->warning(FLERR, "Fix widom will use per atom type masses for velocity initialization");
+
   triclinic = domain->triclinic;
 
   // set index and check validity of region
diff --git a/src/MC/pair_dsmc.cpp b/src/MC/pair_dsmc.cpp
index 4773ac7653..1c152906b3 100644
--- a/src/MC/pair_dsmc.cpp
+++ b/src/MC/pair_dsmc.cpp
@@ -271,6 +271,8 @@ void PairDSMC::coeff(int narg, char **arg)
 
 void PairDSMC::init_style()
 {
+  if (!atom->mass) error->all(FLERR, "Pair style dsmc requires per atom type masses");
+
   ncellsx = ncellsy = ncellsz = 1;
   while (((domain->boxhi[0] - domain->boxlo[0])/ncellsx) > max_cell_size)
     ncellsx++;
diff --git a/src/MDI/fix_mdi_engine.cpp b/src/MDI/fix_mdi_engine.cpp
index 0494d08b2d..fe896d906f 100644
--- a/src/MDI/fix_mdi_engine.cpp
+++ b/src/MDI/fix_mdi_engine.cpp
@@ -19,7 +19,6 @@
 #include "fix_mdi_engine.h"
 
 #include "error.h"
-#include "update.h"
 
 #include "mdi_engine.h"
 
diff --git a/src/MDI/fix_mdi_qm.cpp b/src/MDI/fix_mdi_qm.cpp
index 86dc87fd35..46071c5c90 100644
--- a/src/MDI/fix_mdi_qm.cpp
+++ b/src/MDI/fix_mdi_qm.cpp
@@ -22,6 +22,8 @@
 #include "modify.h"
 #include "update.h"
 
+#include <cstring>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/MDI/fix_mdi_qmmm.cpp b/src/MDI/fix_mdi_qmmm.cpp
index c6df52333d..d45290811f 100644
--- a/src/MDI/fix_mdi_qmmm.cpp
+++ b/src/MDI/fix_mdi_qmmm.cpp
@@ -12,6 +12,7 @@
 ------------------------------------------------------------------------- */
 
 #include "fix_mdi_qmmm.h"
+
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
@@ -25,6 +26,9 @@
 #include "pair.h"
 #include "update.h"
 
+#include <cmath>
+#include <cstring>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/MDI/mdi_command.h b/src/MDI/mdi_command.h
index 88e1901ab1..0bdfe81279 100644
--- a/src/MDI/mdi_command.h
+++ b/src/MDI/mdi_command.h
@@ -21,7 +21,7 @@ CommandStyle(mdi,MDICommand);
 #define LMP_MDI_COMMAND_H
 
 #include "command.h"
-#include "mdi.h"
+#include <mdi.h>
 
 namespace LAMMPS_NS {
 
diff --git a/src/MDI/mdi_engine.cpp b/src/MDI/mdi_engine.cpp
index 9c58de7560..6d7c604fa6 100644
--- a/src/MDI/mdi_engine.cpp
+++ b/src/MDI/mdi_engine.cpp
@@ -25,12 +25,10 @@
 #include "error.h"
 #include "fix_mdi_engine.h"
 #include "force.h"
-#include "group.h"
 #include "input.h"
 #include "integrate.h"
 #include "irregular.h"
 #include "library.h"
-#include "library_mdi.h"
 #include "memory.h"
 #include "min.h"
 #include "modify.h"
diff --git a/src/MDI/mdi_plugin.cpp b/src/MDI/mdi_plugin.cpp
index 92b78b6afb..6294292229 100644
--- a/src/MDI/mdi_plugin.cpp
+++ b/src/MDI/mdi_plugin.cpp
@@ -21,9 +21,7 @@
 #include "error.h"
 #include "input.h"
 #include "memory.h"
-#include "modify.h"
 
-#include <cstdlib>
 #include <cstring>
 
 #include <mdi.h>
diff --git a/src/MEAM/meam.h b/src/MEAM/meam.h
index 5a131bdc34..7aca094912 100644
--- a/src/MEAM/meam.h
+++ b/src/MEAM/meam.h
@@ -17,7 +17,7 @@
 #include <cmath>
 #include <string>
 
-constexpr int maxelt = 5;
+constexpr int MAXELT = 8;
 
 namespace LAMMPS_NS {
 class Memory;
@@ -88,30 +88,30 @@ class MEAM {
   // stheta_meam = sin(theta/2) in radian used in line, zigzag, and trimer reference structures
   // ctheta_meam = cos(theta/2) in radian used in line, zigzag, and trimer reference structures
 
-  double Ec_meam[maxelt][maxelt], re_meam[maxelt][maxelt];
-  double A_meam[maxelt], alpha_meam[maxelt][maxelt], rho0_meam[maxelt];
-  double delta_meam[maxelt][maxelt];
-  double beta0_meam[maxelt], beta1_meam[maxelt];
-  double beta2_meam[maxelt], beta3_meam[maxelt];
-  double t0_meam[maxelt], t1_meam[maxelt];
-  double t2_meam[maxelt], t3_meam[maxelt];
-  double rho_ref_meam[maxelt];
-  int ibar_meam[maxelt], ielt_meam[maxelt];
-  lattice_t lattce_meam[maxelt][maxelt];
-  int nn2_meam[maxelt][maxelt];
-  int zbl_meam[maxelt][maxelt];
-  int eltind[maxelt][maxelt];
+  double Ec_meam[MAXELT][MAXELT], re_meam[MAXELT][MAXELT];
+  double A_meam[MAXELT], alpha_meam[MAXELT][MAXELT], rho0_meam[MAXELT];
+  double delta_meam[MAXELT][MAXELT];
+  double beta0_meam[MAXELT], beta1_meam[MAXELT];
+  double beta2_meam[MAXELT], beta3_meam[MAXELT];
+  double t0_meam[MAXELT], t1_meam[MAXELT];
+  double t2_meam[MAXELT], t3_meam[MAXELT];
+  double rho_ref_meam[MAXELT];
+  int ibar_meam[MAXELT], ielt_meam[MAXELT];
+  lattice_t lattce_meam[MAXELT][MAXELT];
+  int nn2_meam[MAXELT][MAXELT];
+  int zbl_meam[MAXELT][MAXELT];
+  int eltind[MAXELT][MAXELT];
   int neltypes;
 
   double **phir;
 
   double **phirar, **phirar1, **phirar2, **phirar3, **phirar4, **phirar5, **phirar6;
 
-  double attrac_meam[maxelt][maxelt], repuls_meam[maxelt][maxelt];
+  double attrac_meam[MAXELT][MAXELT], repuls_meam[MAXELT][MAXELT];
 
-  double Cmin_meam[maxelt][maxelt][maxelt];
-  double Cmax_meam[maxelt][maxelt][maxelt];
-  double rc_meam, delr_meam, ebound_meam[maxelt][maxelt];
+  double Cmin_meam[MAXELT][MAXELT][MAXELT];
+  double Cmax_meam[MAXELT][MAXELT][MAXELT];
+  double rc_meam, delr_meam, ebound_meam[MAXELT][MAXELT];
   int augt1, ialloy, mix_ref_t, erose_form;
   int emb_lin_neg, bkgd_dyn;
   double gsmooth_factor;
@@ -124,8 +124,8 @@ class MEAM {
 
   // MS-MEAM parameters
 
-  double t1m_meam[maxelt], t2m_meam[maxelt], t3m_meam[maxelt];
-  double beta1m_meam[maxelt], beta2m_meam[maxelt], beta3m_meam[maxelt];
+  double t1m_meam[MAXELT], t2m_meam[MAXELT], t3m_meam[MAXELT];
+  double beta1m_meam[MAXELT], beta2m_meam[MAXELT], beta3m_meam[MAXELT];
   //int msmeamflag; // made public for pair style settings
 
  public:
@@ -142,8 +142,8 @@ class MEAM {
   double *scrfcn, *dscrfcn, *fcpair;
 
   //angle for trimer, zigzag, line reference structures
-  double stheta_meam[maxelt][maxelt];
-  double ctheta_meam[maxelt][maxelt];
+  double stheta_meam[MAXELT][MAXELT];
+  double ctheta_meam[MAXELT][MAXELT];
 
  protected:
   // meam_funcs.cpp
diff --git a/src/MEAM/meam_force.cpp b/src/MEAM/meam_force.cpp
index 23230e0fbc..6fc3fd762f 100644
--- a/src/MEAM/meam_force.cpp
+++ b/src/MEAM/meam_force.cpp
@@ -65,11 +65,11 @@ void MEAM::meam_force(int i, int eflag_global, int eflag_atom, int vflag_global,
   double rhoa2mj,drhoa2mj,rhoa2mi,drhoa2mi;
   double rhoa3mj, drhoa3mj, rhoa3mi, drhoa3mi;
   double arg1i1m, arg1j1m, arg1i2m, arg1j2m, arg1i3m, arg1j3m, arg3i3m, arg3j3m;
-  double drho1mdr1, drho1mdr2, drho1mds1, drho1mds2;
+  double drho1mdr1, drho1mdr2;
   double drho1mdrm1[3], drho1mdrm2[3];
-  double drho2mdr1, drho2mdr2, drho2mds1, drho2mds2;
+  double drho2mdr1, drho2mdr2;
   double drho2mdrm1[3], drho2mdrm2[3];
-  double drho3mdr1, drho3mdr2, drho3mds1, drho3mds2;
+  double drho3mdr1, drho3mdr2;
   double drho3mdrm1[3], drho3mdrm2[3];
 
   third = 1.0 / 3.0;
@@ -527,78 +527,75 @@ void MEAM::meam_force(int i, int eflag_global, int eflag_atom, int vflag_global,
           drho3ds2 = a3 * rhoa3i * arg1j3 - a3a * rhoa3i * arg3j3;
 
           if (msmeamflag) {
-            drho1mds1 = a1 * rhoa1mj * arg1i1m;
-            drho1mds2 = a1 * rhoa1mi * arg1j1m;
-            drho2mds1 = a2 * rhoa2mj * arg1i2m - 2.0 / 3.0 * arho2mb[i] * rhoa2mj;
-            drho2mds2 = a2 * rhoa2mi * arg1j2m - 2.0 / 3.0 * arho2mb[j] * rhoa2mi;
-            drho3mds1 = a3 * rhoa3mj * arg1i3m - a3a * rhoa3mj * arg3i3m;
-            drho3mds2 = a3 * rhoa3mi * arg1j3m - a3a * rhoa3mi * arg3j3m;
-            drho3mds1 *= -1;
-            drho3mds2 *= -1;
-          } else {
-            drho1mds1 = 0.0;
-            drho1mds2 = 0.0;
-            drho2mds1 = 0.0;
-            drho2mds2 = 0.0;
-            drho3mds1 = 0.0;
-            drho3mds2 = 0.0;
-          }
 
-          if (ialloy == 1) {
+            const double drho1mds1 = -a1 * rhoa1mj * arg1i1m;
+            const double drho1mds2 = -a1 * rhoa1mi * arg1j1m;
+            const double drho2mds1 =  a2 * rhoa2mj * arg1i2m - 2.0 / 3.0 * arho2mb[i] * rhoa2mj;
+            const double drho2mds2 =  a2 * rhoa2mi * arg1j2m - 2.0 / 3.0 * arho2mb[j] * rhoa2mi;
+            const double drho3mds1 = -a3 * rhoa3mj * arg1i3m + a3a * rhoa3mj * arg3i3m;
+            const double drho3mds2 = -a3 * rhoa3mi * arg1j3m + a3a * rhoa3mi * arg3j3m;
 
-            a1i = fdiv_zero(rhoa0j, tsq_ave[i][0]);
-            a1j = fdiv_zero(rhoa0i, tsq_ave[j][0]);
-            a2i = fdiv_zero(rhoa0j, tsq_ave[i][1]);
-            a2j = fdiv_zero(rhoa0i, tsq_ave[j][1]);
-            a3i = fdiv_zero(rhoa0j, tsq_ave[i][2]);
-            a3j = fdiv_zero(rhoa0i, tsq_ave[j][2]);
+            t1i = 1.0;
+            t2i = 1.0;
+            t3i = 1.0;
+            t1j = 1.0;
+            t2j = 1.0;
+            t3j = 1.0;
+            dt1dr1 = 0.0;
+            dt1dr2 = 0.0;
+            dt2dr1 = 0.0;
+            dt2dr2 = 0.0;
+            dt3dr1 = 0.0;
+            dt3dr2 = 0.0;
 
-            dt1ds1 = a1i * (t1mj - t1i * MathSpecial::square(t1mj));
-            dt1ds2 = a1j * (t1mi - t1j * MathSpecial::square(t1mi));
-            dt2ds1 = a2i * (t2mj - t2i * MathSpecial::square(t2mj));
-            dt2ds2 = a2j * (t2mi - t2j * MathSpecial::square(t2mi));
-            dt3ds1 = a3i * (t3mj - t3i * MathSpecial::square(t3mj));
-            dt3ds2 = a3j * (t3mi - t3j * MathSpecial::square(t3mi));
-
-          } else if (ialloy == 2) {
-
-            dt1ds1 = 0.0;
-            dt1ds2 = 0.0;
-            dt2ds1 = 0.0;
-            dt2ds2 = 0.0;
-            dt3ds1 = 0.0;
-            dt3ds2 = 0.0;
+            // these formulae are simplifed by substituting t=1, dt=0 from above
 
+            drhods1 = dgamma1[i] * drho0ds1 + dgamma2[i]
+              * ((drho1ds1 - drho1mds1) + (drho2ds1 - drho2mds1) + (drho3ds1 - drho3mds1));
+            drhods2 = dgamma1[j] * drho0ds2 + dgamma2[j]
+              * ((drho1ds2 - drho1mds2) + (drho2ds2 - drho2mds2) + (drho3ds2 - drho3mds2));
           } else {
 
-            ai = 0.0;
-            if (!iszero(rho0[i]))
-              ai = rhoa0j / rho0[i];
-            aj = 0.0;
-            if (!iszero(rho0[j]))
-              aj = rhoa0i / rho0[j];
+            if (ialloy == 1) {
 
-            dt1ds1 = ai * (t1mj - t1i);
-            dt1ds2 = aj * (t1mi - t1j);
-            dt2ds1 = ai * (t2mj - t2i);
-            dt2ds2 = aj * (t2mi - t2j);
-            dt3ds1 = ai * (t3mj - t3i);
-            dt3ds2 = aj * (t3mi - t3j);
-          }
+              a1i = fdiv_zero(rhoa0j, tsq_ave[i][0]);
+              a1j = fdiv_zero(rhoa0i, tsq_ave[j][0]);
+              a2i = fdiv_zero(rhoa0j, tsq_ave[i][1]);
+              a2j = fdiv_zero(rhoa0i, tsq_ave[j][1]);
+              a3i = fdiv_zero(rhoa0j, tsq_ave[i][2]);
+              a3j = fdiv_zero(rhoa0i, tsq_ave[j][2]);
+
+              dt1ds1 = a1i * (t1mj - t1i * MathSpecial::square(t1mj));
+              dt1ds2 = a1j * (t1mi - t1j * MathSpecial::square(t1mi));
+              dt2ds1 = a2i * (t2mj - t2i * MathSpecial::square(t2mj));
+              dt2ds2 = a2j * (t2mi - t2j * MathSpecial::square(t2mi));
+              dt3ds1 = a3i * (t3mj - t3i * MathSpecial::square(t3mj));
+              dt3ds2 = a3j * (t3mi - t3j * MathSpecial::square(t3mi));
+
+            } else if (ialloy == 2) {
+
+              dt1ds1 = 0.0;
+              dt1ds2 = 0.0;
+              dt2ds1 = 0.0;
+              dt2ds2 = 0.0;
+              dt3ds1 = 0.0;
+              dt3ds2 = 0.0;
+
+            } else {
+
+              ai = 0.0;
+              if (!iszero(rho0[i])) ai = rhoa0j / rho0[i];
+              aj = 0.0;
+              if (!iszero(rho0[j])) aj = rhoa0i / rho0[j];
+
+              dt1ds1 = ai * (t1mj - t1i);
+              dt1ds2 = aj * (t1mi - t1j);
+              dt2ds1 = ai * (t2mj - t2i);
+              dt2ds2 = aj * (t2mi - t2j);
+              dt3ds1 = ai * (t3mj - t3i);
+              dt3ds2 = aj * (t3mi - t3j);
+            }
 
-          if (msmeamflag) {
-            drhods1 = dgamma1[i] * drho0ds1 +
-              dgamma2[i] * (dt1ds1 * rho1[i] + t1i * (drho1ds1 - drho1mds1) +
-                            dt2ds1 * rho2[i] + t2i * (drho2ds1 - drho2mds1) +
-                            dt3ds1 * rho3[i] + t3i * (drho3ds1 - drho3mds1)) -
-              dgamma3[i] * (shpi[0] * dt1ds1 + shpi[1] * dt2ds1 + shpi[2] * dt3ds1);
-            drhods2 = dgamma1[j] * drho0ds2 +
-              dgamma2[j] * (dt1ds2 * rho1[j] + t1j * (drho1ds2 - drho1mds2) +
-                            dt2ds2 * rho2[j] + t2j * (drho2ds2 - drho2mds2) +
-                            dt3ds2 * rho3[j] + t3j * (drho3ds2 - drho3mds2)) -
-              dgamma3[j] * (shpj[0] * dt1ds2 + shpj[1] * dt2ds2 + shpj[2] * dt3ds2);
-          }
-          else {
             drhods1 = dgamma1[i] * drho0ds1 +
               dgamma2[i] * (dt1ds1 * rho1[i] + t1i * drho1ds1 + dt2ds1 * rho2[i] + t2i * drho2ds1 +
                             dt3ds1 * rho3[i] + t3i * drho3ds1) -
diff --git a/src/MEAM/meam_impl.cpp b/src/MEAM/meam_impl.cpp
index 473b491b01..41248c192b 100644
--- a/src/MEAM/meam_impl.cpp
+++ b/src/MEAM/meam_impl.cpp
@@ -42,12 +42,12 @@ MEAM::MEAM(Memory *mem) : memory(mem)
   copymode = 0;
 
   neltypes = 0;
-  for (int i = 0; i < maxelt; i++) {
+  for (int i = 0; i < MAXELT; i++) {
     A_meam[i] = rho0_meam[i] = beta0_meam[i] = beta1_meam[i] = beta2_meam[i] = beta3_meam[i] =
         t0_meam[i] = t1_meam[i] = t2_meam[i] = t3_meam[i] = rho_ref_meam[i] = ibar_meam[i] =
             ielt_meam[i] = t1m_meam[i] = t2m_meam[i] = t3m_meam[i] = beta1m_meam[i] =
                 beta2m_meam[i] = beta3m_meam[i] = 0.0;
-    for (int j = 0; j < maxelt; j++) {
+    for (int j = 0; j < MAXELT; j++) {
       lattce_meam[i][j] = FCC;
       Ec_meam[i][j] = re_meam[i][j] = alpha_meam[i][j] = delta_meam[i][j] = ebound_meam[i][j] =
           attrac_meam[i][j] = repuls_meam[i][j] = 0.0;
diff --git a/src/MEAM/meam_setup_done.cpp b/src/MEAM/meam_setup_done.cpp
index 4adfd68f19..ce756051e4 100644
--- a/src/MEAM/meam_setup_done.cpp
+++ b/src/MEAM/meam_setup_done.cpp
@@ -33,7 +33,7 @@ void MEAM::meam_setup_done(double* cutmax)
   *cutmax = cutforce;
 
   //     Augment t1 term
-  for (int i = 0; i < maxelt; i++)
+  for (int i = 0; i < MAXELT; i++)
     t1_meam[i] = t1_meam[i] + augt1 * 3.0 / 5.0 * t3_meam[i];
 
   //     Compute off-diagonal alloy parameters
diff --git a/src/MEAM/meam_setup_global.cpp b/src/MEAM/meam_setup_global.cpp
index 299fc4da61..1487a53b2e 100644
--- a/src/MEAM/meam_setup_global.cpp
+++ b/src/MEAM/meam_setup_global.cpp
@@ -38,7 +38,7 @@ void MEAM::meam_setup_global(int nelt, lattice_t *lat, int *ielement, double * /
                              double *b2m, double *b3m, double *t1m, double *t2m, double *t3m)
 {
   int i;
-  double tmplat[maxelt];
+  double tmplat[MAXELT];
 
   neltypes = nelt;
 
@@ -123,4 +123,7 @@ void MEAM::meam_setup_global(int nelt, lattice_t *lat, int *ielement, double * /
   // for trimer, zigzag, line refernece structure, sungkwang
   setall2d(stheta_meam, 1.0);    // stheta = sin(theta/2*pi/180) where theta is 180, so 1.0
   setall2d(ctheta_meam, 0.0);    // stheta = cos(theta/2*pi/180) where theta is 180, so 0
+
+  if (msmeamflag) ialloy = 1;
+
 }
diff --git a/src/MEAM/pair_meam.cpp b/src/MEAM/pair_meam.cpp
index c8932d9a31..d9d2b53885 100644
--- a/src/MEAM/pair_meam.cpp
+++ b/src/MEAM/pair_meam.cpp
@@ -206,7 +206,12 @@ void PairMEAM::coeff(int narg, char **arg)
   // check for presence of first meam file
 
   std::string lib_file = utils::get_potential_file_path(arg[2]);
-  if (lib_file.empty()) error->all(FLERR, "Cannot open MEAM library file {}", lib_file);
+  if (lib_file.empty()) {
+    if (msmeamflag)
+      error->all(FLERR, "Cannot open MS-MEAM library file {}", lib_file);
+    else
+      error->all(FLERR, "Cannot open MEAM library file {}", lib_file);
+  }
 
   // find meam parameter file in arguments:
   // first word that is a file or "NULL" after the MEAM library file
@@ -226,7 +231,12 @@ void PairMEAM::coeff(int narg, char **arg)
       break;
     }
   }
-  if (paridx < 0) error->all(FLERR, "No MEAM parameter file in pair coefficients");
+  if (paridx < 0) {
+    if (msmeamflag)
+      error->all(FLERR, "No MS-MEAM parameter file in pair coefficients");
+    else
+      error->all(FLERR, "No MEAM parameter file in pair coefficients");
+  }
   if ((narg - paridx - 1) != atom->ntypes)
     error->all(FLERR, "Incorrect args for pair style {} coefficients", myname);
 
@@ -241,11 +251,10 @@ void PairMEAM::coeff(int narg, char **arg)
 
   nlibelements = paridx - 3;
   if (nlibelements < 1) error->all(FLERR, "Incorrect args for pair coefficients");
-  if (nlibelements > maxelt)
+  if (nlibelements > MAXELT)
     error->all(FLERR,
                "Too many elements extracted from MEAM library (current limit: {}). "
-               "Increase 'maxelt' in meam.h and recompile.",
-               maxelt);
+               "Increase 'MAXELT' in meam.h and recompile.", MAXELT);
 
   for (int i = 0; i < nlibelements; i++) {
     if (std::any_of(libelements.begin(), libelements.end(), [&](const std::string &elem) {
diff --git a/src/MEAM/pair_meam_ms.cpp b/src/MEAM/pair_meam_ms.cpp
index 982a54f546..e5cb960b59 100644
--- a/src/MEAM/pair_meam_ms.cpp
+++ b/src/MEAM/pair_meam_ms.cpp
@@ -12,6 +12,7 @@
 ------------------------------------------------------------------------- */
 
 #include "pair_meam_ms.h"
+
 #include "meam.h"
 
 using namespace LAMMPS_NS;
diff --git a/src/MESONT/angle_mesocnt.cpp b/src/MESONT/angle_mesocnt.cpp
index 06ec135e3c..c6dae4b0fb 100644
--- a/src/MESONT/angle_mesocnt.cpp
+++ b/src/MESONT/angle_mesocnt.cpp
@@ -30,6 +30,7 @@
 #include "update.h"
 
 #include <cmath>
+#include <cstring>
 
 using namespace LAMMPS_NS;
 using MathConst::DEG2RAD;
diff --git a/src/MESONT/bond_mesocnt.cpp b/src/MESONT/bond_mesocnt.cpp
index 1623c4b1fc..5f468bd720 100644
--- a/src/MESONT/bond_mesocnt.cpp
+++ b/src/MESONT/bond_mesocnt.cpp
@@ -24,7 +24,6 @@
 #include "force.h"
 #include "math_const.h"
 #include "memory.h"
-#include "neighbor.h"
 #include "update.h"
 
 #include <cmath>
diff --git a/src/MESONT/pair_mesocnt.cpp b/src/MESONT/pair_mesocnt.cpp
index 133170f883..21b04268c4 100644
--- a/src/MESONT/pair_mesocnt.cpp
+++ b/src/MESONT/pair_mesocnt.cpp
@@ -34,9 +34,9 @@
 #include <algorithm>
 #include <cmath>
 #include <cstring>
-#include <string>
+#include <exception>
+#include <stdexcept>
 #include <unordered_map>
-#include <vector>
 
 using namespace LAMMPS_NS;
 using namespace MathExtra;
diff --git a/src/MESONT/pair_mesocnt_viscous.cpp b/src/MESONT/pair_mesocnt_viscous.cpp
index 9beabe0d2f..f7ad7b0aa6 100644
--- a/src/MESONT/pair_mesocnt_viscous.cpp
+++ b/src/MESONT/pair_mesocnt_viscous.cpp
@@ -25,7 +25,6 @@
 #include "math_const.h"
 #include "math_extra.h"
 #include "memory.h"
-#include "neigh_list.h"
 #include "neighbor.h"
 #include "update.h"
 
diff --git a/src/ML-IAP/compute_mliap.cpp b/src/ML-IAP/compute_mliap.cpp
index 4fa88abad1..0ee35ea080 100644
--- a/src/ML-IAP/compute_mliap.cpp
+++ b/src/ML-IAP/compute_mliap.cpp
@@ -19,10 +19,10 @@
 #include "compute_mliap.h"
 
 #include "mliap_data.h"
-#include "mliap_model_linear.h"
-#include "mliap_model_quadratic.h"
 #include "mliap_descriptor_snap.h"
 #include "mliap_descriptor_so3.h"
+#include "mliap_model_linear.h"
+#include "mliap_model_quadratic.h"
 #ifdef MLIAP_PYTHON
 #include "mliap_model_python.h"
 #endif
@@ -34,6 +34,7 @@
 #include "memory.h"
 #include "modify.h"
 #include "neighbor.h"
+#include "neigh_list.h"
 #include "pair.h"
 #include "update.h"
 
@@ -41,17 +42,17 @@
 
 using namespace LAMMPS_NS;
 
-enum{SCALAR,VECTOR,ARRAY};
+enum { SCALAR, VECTOR, ARRAY };
 
 ComputeMLIAP::ComputeMLIAP(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg), mliaparray(nullptr),
-  mliaparrayall(nullptr), map(nullptr)
+    Compute(lmp, narg, arg), mliaparray(nullptr), mliaparrayall(nullptr), list(nullptr),
+    map(nullptr), model(nullptr), descriptor(nullptr), data(nullptr), c_pe(nullptr),
+    c_virial(nullptr)
 {
   array_flag = 1;
   extarray = 0;
 
-  if (narg < 4)
-    error->all(FLERR,"Illegal compute mliap command");
+  if (narg < 4) utils::missing_cmd_args(FLERR, "compute mliap", error);
 
   // default values
 
@@ -130,7 +131,6 @@ ComputeMLIAP::ComputeMLIAP(LAMMPS *lmp, int narg, char **arg) :
 
 ComputeMLIAP::~ComputeMLIAP()
 {
-
   modify->delete_compute(id_virial);
 
   memory->destroy(mliaparray);
@@ -180,23 +180,13 @@ void ComputeMLIAP::init()
 
   // find compute for reference energy
 
-  std::string id_pe = std::string("thermo_pe");
-  int ipe = modify->find_compute(id_pe);
-  if (ipe == -1)
-    error->all(FLERR,"compute thermo_pe does not exist.");
-  c_pe = modify->compute[ipe];
+  c_pe = modify->get_compute_by_id("thermo_pe");
+  if (!c_pe) error->all(FLERR,"Compute thermo_pe does not exist.");
 
   // add compute for reference virial tensor
 
   id_virial = id + std::string("_press");
-  std::string pcmd = id_virial + " all pressure NULL virial";
-  modify->add_compute(pcmd);
-
-  int ivirial = modify->find_compute(id_virial);
-  if (ivirial == -1)
-    error->all(FLERR,"compute mliap_press does not exist.");
-  c_virial = modify->compute[ivirial];
-
+  c_virial = modify->add_compute(id_virial + " all pressure NULL virial");
 }
 
 
diff --git a/src/ML-IAP/mliap_unified.cpp b/src/ML-IAP/mliap_unified.cpp
index de1d0bcb7d..7697204e44 100644
--- a/src/ML-IAP/mliap_unified.cpp
+++ b/src/ML-IAP/mliap_unified.cpp
@@ -254,10 +254,8 @@ void LAMMPS_NS::update_pair_energy(MLIAPData *data, double *eij)
     double e = 0.5 * eij[ii];
 
     // must not count any contribution where i is not a local atom
-    if (i < nlocal) {
-      data->eatoms[i] += e;
-      e_total += e;
-    }
+    data->eatoms[i] += e;
+    e_total += e;
   }
   data->energy = e_total;
 }
@@ -277,17 +275,14 @@ void LAMMPS_NS::update_pair_forces(MLIAPData *data, double *fij)
     int i = data->pair_i[ii];
     int j = data->jatoms[ii];
 
-    // must not count any contribution where i is not a local atom
-    if (i < nlocal) {
-      f[i][0] += fij[ii3];
-      f[i][1] += fij[ii3 + 1];
-      f[i][2] += fij[ii3 + 2];
-      f[j][0] -= fij[ii3];
-      f[j][1] -= fij[ii3 + 1];
-      f[j][2] -= fij[ii3 + 2];
+    f[i][0] += fij[ii3];
+    f[i][1] += fij[ii3 + 1];
+    f[i][2] += fij[ii3 + 2];
+    f[j][0] -= fij[ii3];
+    f[j][1] -= fij[ii3 + 1];
+    f[j][2] -= fij[ii3 + 2];
 
-      if (data->vflag) data->pairmliap->v_tally(i, j, &fij[ii3], data->rij[ii]);
-    }
+    if (data->vflag) data->pairmliap->v_tally(i, j, &fij[ii3], data->rij[ii]);
   }
 }
 
diff --git a/src/ML-IAP/mliap_unified_couple.pyx b/src/ML-IAP/mliap_unified_couple.pyx
index 3148b96b51..6c8331d0fa 100644
--- a/src/ML-IAP/mliap_unified_couple.pyx
+++ b/src/ML-IAP/mliap_unified_couple.pyx
@@ -8,6 +8,7 @@ import lammps.mliap
 cimport cython
 from cpython.ref cimport PyObject
 from libc.stdlib cimport malloc, free
+from libc.string cimport memcpy
 
 
 cdef extern from "lammps.h" namespace "LAMMPS_NS":
@@ -387,15 +388,26 @@ cdef public object mliap_unified_connect(char *fname, MLIAPDummyModel * model,
 
     cdef int nelements = <int>len(unified.element_types)
     cdef char **elements = <char**>malloc(nelements * sizeof(char*))
+    cdef char * c_str
+    cdef char * s
+    cdef ssize_t slen
 
     if not elements:
         raise MemoryError("failed to allocate memory for element names")
 
-    cdef char *elem_name
     for i, elem in enumerate(unified.element_types):
-        elem_name_bytes = elem.encode('UTF-8')
-        elem_name = elem_name_bytes
-        elements[i] = &elem_name[0]
+        py_str = elem.encode('UTF-8')
+
+        s = py_str
+        slen = len(py_str)
+        c_str = <char *>malloc((slen+1)*sizeof(char))
+        if not c_str:
+            raise MemoryError("failed to allocate memory for element names")
+        memcpy(c_str, s, slen)
+        c_str[slen] = 0
+        
+        elements[i] = c_str
+    
     unified_int.descriptor.set_elements(elements, nelements)
     unified_int.model.nelements = nelements
 
diff --git a/src/ML-PACE/compute_pace.cpp b/src/ML-PACE/compute_pace.cpp
index b96432cfe3..fee9a0fa0e 100644
--- a/src/ML-PACE/compute_pace.cpp
+++ b/src/ML-PACE/compute_pace.cpp
@@ -11,24 +11,21 @@
 ------------------------------------------------------------------------- */
 
 #include "compute_pace.h"
-#include "ace-evaluator/ace_evaluator.h"
+
 #include "ace-evaluator/ace_c_basis.h"
-#include "ace-evaluator/ace_abstract_basis.h"
+#include "ace-evaluator/ace_evaluator.h"
 #include "ace-evaluator/ace_types.h"
-#include <cstring>
-#include <map>
 
 #include "atom.h"
-#include "update.h"
-#include "modify.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "neigh_request.h"
-#include "force.h"
-#include "pair.h"
 #include "comm.h"
-#include "memory.h"
 #include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "update.h"
 
 namespace LAMMPS_NS {
 struct ACECimpl {
@@ -41,14 +38,14 @@ struct ACECimpl {
   ACECTildeBasisSet *basis_set;
   ACECTildeEvaluator *ace;
 };
-}
+}    // namespace LAMMPS_NS
 
 using namespace LAMMPS_NS;
 
 enum { SCALAR, VECTOR, ARRAY };
 ComputePACE::ComputePACE(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg), cutsq(nullptr), list(nullptr), pace(nullptr), paceall(nullptr),
-  pace_peratom(nullptr), map(nullptr), cg(nullptr), c_pe(nullptr), c_virial(nullptr)
+    Compute(lmp, narg, arg), cutsq(nullptr), list(nullptr), pace(nullptr), paceall(nullptr),
+    pace_peratom(nullptr), map(nullptr), c_pe(nullptr), c_virial(nullptr), acecimpl(nullptr)
 {
   array_flag = 1;
   extarray = 0;
@@ -111,6 +108,8 @@ ComputePACE::ComputePACE(LAMMPS *lmp, int narg, char **arg) :
 
 ComputePACE::~ComputePACE()
 {
+  modify->delete_compute(id_virial);
+
   delete acecimpl;
   memory->destroy(pace);
   memory->destroy(paceall);
@@ -132,10 +131,7 @@ void ComputePACE::init()
   // need an occasional full neighbor list
   neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
 
-  int count = 0;
-  for (int i = 0; i < modify->ncompute; i++)
-    if (strcmp(modify->compute[i]->style,"pace") == 0) count++;
-  if (count > 1 && comm->me == 0)
+  if (modify->get_compute_by_style("pace").size() > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute pace");
 
   // allocate memory for global array
@@ -145,22 +141,13 @@ void ComputePACE::init()
 
   // find compute for reference energy
 
-  std::string id_pe = std::string("thermo_pe");
-  int ipe = modify->find_compute(id_pe);
-  if (ipe == -1)
-    error->all(FLERR,"compute thermo_pe does not exist.");
-  c_pe = modify->compute[ipe];
+  c_pe = modify->get_compute_by_id("thermo_pe");
+  if (!c_pe) error->all(FLERR,"Compute thermo_pe does not exist.");
 
   // add compute for reference virial tensor
 
-  std::string id_virial = std::string("pace_press");
-  std::string pcmd = id_virial + " all pressure NULL virial";
-  modify->add_compute(pcmd);
-
-  int ivirial = modify->find_compute(id_virial);
-  if (ivirial == -1)
-    error->all(FLERR,"compute pace_press does not exist.");
-  c_virial = modify->compute[ivirial];
+  id_virial = id + std::string("_press");
+  c_virial = modify->add_compute(id_virial + " all pressure NULL virial");
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/ML-PACE/compute_pace.h b/src/ML-PACE/compute_pace.h
index 496c8a16d3..23243b0066 100644
--- a/src/ML-PACE/compute_pace.h
+++ b/src/ML-PACE/compute_pace.h
@@ -43,10 +43,11 @@ class ComputePACE : public Compute {
   double **pace_peratom;
   int *map;    // map types to [0,nelements)
   int bikflag, bik_rows, dgradflag, dgrad_rows;
-  double *cg;
   double cutmax;
+
   Compute *c_pe;
   Compute *c_virial;
+  std::string id_virial;
 
   void dbdotr_compute();
   struct ACECimpl *acecimpl;
diff --git a/src/ML-PACE/pair_pace_extrapolation.cpp b/src/ML-PACE/pair_pace_extrapolation.cpp
index d9b8d3588a..ec42d232af 100644
--- a/src/ML-PACE/pair_pace_extrapolation.cpp
+++ b/src/ML-PACE/pair_pace_extrapolation.cpp
@@ -29,15 +29,12 @@ Copyright 2022 Yury Lysogorskiy^1, Anton Bochkarev^1, Matous Mrovec^1, Ralf Drau
 #include "force.h"
 #include "math_const.h"
 #include "memory.h"
-#include "modify.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "update.h"
 
-#include <cmath>
-#include <cstdlib>
 #include <cstring>
+#include <exception>
 
 #include "ace/ace_b_basis.h"
 #include "ace/ace_b_evaluator.h"
diff --git a/src/ML-PACE/pair_pace_extrapolation.h b/src/ML-PACE/pair_pace_extrapolation.h
index 2dcec04d4b..440d999029 100644
--- a/src/ML-PACE/pair_pace_extrapolation.h
+++ b/src/ML-PACE/pair_pace_extrapolation.h
@@ -28,7 +28,6 @@ PairStyle(pace/extrapolation,PairPACEExtrapolation)
 #define LMP_PAIR_PACE_AL_H
 
 #include "pair.h"
-#include <vector>
 
 namespace LAMMPS_NS {
 
diff --git a/src/ML-SNAP/compute_sna_atom.cpp b/src/ML-SNAP/compute_sna_atom.cpp
index b1b4a46482..c3582f200c 100644
--- a/src/ML-SNAP/compute_sna_atom.cpp
+++ b/src/ML-SNAP/compute_sna_atom.cpp
@@ -26,6 +26,7 @@
 #include "memory.h"
 #include "error.h"
 
+#include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
diff --git a/src/ML-SNAP/compute_snap.cpp b/src/ML-SNAP/compute_snap.cpp
index 3141791f6c..99a9a96361 100644
--- a/src/ML-SNAP/compute_snap.cpp
+++ b/src/ML-SNAP/compute_snap.cpp
@@ -30,14 +30,13 @@
 
 using namespace LAMMPS_NS;
 
-enum{SCALAR,VECTOR,ARRAY};
+enum { SCALAR, VECTOR, ARRAY };
 
 ComputeSnap::ComputeSnap(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg), cutsq(nullptr), list(nullptr), snap(nullptr),
-  snapall(nullptr), snap_peratom(nullptr), radelem(nullptr), wjelem(nullptr),
-  sinnerelem(nullptr), dinnerelem(nullptr), snaptr(nullptr)
+    Compute(lmp, narg, arg), cutsq(nullptr), list(nullptr), snap(nullptr), snapall(nullptr),
+    snap_peratom(nullptr), radelem(nullptr), wjelem(nullptr), map(nullptr), sinnerelem(nullptr),
+    dinnerelem(nullptr), snaptr(nullptr), c_pe(nullptr), c_virial(nullptr)
 {
-
   array_flag = 1;
   extarray = 0;
 
@@ -172,22 +171,18 @@ ComputeSnap::ComputeSnap(LAMMPS *lmp, int narg, char **arg) :
   }
 
   if (switchinnerflag && !(sinnerflag && dinnerflag))
-    error->all(
-        FLERR,
-        "Illegal compute {} command: switchinnerflag = 1, missing sinner/dinner keyword",
-        style);
+    error->all(FLERR, "Illegal compute {} command: switchinnerflag = 1, "
+               "missing sinner/dinner keyword", style);
 
   if (!switchinnerflag && (sinnerflag || dinnerflag))
-    error->all(
-        FLERR,
-        "Illegal compute {} command: switchinnerflag = 0, unexpected sinner/dinner keyword",
-        style);
+    error->all(FLERR, "Illegal compute {} command: switchinnerflag = 0, "
+               "unexpected sinner/dinner keyword", style);
 
   if (dgradflag && !bikflag)
-    error->all(FLERR,"Illegal compute snap command: dgradflag=1 requires bikflag=1");
+    error->all(FLERR, "Illegal compute snap command: dgradflag=1 requires bikflag=1");
 
   if (dgradflag && quadraticflag)
-    error->all(FLERR,"Illegal compute snap command: dgradflag=1 not implemented for quadratic SNAP");
+    error->all(FLERR, "Illegal compute snap command: dgradflag=1 not implemented for quadratic SNAP");
 
   snaptr = new SNA(lmp, rfac0, twojmax, rmin0, switchflag, bzeroflag, chemflag, bnormflag,
                    wselfallflag, nelements, switchinnerflag);
@@ -210,7 +205,8 @@ ComputeSnap::ComputeSnap(LAMMPS *lmp, int narg, char **arg) :
   if (dgradflag) {
     size_array_rows = bik_rows + 3*natoms*natoms + 1;
     size_array_cols = nvalues + 3;
-    error->warning(FLERR,"dgradflag=1 creates a N^2 array, beware of large systems.");
+    if (comm->me == 0)
+      error->warning(FLERR, "dgradflag=1 creates a N^2 array, beware of large systems.");
   }
   else size_array_cols = nvalues*atom->ntypes + 1;
   lastcol = size_array_cols-1;
@@ -249,7 +245,8 @@ void ComputeSnap::init()
     error->all(FLERR,"Compute snap requires a pair style be defined");
 
   if (cutmax > force->pair->cutforce)
-    error->all(FLERR,"Compute snap cutoff is longer than pairwise cutoff");
+    error->all(FLERR,"Compute snap cutoff {} is longer than pairwise cutoff {}",
+               cutmax, force->pair->cutforce);
 
   // need an occasional full neighbor list
 
@@ -261,31 +258,19 @@ void ComputeSnap::init()
 
   // allocate memory for global array
 
-  memory->create(snap,size_array_rows,size_array_cols,
-                 "snap:snap");
-  memory->create(snapall,size_array_rows,size_array_cols,
-                 "snap:snapall");
+  memory->create(snap,size_array_rows,size_array_cols, "snap:snap");
+  memory->create(snapall,size_array_rows,size_array_cols, "snap:snapall");
   array = snapall;
 
-  // find compute for reference energy
+  // find compute for global reference potential energy
 
-  std::string id_pe = std::string("thermo_pe");
-  int ipe = modify->find_compute(id_pe);
-  if (ipe == -1)
-    error->all(FLERR,"compute thermo_pe does not exist.");
-  c_pe = modify->compute[ipe];
+  c_pe = modify->get_compute_by_id("thermo_pe");
+  if (!c_pe) error->all(FLERR,"compute thermo_pe does not exist.");
 
-  // add compute for reference virial tensor
-
-  std::string id_virial = std::string("snap_press");
-  std::string pcmd = id_virial + " all pressure NULL virial";
-  modify->add_compute(pcmd);
-
-  int ivirial = modify->find_compute(id_virial);
-  if (ivirial == -1)
-    error->all(FLERR,"compute snap_press does not exist.");
-  c_virial = modify->compute[ivirial];
+  // add compute for global reference virial tensor
 
+  id_virial = id + std::string("_press");
+  c_virial = modify->add_compute(id_virial + " all pressure NULL virial");
 }
 
 
@@ -309,8 +294,7 @@ void ComputeSnap::compute_array()
   if (atom->nmax > nmax) {
     memory->destroy(snap_peratom);
     nmax = atom->nmax;
-    memory->create(snap_peratom,nmax,size_peratom,
-                   "snap:snap_peratom");
+    memory->create(snap_peratom,nmax,size_peratom, "snap:snap_peratom");
   }
 
   // clear global array
diff --git a/src/ML-SNAP/compute_snap.h b/src/ML-SNAP/compute_snap.h
index 2b8b972bbc..fe0b35d9e3 100644
--- a/src/ML-SNAP/compute_snap.h
+++ b/src/ML-SNAP/compute_snap.h
@@ -28,6 +28,7 @@ class ComputeSnap : public Compute {
  public:
   ComputeSnap(class LAMMPS *, int, char **);
   ~ComputeSnap() override;
+
   void init() override;
   void init_list(int, class NeighList *) override;
   void compute_array() override;
@@ -56,10 +57,10 @@ class ComputeSnap : public Compute {
 
   Compute *c_pe;
   Compute *c_virial;
+  std::string id_virial;
 
   void dbdotr_compute();
 };
-
 }    // namespace LAMMPS_NS
 
 #endif
diff --git a/src/MOLECULE/bond_gromos.cpp b/src/MOLECULE/bond_gromos.cpp
index badb808007..1917f18686 100644
--- a/src/MOLECULE/bond_gromos.cpp
+++ b/src/MOLECULE/bond_gromos.cpp
@@ -24,6 +24,7 @@
 #include "memory.h"
 #include "neighbor.h"
 
+#include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
diff --git a/src/MOLECULE/pair_tip4p_cut.cpp b/src/MOLECULE/pair_tip4p_cut.cpp
index 6d27c1a164..73a5651e6b 100644
--- a/src/MOLECULE/pair_tip4p_cut.cpp
+++ b/src/MOLECULE/pair_tip4p_cut.cpp
@@ -30,6 +30,7 @@
 #include "neighbor.h"
 
 #include <cmath>
+#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/MOLFILE/molfile_interface.cpp b/src/MOLFILE/molfile_interface.cpp
index 8f5ac8545e..84aa63cefc 100644
--- a/src/MOLFILE/molfile_interface.cpp
+++ b/src/MOLFILE/molfile_interface.cpp
@@ -26,7 +26,6 @@
 
 #include <cstdio>
 #include <cstring>
-#include <cstdlib>
 
 #if vmdplugin_ABIVERSION < 16
 #error "unsupported VMD molfile plugin ABI version"
diff --git a/src/OPENMP/angle_lepton_omp.cpp b/src/OPENMP/angle_lepton_omp.cpp
index f57cf916a2..918fb57871 100644
--- a/src/OPENMP/angle_lepton_omp.cpp
+++ b/src/OPENMP/angle_lepton_omp.cpp
@@ -16,13 +16,16 @@
 ------------------------------------------------------------------------- */
 
 #include "angle_lepton_omp.h"
+
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "neighbor.h"
 #include "suffix.h"
 
 #include <cmath>
+#include <exception>
 
 #include "Lepton.h"
 #include "lepton_utils.h"
diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp
index d9982b08f8..995e2fac09 100644
--- a/src/OPENMP/bond_lepton_omp.cpp
+++ b/src/OPENMP/bond_lepton_omp.cpp
@@ -18,11 +18,13 @@
 #include "bond_lepton_omp.h"
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "neighbor.h"
 #include "suffix.h"
 
 #include <cmath>
+#include <exception>
 
 #include "Lepton.h"
 #include "lepton_utils.h"
diff --git a/src/OPENMP/dihedral_lepton_omp.cpp b/src/OPENMP/dihedral_lepton_omp.cpp
index 37748ce9d5..206749fcfa 100644
--- a/src/OPENMP/dihedral_lepton_omp.cpp
+++ b/src/OPENMP/dihedral_lepton_omp.cpp
@@ -18,12 +18,14 @@
 #include "dihedral_lepton_omp.h"
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "math_extra.h"
 #include "neighbor.h"
 #include "suffix.h"
 
 #include <cmath>
+#include <exception>
 
 #include "Lepton.h"
 #include "lepton_utils.h"
diff --git a/src/OPENMP/fix_nh_omp.cpp b/src/OPENMP/fix_nh_omp.cpp
index d3565c4994..7ef69af0fe 100644
--- a/src/OPENMP/fix_nh_omp.cpp
+++ b/src/OPENMP/fix_nh_omp.cpp
@@ -22,7 +22,6 @@
 #include "compute.h"
 #include "domain.h"
 #include "error.h"
-#include "modify.h"
 
 #include <cmath>
 
diff --git a/src/OPENMP/fix_omp.cpp b/src/OPENMP/fix_omp.cpp
index 3a249bad82..f7828f43ee 100644
--- a/src/OPENMP/fix_omp.cpp
+++ b/src/OPENMP/fix_omp.cpp
@@ -161,12 +161,15 @@ void FixOMP::init()
 {
   // OPENMP package cannot be used with atom_style template
   if (atom->molecular == Atom::TEMPLATE)
-    error->all(FLERR,"OPENMP package does not (yet) work with "
-               "atom_style template");
+    error->all(FLERR,"OPENMP package does not (yet) work with atom_style template");
 
   // adjust number of data objects when the number of OpenMP
   // threads has been changed somehow
   const int nthreads = comm->nthreads;
+#if defined(_OPENMP)
+  // make certain threads are initialized correctly. avoids segfaults with LAMMPS-GUI
+  if (nthreads != omp_get_max_threads()) omp_set_num_threads(nthreads);
+#endif
   if (_nthr != nthreads) {
     if (comm->me == 0)
       utils::logmesg(lmp,"Re-init OPENMP for {} OpenMP thread(s)\n", nthreads);
@@ -212,7 +215,7 @@ void FixOMP::init()
   // kspace_split < 0  : master partition, does not do kspace
   // kspace_split > 0  : slave partition, only does kspace
 
-  if (strstr(update->integrate_style,"verlet/split") != nullptr) {
+  if (utils::strmatch(update->integrate_style, "^verlet/split")) {
     if (universe->iworld == 0) kspace_split = -1;
     else kspace_split = 1;
   } else {
diff --git a/src/OPENMP/fix_rigid_nh_omp.cpp b/src/OPENMP/fix_rigid_nh_omp.cpp
index 19e5d4f240..f5e4a1f49d 100644
--- a/src/OPENMP/fix_rigid_nh_omp.cpp
+++ b/src/OPENMP/fix_rigid_nh_omp.cpp
@@ -30,7 +30,6 @@
 #include "kspace.h"
 #include "math_const.h"
 #include "math_extra.h"
-#include "modify.h"
 #include "rigid_const.h"
 #include "update.h"
 
diff --git a/src/OPENMP/npair_bin_omp.cpp b/src/OPENMP/npair_bin_omp.cpp
index 5b2189dec2..7922d76612 100644
--- a/src/OPENMP/npair_bin_omp.cpp
+++ b/src/OPENMP/npair_bin_omp.cpp
@@ -25,6 +25,8 @@
 #include "my_page.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/OPENMP/npair_multi_old_omp.cpp b/src/OPENMP/npair_multi_old_omp.cpp
index d45f2d1f5f..fa790e177a 100644
--- a/src/OPENMP/npair_multi_old_omp.cpp
+++ b/src/OPENMP/npair_multi_old_omp.cpp
@@ -24,6 +24,8 @@
 #include "my_page.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/OPENMP/npair_multi_omp.cpp b/src/OPENMP/npair_multi_omp.cpp
index 3f8604572c..cbc21ebc29 100644
--- a/src/OPENMP/npair_multi_omp.cpp
+++ b/src/OPENMP/npair_multi_omp.cpp
@@ -26,6 +26,8 @@
 #include "neigh_list.h"
 #include "neighbor.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/OPENMP/npair_nsq_omp.cpp b/src/OPENMP/npair_nsq_omp.cpp
index c482fc8f2d..5d6aa518b0 100644
--- a/src/OPENMP/npair_nsq_omp.cpp
+++ b/src/OPENMP/npair_nsq_omp.cpp
@@ -27,6 +27,8 @@
 #include "neigh_list.h"
 #include "neighbor.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace NeighConst;
 
diff --git a/src/OPENMP/npair_respa_bin_omp.cpp b/src/OPENMP/npair_respa_bin_omp.cpp
index c958167ba0..a069affb06 100644
--- a/src/OPENMP/npair_respa_bin_omp.cpp
+++ b/src/OPENMP/npair_respa_bin_omp.cpp
@@ -25,6 +25,8 @@
 #include "my_page.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/OPENMP/npair_respa_nsq_omp.cpp b/src/OPENMP/npair_respa_nsq_omp.cpp
index 6815b21544..deba473678 100644
--- a/src/OPENMP/npair_respa_nsq_omp.cpp
+++ b/src/OPENMP/npair_respa_nsq_omp.cpp
@@ -26,6 +26,8 @@
 #include "my_page.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/OPENMP/pair_airebo_omp.cpp b/src/OPENMP/pair_airebo_omp.cpp
index 9f992aefda..6736b10f8f 100644
--- a/src/OPENMP/pair_airebo_omp.cpp
+++ b/src/OPENMP/pair_airebo_omp.cpp
@@ -38,8 +38,7 @@ static constexpr double TOL = 1.0e-9;
 
 /* ---------------------------------------------------------------------- */
 
-PairAIREBOOMP::PairAIREBOOMP(LAMMPS *lmp) :
-  PairAIREBO(lmp), ThrOMP(lmp, THR_PAIR)
+PairAIREBOOMP::PairAIREBOOMP(LAMMPS *lmp) : PairAIREBO(lmp), ThrOMP(lmp, THR_PAIR)
 {
   suffix_flag |= Suffix::OMP;
   respa_enable = 0;
@@ -1121,12 +1120,9 @@ double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag,
       cosjik = MIN(cosjik,1.0);
       cosjik = MAX(cosjik,-1.0);
 
-      dcosjikdri[0] = ((rij[0]+rik[0])*invrijkm) -
-        (cosjik*((rij[0]*invrijm2)+(rik[0]*invrikm2)));
-      dcosjikdri[1] = ((rij[1]+rik[1])*invrijkm) -
-        (cosjik*((rij[1]*invrijm2)+(rik[1]*invrikm2)));
-      dcosjikdri[2] = ((rij[2]+rik[2])*invrijkm) -
-        (cosjik*((rij[2]*invrijm2)+(rik[2]*invrikm2)));
+      dcosjikdri[0] = ((rij[0]+rik[0])*invrijkm) - (cosjik*((rij[0]*invrijm2)+(rik[0]*invrikm2)));
+      dcosjikdri[1] = ((rij[1]+rik[1])*invrijkm) - (cosjik*((rij[1]*invrijm2)+(rik[1]*invrikm2)));
+      dcosjikdri[2] = ((rij[2]+rik[2])*invrijkm) - (cosjik*((rij[2]*invrijm2)+(rik[2]*invrikm2)));
       dcosjikdrk[0] = (-rij[0]*invrijkm) + (cosjik*(rik[0]*invrikm2));
       dcosjikdrk[1] = (-rij[1]*invrijkm) + (cosjik*(rik[1]*invrikm2));
       dcosjikdrk[2] = (-rij[2]*invrijkm) + (cosjik*(rik[2]*invrikm2));
diff --git a/src/OPENMP/pair_lepton_coul_omp.cpp b/src/OPENMP/pair_lepton_coul_omp.cpp
index 532c16d797..ae737ef1cb 100644
--- a/src/OPENMP/pair_lepton_coul_omp.cpp
+++ b/src/OPENMP/pair_lepton_coul_omp.cpp
@@ -16,6 +16,7 @@
 
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "neigh_list.h"
 #include "suffix.h"
@@ -26,6 +27,7 @@
 
 #include <array>
 #include <cmath>
+#include <exception>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp
index 58692e52d6..3b07a7b757 100644
--- a/src/OPENMP/pair_lepton_omp.cpp
+++ b/src/OPENMP/pair_lepton_omp.cpp
@@ -16,6 +16,7 @@
 
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "neigh_list.h"
 #include "suffix.h"
@@ -23,8 +24,9 @@
 #include "Lepton.h"
 #include "lepton_utils.h"
 #include "omp_compat.h"
-#include <array>
+
 #include <cmath>
+#include <exception>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/OPENMP/pair_lepton_sphere_omp.cpp b/src/OPENMP/pair_lepton_sphere_omp.cpp
index 79afe27717..6de9714f3e 100644
--- a/src/OPENMP/pair_lepton_sphere_omp.cpp
+++ b/src/OPENMP/pair_lepton_sphere_omp.cpp
@@ -16,6 +16,7 @@
 
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "neigh_list.h"
 #include "suffix.h"
@@ -26,6 +27,7 @@
 
 #include <array>
 #include <cmath>
+#include <exception>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/OPENMP/pair_lj_expand_sphere_omp.cpp b/src/OPENMP/pair_lj_expand_sphere_omp.cpp
index c19d3e7a7f..40f878cdc2 100644
--- a/src/OPENMP/pair_lj_expand_sphere_omp.cpp
+++ b/src/OPENMP/pair_lj_expand_sphere_omp.cpp
@@ -21,6 +21,8 @@
 #include "neigh_list.h"
 #include "suffix.h"
 
+#include <cmath>
+
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
 using MathSpecial::powint;
diff --git a/src/OPENMP/pair_rebomos_omp.cpp b/src/OPENMP/pair_rebomos_omp.cpp
new file mode 100644
index 0000000000..06b979d41a
--- /dev/null
+++ b/src/OPENMP/pair_rebomos_omp.cpp
@@ -0,0 +1,701 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   References:
+
+   This code:
+   Stewart J A and Spearot D E (2013) Atomistic simulations of nanoindentation on the basal plane of crystalline molybdenum disulfide. Modelling Simul. Mater. Sci. Eng. 21.
+
+   Based on:
+   Liang T, Phillpot S R and Sinnott S B (2009) Parameterization of a reactive many-body potential for Mo2S systems. Phys. Rev. B79 245110.
+   Liang T, Phillpot S R and Sinnott S B (2012) Erratum: Parameterization of a reactive many-body potential for Mo-S systems. (Phys. Rev. B79 245110 (2009)) Phys. Rev. B85 199903(E).
+
+   LAMMPS file contributing authors: James Stewart, Khanh Dang and Douglas Spearot (University of Arkansas)
+------------------------------------------------------------------------- */
+
+// clang-format on
+
+#include "pair_rebomos_omp.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "math_special.h"
+#include "memory.h"
+#include "my_page.h"
+#include "neigh_list.h"
+
+#include "suffix.h"
+
+#include <cmath>
+
+#include "omp_compat.h"
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+using namespace LAMMPS_NS;
+using MathSpecial::cube;
+using MathSpecial::powint;
+using MathSpecial::square;
+
+static constexpr double TOL = 1.0e-9;
+
+/* ---------------------------------------------------------------------- */
+
+PairREBOMoSOMP::PairREBOMoSOMP(LAMMPS *lmp) : PairREBOMoS(lmp), ThrOMP(lmp, THR_PAIR)
+{
+  suffix_flag |= Suffix::OMP;
+  respa_enable = 0;
+}
+
+// clang-format off
+
+/* ---------------------------------------------------------------------- */
+
+void PairREBOMoSOMP::compute(int eflag, int vflag)
+{
+  ev_init(eflag,vflag);
+
+  REBO_neigh_thr();
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(eflag,vflag)
+#endif
+  {
+    int ifrom, ito, tid;
+
+    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
+    ThrData *thr = fix->get_thr(tid);
+    thr->timer(Timer::START);
+    ev_setup_thr(eflag, vflag, nall, eatom, vatom, nullptr, thr);
+
+    FREBO_thr(ifrom,ito,eflag,thr);
+    FLJ_thr(ifrom,ito,eflag,thr);
+
+    thr->timer(Timer::PAIR);
+    reduce_thr(this, eflag, vflag, thr);
+  } // end of omp parallel region
+}
+
+/* ----------------------------------------------------------------------
+   create REBO neighbor list from main neighbor list
+   REBO neighbor list stores neighbors of ghost atoms
+------------------------------------------------------------------------- */
+
+void PairREBOMoSOMP::REBO_neigh_thr()
+{
+  const int nthreads = comm->nthreads;
+
+  if (atom->nmax > maxlocal) {
+    maxlocal = atom->nmax;
+    memory->destroy(REBO_numneigh);
+    memory->sfree(REBO_firstneigh);
+    memory->destroy(nM);
+    memory->destroy(nS);
+    memory->create(REBO_numneigh,maxlocal,"REBOMoS:numneigh");
+    REBO_firstneigh = (int **) memory->smalloc(maxlocal*sizeof(int *),
+                                               "REBOMoS:firstneigh");
+    memory->create(nM,maxlocal,"REBOMoS:nM");
+    memory->create(nS,maxlocal,"REBOMoS:nS");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE
+#endif
+  {
+    int i,j,ii,jj,n,jnum,itype,jtype;
+    double xtmp,ytmp,ztmp,delx,dely,delz,rsq,dS;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *neighptr;
+
+    double **x = atom->x;
+    int *type = atom->type;
+
+    const int allnum = list->inum + list->gnum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    const int iidelta = 1 + allnum/nthreads;
+    const int iifrom = tid*iidelta;
+    const int iito = ((iifrom+iidelta)>allnum) ? allnum : (iifrom+iidelta);
+
+    // store all REBO neighs of owned and ghost atoms
+    // scan full neighbor list of I
+
+    // each thread has its own page allocator
+    MyPage<int> &ipg = ipage[tid];
+    ipg.reset();
+
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+
+      n = 0;
+      neighptr = ipg.vget();
+
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+      itype = map[type[i]];
+      nM[i] = nS[i] = 0.0;
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        j = jlist[jj];
+        j &= NEIGHMASK;
+        jtype = map[type[j]];
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx*delx + dely*dely + delz*delz;
+
+        if (rsq < rcmaxsq[itype][jtype]) {
+          neighptr[n++] = j;
+          if (jtype == 0)
+            nM[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS);
+          else
+            nS[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS);
+        }
+      }
+
+      REBO_firstneigh[i] = neighptr;
+      REBO_numneigh[i] = n;
+      ipg.vgot(n);
+      if (ipg.status())
+        error->one(FLERR,"REBO list overflow, boost neigh_modify one");
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   REBO forces and energy
+------------------------------------------------------------------------- */
+
+void PairREBOMoSOMP::FREBO_thr(int ifrom, int ito, int eflag, ThrData * const thr)
+{
+  int i,j,k,ii,itype,jtype;
+  tagint itag, jtag;
+  double delx,dely,delz,evdwl,fpair,xtmp,ytmp,ztmp;
+  double rsq,rij,wij;
+  double Qij,Aij,alphaij,VR,pre,dVRdi,VA,bij,dVAdi,dVA;
+  double dwij,del[3];
+  int *ilist,*REBO_neighs;
+
+  evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+  const tagint * const tag = atom->tag;
+  const int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+
+  // two-body interactions from REBO neighbor list, skip half of them
+
+  for (ii = ifrom; ii < ito; ii++) {
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    REBO_neighs = REBO_firstneigh[i];
+
+    for (k = 0; k < REBO_numneigh[i]; k++) {
+      j = REBO_neighs[k];
+      jtag = tag[j];
+
+      if (itag > jtag) {
+        if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+        if ((itag+jtag) % 2 == 1) continue;
+      } else {
+        if (x[j][2] < ztmp) continue;
+        if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+        if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      jtype = map[type[j]];
+
+      delx = x[i][0] - x[j][0];
+      dely = x[i][1] - x[j][1];
+      delz = x[i][2] - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      rij = sqrt(rsq);
+      wij = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
+      if (wij <= TOL) continue;
+
+      Qij = Q[itype][jtype];
+      Aij = A[itype][jtype];
+      alphaij = alpha[itype][jtype];
+
+      VR = wij*(1.0+(Qij/rij)) * Aij*exp(-alphaij*rij);
+      pre = wij*Aij * exp(-alphaij*rij);
+      dVRdi = pre * ((-alphaij)-(Qij/rsq)-(Qij*alphaij/rij));
+      dVRdi += VR/wij * dwij;
+
+      VA = dVA = 0.0;
+      VA = -wij * BIJc[itype][jtype] * exp(-Beta[itype][jtype]*rij);
+
+      dVA = -Beta[itype][jtype] * VA;
+      dVA += VA/wij * dwij;
+
+      del[0] = delx;
+      del[1] = dely;
+      del[2] = delz;
+      bij = bondorder_thr(i,j,del,rij,VA,thr);
+      dVAdi = bij*dVA;
+
+      fpair = -(dVRdi+dVAdi) / rij;
+      f[i][0] += delx*fpair;
+      f[i][1] += dely*fpair;
+      f[i][2] += delz*fpair;
+      f[j][0] -= delx*fpair;
+      f[j][1] -= dely*fpair;
+      f[j][2] -= delz*fpair;
+
+      if (eflag) evdwl = VR + bij*VA;
+      if (evflag) ev_tally_thr(this,i,j,nlocal,/* newton_pair */1,evdwl,0.0,fpair,delx,dely,delz,thr);
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ forces and energy
+------------------------------------------------------------------------- */
+
+void PairREBOMoSOMP::FLJ_thr(int ifrom, int ito, int eflag, ThrData * const thr)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  tagint itag,jtag;
+  double evdwl,fpair,xtmp,ytmp,ztmp;
+  double rij,delij[3],rijsq;
+  double VLJ,dVLJ;
+  double vdw,dvdw;
+  double r2inv,r6inv;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double c2,c3,dr,drp,r6;
+
+  // I-J interaction from full neighbor list
+  // skip 1/2 of interactions since only consider each pair once
+
+  evdwl = 0.0;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const tagint * const tag = atom->tag;
+  const int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = ifrom; ii < ito; ii++) {
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtag = tag[j];
+
+      if (itag > jtag) {
+        if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+        if ((itag+jtag) % 2 == 1) continue;
+      } else {
+        if (x[j][2] < ztmp) continue;
+        if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+        if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+      jtype = map[type[j]];
+
+      delij[0] = xtmp - x[j][0];
+      delij[1] = ytmp - x[j][1];
+      delij[2] = ztmp - x[j][2];
+      rijsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
+      rij = sqrt(rijsq);
+
+      // compute LJ forces and energy
+
+      // Outside Rmax
+      if (rij > rcLJmax[itype][jtype] || rij < rcLJmin[itype][jtype]){
+          VLJ = 0;
+          dVLJ = 0;
+      }
+
+      // Inside Rmax and above 0.95*sigma
+      else if (rij <= rcLJmax[itype][jtype] && rij >= 0.95*sigma[itype][jtype]){
+              r2inv = 1.0/rijsq;
+              r6inv = r2inv*r2inv*r2inv;
+              VLJ = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+              dVLJ = -r6inv*(lj1[itype][jtype]*r6inv - lj2[itype][jtype])/rij;
+      }
+
+      // Below 0.95*sigma
+      else if (rij < 0.95*sigma[itype][jtype] && rij >= rcLJmin[itype][jtype]){
+              dr = 0.95*sigma[itype][jtype] - rcLJmin[itype][jtype];
+              r6 = powint((sigma[itype][jtype]/(0.95*sigma[itype][jtype])),6);
+              vdw = 4*epsilon[itype][jtype]*r6*(r6 - 1.0);
+              dvdw = (-4*epsilon[itype][jtype]/(0.95*sigma[itype][jtype]))*r6*(12.0*r6 - 6.0);
+              c2 = ((3.0/dr)*vdw - dvdw)/dr;
+              c3 = (vdw/(dr*dr) - c2)/dr;
+
+              drp = rij - rcLJmin[itype][jtype];
+              VLJ = drp*drp*(drp*c3 + c2);
+              dVLJ = drp*(3.0*drp*c3 + 2.0*c2);
+      }
+
+      fpair = -dVLJ/rij;
+      f[i][0] += delij[0]*fpair;
+      f[i][1] += delij[1]*fpair;
+      f[i][2] += delij[2]*fpair;
+      f[j][0] -= delij[0]*fpair;
+      f[j][1] -= delij[1]*fpair;
+      f[j][2] -= delij[2]*fpair;
+
+      if (eflag) evdwl = VLJ;
+      if (evflag) ev_tally_thr(this,i,j,nlocal,/*newton_pair*/1,evdwl,0.0,fpair,delij[0],delij[1],delij[2],thr);
+
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Bij function
+
+   The bond order term modified the attractive portion of the REBO
+   potential based on the number of atoms around a specific pair
+   and the bond angle between sets of three atoms.
+
+   The functions G(cos(theta)) and P(N) are evaluated and their
+   derivatives are also computed for use in the force calculation.
+------------------------------------------------------------------------- */
+
+double PairREBOMoSOMP::bondorder_thr(int i, int j, double rij[3], double rijmag, double VA, ThrData *thr)
+{
+  int atomi,atomj,atomk,atoml;
+  int k,l;
+  int itype, jtype, ktype, ltype;
+  double rik[3], rjl[3], rji[3], rki[3],rlj[3], dwjl, bij;
+  double NijM,NijS,NjiM,NjiS,wik,dwik,wjl;
+  double rikmag,rjlmag,cosjik,cosijl,g,tmp2;
+  double Etmp,pij,tmp,dwij,dS;
+  double dgdc,pji;
+  double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3];
+  double dp;
+  double dcosjikdrj[3],dcosijldrj[3],dcosijldrl[3];
+  double fi[3],fj[3],fk[3],fl[3];
+  double PijS, PjiS;
+  int *REBO_neighs;
+
+  const double * const * const x = atom->x;
+  double * const * const f = thr->get_f();
+  const int * const type = atom->type;
+
+  atomi = i;
+  atomj = j;
+  itype = map[type[i]];
+  jtype = map[type[j]];
+  Sp(rijmag,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
+  NijM = nM[i];
+  NijS = nS[i];
+  NjiM = nM[j];
+  NjiS = nS[j];
+  bij = 0.0;
+  tmp = 0.0;
+  tmp2 = 0.0;
+  dgdc = 0.0;
+  Etmp = 0.0;
+
+  REBO_neighs = REBO_firstneigh[i];
+  for (k = 0; k < REBO_numneigh[i]; k++) {
+    atomk = REBO_neighs[k];
+    if (atomk != atomj) {
+      ktype = map[type[atomk]];
+      rik[0] = x[atomi][0]-x[atomk][0];
+      rik[1] = x[atomi][1]-x[atomk][1];
+      rik[2] = x[atomi][2]-x[atomk][2];
+      rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+      wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS);
+      cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) / (rijmag*rikmag);
+      cosjik = MIN(cosjik,1.0);
+      cosjik = MAX(cosjik,-1.0);
+
+      // evaluate g and derivative dg
+
+      g = gSpline(cosjik,itype,dgdc);
+      Etmp = Etmp+(wik*g);
+    }
+  }
+
+  dp = 0.0;
+  PijS = PijSpline(NijM,NijS,itype,dp);
+  pij = 1.0/sqrt(1.0+Etmp+PijS);
+  tmp = -0.5*cube(pij);
+
+  // derivative calculations
+
+  REBO_neighs = REBO_firstneigh[i];
+  for (k = 0; k < REBO_numneigh[i]; k++) {
+    atomk = REBO_neighs[k];
+    if (atomk != atomj) {
+      ktype = map[type[atomk]];
+      rik[0] = x[atomi][0]-x[atomk][0];
+      rik[1] = x[atomi][1]-x[atomk][1];
+      rik[2] = x[atomi][2]-x[atomk][2];
+      rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
+      wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
+      cosjik = (rij[0]*rik[0] + rij[1]*rik[1] + rij[2]*rik[2]) / (rijmag*rikmag);
+      cosjik = MIN(cosjik,1.0);
+      cosjik = MAX(cosjik,-1.0);
+
+      dcosjikdri[0] = ((rij[0]+rik[0])/(rijmag*rikmag)) -
+        (cosjik*((rij[0]/(rijmag*rijmag))+(rik[0]/(rikmag*rikmag))));
+      dcosjikdri[1] = ((rij[1]+rik[1])/(rijmag*rikmag)) -
+        (cosjik*((rij[1]/(rijmag*rijmag))+(rik[1]/(rikmag*rikmag))));
+      dcosjikdri[2] = ((rij[2]+rik[2])/(rijmag*rikmag)) -
+        (cosjik*((rij[2]/(rijmag*rijmag))+(rik[2]/(rikmag*rikmag))));
+      dcosjikdrk[0] = (-rij[0]/(rijmag*rikmag)) +
+        (cosjik*(rik[0]/(rikmag*rikmag)));
+      dcosjikdrk[1] = (-rij[1]/(rijmag*rikmag)) +
+        (cosjik*(rik[1]/(rikmag*rikmag)));
+      dcosjikdrk[2] = (-rij[2]/(rijmag*rikmag)) +
+        (cosjik*(rik[2]/(rikmag*rikmag)));
+      dcosjikdrj[0] = (-rik[0]/(rijmag*rikmag)) +
+        (cosjik*(rij[0]/(rijmag*rijmag)));
+      dcosjikdrj[1] = (-rik[1]/(rijmag*rikmag)) +
+        (cosjik*(rij[1]/(rijmag*rijmag)));
+      dcosjikdrj[2] = (-rik[2]/(rijmag*rikmag)) +
+        (cosjik*(rij[2]/(rijmag*rijmag)));
+
+      g = gSpline(cosjik,itype,dgdc);
+      tmp2 = VA*0.5*(tmp*wik*dgdc);
+      fj[0] = -tmp2*dcosjikdrj[0];
+      fj[1] = -tmp2*dcosjikdrj[1];
+      fj[2] = -tmp2*dcosjikdrj[2];
+      fi[0] = -tmp2*dcosjikdri[0];
+      fi[1] = -tmp2*dcosjikdri[1];
+      fi[2] = -tmp2*dcosjikdri[2];
+      fk[0] = -tmp2*dcosjikdrk[0];
+      fk[1] = -tmp2*dcosjikdrk[1];
+      fk[2] = -tmp2*dcosjikdrk[2];
+
+      // coordination forces
+
+      // dwik forces (from partial derivative)
+
+      tmp2 = VA*0.5*(tmp*dwik*g)/rikmag;
+      fi[0] -= tmp2*rik[0];
+      fi[1] -= tmp2*rik[1];
+      fi[2] -= tmp2*rik[2];
+      fk[0] += tmp2*rik[0];
+      fk[1] += tmp2*rik[1];
+      fk[2] += tmp2*rik[2];
+
+      // PIJ forces (from coordination P(N) term)
+
+      tmp2 = VA*0.5*(tmp*dp*dwik)/rikmag;
+      fi[0] -= tmp2*rik[0];
+      fi[1] -= tmp2*rik[1];
+      fi[2] -= tmp2*rik[2];
+      fk[0] += tmp2*rik[0];
+      fk[1] += tmp2*rik[1];
+      fk[2] += tmp2*rik[2];
+
+      // dgdN forces are removed
+
+      f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
+      f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
+      f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2];
+
+      if (vflag_either) {
+        rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2];
+        rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2];
+        v_tally3_thr(this,atomi,atomj,atomk,fj,fk,rji,rki,thr);
+      }
+    }
+  }
+
+  // PIJ force contribution additional term
+  tmp2 = -VA*0.5*(tmp*dp*dwij)/rijmag;
+
+  f[atomi][0] += rij[0]*tmp2;
+  f[atomi][1] += rij[1]*tmp2;
+  f[atomi][2] += rij[2]*tmp2;
+  f[atomj][0] -= rij[0]*tmp2;
+  f[atomj][1] -= rij[1]*tmp2;
+  f[atomj][2] -= rij[2]*tmp2;
+
+  if (vflag_either) v_tally2_thr(this,atomi,atomj,tmp2,rij,thr);
+
+  tmp = 0.0;
+  tmp2 = 0.0;
+  Etmp = 0.0;
+
+  REBO_neighs = REBO_firstneigh[j];
+  for (l = 0; l < REBO_numneigh[j]; l++) {
+    atoml = REBO_neighs[l];
+    if (atoml != atomi) {
+      ltype = map[type[atoml]];
+      rjl[0] = x[atomj][0]-x[atoml][0];
+      rjl[1] = x[atomj][1]-x[atoml][1];
+      rjl[2] = x[atomj][2]-x[atoml][2];
+      rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+      wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS);
+      cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) / (rijmag*rjlmag);
+      cosijl = MIN(cosijl,1.0);
+      cosijl = MAX(cosijl,-1.0);
+
+      // evaluate g and derivative dg
+
+      g = gSpline(cosijl,jtype,dgdc);
+      Etmp = Etmp+(wjl*g);
+    }
+  }
+
+  dp = 0.0;
+  PjiS = PijSpline(NjiM,NjiS,jtype,dp);
+  pji = 1.0/sqrt(1.0+Etmp+PjiS);
+  tmp = -0.5*cube(pji);
+
+  REBO_neighs = REBO_firstneigh[j];
+  for (l = 0; l < REBO_numneigh[j]; l++) {
+    atoml = REBO_neighs[l];
+    if (atoml != atomi) {
+      ltype = map[type[atoml]];
+      rjl[0] = x[atomj][0]-x[atoml][0];
+      rjl[1] = x[atomj][1]-x[atoml][1];
+      rjl[2] = x[atomj][2]-x[atoml][2];
+      rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
+      wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
+      cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) / (rijmag*rjlmag);
+      cosijl = MIN(cosijl,1.0);
+      cosijl = MAX(cosijl,-1.0);
+
+      dcosijldri[0] = (-rjl[0]/(rijmag*rjlmag)) -
+        (cosijl*rij[0]/(rijmag*rijmag));
+      dcosijldri[1] = (-rjl[1]/(rijmag*rjlmag)) -
+        (cosijl*rij[1]/(rijmag*rijmag));
+      dcosijldri[2] = (-rjl[2]/(rijmag*rjlmag)) -
+        (cosijl*rij[2]/(rijmag*rijmag));
+      dcosijldrj[0] = ((-rij[0]+rjl[0])/(rijmag*rjlmag)) +
+        (cosijl*((rij[0]/square(rijmag))-(rjl[0]/(rjlmag*rjlmag))));
+      dcosijldrj[1] = ((-rij[1]+rjl[1])/(rijmag*rjlmag)) +
+        (cosijl*((rij[1]/square(rijmag))-(rjl[1]/(rjlmag*rjlmag))));
+      dcosijldrj[2] = ((-rij[2]+rjl[2])/(rijmag*rjlmag)) +
+        (cosijl*((rij[2]/square(rijmag))-(rjl[2]/(rjlmag*rjlmag))));
+      dcosijldrl[0] = (rij[0]/(rijmag*rjlmag))+(cosijl*rjl[0]/(rjlmag*rjlmag));
+      dcosijldrl[1] = (rij[1]/(rijmag*rjlmag))+(cosijl*rjl[1]/(rjlmag*rjlmag));
+      dcosijldrl[2] = (rij[2]/(rijmag*rjlmag))+(cosijl*rjl[2]/(rjlmag*rjlmag));
+
+      // evaluate g and derivatives dg
+
+      g = gSpline(cosijl,jtype,dgdc);
+      tmp2 = VA*0.5*(tmp*wjl*dgdc);
+      fi[0] = -tmp2*dcosijldri[0];
+      fi[1] = -tmp2*dcosijldri[1];
+      fi[2] = -tmp2*dcosijldri[2];
+      fj[0] = -tmp2*dcosijldrj[0];
+      fj[1] = -tmp2*dcosijldrj[1];
+      fj[2] = -tmp2*dcosijldrj[2];
+      fl[0] = -tmp2*dcosijldrl[0];
+      fl[1] = -tmp2*dcosijldrl[1];
+      fl[2] = -tmp2*dcosijldrl[2];
+
+      // coordination forces
+
+      // dwik forces (from partial derivative)
+
+      tmp2 = VA*0.5*(tmp*dwjl*g)/rjlmag;
+      fj[0] -= tmp2*rjl[0];
+      fj[1] -= tmp2*rjl[1];
+      fj[2] -= tmp2*rjl[2];
+      fl[0] += tmp2*rjl[0];
+      fl[1] += tmp2*rjl[1];
+      fl[2] += tmp2*rjl[2];
+
+      // PIJ forces (coordination)
+
+      tmp2 = VA*0.5*(tmp*dp*dwjl)/rjlmag;
+      fj[0] -= tmp2*rjl[0];
+      fj[1] -= tmp2*rjl[1];
+      fj[2] -= tmp2*rjl[2];
+      fl[0] += tmp2*rjl[0];
+      fl[1] += tmp2*rjl[1];
+      fl[2] += tmp2*rjl[2];
+
+      // dgdN forces are removed
+
+      f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
+      f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
+      f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2];
+
+      if (vflag_either) {
+        rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2];
+        v_tally3_thr(this,atomi,atomj,atoml,fi,fl,rij,rlj,thr);
+      }
+    }
+  }
+
+  // PIJ force contribution additional term
+
+  tmp2 = -VA*0.5*(tmp*dp*dwij)/rijmag;
+  f[atomi][0] += rij[0]*tmp2;
+  f[atomi][1] += rij[1]*tmp2;
+  f[atomi][2] += rij[2]*tmp2;
+  f[atomj][0] -= rij[0]*tmp2;
+  f[atomj][1] -= rij[1]*tmp2;
+  f[atomj][2] -= rij[2]*tmp2;
+
+  if (vflag_either) v_tally2_thr(this,atomi,atomj,tmp2,rij,thr);
+
+  bij = (0.5*(pij+pji));
+  return bij;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+double PairREBOMoSOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairREBOMoS::memory_usage();
+
+  return bytes;
+}
diff --git a/src/OPENMP/pair_rebomos_omp.h b/src/OPENMP/pair_rebomos_omp.h
new file mode 100644
index 0000000000..ea87f51950
--- /dev/null
+++ b/src/OPENMP/pair_rebomos_omp.h
@@ -0,0 +1,46 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(rebomos/omp,PairREBOMoSOMP);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_REBOMOS_OMP_H
+#define LMP_PAIR_REBOMOS_OMP_H
+
+#include "pair_rebomos.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairREBOMoSOMP : public PairREBOMoS, public ThrOMP {
+ public:
+  PairREBOMoSOMP(class LAMMPS *);
+
+  void compute(int, int) override;
+  double memory_usage() override;
+
+ protected:
+  void FREBO_thr(int ifrom, int ito, int eflag, ThrData *const thr);
+  void FLJ_thr(int ifrom, int ito, int eflag, ThrData *const thr);
+
+  void REBO_neigh_thr();
+
+  double bondorder_thr(int, int, double *, double, double, ThrData *const thr);
+};
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/POEMS/fix_poems.cpp b/src/POEMS/fix_poems.cpp
index a2720a3f64..eb9c790422 100644
--- a/src/POEMS/fix_poems.cpp
+++ b/src/POEMS/fix_poems.cpp
@@ -36,7 +36,7 @@
 
 #include <cmath>
 #include <cstring>
-#include <vector>
+#include <exception>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/PTM/compute_ptm_atom.cpp b/src/PTM/compute_ptm_atom.cpp
index e66bc1a17d..4d6cd4bc01 100644
--- a/src/PTM/compute_ptm_atom.cpp
+++ b/src/PTM/compute_ptm_atom.cpp
@@ -28,7 +28,6 @@ under
 #include "memory.h"
 #include "modify.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
 #include "neighbor.h"
 #include "update.h"
 
diff --git a/src/PTM/ptm_convex_hull_incremental.h b/src/PTM/ptm_convex_hull_incremental.h
index 796c787937..81fc92d829 100644
--- a/src/PTM/ptm_convex_hull_incremental.h
+++ b/src/PTM/ptm_convex_hull_incremental.h
@@ -11,7 +11,6 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 #define PTM_CONVEX_HULL_INCREMENTAL_H
 
 #include "ptm_constants.h"
-#include <cstdbool>
 #include <cstdint>
 
 namespace ptm {
diff --git a/src/PYTHON/python_impl.cpp b/src/PYTHON/python_impl.cpp
index 0db468d701..87a57187bf 100644
--- a/src/PYTHON/python_impl.cpp
+++ b/src/PYTHON/python_impl.cpp
@@ -17,7 +17,6 @@
 
 #include "python_impl.h"
 
-#include "comm.h"
 #include "error.h"
 #include "input.h"
 #include "memory.h"
diff --git a/src/QEQ/fix_qeq.cpp b/src/QEQ/fix_qeq.cpp
index 22632cf786..411bdfb60b 100644
--- a/src/QEQ/fix_qeq.cpp
+++ b/src/QEQ/fix_qeq.cpp
@@ -27,9 +27,7 @@
 #include "memory.h"
 #include "modify.h"
 #include "neigh_list.h"
-#include "pair.h"
 #include "respa.h"
-#include "suffix.h"
 #include "text_file_reader.h"
 #include "update.h"
 
diff --git a/src/REAXFF/compute_reaxff_atom.cpp b/src/REAXFF/compute_reaxff_atom.cpp
index 212d117ac7..0371f75120 100644
--- a/src/REAXFF/compute_reaxff_atom.cpp
+++ b/src/REAXFF/compute_reaxff_atom.cpp
@@ -17,8 +17,8 @@
 ------------------------------------------------------------------------- */
 
 #include "compute_reaxff_atom.h"
+
 #include "atom.h"
-#include "molecule.h"
 #include "update.h"
 #include "force.h"
 #include "memory.h"
diff --git a/src/REAXFF/fix_acks2_reaxff.cpp b/src/REAXFF/fix_acks2_reaxff.cpp
index 68de1c8ed1..4fd86605fa 100644
--- a/src/REAXFF/fix_acks2_reaxff.cpp
+++ b/src/REAXFF/fix_acks2_reaxff.cpp
@@ -33,6 +33,7 @@
 
 #include <cmath>
 #include <cstring>
+#include <exception>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/REAXFF/reaxff_ffield.cpp b/src/REAXFF/reaxff_ffield.cpp
index 6ca8dc6256..b941d7d0f0 100644
--- a/src/REAXFF/reaxff_ffield.cpp
+++ b/src/REAXFF/reaxff_ffield.cpp
@@ -30,7 +30,6 @@
 #include "error.h"
 #include "memory.h"
 #include "text_file_reader.h"
-#include "tokenizer.h"
 #include "utils.h"
 
 #include <cmath>
diff --git a/src/REPLICA/fix_alchemy.cpp b/src/REPLICA/fix_alchemy.cpp
index 2fe4417161..b14c6bc22d 100644
--- a/src/REPLICA/fix_alchemy.cpp
+++ b/src/REPLICA/fix_alchemy.cpp
@@ -21,7 +21,6 @@
 #include "input.h"
 #include "memory.h"
 #include "modify.h"
-#include "respa.h"
 #include "universe.h"
 #include "update.h"
 #include "variable.h"
diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp
index 01cfa66ebd..c6886fbed7 100644
--- a/src/REPLICA/fix_pimd_langevin.cpp
+++ b/src/REPLICA/fix_pimd_langevin.cpp
@@ -41,7 +41,6 @@
 #include "random_mars.h"
 #include "universe.h"
 #include "update.h"
-#include "utils.h"
 
 #include <cmath>
 #include <cstring>
diff --git a/src/REPLICA/temper.cpp b/src/REPLICA/temper.cpp
index adbdb4d742..77bc45e6e3 100644
--- a/src/REPLICA/temper.cpp
+++ b/src/REPLICA/temper.cpp
@@ -33,7 +33,6 @@
 #include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/REPLICA/temper_npt.cpp b/src/REPLICA/temper_npt.cpp
index d814bf6725..aa72047fe7 100644
--- a/src/REPLICA/temper_npt.cpp
+++ b/src/REPLICA/temper_npt.cpp
@@ -35,7 +35,6 @@
 #include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/SMTBQ/pair_smatb.cpp b/src/SMTBQ/pair_smatb.cpp
index 85446b7a62..ab6aee557e 100644
--- a/src/SMTBQ/pair_smatb.cpp
+++ b/src/SMTBQ/pair_smatb.cpp
@@ -26,6 +26,7 @@
 #include "neighbor.h"
 
 #include <cmath>
+#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/SMTBQ/pair_smatb_single.cpp b/src/SMTBQ/pair_smatb_single.cpp
index 756941b2b7..4506a1093c 100644
--- a/src/SMTBQ/pair_smatb_single.cpp
+++ b/src/SMTBQ/pair_smatb_single.cpp
@@ -26,6 +26,7 @@
 #include "neighbor.h"
 
 #include <cmath>
+#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/SMTBQ/pair_smtbq.cpp b/src/SMTBQ/pair_smtbq.cpp
index 4e4a6109fa..3696a3ab6c 100644
--- a/src/SMTBQ/pair_smtbq.cpp
+++ b/src/SMTBQ/pair_smtbq.cpp
@@ -60,6 +60,7 @@
 #include <cstring>
 
 #include <algorithm>
+#include <exception>
 #include <fstream>
 #include <iomanip>
 
diff --git a/src/SPIN/neb_spin.cpp b/src/SPIN/neb_spin.cpp
index ac54f069a9..fb8b7d8353 100644
--- a/src/SPIN/neb_spin.cpp
+++ b/src/SPIN/neb_spin.cpp
@@ -44,6 +44,7 @@
 
 #include <cmath>
 #include <cstring>
+#include <exception>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/angle_write.cpp b/src/angle_write.cpp
index 0a0c457000..48420ae7be 100644
--- a/src/angle_write.cpp
+++ b/src/angle_write.cpp
@@ -25,7 +25,6 @@
 #include "error.h"
 #include "force.h"
 #include "input.h"
-#include "lammps.h"
 #include "math_const.h"
 #include "update.h"
 
diff --git a/src/atom_vec_sphere.cpp b/src/atom_vec_sphere.cpp
index 3e8c2fd2a3..3c7be5d3ee 100644
--- a/src/atom_vec_sphere.cpp
+++ b/src/atom_vec_sphere.cpp
@@ -19,8 +19,6 @@
 #include "math_const.h"
 #include "modify.h"
 
-#include <cstring>
-
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
diff --git a/src/comm.cpp b/src/comm.cpp
index 1293dd3d6d..02999fd541 100644
--- a/src/comm.cpp
+++ b/src/comm.cpp
@@ -420,6 +420,7 @@ void Comm::set_processors(int narg, char **arg)
     error->all(FLERR,"Specified processors != physical processors");
 
   int iarg = 3;
+  numa_nodes = 2;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"grid") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
@@ -514,6 +515,12 @@ void Comm::set_processors(int narg, char **arg)
       outfile = utils::strdup(arg[iarg+1]);
       iarg += 2;
 
+    } else if (strcmp(arg[iarg],"numa_nodes") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
+      numa_nodes = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
+      if (numa_nodes < 1) numa_nodes = 2;
+      iarg += 2;
+
     } else error->all(FLERR,"Illegal processors command");
   }
 
@@ -565,7 +572,7 @@ void Comm::set_proc_grid(int outflag)
                         otherflag,other_style,other_procgrid,other_coregrid);
 
   } else if (gridflag == NUMA) {
-    pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid);
+    pmap->numa_grid(numa_nodes,nprocs,user_procgrid,procgrid,coregrid);
 
   } else if (gridflag == CUSTOM) {
     pmap->custom_grid(customfile,nprocs,user_procgrid,procgrid);
diff --git a/src/comm.h b/src/comm.h
index 5d803c1afa..fde4c3b81f 100644
--- a/src/comm.h
+++ b/src/comm.h
@@ -146,6 +146,7 @@ class Comm : protected Pointers {
   char xyz[4];         // xyz mapping of procs to 3d grid
   char *customfile;    // file with custom proc map
   char *outfile;       // proc grid/map output file
+  int numa_nodes;      // Number of numa domains per socket for 3d grid
 
   int otherflag;            // 1 if this partition dependent on another
   int other_style;          // style of dependency
diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp
index 65cbfad0b5..b864e0523d 100644
--- a/src/comm_tiled.cpp
+++ b/src/comm_tiled.cpp
@@ -47,14 +47,9 @@ CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
 {
   style = Comm::TILED;
   layout = Comm::LAYOUT_UNIFORM;
-  pbc_flag = nullptr;
-  buf_send = nullptr;
-  buf_recv = nullptr;
-  overlap = nullptr;
-  rcbinfo = nullptr;
-  cutghostmulti = nullptr;
-  cutghostmultiold = nullptr;
-  init_buffers();
+  init_pointers();
+  init_buffers_flag = 0;
+  maxswap = 0;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -69,7 +64,9 @@ CommTiled::CommTiled(LAMMPS * /*lmp*/, Comm *oldcomm) : Comm(*oldcomm)
   style = Comm::TILED;
   layout = oldcomm->layout;
   Comm::copy_arrays(oldcomm);
-  init_buffers();
+  init_pointers();
+  init_buffers_flag = 0;
+  maxswap = 0;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -85,24 +82,59 @@ CommTiled::~CommTiled()
   memory->destroy(cutghostmultiold);
 }
 
+/* ----------------------------------------------------------------------
+   initialize comm pointers to nullptr
+------------------------------------------------------------------------- */
+
+void CommTiled::init_pointers()
+{
+  buf_send = buf_recv = nullptr;
+  overlap = nullptr;
+  rcbinfo = nullptr;
+  cutghostmulti = nullptr;
+  cutghostmultiold = nullptr;
+
+  nsendproc = nullptr;
+  nrecvproc = nullptr;
+  sendother = nullptr;
+  recvother = nullptr;
+  sendself = nullptr;
+  sendproc = nullptr;
+  recvproc = nullptr;
+  sendnum = nullptr;
+  recvnum = nullptr;
+  size_forward_recv = nullptr;
+  firstrecv = nullptr;
+  size_reverse_send = nullptr;
+  size_reverse_recv = nullptr;
+  forward_recv_offset = nullptr;
+  reverse_recv_offset = nullptr;
+  pbc_flag = nullptr;
+  pbc = nullptr;
+  sendbox = nullptr;
+  sendbox_multi = nullptr;
+  sendbox_multiold = nullptr;
+  maxsendlist = nullptr;
+  sendlist = nullptr;
+  requests = nullptr;
+  nprocmax = nullptr;
+  nexchproc = nullptr;
+  nexchprocmax = nullptr;
+  exchproc = nullptr;
+  exchnum = nullptr;
+}
+
 /* ----------------------------------------------------------------------
    initialize comm buffers and other data structs local to CommTiled
 ------------------------------------------------------------------------- */
 
 void CommTiled::init_buffers()
 {
-  buf_send = buf_recv = nullptr;
   maxsend = maxrecv = BUFMIN;
   grow_send(maxsend,2);
-  memory->create(buf_recv,maxrecv,"comm:buf_recv");
+  grow_recv(maxrecv,1);
 
   maxoverlap = 0;
-  overlap = nullptr;
-  rcbinfo = nullptr;
-  cutghostmulti = nullptr;
-  cutghostmultiold = nullptr;
-  sendbox_multi = nullptr;
-  sendbox_multiold = nullptr;
 
   // Note this may skip growing multi arrays, will call again in init()
   maxswap = 6;
@@ -113,6 +145,11 @@ void CommTiled::init_buffers()
 
 void CommTiled::init()
 {
+  if (!init_buffers_flag) {
+    init_buffers();
+    init_buffers_flag = 1;
+  }
+
   Comm::init();
 
   // cannot set nswap in init_buffers() b/c
@@ -2236,12 +2273,15 @@ void CommTiled::grow_send(int n, int flag)
 }
 
 /* ----------------------------------------------------------------------
-   free/malloc the size of the recv buffer as needed with BUFFACTOR
+   free/malloc the size of the recv buffer as needed
+   flag = 0, realloc with BUFFACTOR
+   flag = 1, free/malloc w/out BUFFACTOR
 ------------------------------------------------------------------------- */
 
-void CommTiled::grow_recv(int n)
+void CommTiled::grow_recv(int n, int flag)
 {
-  maxrecv = static_cast<int> (BUFFACTOR * n);
+  if (flag) maxrecv = n;
+  else maxrecv = static_cast<int> (BUFFACTOR * n);
   memory->destroy(buf_recv);
   memory->create(buf_recv,maxrecv,"comm:buf_recv");
 }
@@ -2428,8 +2468,10 @@ void CommTiled::deallocate_swap(int n)
 
     delete [] maxsendlist[i];
 
-    for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]);
-    delete [] sendlist[i];
+    if (sendlist && sendlist[i]) {
+      for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]);
+      delete [] sendlist[i];
+    }
   }
 
   delete [] sendproc;
diff --git a/src/comm_tiled.h b/src/comm_tiled.h
index c9434e6164..857cddf033 100644
--- a/src/comm_tiled.h
+++ b/src/comm_tiled.h
@@ -51,7 +51,7 @@ class CommTiled : public Comm {
 
   double memory_usage() override;
 
- private:
+ protected:
   int nswap;      // # of swaps to perform = 2*dim
   int maxswap;    // largest nswap can be = 6
 
@@ -117,8 +117,9 @@ class CommTiled : public Comm {
   double *sublo, *subhi;
   int dimension;
 
-  // NOTE: init_buffers is called from a constructor and must not be made virtual
+  void init_pointers();
   void init_buffers();
+  int init_buffers_flag;
 
   // box drop and other functions
 
@@ -145,11 +146,11 @@ class CommTiled : public Comm {
   int point_drop_tiled_recurse(double *, int, int);
   int closer_subbox_edge(int, double *);
 
-  void grow_send(int, int);               // reallocate send buffer
-  void grow_recv(int);                    // free/allocate recv buffer
-  void grow_list(int, int, int);          // reallocate sendlist for one swap/proc
+  virtual void grow_send(int, int);               // reallocate send buffer
+  virtual void grow_recv(int, int flag = 0);      // free/allocate recv buffer
+  virtual void grow_list(int, int, int);          // reallocate sendlist for one swap/proc
   void allocate_swap(int);                // allocate swap arrays
-  void grow_swap_send(int, int, int);     // grow swap arrays for send and recv
+  virtual void grow_swap_send(int, int, int);     // grow swap arrays for send and recv
   void grow_swap_send_multi(int, int);    // grow multi swap arrays for send and recv
   void grow_swap_recv(int, int);
   void deallocate_swap(int);    // deallocate swap arrays
diff --git a/src/compute_aggregate_atom.cpp b/src/compute_aggregate_atom.cpp
index 5a489092b7..8c6f7165a2 100644
--- a/src/compute_aggregate_atom.cpp
+++ b/src/compute_aggregate_atom.cpp
@@ -31,7 +31,6 @@
 #include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/compute_cluster_atom.cpp b/src/compute_cluster_atom.cpp
index ba0f263747..0021d32e2c 100644
--- a/src/compute_cluster_atom.cpp
+++ b/src/compute_cluster_atom.cpp
@@ -25,7 +25,6 @@
 #include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/compute_cna_atom.cpp b/src/compute_cna_atom.cpp
index a09a671c07..b92dca8f86 100644
--- a/src/compute_cna_atom.cpp
+++ b/src/compute_cna_atom.cpp
@@ -29,7 +29,6 @@
 #include "update.h"
 
 #include <cmath>
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/compute_count_type.cpp b/src/compute_count_type.cpp
index 3d4815f9ff..d430b23e11 100644
--- a/src/compute_count_type.cpp
+++ b/src/compute_count_type.cpp
@@ -14,12 +14,12 @@
 #include "compute_count_type.h"
 
 #include "atom.h"
-#include "domain.h"
 #include "error.h"
 #include "force.h"
-#include "group.h"
 #include "update.h"
 
+#include <cstring>
+
 using namespace LAMMPS_NS;
 
 enum { ATOM, BOND, ANGLE, DIHEDRAL, IMPROPER };
diff --git a/src/compute_dipole_chunk.h b/src/compute_dipole_chunk.h
index 603e6a4353..3ed875283f 100644
--- a/src/compute_dipole_chunk.h
+++ b/src/compute_dipole_chunk.h
@@ -23,7 +23,6 @@ ComputeStyle(dipole/chunk,ComputeDipoleChunk);
 #include "compute_chunk.h"
 
 namespace LAMMPS_NS {
-class Fix;
 
 class ComputeDipoleChunk : public ComputeChunk {
  public:
@@ -43,8 +42,6 @@ class ComputeDipoleChunk : public ComputeChunk {
 
   void allocate() override;
 };
-
 }    // namespace LAMMPS_NS
-
 #endif
 #endif
diff --git a/src/compute_erotate_sphere_atom.cpp b/src/compute_erotate_sphere_atom.cpp
index b1aca68614..fa1ce8a180 100644
--- a/src/compute_erotate_sphere_atom.cpp
+++ b/src/compute_erotate_sphere_atom.cpp
@@ -12,28 +12,25 @@
 ------------------------------------------------------------------------- */
 
 #include "compute_erotate_sphere_atom.h"
-#include <cstring>
+
 #include "atom.h"
-#include "update.h"
-#include "modify.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "memory.h"
-#include "error.h"
+#include "modify.h"
+#include "update.h"
 
 using namespace LAMMPS_NS;
 
-static constexpr double INERTIA = 0.4;          // moment of inertia prefactor for sphere
+static constexpr double INERTIA = 0.4;    // moment of inertia prefactor for sphere
 
 /* ---------------------------------------------------------------------- */
 
-ComputeErotateSphereAtom::
-ComputeErotateSphereAtom(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg),
-  erot(nullptr)
+ComputeErotateSphereAtom::ComputeErotateSphereAtom(LAMMPS *lmp, int narg, char **arg) :
+    Compute(lmp, narg, arg), erot(nullptr)
 {
-  if (narg != 3)
-    error->all(FLERR,"Illegal compute erotate/sphere//atom command");
+  if (narg != 3) error->all(FLERR, "Illegal compute erotate/sphere//atom command");
 
   peratom_flag = 1;
   size_peratom_cols = 0;
@@ -41,9 +38,9 @@ ComputeErotateSphereAtom(LAMMPS *lmp, int narg, char **arg) :
   // error check
 
   if (!atom->omega_flag)
-    error->all(FLERR,"Compute erotate/sphere/atom requires atom attribute omega");
+    error->all(FLERR, "Compute erotate/sphere/atom requires atom attribute omega");
   if (!atom->radius_flag)
-    error->all(FLERR,"Compute erotate/sphere/atom requires atom attribute radius");
+    error->all(FLERR, "Compute erotate/sphere/atom requires atom attribute radius");
 
   nmax = 0;
 }
@@ -76,7 +73,7 @@ void ComputeErotateSphereAtom::compute_peratom()
   if (atom->nmax > nmax) {
     memory->destroy(erot);
     nmax = atom->nmax;
-    memory->create(erot,nmax,"erotate/sphere/atom:erot");
+    memory->create(erot, nmax, "erotate/sphere/atom:erot");
     vector_atom = erot;
   }
 
@@ -91,10 +88,12 @@ void ComputeErotateSphereAtom::compute_peratom()
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
-      erot[i] = (omega[i][0]*omega[i][0] + omega[i][1]*omega[i][1] +
-                 omega[i][2]*omega[i][2]) * radius[i]*radius[i]*rmass[i];
+      erot[i] =
+          (omega[i][0] * omega[i][0] + omega[i][1] * omega[i][1] + omega[i][2] * omega[i][2]) *
+          radius[i] * radius[i] * rmass[i];
       erot[i] *= pfactor;
-    } else erot[i] = 0.0;
+    } else
+      erot[i] = 0.0;
   }
 }
 
@@ -104,6 +103,6 @@ void ComputeErotateSphereAtom::compute_peratom()
 
 double ComputeErotateSphereAtom::memory_usage()
 {
-  double bytes = (double)nmax * sizeof(double);
+  double bytes = (double) nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/compute_heat_flux.cpp b/src/compute_heat_flux.cpp
index 62b2c8b63b..64cc8e69f3 100644
--- a/src/compute_heat_flux.cpp
+++ b/src/compute_heat_flux.cpp
@@ -19,12 +19,11 @@
 
 #include "compute_heat_flux.h"
 
-#include <cstring>
 #include "atom.h"
-#include "update.h"
-#include "modify.h"
-#include "force.h"
 #include "error.h"
+#include "force.h"
+#include "modify.h"
+#include "update.h"
 
 using namespace LAMMPS_NS;
 
diff --git a/src/compute_ke_atom.cpp b/src/compute_ke_atom.cpp
index 9a329232b3..e8ab1b8b25 100644
--- a/src/compute_ke_atom.cpp
+++ b/src/compute_ke_atom.cpp
@@ -12,6 +12,7 @@
 ------------------------------------------------------------------------- */
 
 #include "compute_ke_atom.h"
+
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
@@ -19,7 +20,6 @@
 #include "memory.h"
 #include "modify.h"
 #include "update.h"
-#include <cstring>
 
 using namespace LAMMPS_NS;
 
diff --git a/src/compute_property_atom.cpp b/src/compute_property_atom.cpp
index c3c101b995..b95b7267dc 100644
--- a/src/compute_property_atom.cpp
+++ b/src/compute_property_atom.cpp
@@ -205,6 +205,14 @@ ComputePropertyAtom::ComputePropertyAtom(LAMMPS *lmp, int narg, char **arg) :
       if (!atom->omega_flag)
         error->all(FLERR,"Compute property/atom {} is not available", arg[iarg]);
       pack_choice[i] = &ComputePropertyAtom::pack_omegaz;
+    } else if (strcmp(arg[iarg],"temperature") == 0) {
+      if (!atom->temperature_flag)
+        error->all(FLERR,"Compute property/atom {} is not available", arg[iarg]);
+      pack_choice[i] = &ComputePropertyAtom::pack_temperature;
+    } else if (strcmp(arg[iarg],"heatflow") == 0) {
+      if (!atom->heatflow_flag)
+        error->all(FLERR,"Compute property/atom {} is not available", arg[iarg]);
+      pack_choice[i] = &ComputePropertyAtom::pack_heatflow;
     } else if (strcmp(arg[iarg],"angmomx") == 0) {
       if (!atom->angmom_flag)
         error->all(FLERR,"Compute property/atom {} is not available", arg[iarg]);
@@ -1213,6 +1221,36 @@ void ComputePropertyAtom::pack_omegaz(int n)
 
 /* ---------------------------------------------------------------------- */
 
+void ComputePropertyAtom::pack_temperature(int n)
+{
+  double *temperature = atom->temperature;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (mask[i] & groupbit) buf[n] = temperature[i];
+    else buf[n] = 0.0;
+    n += nvalues;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePropertyAtom::pack_heatflow(int n)
+{
+  double *heatflow = atom->heatflow;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int i = 0; i < nlocal; i++) {
+    if (mask[i] & groupbit) buf[n] = heatflow[i];
+    else buf[n] = 0.0;
+    n += nvalues;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 void ComputePropertyAtom::pack_angmomx(int n)
 {
   double **angmom = atom->angmom;
diff --git a/src/compute_property_atom.h b/src/compute_property_atom.h
index 034b2901c2..d4f4db564f 100644
--- a/src/compute_property_atom.h
+++ b/src/compute_property_atom.h
@@ -95,6 +95,8 @@ class ComputePropertyAtom : public Compute {
   void pack_omegax(int);
   void pack_omegay(int);
   void pack_omegaz(int);
+  void pack_temperature(int);
+  void pack_heatflow(int);
   void pack_angmomx(int);
   void pack_angmomy(int);
   void pack_angmomz(int);
diff --git a/src/compute_rdf.cpp b/src/compute_rdf.cpp
index 17fe450fe5..89f3c91017 100644
--- a/src/compute_rdf.cpp
+++ b/src/compute_rdf.cpp
@@ -46,7 +46,7 @@ ComputeRDF::ComputeRDF(LAMMPS *lmp, int narg, char **arg) :
   hist(nullptr), histall(nullptr), typecount(nullptr), icount(nullptr), jcount(nullptr),
   duplicates(nullptr)
 {
-  if (narg < 4) error->all(FLERR,"Illegal compute rdf command");
+  if (narg < 4) utils::missing_cmd_args(FLERR,"compute rdf", error);
 
   array_flag = 1;
   extarray = 0;
@@ -67,12 +67,14 @@ ComputeRDF::ComputeRDF(LAMMPS *lmp, int narg, char **arg) :
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"cutoff") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal compute rdf command");
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"compute rdf cutoff", error);
+      if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+        error->all(FLERR, "Compute rdf with custom cutoff requires neighbor style 'bin' or 'nsq'");
       cutoff_user = utils::numeric(FLERR,arg[iarg+1],false,lmp);
       if (cutoff_user <= 0.0) cutflag = 0;
       else cutflag = 1;
       iarg += 2;
-    } else error->all(FLERR,"Illegal compute rdf command");
+    } else error->all(FLERR,"Unknown compute rdf keyword {}", arg[iarg]);
   }
 
   // pairwise args
@@ -94,7 +96,7 @@ ComputeRDF::ComputeRDF(LAMMPS *lmp, int narg, char **arg) :
   jlo = new int[npairs];
   jhi = new int[npairs];
 
-  if (nargpair == 0) {
+  if (!nargpair) {
     ilo[0] = 1; ihi[0] = ntypes;
     jlo[0] = 1; jhi[0] = ntypes;
   } else {
@@ -139,17 +141,17 @@ ComputeRDF::~ComputeRDF()
 {
   memory->destroy(rdfpair);
   memory->destroy(nrdfpair);
-  delete [] ilo;
-  delete [] ihi;
-  delete [] jlo;
-  delete [] jhi;
+  delete[] ilo;
+  delete[] ihi;
+  delete[] jlo;
+  delete[] jhi;
   memory->destroy(hist);
   memory->destroy(histall);
   memory->destroy(array);
-  delete [] typecount;
-  delete [] icount;
-  delete [] jcount;
-  delete [] duplicates;
+  delete[] typecount;
+  delete[] icount;
+  delete[] jcount;
+  delete[] duplicates;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -158,8 +160,7 @@ void ComputeRDF::init()
 {
 
   if (!force->pair && !cutflag)
-    error->all(FLERR,"Compute rdf requires a pair style be defined "
-               "or cutoff specified");
+    error->all(FLERR,"Compute rdf requires a pair style or an explicit cutoff");
 
   if (cutflag) {
     double skin = neighbor->skin;
@@ -205,7 +206,11 @@ void ComputeRDF::init()
   //   than cutoff_user apart, just like a normal neighbor list does
 
   auto req = neighbor->add_request(this, NeighConst::REQ_OCCASIONAL);
-  if (cutflag) req->set_cutoff(mycutneigh);
+  if (cutflag) {
+    if ((neighbor->style == Neighbor::MULTI) || (neighbor->style == Neighbor::MULTI_OLD))
+      error->all(FLERR, "Compute rdf with custom cutoff requires neighbor style 'bin' or 'nsq'");
+    req->set_cutoff(mycutneigh);
+  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -254,7 +259,7 @@ void ComputeRDF::init_norm()
   for (i = 0; i < npairs; i++) jcount[i] = scratch[i];
   MPI_Allreduce(duplicates,scratch,npairs,MPI_INT,MPI_SUM,world);
   for (i = 0; i < npairs; i++) duplicates[i] = scratch[i];
-  delete [] scratch;
+  delete[] scratch;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/dihedral_write.cpp b/src/dihedral_write.cpp
index 375373523b..dd1ca1de6a 100644
--- a/src/dihedral_write.cpp
+++ b/src/dihedral_write.cpp
@@ -25,7 +25,6 @@
 #include "error.h"
 #include "force.h"
 #include "input.h"
-#include "lammps.h"
 #include "math_const.h"
 #include "update.h"
 
diff --git a/src/dump_custom.cpp b/src/dump_custom.cpp
index 76441bb39d..6e7ff619b6 100644
--- a/src/dump_custom.cpp
+++ b/src/dump_custom.cpp
@@ -41,7 +41,7 @@ enum{ID,MOL,PROC,PROCP1,TYPE,ELEMENT,MASS,
      XSU,YSU,ZSU,XSUTRI,YSUTRI,ZSUTRI,
      IX,IY,IZ,
      VX,VY,VZ,FX,FY,FZ,
-     Q,MUX,MUY,MUZ,MU,RADIUS,DIAMETER,HEATFLOW,TEMPERATURE,
+     Q,MUX,MUY,MUZ,MU,RADIUS,DIAMETER,
      OMEGAX,OMEGAY,OMEGAZ,ANGMOMX,ANGMOMY,ANGMOMZ,
      TQX,TQY,TQZ,
      COMPUTE,FIX,VARIABLE,IVEC,DVEC,IARRAY,DARRAY};
@@ -983,18 +983,6 @@ int DumpCustom::count()
         for (i = 0; i < nlocal; i++) dchoose[i] = 2.0*radius[i];
         ptr = dchoose;
         nstride = 1;
-      } else if (thresh_array[ithresh] == HEATFLOW) {
-        if (!atom->heatflow_flag)
-          error->all(FLERR,
-                     "Threshold for an atom property that isn't allocated");
-        ptr = atom->heatflow;
-        nstride = 1;
-      } else if (thresh_array[ithresh] == TEMPERATURE) {
-        if (!atom->temperature_flag)
-          error->all(FLERR,
-                     "Threshold for an atom property that isn't allocated");
-        ptr = atom->temperature;
-        nstride = 1;
       } else if (thresh_array[ithresh] == OMEGAX) {
         if (!atom->omega_flag)
           error->all(FLERR,
@@ -1466,18 +1454,6 @@ int DumpCustom::parse_fields(int narg, char **arg)
         error->all(FLERR,"Dumping an atom property that isn't allocated");
       pack_choice[iarg] = &DumpCustom::pack_diameter;
       vtype[iarg] = Dump::DOUBLE;
-
-    } else if (strcmp(arg[iarg],"heatflow") == 0) {
-      if (!atom->heatflow_flag)
-        error->all(FLERR,"Dumping an atom property that isn't allocated");
-      pack_choice[iarg] = &DumpCustom::pack_heatflow;
-      vtype[iarg] = Dump::DOUBLE;
-    } else if (strcmp(arg[iarg],"temperature") == 0) {
-      if (!atom->temperature_flag)
-        error->all(FLERR,"Dumping an atom property that isn't allocated");
-      pack_choice[iarg] = &DumpCustom::pack_temperature;
-      vtype[iarg] = Dump::DOUBLE;
-
     } else if (strcmp(arg[iarg],"omegax") == 0) {
       if (!atom->omega_flag)
         error->all(FLERR,"Dumping an atom property that isn't allocated");
@@ -1975,8 +1951,6 @@ int DumpCustom::modify_param(int narg, char **arg)
 
     else if (strcmp(arg[1],"radius") == 0) thresh_array[nthresh] = RADIUS;
     else if (strcmp(arg[1],"diameter") == 0) thresh_array[nthresh] = DIAMETER;
-    else if (strcmp(arg[1],"heatflow") == 0) thresh_array[nthresh] = HEATFLOW;
-    else if (strcmp(arg[1],"temperature") == 0) thresh_array[nthresh] = TEMPERATURE;
     else if (strcmp(arg[1],"omegax") == 0) thresh_array[nthresh] = OMEGAX;
     else if (strcmp(arg[1],"omegay") == 0) thresh_array[nthresh] = OMEGAY;
     else if (strcmp(arg[1],"omegaz") == 0) thresh_array[nthresh] = OMEGAZ;
@@ -3137,30 +3111,6 @@ void DumpCustom::pack_diameter(int n)
 
 /* ---------------------------------------------------------------------- */
 
-void DumpCustom::pack_heatflow(int n)
-{
-  double *heatflow = atom->heatflow;
-
-  for (int i = 0; i < nchoose; i++) {
-    buf[n] = heatflow[clist[i]];
-    n += size_one;
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void DumpCustom::pack_temperature(int n)
-{
-  double *temperature = atom->temperature;
-
-  for (int i = 0; i < nchoose; i++) {
-    buf[n] = temperature[clist[i]];
-    n += size_one;
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 void DumpCustom::pack_omegax(int n)
 {
   double **omega = atom->omega;
diff --git a/src/dump_custom.h b/src/dump_custom.h
index ceb85ea3c2..6b4ca88b0b 100644
--- a/src/dump_custom.h
+++ b/src/dump_custom.h
@@ -215,9 +215,6 @@ class DumpCustom : public Dump {
   void pack_radius(int);
   void pack_diameter(int);
 
-  void pack_heatflow(int);
-  void pack_temperature(int);
-
   void pack_omegax(int);
   void pack_omegay(int);
   void pack_omegaz(int);
diff --git a/src/dump_grid.cpp b/src/dump_grid.cpp
index 4c89b05739..ac42a85b01 100644
--- a/src/dump_grid.cpp
+++ b/src/dump_grid.cpp
@@ -23,7 +23,6 @@
 #include "grid3d.h"
 #include "memory.h"
 #include "modify.h"
-#include "region.h"
 #include "update.h"
 
 #include <cstring>
diff --git a/src/fix_ave_chunk.cpp b/src/fix_ave_chunk.cpp
index 8ed518efc2..a0d25cf2c7 100644
--- a/src/fix_ave_chunk.cpp
+++ b/src/fix_ave_chunk.cpp
@@ -301,8 +301,7 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) :
   if (fp && comm->me == 0) {
     clearerr(fp);
     if (title1) fprintf(fp,"%s\n",title1);
-    else fprintf(fp,"# Chunk-averaged data for fix %s and group %s\n",
-                 id, group);
+    else fprintf(fp,"# Chunk-averaged data for fix %s and group %s\n", id, group);
     if (title2) fprintf(fp,"%s\n",title2);
     else fprintf(fp,"# Timestep Number-of-chunks Total-count\n");
     if (title3) fprintf(fp,"%s\n",title3);
diff --git a/src/fix_balance.cpp b/src/fix_balance.cpp
index 23a56c0a9d..2a32e96106 100644
--- a/src/fix_balance.cpp
+++ b/src/fix_balance.cpp
@@ -61,20 +61,36 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
   if (nevery < 0) error->all(FLERR,"Illegal fix balance command");
   thresh = utils::numeric(FLERR,arg[4],false,lmp);
 
-  if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT;
-  else if (strcmp(arg[5],"rcb") == 0) lbstyle = BISECTION;
-  else error->all(FLERR,"Illegal fix balance command");
+  reportonly = 0;
+  if (strcmp(arg[5],"shift") == 0) {
+    lbstyle = SHIFT;
+  } else if (strcmp(arg[5],"rcb") == 0) {
+    lbstyle = BISECTION;
+  } else if (strcmp(arg[5],"report") == 0) {
+    lbstyle = SHIFT;
+    reportonly = 1;
+  } else error->all(FLERR,"Unknown fix balance style {}", arg[5]);
 
   int iarg = 5;
   if (lbstyle == SHIFT) {
-    if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix balance shift", error);
-    bstr = arg[iarg+1];
-    if (bstr.size() > Balance::BSTR_SIZE) error->all(FLERR,"Illegal fix balance shift command");
-    nitermax = utils::inumeric(FLERR,arg[iarg+2],false,lmp);
-    if (nitermax <= 0) error->all(FLERR,"Illegal fix balance command");
-    stopthresh = utils::numeric(FLERR,arg[iarg+3],false,lmp);
-    if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command");
-    iarg += 4;
+    if (reportonly) {
+      if (dimension == 2)
+        bstr = "xy";
+      else
+        bstr = "xyz";
+      nitermax = 5;
+      stopthresh = 1.1;
+      iarg++;
+    } else {
+      if (iarg+4 > narg) utils::missing_cmd_args(FLERR, "fix balance shift", error);
+      bstr = arg[iarg+1];
+      if (bstr.size() > Balance::BSTR_SIZE) error->all(FLERR,"Illegal fix balance shift command");
+      nitermax = utils::inumeric(FLERR,arg[iarg+2],false,lmp);
+      if (nitermax <= 0) error->all(FLERR,"Illegal fix balance command");
+      stopthresh = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+      if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command");
+      iarg += 4;
+    }
 
   } else if (lbstyle == BISECTION) {
     iarg++;
@@ -175,7 +191,7 @@ void FixBalance::setup(int /*vflag*/)
 void FixBalance::setup_pre_exchange()
 {
   // do not allow rebalancing twice on same timestep
-  // even if wanted to, can mess up elapsed time in ImbalanceTime
+  // even if you wanted to, it can mess up elapsed time in ImbalanceTime
 
   if (update->ntimestep == lastbalance) return;
   lastbalance = update->ntimestep;
@@ -195,6 +211,7 @@ void FixBalance::setup_pre_exchange()
 
   balance->set_weights();
   imbnow = balance->imbalance_factor(maxloadperproc);
+
   if (imbnow > thresh) rebalance();
 
   // next timestep to rebalance
@@ -263,6 +280,13 @@ void FixBalance::pre_neighbor()
 
 void FixBalance::rebalance()
 {
+  // return immediately if only reporting of the imbalance is requested
+
+  if (reportonly) {
+    imbprev = imbfinal = imbnow;
+    return;
+  }
+
   imbprev = imbnow;
 
   // invoke balancer and reset comm->uniform flag
diff --git a/src/fix_balance.h b/src/fix_balance.h
index 964357a634..a319710ac6 100644
--- a/src/fix_balance.h
+++ b/src/fix_balance.h
@@ -45,6 +45,7 @@ class FixBalance : public Fix {
   std::string bstr;
   int wtflag;               // 1 for weighted balancing
   int sortflag;             // 1 for sorting comm messages
+  int reportonly;           // 1 if skipping rebalancing and only computing imbalance
 
   double imbnow;            // current imbalance factor
   double imbprev;           // imbalance factor before last rebalancing
diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp
index 02aaae5940..bb27faeaa8 100644
--- a/src/fix_deform.cpp
+++ b/src/fix_deform.cpp
@@ -34,171 +34,204 @@
 
 #include <cmath>
 #include <cstring>
+#include <unordered_map>
+#include <unordered_set>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-enum{NONE=0,FINAL,DELTA,SCALE,VEL,ERATE,TRATE,VOLUME,WIGGLE,VARIABLE};
-enum{ONE_FROM_ONE,ONE_FROM_TWO,TWO_FROM_ONE};
-
 /* ---------------------------------------------------------------------- */
 
 FixDeform::FixDeform(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg),
 irregular(nullptr), set(nullptr)
 {
-  if (narg < 4) error->all(FLERR,"Illegal fix deform command");
+  const std::string thiscmd = fmt::format("fix {}", style);
+  if (narg < 4) utils::missing_cmd_args(FLERR, thiscmd, error);
 
   no_change_box = 1;
   restart_global = 1;
   pre_exchange_migrate = 1;
 
-  nevery = utils::inumeric(FLERR,arg[3],false,lmp);
-  if (nevery <= 0) error->all(FLERR,"Illegal fix deform command");
+  nevery = utils::inumeric(FLERR, arg[3], false, lmp);
+  if (nevery <= 0) error->all(FLERR, "Fix {} Nevery must be > 0", style);
+
+  // arguments for child classes
+
+  std::unordered_set<std::string> child_parameters;
+  std::unordered_map<std::string, int> child_styles;
+  int nskip;
+  if (utils::strmatch(style, "^deform/pressure")) {
+    child_parameters.insert("box");
+    child_styles.insert({{"pressure", 4}, {"pressure/mean", 4}, {"volume", 2}});
+  }
 
   // set defaults
 
   set = new Set[6];
-  memset(set,0,6*sizeof(Set));
+  memset(set, 0, 6 * sizeof(Set));
 
-  // parse arguments
+  // parse all parameter/style arguments for this parent and also child classes
+  // for child classes, simply store them in leftover_iarg and skip over them
 
   triclinic = domain->triclinic;
 
   int index;
   int iarg = 4;
+
   while (iarg < narg) {
-    if (strcmp(arg[iarg],"x") == 0 ||
-        strcmp(arg[iarg],"y") == 0 ||
-        strcmp(arg[iarg],"z") == 0) {
+    if ((strcmp(arg[iarg], "x") == 0)
+        || (strcmp(arg[iarg], "y") == 0)
+        || (strcmp(arg[iarg], "z") == 0)) {
 
-      if (strcmp(arg[iarg],"x") == 0) index = 0;
-      else if (strcmp(arg[iarg],"y") == 0) index = 1;
-      else if (strcmp(arg[iarg],"z") == 0) index = 2;
+      if (strcmp(arg[iarg], "x") == 0) index = 0;
+      else if (strcmp(arg[iarg], "y") == 0) index = 1;
+      else if (strcmp(arg[iarg], "z") == 0) index = 2;
 
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command");
-      if (strcmp(arg[iarg+1],"final") == 0) {
-        if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command");
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, thiscmd, error);
+      if (strcmp(arg[iarg + 1], "final") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, thiscmd + " final", error);
         set[index].style = FINAL;
-        set[index].flo = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-        set[index].fhi = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+        set[index].flo = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        set[index].fhi = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
         iarg += 4;
-      } else if (strcmp(arg[iarg+1],"delta") == 0) {
-        if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "delta") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, thiscmd + " delta", error);
         set[index].style = DELTA;
-        set[index].dlo = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-        set[index].dhi = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+        set[index].dlo = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        set[index].dhi = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
         iarg += 4;
-      } else if (strcmp(arg[iarg+1],"scale") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "scale") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " scale", error);
         set[index].style = SCALE;
-        set[index].scale = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].scale = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"vel") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "vel") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " vel", error);
         set[index].style = VEL;
-        set[index].vel = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].vel = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"erate") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "erate") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " erate", error);
         set[index].style = ERATE;
-        set[index].rate = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].rate = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"trate") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "trate") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " trate", error);
         set[index].style = TRATE;
-        set[index].rate = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].rate = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"volume") == 0) {
+      } else if (strcmp(arg[iarg + 1], "volume") == 0) {
         set[index].style = VOLUME;
         iarg += 2;
-      } else if (strcmp(arg[iarg+1],"wiggle") == 0) {
-        if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "wiggle") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, thiscmd + " wiggle", error);
         set[index].style = WIGGLE;
-        set[index].amplitude = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-        set[index].tperiod = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+        set[index].amplitude = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        set[index].tperiod = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
         if (set[index].tperiod <= 0.0)
-          error->all(FLERR,"Illegal fix deform command");
+          error->all(FLERR, "Illegal fix {} wiggle period, must be positive", style);
         iarg += 4;
-      } else if (strcmp(arg[iarg+1],"variable") == 0) {
-        if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "variable") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, thiscmd + " variable", error);
         set[index].style = VARIABLE;
-        if (strstr(arg[iarg+2],"v_") != arg[iarg+2])
-          error->all(FLERR,"Illegal fix deform command");
-        if (strstr(arg[iarg+3],"v_") != arg[iarg+3])
-          error->all(FLERR,"Illegal fix deform command");
+        if (strstr(arg[iarg + 2], "v_") != arg[iarg + 2])
+          error->all(FLERR, "Illegal fix {} variable name {}", style, arg[iarg + 2]);
+        if (strstr(arg[iarg + 3], "v_") != arg[iarg + 3])
+          error->all(FLERR, "Illegal fix {} variable name {}", style, arg[iarg + 3]);
         delete[] set[index].hstr;
         delete[] set[index].hratestr;
-        set[index].hstr = utils::strdup(&arg[iarg+2][2]);
-        set[index].hratestr = utils::strdup(&arg[iarg+3][2]);
+        set[index].hstr = utils::strdup(&arg[iarg + 2][2]);
+        set[index].hratestr = utils::strdup(&arg[iarg + 3][2]);
         iarg += 4;
-      } else error->all(FLERR,"Illegal fix deform command");
+      } else if (child_styles.find(arg[iarg + 1]) != child_styles.end()) {
+        nskip = child_styles[arg[iarg + 1]];
+        if (iarg + nskip > narg)
+          utils::missing_cmd_args(FLERR, fmt::format("fix {} {}", style, arg[iarg + 1]), error);
+        for (int i = 0; i < nskip; i++) leftover_iarg.push_back(iarg + i);
+        iarg += nskip;
+      } else error->all(FLERR, "Illegal fix {} command argument: {}", style, arg[iarg + 1]);
 
-    } else if (strcmp(arg[iarg],"xy") == 0 ||
-               strcmp(arg[iarg],"xz") == 0 ||
-               strcmp(arg[iarg],"yz") == 0) {
+    } else if ((strcmp(arg[iarg], "xy") == 0)
+               || (strcmp(arg[iarg], "xz") == 0)
+               || (strcmp(arg[iarg], "yz") == 0)) {
 
-      if (triclinic == 0)
-        error->all(FLERR,"Fix deform tilt factors require triclinic box");
-      if (strcmp(arg[iarg],"xy") == 0) index = 5;
-      else if (strcmp(arg[iarg],"xz") == 0) index = 4;
-      else if (strcmp(arg[iarg],"yz") == 0) index = 3;
+      if (triclinic == 0) error->all(FLERR,"Fix {} tilt factors require triclinic box", style);
+      if (strcmp(arg[iarg], "xy") == 0) index = 5;
+      else if (strcmp(arg[iarg], "xz") == 0) index = 4;
+      else if (strcmp(arg[iarg], "yz") == 0) index = 3;
 
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command");
-      if (strcmp(arg[iarg+1],"final") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, thiscmd, error);
+      if (strcmp(arg[iarg + 1], "final") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " final", error);
         set[index].style = FINAL;
-        set[index].ftilt = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].ftilt = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"delta") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "delta") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " delta", error);
         set[index].style = DELTA;
-        set[index].dtilt = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].dtilt = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"vel") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "vel") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " vel", error);
         set[index].style = VEL;
-        set[index].vel = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].vel = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"erate") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "erate") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " erate", error);
         set[index].style = ERATE;
-        set[index].rate = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].rate = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"trate") == 0) {
-        if (iarg+3 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "trate") == 0) {
+        if (iarg + 3 > narg) utils::missing_cmd_args(FLERR, thiscmd + " trate", error);
         set[index].style = TRATE;
-        set[index].rate = utils::numeric(FLERR,arg[iarg+2],false,lmp);
+        set[index].rate = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
         iarg += 3;
-      } else if (strcmp(arg[iarg+1],"wiggle") == 0) {
-        if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "wiggle") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, thiscmd + " wiggle", error);
         set[index].style = WIGGLE;
-        set[index].amplitude = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-        set[index].tperiod = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+        set[index].amplitude = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        set[index].tperiod = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
         if (set[index].tperiod <= 0.0)
-          error->all(FLERR,"Illegal fix deform command");
+          error->all(FLERR, "Illegal fix {} wiggle period, must be positive", style);
         iarg += 4;
-      } else if (strcmp(arg[iarg+1],"variable") == 0) {
-        if (iarg+4 > narg) error->all(FLERR,"Illegal fix deform command");
+      } else if (strcmp(arg[iarg + 1], "variable") == 0) {
+        if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, thiscmd + " variable", error);
         set[index].style = VARIABLE;
-        if (strstr(arg[iarg+2],"v_") != arg[iarg+2])
-          error->all(FLERR,"Illegal fix deform command");
-        if (strstr(arg[iarg+3],"v_") != arg[iarg+3])
-          error->all(FLERR,"Illegal fix deform command");
+        if (strstr(arg[iarg + 2], "v_") != arg[iarg + 2])
+          error->all(FLERR, "Illegal fix {} variable name {}", style, arg[iarg + 2]);
+        if (strstr(arg[iarg + 3], "v_") != arg[iarg + 3])
+          error->all(FLERR, "Illegal fix {} variable name {}", style, arg[iarg + 3]);
         delete[] set[index].hstr;
         delete[] set[index].hratestr;
-        set[index].hstr = utils::strdup(&arg[iarg+2][2]);
-        set[index].hratestr = utils::strdup(&arg[iarg+3][2]);
+        set[index].hstr = utils::strdup(&arg[iarg + 2][2]);
+        set[index].hratestr = utils::strdup(&arg[iarg + 3][2]);
         iarg += 4;
-      } else error->all(FLERR,"Illegal fix deform command");
-
+      } else if (child_styles.find(arg[iarg + 1]) != child_styles.end()) {
+        nskip = child_styles[arg[iarg + 1]];
+        if (iarg + nskip > narg)
+         utils::missing_cmd_args(FLERR, fmt::format("fix {} {}", style, arg[iarg + 1]), error);
+        for (int i = 0; i < nskip; i++) leftover_iarg.push_back(iarg + i);
+        iarg += nskip;
+      } else error->all(FLERR, "Illegal fix {} command argument: {}", style, arg[iarg + 1]);
+    } else if (child_parameters.find(arg[iarg]) != child_parameters.end()) {
+      if (child_styles.find(arg[iarg + 1]) != child_styles.end()) {
+        nskip = child_styles[arg[iarg + 1]];
+        if (iarg + nskip > narg)
+         utils::missing_cmd_args(FLERR, fmt::format("fix {} {}", style, arg[iarg + 1]), error);
+        for (int i = 0; i < nskip; i++) leftover_iarg.push_back(iarg + i);
+        iarg += nskip;
+      } else error->all(FLERR, "Illegal fix {} command argument: {}", style, arg[iarg + 1]);
     } else break;
   }
 
   // read options from end of input line
+
+  iarg_options_start = iarg;
+  options(narg - iarg, &arg[iarg]);
+
   // no x remap effectively moves atoms within box, so set restart_pbc
 
-  options(narg-iarg,&arg[iarg]);
   if (remapflag != Domain::X_REMAP) restart_pbc = 1;
 
   // setup dimflags used by other classes to check for volume-change conflicts
@@ -217,28 +250,19 @@ irregular(nullptr), set(nullptr)
   // no tensile deformation on shrink-wrapped dims
   // b/c shrink wrap will change box-length
 
-  if (set[0].style &&
-      (domain->boundary[0][0] >= 2 || domain->boundary[0][1] >= 2))
-      error->all(FLERR,"Cannot use fix deform on a shrink-wrapped boundary");
-  if (set[1].style &&
-      (domain->boundary[1][0] >= 2 || domain->boundary[1][1] >= 2))
-      error->all(FLERR,"Cannot use fix deform on a shrink-wrapped boundary");
-  if (set[2].style &&
-      (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2))
-      error->all(FLERR,"Cannot use fix deform on a shrink-wrapped boundary");
+  for (int i = 0; i < 3; i++)
+    if (set[i].style && (domain->boundary[i][0] >= 2 || domain->boundary[i][1] >= 2))
+      error->all(FLERR, "Cannot use fix {} on a shrink-wrapped boundary", style);
 
   // no tilt deformation on shrink-wrapped 2nd dim
   // b/c shrink wrap will change tilt factor in domain::reset_box()
 
-  if (set[3].style &&
-      (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2))
-    error->all(FLERR,"Cannot use fix deform tilt on a shrink-wrapped 2nd dim");
-  if (set[4].style &&
-      (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2))
-    error->all(FLERR,"Cannot use fix deform tilt on a shrink-wrapped 2nd dim");
-  if (set[5].style &&
-      (domain->boundary[1][0] >= 2 || domain->boundary[1][1] >= 2))
-    error->all(FLERR,"Cannot use fix deform tilt on a shrink-wrapped 2nd dim");
+  if (set[3].style && (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2))
+    error->all(FLERR, "Cannot use fix {} tilt on a shrink-wrapped 2nd dim", style);
+  if (set[4].style && (domain->boundary[2][0] >= 2 || domain->boundary[2][1] >= 2))
+    error->all(FLERR, "Cannot use fix {} tilt on a shrink-wrapped 2nd dim", style);
+  if (set[5].style && (domain->boundary[1][0] >= 2 || domain->boundary[1][1] >= 2))
+    error->all(FLERR, "Cannot use fix {} tilt on a shrink-wrapped 2nd dim", style);
 
   // apply scaling to FINAL,DELTA,VEL,WIGGLE since they have dist/vel units
 
@@ -247,7 +271,7 @@ irregular(nullptr), set(nullptr)
     if (set[i].style == FINAL || set[i].style == DELTA ||
         set[i].style == VEL || set[i].style == WIGGLE) flag = 1;
 
-  double xscale,yscale,zscale;
+  double xscale, yscale, zscale;
   if (flag && scaleflag) {
     xscale = domain->lattice->xlattice;
     yscale = domain->lattice->ylattice;
@@ -284,40 +308,40 @@ irregular(nullptr), set(nullptr)
 
   // for VOLUME, setup links to other dims
   // fixed, dynamic1, dynamic2
+  // only check for parent, otherwise child will check
 
-  for (int i = 0; i < 3; i++) {
-    if (set[i].style != VOLUME) continue;
-    int other1 = (i+1) % 3;
-    int other2 = (i+2) % 3;
+  if (strcmp(style, "deform") == 0) {
+    for (int i = 0; i < 3; i++) {
+      if (set[i].style != VOLUME) continue;
+      int other1 = (i + 1) % 3;
+      int other2 = (i + 2) % 3;
 
-    if (set[other1].style == NONE) {
-      if (set[other2].style == NONE || set[other2].style == VOLUME)
-        error->all(FLERR,"Fix deform volume setting is invalid");
-      set[i].substyle = ONE_FROM_ONE;
-      set[i].fixed = other1;
-      set[i].dynamic1 = other2;
-    } else if (set[other2].style == NONE) {
+      // Cannot use VOLUME option without at least one deformed dimension
       if (set[other1].style == NONE || set[other1].style == VOLUME)
-        error->all(FLERR,"Fix deform volume setting is invalid");
-      set[i].substyle = ONE_FROM_ONE;
-      set[i].fixed = other2;
-      set[i].dynamic1 = other1;
-    } else if (set[other1].style == VOLUME) {
-      if (set[other2].style == NONE || set[other2].style == VOLUME)
-        error->all(FLERR,"Fix deform volume setting is invalid");
-      set[i].substyle = TWO_FROM_ONE;
-      set[i].fixed = other1;
-      set[i].dynamic1 = other2;
-    } else if (set[other2].style == VOLUME) {
-      if (set[other1].style == NONE || set[other1].style == VOLUME)
-        error->all(FLERR,"Fix deform volume setting is invalid");
-      set[i].substyle = TWO_FROM_ONE;
-      set[i].fixed = other2;
-      set[i].dynamic1 = other1;
-    } else {
-      set[i].substyle = ONE_FROM_TWO;
-      set[i].dynamic1 = other1;
-      set[i].dynamic2 = other2;
+        if (set[other2].style == NONE || set[other2].style == VOLUME)
+          error->all(FLERR, "Fix {} volume setting is invalid", style);
+
+      if (set[other1].style == NONE) {
+        set[i].substyle = ONE_FROM_ONE;
+        set[i].fixed = other1;
+        set[i].dynamic1 = other2;
+      } else if (set[other2].style == NONE) {
+        set[i].substyle = ONE_FROM_ONE;
+        set[i].fixed = other2;
+        set[i].dynamic1 = other1;
+      } else if (set[other1].style == VOLUME) {
+        set[i].substyle = TWO_FROM_ONE;
+        set[i].fixed = other1;
+        set[i].dynamic1 = other2;
+      } else if (set[other2].style == VOLUME) {
+        set[i].substyle = TWO_FROM_ONE;
+        set[i].fixed = other2;
+        set[i].dynamic1 = other1;
+      } else {
+        set[i].substyle = ONE_FROM_TWO;
+        set[i].dynamic1 = other1;
+        set[i].dynamic2 = other2;
+      }
     }
   }
 
@@ -348,8 +372,6 @@ irregular(nullptr), set(nullptr)
 
   if (force_reneighbor) irregular = new Irregular(lmp);
   else irregular = nullptr;
-
-  TWOPI = 2.0*MY_PI;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -394,7 +416,7 @@ void FixDeform::init()
   // domain, fix nvt/sllod, compute temp/deform only work on single h_rate
 
   if (modify->get_fix_by_style("deform").size() > 1)
-    error->all(FLERR,"More than one fix deform");
+    error->all(FLERR, "More than one fix deform");
 
   // Kspace setting
 
@@ -411,14 +433,14 @@ void FixDeform::init()
     if (set[i].style != VARIABLE) continue;
     set[i].hvar = input->variable->find(set[i].hstr);
     if (set[i].hvar < 0)
-      error->all(FLERR,"Variable name for fix deform does not exist");
+      error->all(FLERR, "Variable name {} for fix {} does not exist", set[i].hstr, style);
     if (!input->variable->equalstyle(set[i].hvar))
-      error->all(FLERR,"Variable for fix deform is invalid style");
+      error->all(FLERR, "Variable {} for fix {} is invalid style", set[i].hstr, style);
     set[i].hratevar = input->variable->find(set[i].hratestr);
     if (set[i].hratevar < 0)
-      error->all(FLERR,"Variable name for fix deform does not exist");
+      error->all(FLERR, "Variable name {} for fix {} does not exist", set[i].hratestr, style);
     if (!input->variable->equalstyle(set[i].hratevar))
-      error->all(FLERR,"Variable for fix deform is invalid style");
+      error->all(FLERR, "Variable {} for fix {} is invalid style", set[i].hratestr, style);
   }
 
   // set start/stop values for box size and shape
@@ -445,30 +467,26 @@ void FixDeform::init()
       set[i].lo_stop = set[i].lo_start + set[i].dlo;
       set[i].hi_stop = set[i].hi_start + set[i].dhi;
     } else if (set[i].style == SCALE) {
-      set[i].lo_stop = 0.5*(set[i].lo_start+set[i].hi_start) -
-        0.5*set[i].scale*(set[i].hi_start-set[i].lo_start);
-      set[i].hi_stop = 0.5*(set[i].lo_start+set[i].hi_start) +
-        0.5*set[i].scale*(set[i].hi_start-set[i].lo_start);
+      double shift = 0.5 * set[i].scale * (set[i].hi_start - set[i].lo_start);
+      set[i].lo_stop = 0.5 * (set[i].lo_start + set[i].hi_start) - shift;
+      set[i].hi_stop = 0.5 * (set[i].lo_start + set[i].hi_start) + shift;
     } else if (set[i].style == VEL) {
-      set[i].lo_stop = set[i].lo_start - 0.5*delt*set[i].vel;
-      set[i].hi_stop = set[i].hi_start + 0.5*delt*set[i].vel;
+      set[i].lo_stop = set[i].lo_start - 0.5 * delt * set[i].vel;
+      set[i].hi_stop = set[i].hi_start + 0.5 * delt * set[i].vel;
     } else if (set[i].style == ERATE) {
-      set[i].lo_stop = set[i].lo_start -
-        0.5*delt*set[i].rate * (set[i].hi_start-set[i].lo_start);
-      set[i].hi_stop = set[i].hi_start +
-        0.5*delt*set[i].rate * (set[i].hi_start-set[i].lo_start);
+      double shift = 0.5 * delt * set[i].rate * (set[i].hi_start - set[i].lo_start);
+      set[i].lo_stop = set[i].lo_start - shift;
+      set[i].hi_stop = set[i].hi_start + shift;
       if (set[i].hi_stop <= set[i].lo_stop)
-        error->all(FLERR,"Final box dimension due to fix deform is < 0.0");
+        error->all(FLERR, "Final box dimension due to fix {} is < 0.0", style);
     } else if (set[i].style == TRATE) {
-      set[i].lo_stop = 0.5*(set[i].lo_start+set[i].hi_start) -
-        0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt));
-      set[i].hi_stop = 0.5*(set[i].lo_start+set[i].hi_start) +
-        0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt));
+      double shift = 0.5 * ((set[i].hi_start - set[i].lo_start) * exp(set[i].rate * delt));
+      set[i].lo_stop = 0.5 * (set[i].lo_start + set[i].hi_start) - shift;
+      set[i].hi_stop = 0.5 * (set[i].lo_start + set[i].hi_start) + shift;
     } else if (set[i].style == WIGGLE) {
-      set[i].lo_stop = set[i].lo_start -
-        0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
-      set[i].hi_stop = set[i].hi_start +
-        0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
+      double shift = 0.5 * set[i].amplitude * sin(MY_2PI * delt / set[i].tperiod);
+      set[i].lo_stop = set[i].lo_start - shift;
+      set[i].hi_stop = set[i].hi_start + shift;
     }
   }
 
@@ -484,50 +502,46 @@ void FixDeform::init()
     } else if (set[i].style == DELTA) {
       set[i].tilt_stop = set[i].tilt_start + set[i].dtilt;
     } else if (set[i].style == VEL) {
-      set[i].tilt_stop = set[i].tilt_start + delt*set[i].vel;
+      set[i].tilt_stop = set[i].tilt_start + delt * set[i].vel;
     } else if (set[i].style == ERATE) {
       if (i == 3) set[i].tilt_stop = set[i].tilt_start +
-                    delt*set[i].rate * (set[2].hi_start-set[2].lo_start);
+                    delt * set[i].rate * (set[2].hi_start - set[2].lo_start);
       if (i == 4) set[i].tilt_stop = set[i].tilt_start +
-                    delt*set[i].rate * (set[2].hi_start-set[2].lo_start);
+                    delt * set[i].rate * (set[2].hi_start - set[2].lo_start);
       if (i == 5) set[i].tilt_stop = set[i].tilt_start +
-                    delt*set[i].rate * (set[1].hi_start-set[1].lo_start);
+                    delt * set[i].rate * (set[1].hi_start - set[1].lo_start);
     } else if (set[i].style == TRATE) {
-      set[i].tilt_stop = set[i].tilt_start * exp(set[i].rate*delt);
+      set[i].tilt_stop = set[i].tilt_start * exp(set[i].rate * delt);
     } else if (set[i].style == WIGGLE) {
-      set[i].tilt_stop = set[i].tilt_start +
-        set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
+      double shift = set[i].amplitude * sin(MY_2PI * delt / set[i].tperiod);
+      set[i].tilt_stop = set[i].tilt_start + shift;
 
       // compute min/max for WIGGLE = extrema tilt factor will ever reach
 
       if (set[i].amplitude >= 0.0) {
-        if (delt < 0.25*set[i].tperiod) {
+        if (delt < 0.25 * set[i].tperiod) {
           set[i].tilt_min = set[i].tilt_start;
-          set[i].tilt_max = set[i].tilt_start +
-            set[i].amplitude*sin(TWOPI*delt/set[i].tperiod);
-        } else if (delt < 0.5*set[i].tperiod) {
+          set[i].tilt_max = set[i].tilt_start + shift;
+        } else if (delt < 0.5 * set[i].tperiod) {
           set[i].tilt_min = set[i].tilt_start;
           set[i].tilt_max = set[i].tilt_start + set[i].amplitude;
-        } else if (delt < 0.75*set[i].tperiod) {
-          set[i].tilt_min = set[i].tilt_start -
-            set[i].amplitude*sin(TWOPI*delt/set[i].tperiod);
+        } else if (delt < 0.75 * set[i].tperiod) {
+          set[i].tilt_min = set[i].tilt_start - shift;
           set[i].tilt_max = set[i].tilt_start + set[i].amplitude;
         } else {
           set[i].tilt_min = set[i].tilt_start - set[i].amplitude;
           set[i].tilt_max = set[i].tilt_start + set[i].amplitude;
         }
       } else {
-        if (delt < 0.25*set[i].tperiod) {
-          set[i].tilt_min = set[i].tilt_start -
-            set[i].amplitude*sin(TWOPI*delt/set[i].tperiod);
+        if (delt < 0.25 * set[i].tperiod) {
+          set[i].tilt_min = set[i].tilt_start - shift;
           set[i].tilt_max = set[i].tilt_start;
-        } else if (delt < 0.5*set[i].tperiod) {
+        } else if (delt < 0.5 * set[i].tperiod) {
           set[i].tilt_min = set[i].tilt_start - set[i].amplitude;
           set[i].tilt_max = set[i].tilt_start;
-        } else if (delt < 0.75*set[i].tperiod) {
+        } else if (delt < 0.75 * set[i].tperiod) {
           set[i].tilt_min = set[i].tilt_start - set[i].amplitude;
-          set[i].tilt_max = set[i].tilt_start +
-            set[i].amplitude*sin(TWOPI*delt/set[i].tperiod);
+          set[i].tilt_max = set[i].tilt_start + shift;
         } else {
           set[i].tilt_min = set[i].tilt_start - set[i].amplitude;
           set[i].tilt_max = set[i].tilt_start + set[i].amplitude;
@@ -540,7 +554,7 @@ void FixDeform::init()
 
   for (int i = 3; i < 6; i++)
     if (set[i].style == TRATE && set[i].tilt_start == 0.0)
-      error->all(FLERR,"Cannot use fix deform trate on a box with zero tilt");
+      error->all(FLERR, "Cannot use fix {} trate on a box with zero tilt", style);
 
   // if yz changes and will cause box flip, then xy cannot be changing
   // yz = [3], xy = [5]
@@ -555,20 +569,20 @@ void FixDeform::init()
     int flag = 0;
     double lo,hi;
     if (flipflag && set[3].style == VARIABLE)
-      error->all(FLERR,"Fix deform cannot use yz variable with xy");
+      error->all(FLERR, "Fix {} cannot use yz variable with xy", style);
     if (set[3].style == WIGGLE) {
       lo = set[3].tilt_min;
       hi = set[3].tilt_max;
     } else lo = hi = set[3].tilt_stop;
     if (flipflag) {
-      if (lo/(set[1].hi_start-set[1].lo_start) < -0.5 ||
-          hi/(set[1].hi_start-set[1].lo_start) > 0.5) flag = 1;
+      if (lo / (set[1].hi_start - set[1].lo_start) < -0.5 ||
+          hi / (set[1].hi_start - set[1].lo_start) > 0.5) flag = 1;
       if (set[1].style) {
-        if (lo/(set[1].hi_stop-set[1].lo_stop) < -0.5 ||
-            hi/(set[1].hi_stop-set[1].lo_stop) > 0.5) flag = 1;
+        if (lo / (set[1].hi_stop - set[1].lo_stop) < -0.5 ||
+            hi / (set[1].hi_stop - set[1].lo_stop) > 0.5) flag = 1;
       }
       if (flag)
-        error->all(FLERR,"Fix deform is changing yz too much with xy");
+        error->all(FLERR, "Fix {} is changing yz too much with xy", style);
     }
   }
 
@@ -584,7 +598,7 @@ void FixDeform::init()
     if (set[i].style == FINAL || set[i].style == DELTA ||
         set[i].style == SCALE || set[i].style == VEL ||
         set[i].style == ERATE) {
-      double dlo_dt,dhi_dt;
+      double dlo_dt, dhi_dt;
       if (delt != 0.0) {
         dlo_dt = (set[i].lo_stop - set[i].lo_start) / delt;
         dhi_dt = (set[i].hi_stop - set[i].hi_start) / delt;
@@ -633,7 +647,7 @@ void FixDeform::pre_exchange()
   domain->set_global_box();
   domain->set_local_box();
 
-  domain->image_flip(flipxy,flipxz,flipyz);
+  domain->image_flip(flipxy, flipxz, flipyz);
 
   double **x = atom->x;
   imageint *image = atom->image;
@@ -651,104 +665,72 @@ void FixDeform::pre_exchange()
 
 void FixDeform::end_of_step()
 {
-  int i;
-
-  double delta = update->ntimestep - update->beginstep;
-  if (delta != 0.0) delta /= update->endstep - update->beginstep;
-
   // wrap variable evaluations with clear/add
 
   if (varflag) modify->clearstep_compute();
 
-  // set new box size
+  // set new box size for strain-based dims
+
+  apply_strain();
+
+  // set new box size for VOLUME dims that are linked to other dims
+  // NOTE: still need to set h_rate for these dims
+
+  apply_volume();
+
+  if (varflag) modify->addstep_compute(update->ntimestep + nevery);
+
+  update_domain();
+
+  // redo KSpace coeffs since box has changed
+
+  if (kspace_flag) force->kspace->setup();
+}
+
+/* ----------------------------------------------------------------------
+   apply strain controls
+------------------------------------------------------------------------- */
+
+void FixDeform::apply_strain()
+{
   // for NONE, target is current box size
   // for TRATE, set target directly based on current time, also set h_rate
   // for WIGGLE, set target directly based on current time, also set h_rate
   // for VARIABLE, set target directly via variable eval, also set h_rate
   // for others except VOLUME, target is linear value between start and stop
 
-  for (i = 0; i < 3; i++) {
+  double delta = update->ntimestep - update->beginstep;
+  if (delta != 0.0) delta /= update->endstep - update->beginstep;
+
+  for (int i = 0; i < 3; i++) {
     if (set[i].style == NONE) {
       set[i].lo_target = domain->boxlo[i];
       set[i].hi_target = domain->boxhi[i];
     } else if (set[i].style == TRATE) {
       double delt = (update->ntimestep - update->beginstep) * update->dt;
-      set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) -
-        0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt));
-      set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) +
-        0.5*((set[i].hi_start-set[i].lo_start) * exp(set[i].rate*delt));
+      double shift = 0.5 * ((set[i].hi_start - set[i].lo_start) * exp(set[i].rate * delt));
+      set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift;
+      set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift;
       h_rate[i] = set[i].rate * domain->h[i];
-      h_ratelo[i] = -0.5*h_rate[i];
+      h_ratelo[i] = -0.5 * h_rate[i];
     } else if (set[i].style == WIGGLE) {
       double delt = (update->ntimestep - update->beginstep) * update->dt;
-      set[i].lo_target = set[i].lo_start -
-        0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
-      set[i].hi_target = set[i].hi_start +
-        0.5*set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
-      h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude *
-        cos(TWOPI*delt/set[i].tperiod);
-      h_ratelo[i] = -0.5*h_rate[i];
+      double shift = 0.5 * set[i].amplitude * sin(MY_2PI * delt / set[i].tperiod);
+      set[i].lo_target = set[i].lo_start - shift;
+      set[i].hi_target = set[i].hi_start + shift;
+      h_rate[i] = MY_2PI / set[i].tperiod * set[i].amplitude *
+        cos(MY_2PI * delt / set[i].tperiod);
+      h_ratelo[i] = -0.5 * h_rate[i];
     } else if (set[i].style == VARIABLE) {
       double del = input->variable->compute_equal(set[i].hvar);
-      set[i].lo_target = set[i].lo_start - 0.5*del;
-      set[i].hi_target = set[i].hi_start + 0.5*del;
+      set[i].lo_target = set[i].lo_start - 0.5 * del;
+      set[i].hi_target = set[i].hi_start + 0.5 * del;
       h_rate[i] = input->variable->compute_equal(set[i].hratevar);
-      h_ratelo[i] = -0.5*h_rate[i];
-    } else if (set[i].style != VOLUME) {
-      set[i].lo_target = set[i].lo_start +
-        delta*(set[i].lo_stop - set[i].lo_start);
-      set[i].hi_target = set[i].hi_start +
-        delta*(set[i].hi_stop - set[i].hi_start);
-    }
-  }
-
-  // set new box size for VOLUME dims that are linked to other dims
-  // NOTE: still need to set h_rate for these dims
-
-  for (i = 0; i < 3; i++) {
-    if (set[i].style != VOLUME) continue;
-
-    if (set[i].substyle == ONE_FROM_ONE) {
-      set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) -
-        0.5*(set[i].vol_start /
-             (set[set[i].dynamic1].hi_target -
-              set[set[i].dynamic1].lo_target) /
-             (set[set[i].fixed].hi_start-set[set[i].fixed].lo_start));
-      set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) +
-        0.5*(set[i].vol_start /
-             (set[set[i].dynamic1].hi_target -
-              set[set[i].dynamic1].lo_target) /
-             (set[set[i].fixed].hi_start-set[set[i].fixed].lo_start));
-
-    } else if (set[i].substyle == ONE_FROM_TWO) {
-      set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) -
-        0.5*(set[i].vol_start /
-             (set[set[i].dynamic1].hi_target -
-              set[set[i].dynamic1].lo_target) /
-             (set[set[i].dynamic2].hi_target -
-              set[set[i].dynamic2].lo_target));
-      set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) +
-        0.5*(set[i].vol_start /
-             (set[set[i].dynamic1].hi_target -
-              set[set[i].dynamic1].lo_target) /
-             (set[set[i].dynamic2].hi_target -
-              set[set[i].dynamic2].lo_target));
-
-    } else if (set[i].substyle == TWO_FROM_ONE) {
-      set[i].lo_target = 0.5*(set[i].lo_start+set[i].hi_start) -
-        0.5*sqrt(set[i].vol_start /
-                 (set[set[i].dynamic1].hi_target -
-                  set[set[i].dynamic1].lo_target) /
-                 (set[set[i].fixed].hi_start -
-                  set[set[i].fixed].lo_start) *
-                 (set[i].hi_start - set[i].lo_start));
-      set[i].hi_target = 0.5*(set[i].lo_start+set[i].hi_start) +
-        0.5*sqrt(set[i].vol_start /
-                 (set[set[i].dynamic1].hi_target -
-                  set[set[i].dynamic1].lo_target) /
-                 (set[set[i].fixed].hi_start -
-                  set[set[i].fixed].lo_start) *
-                 (set[i].hi_start - set[i].lo_start));
+      h_ratelo[i] = -0.5 * h_rate[i];
+    } else if (set[i].style == FINAL || set[i].style == DELTA || set[i].style == SCALE ||
+               set[i].style == VEL || set[i].style == ERATE) {
+      set[i].lo_target = set[i].lo_start + delta * (set[i].lo_stop - set[i].lo_start);
+      set[i].hi_target = set[i].hi_start + delta * (set[i].hi_stop - set[i].hi_start);
     }
   }
 
@@ -760,55 +742,97 @@ void FixDeform::end_of_step()
   // for other styles, target is linear value between start and stop values
 
   if (triclinic) {
-    double *h = domain->h;
-
-    for (i = 3; i < 6; i++) {
+    for (int i = 3; i < 6; i++) {
       if (set[i].style == NONE) {
         if (i == 5) set[i].tilt_target = domain->xy;
         else if (i == 4) set[i].tilt_target = domain->xz;
         else if (i == 3) set[i].tilt_target = domain->yz;
       } else if (set[i].style == TRATE) {
         double delt = (update->ntimestep - update->beginstep) * update->dt;
-        set[i].tilt_target = set[i].tilt_start * exp(set[i].rate*delt);
+        set[i].tilt_target = set[i].tilt_start * exp(set[i].rate * delt);
         h_rate[i] = set[i].rate * domain->h[i];
       } else if (set[i].style == WIGGLE) {
         double delt = (update->ntimestep - update->beginstep) * update->dt;
         set[i].tilt_target = set[i].tilt_start +
-          set[i].amplitude * sin(TWOPI*delt/set[i].tperiod);
-        h_rate[i] = TWOPI/set[i].tperiod * set[i].amplitude *
-          cos(TWOPI*delt/set[i].tperiod);
+          set[i].amplitude * sin(MY_2PI * delt / set[i].tperiod);
+        h_rate[i] = MY_2PI / set[i].tperiod * set[i].amplitude *
+          cos(MY_2PI * delt / set[i].tperiod);
       } else if (set[i].style == VARIABLE) {
         double delta_tilt = input->variable->compute_equal(set[i].hvar);
         set[i].tilt_target = set[i].tilt_start + delta_tilt;
         h_rate[i] = input->variable->compute_equal(set[i].hratevar);
       } else {
-        set[i].tilt_target = set[i].tilt_start +
-          delta*(set[i].tilt_stop - set[i].tilt_start);
+        set[i].tilt_target = set[i].tilt_start + delta * (set[i].tilt_stop - set[i].tilt_start);
       }
+    }
+  }
+}
 
-      // tilt_target can be large positive or large negative value
-      // add/subtract box lengths until tilt_target is closest to current value
+/* ----------------------------------------------------------------------
+   apply volume controls
+------------------------------------------------------------------------- */
 
+void FixDeform::apply_volume()
+{
+  for (int i = 0; i < 3; i++) {
+    if (set[i].style != VOLUME) continue;
+
+    int dynamic1 = set[i].dynamic1;
+    int dynamic2 = set[i].dynamic2;
+    int fixed = set[i].fixed;
+    double v0 = set[i].vol_start;
+    double shift = 0.0;
+
+    if (set[i].substyle == ONE_FROM_ONE) {
+      shift = 0.5 * (v0 / (set[dynamic1].hi_target - set[dynamic1].lo_target) /
+             (set[fixed].hi_start - set[fixed].lo_start));
+    } else if (set[i].substyle == ONE_FROM_TWO) {
+      shift = 0.5 * (v0 / (set[dynamic1].hi_target - set[dynamic1].lo_target) /
+             (set[dynamic2].hi_target - set[dynamic2].lo_target));
+    } else if (set[i].substyle == TWO_FROM_ONE) {
+      shift = 0.5 * sqrt(v0 * (set[i].hi_start - set[i].lo_start) /
+                 (set[dynamic1].hi_target - set[dynamic1].lo_target) /
+                 (set[fixed].hi_start - set[fixed].lo_start));
+    }
+
+    h_rate[i] = (2.0 * shift / (domain->boxhi[i] - domain->boxlo[i]) - 1.0) / update->dt;
+    h_ratelo[i] = -0.5 * h_rate[i];
+
+    set[i].lo_target = 0.5 * (set[i].lo_start + set[i].hi_start) - shift;
+    set[i].hi_target = 0.5 * (set[i].lo_start + set[i].hi_start) + shift;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Update box domain
+------------------------------------------------------------------------- */
+
+void FixDeform::update_domain()
+{
+  // tilt_target can be large positive or large negative value
+  // add/subtract box lengths until tilt_target is closest to current value
+
+  if (triclinic) {
+    double *h = domain->h;
+    for (int i = 3; i < 6; i++) {
       int idenom = 0;
       if (i == 5) idenom = 0;
       else if (i == 4) idenom = 0;
       else if (i == 3) idenom = 1;
       double denom = set[idenom].hi_target - set[idenom].lo_target;
 
-      double current = h[i]/h[idenom];
+      double current = h[i] / h[idenom];
 
-      while (set[i].tilt_target/denom - current > 0.0)
+      while (set[i].tilt_target / denom - current > 0.0)
         set[i].tilt_target -= denom;
-      while (set[i].tilt_target/denom - current < 0.0)
+      while (set[i].tilt_target / denom - current < 0.0)
         set[i].tilt_target += denom;
-      if (fabs(set[i].tilt_target/denom - 1.0 - current) <
-          fabs(set[i].tilt_target/denom - current))
+      if (fabs(set[i].tilt_target / denom - 1.0 - current) <
+          fabs(set[i].tilt_target / denom - current))
         set[i].tilt_target -= denom;
     }
   }
 
-  if (varflag) modify->addstep_compute(update->ntimestep + nevery);
-
   // if any tilt ratios exceed 0.5, set flip = 1 and compute new tilt values
   // do not flip in x or y if non-periodic (can tilt but not flip)
   //   this is b/c the box length would be changed (dramatically) by flip
@@ -823,12 +847,12 @@ void FixDeform::end_of_step()
     double yprd = set[1].hi_target - set[1].lo_target;
     double xprdinv = 1.0 / xprd;
     double yprdinv = 1.0 / yprd;
-    if (set[3].tilt_target*yprdinv < -0.5 ||
-                                     set[3].tilt_target*yprdinv > 0.5 ||
-        set[4].tilt_target*xprdinv < -0.5 ||
-                                     set[4].tilt_target*xprdinv > 0.5 ||
-        set[5].tilt_target*xprdinv < -0.5 ||
-                                     set[5].tilt_target*xprdinv > 0.5) {
+    if (set[3].tilt_target * yprdinv < -0.5 ||
+        set[3].tilt_target * yprdinv > 0.5 ||
+        set[4].tilt_target * xprdinv < -0.5 ||
+        set[4].tilt_target * xprdinv > 0.5 ||
+        set[5].tilt_target * xprdinv < -0.5 ||
+        set[5].tilt_target * xprdinv > 0.5) {
       set[3].tilt_flip = set[3].tilt_target;
       set[4].tilt_flip = set[4].tilt_target;
       set[5].tilt_flip = set[5].tilt_target;
@@ -836,30 +860,30 @@ void FixDeform::end_of_step()
       flipxy = flipxz = flipyz = 0;
 
       if (domain->yperiodic) {
-        if (set[3].tilt_flip*yprdinv < -0.5) {
+        if (set[3].tilt_flip * yprdinv < -0.5) {
           set[3].tilt_flip += yprd;
           set[4].tilt_flip += set[5].tilt_flip;
           flipyz = 1;
-        } else if (set[3].tilt_flip*yprdinv > 0.5) {
+        } else if (set[3].tilt_flip * yprdinv > 0.5) {
           set[3].tilt_flip -= yprd;
           set[4].tilt_flip -= set[5].tilt_flip;
           flipyz = -1;
         }
       }
       if (domain->xperiodic) {
-        if (set[4].tilt_flip*xprdinv < -0.5) {
+        if (set[4].tilt_flip * xprdinv < -0.5) {
           set[4].tilt_flip += xprd;
           flipxz = 1;
         }
-        if (set[4].tilt_flip*xprdinv > 0.5) {
+        if (set[4].tilt_flip * xprdinv > 0.5) {
           set[4].tilt_flip -= xprd;
           flipxz = -1;
         }
-        if (set[5].tilt_flip*xprdinv < -0.5) {
+        if (set[5].tilt_flip * xprdinv < -0.5) {
           set[5].tilt_flip += xprd;
           flipxy = 1;
         }
-        if (set[5].tilt_flip*xprdinv > 0.5) {
+        if (set[5].tilt_flip * xprdinv > 0.5) {
           set[5].tilt_flip -= xprd;
           flipxy = -1;
         }
@@ -878,9 +902,9 @@ void FixDeform::end_of_step()
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
-    for (i = 0; i < nlocal; i++)
+    for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit)
-        domain->x2lamda(x[i],x[i]);
+        domain->x2lamda(x[i], x[i]);
 
     for (auto &ifix : rfix)
       ifix->deform(0);
@@ -889,22 +913,22 @@ void FixDeform::end_of_step()
   // reset global and local box to new size/shape
   // only if deform fix is controlling the dimension
 
-  if (set[0].style) {
+  if (dimflag[0]) {
     domain->boxlo[0] = set[0].lo_target;
     domain->boxhi[0] = set[0].hi_target;
   }
-  if (set[1].style) {
+  if (dimflag[1]) {
     domain->boxlo[1] = set[1].lo_target;
     domain->boxhi[1] = set[1].hi_target;
   }
-  if (set[2].style) {
+  if (dimflag[2]) {
     domain->boxlo[2] = set[2].lo_target;
     domain->boxhi[2] = set[2].hi_target;
   }
   if (triclinic) {
-    if (set[3].style) domain->yz = set[3].tilt_target;
-    if (set[4].style) domain->xz = set[4].tilt_target;
-    if (set[5].style) domain->xy = set[5].tilt_target;
+    if (dimflag[3]) domain->yz = set[3].tilt_target;
+    if (dimflag[4]) domain->xz = set[4].tilt_target;
+    if (dimflag[5]) domain->xy = set[5].tilt_target;
   }
 
   domain->set_global_box();
@@ -917,17 +941,13 @@ void FixDeform::end_of_step()
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
-    for (i = 0; i < nlocal; i++)
+    for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit)
-        domain->lamda2x(x[i],x[i]);
+        domain->lamda2x(x[i], x[i]);
 
     for (auto &ifix : rfix)
       ifix->deform(1);
   }
-
-  // redo KSpace coeffs since box has changed
-
-  if (kspace_flag) force->kspace->setup();
 }
 
 /* ----------------------------------------------------------------------
@@ -937,9 +957,9 @@ void FixDeform::end_of_step()
 void FixDeform::write_restart(FILE *fp)
 {
   if (comm->me == 0) {
-    int size = 6*sizeof(Set);
-    fwrite(&size,sizeof(int),1,fp);
-    fwrite(set,sizeof(Set),6,fp);
+    int size = 6 * sizeof(Set);
+    fwrite(&size, sizeof(int), 1, fp);
+    fwrite(set, sizeof(Set), 6, fp);
   }
 }
 
@@ -951,7 +971,7 @@ void FixDeform::restart(char *buf)
 {
   int samestyle = 1;
   Set *set_restart = (Set *) buf;
-  for (int i=0; i<6; ++i) {
+  for (int i = 0; i < 6; ++i) {
     // restore data from initial state
     set[i].lo_initial = set_restart[i].lo_initial;
     set[i].hi_initial = set_restart[i].hi_initial;
@@ -964,39 +984,57 @@ void FixDeform::restart(char *buf)
       samestyle = 0;
   }
   if (!samestyle)
-    error->all(FLERR,"Fix deform settings not consistent with restart");
+    error->all(FLERR, "Fix {} settings not consistent with restart", style);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixDeform::options(int narg, char **arg)
 {
-  if (narg < 0) error->all(FLERR,"Illegal fix deform command");
+  const std::string thiscmd = fmt::format("fix {}", style);
+  if (narg < 0) utils::missing_cmd_args(FLERR, thiscmd, error);
 
   remapflag = Domain::X_REMAP;
   scaleflag = 1;
   flipflag = 1;
 
+  // arguments for child classes
+
+  std::unordered_map<std::string, int> child_options;
+  if (utils::strmatch(style, "^deform/pressure")) {
+    child_options.insert({{"couple", 2}, {"max/rate", 2}, {"normalize/pressure", 2},
+                          {"vol/balance/p", 2}});
+  }
+
+  // parse all optional arguments for this parent and also child classes
+  // for child classes, simply store them in leftover_iarg and skip over them
+
   int iarg = 0;
   while (iarg < narg) {
-    if (strcmp(arg[iarg],"remap") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command");
-      if (strcmp(arg[iarg+1],"x") == 0) remapflag = Domain::X_REMAP;
-      else if (strcmp(arg[iarg+1],"v") == 0) remapflag = Domain::V_REMAP;
-      else if (strcmp(arg[iarg+1],"none") == 0) remapflag = Domain::NO_REMAP;
-      else error->all(FLERR,"Illegal fix deform command");
+    if (strcmp(arg[iarg], "remap") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, thiscmd + " remap", error);
+      if (strcmp(arg[iarg + 1], "x") == 0) remapflag = Domain::X_REMAP;
+      else if (strcmp(arg[iarg + 1], "v") == 0) remapflag = Domain::V_REMAP;
+      else if (strcmp(arg[iarg + 1], "none") == 0) remapflag = Domain::NO_REMAP;
+      else error->all(FLERR, "Illegal fix {} remap command: {}", style, arg[iarg + 1]);
       iarg += 2;
-    } else if (strcmp(arg[iarg],"units") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command");
-      if (strcmp(arg[iarg+1],"box") == 0) scaleflag = 0;
-      else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1;
-      else error->all(FLERR,"Illegal fix deform command");
+    } else if (strcmp(arg[iarg], "units") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, thiscmd + " units", error);
+      if (strcmp(arg[iarg + 1], "box") == 0) scaleflag = 0;
+      else if (strcmp(arg[iarg + 1], "lattice") == 0) scaleflag = 1;
+      else error->all(FLERR, "Illegal fix {} units command: {}", style, arg[iarg + 1]);
       iarg += 2;
-    } else if (strcmp(arg[iarg],"flip") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix deform command");
-      flipflag = utils::logical(FLERR,arg[iarg+1],false,lmp);
+    } else if (strcmp(arg[iarg], "flip") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, thiscmd + " flip", error);
+      flipflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
       iarg += 2;
-    } else error->all(FLERR,"Illegal fix deform command");
+    } else if (child_options.find(arg[iarg]) != child_options.end()) {
+      auto nskip = child_options[arg[iarg]];
+      if (iarg + nskip > narg)
+        utils::missing_cmd_args(FLERR, fmt::format("fix {} {}", style, arg[iarg]), error);
+      for (int i = 0; i < nskip; i++) leftover_iarg.push_back(iarg + i);
+      iarg += nskip;
+    } else error->all(FLERR, "Unknown fix {} keyword: {}", style, arg[iarg]);
   }
 }
 
diff --git a/src/fix_deform.h b/src/fix_deform.h
index 20f6ac5901..b133729444 100644
--- a/src/fix_deform.h
+++ b/src/fix_deform.h
@@ -29,14 +29,17 @@ class FixDeform : public Fix {
   int remapflag;     // whether x,v are remapped across PBC
   int dimflag[6];    // which dims are deformed
 
+  enum { NONE, FINAL, DELTA, SCALE, VEL, ERATE, TRATE, VOLUME, WIGGLE, VARIABLE, PRESSURE, PMEAN };
+  enum { ONE_FROM_ONE, ONE_FROM_TWO, TWO_FROM_ONE };
+
   FixDeform(class LAMMPS *, int, char **);
   ~FixDeform() override;
   int setmask() override;
   void init() override;
   void pre_exchange() override;
   void end_of_step() override;
-  void write_restart(FILE *) override;
-  void restart(char *buf) override;
+  void virtual write_restart(FILE *) override;
+  void virtual restart(char *buf) override;
   double memory_usage() override;
 
  protected:
@@ -48,8 +51,6 @@ class FixDeform : public Fix {
   std::vector<Fix *> rfix;       // pointers to rigid fixes
   class Irregular *irregular;    // for migrating atoms after box flips
 
-  double TWOPI;
-
   struct Set {
     int style, substyle;
     double flo, fhi, ftilt;
@@ -67,7 +68,13 @@ class FixDeform : public Fix {
   };
   Set *set;
 
+  std::vector<int> leftover_iarg;
+  int iarg_options_start;
+
   void options(int, char **);
+  void virtual apply_volume();
+  void apply_strain();
+  void update_domain();
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/fix_enforce2d.cpp b/src/fix_enforce2d.cpp
index c13e2147a3..048f8de543 100644
--- a/src/fix_enforce2d.cpp
+++ b/src/fix_enforce2d.cpp
@@ -17,7 +17,6 @@
 #include "atom.h"
 #include "domain.h"
 #include "error.h"
-#include "modify.h"
 #include "respa.h"
 #include "update.h"
 
diff --git a/src/fix_pair.cpp b/src/fix_pair.cpp
index 66212684a8..da56b01f9e 100644
--- a/src/fix_pair.cpp
+++ b/src/fix_pair.cpp
@@ -21,7 +21,8 @@
 #include "memory.h"
 #include "pair.h"
 #include "update.h"
-#include "fmt/format.h"
+
+#include <cstring>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
diff --git a/src/fix_press_langevin.cpp b/src/fix_press_langevin.cpp
index d8d2a3b04a..8116d66c0a 100644
--- a/src/fix_press_langevin.cpp
+++ b/src/fix_press_langevin.cpp
@@ -24,7 +24,6 @@
 #include "error.h"
 #include "fix_deform.h"
 #include "force.h"
-#include "group.h"
 #include "irregular.h"
 #include "kspace.h"
 #include "modify.h"
diff --git a/src/fix_vector.cpp b/src/fix_vector.cpp
index e18b53f615..7c75f93a3a 100644
--- a/src/fix_vector.cpp
+++ b/src/fix_vector.cpp
@@ -22,6 +22,8 @@
 #include "update.h"
 #include "variable.h"
 
+#include <cstring>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
diff --git a/src/fix_wall_lj126.cpp b/src/fix_wall_lj126.cpp
index f0f7750edb..d526390153 100644
--- a/src/fix_wall_lj126.cpp
+++ b/src/fix_wall_lj126.cpp
@@ -17,8 +17,6 @@
 #include "error.h"
 #include "math_special.h"
 
-#include <cmath>
-
 using namespace LAMMPS_NS;
 using MathSpecial::powint;
 
diff --git a/src/fix_wall_lj93.cpp b/src/fix_wall_lj93.cpp
index c0c5e86ce3..dda0298be1 100644
--- a/src/fix_wall_lj93.cpp
+++ b/src/fix_wall_lj93.cpp
@@ -17,8 +17,6 @@
 #include "error.h"
 #include "math_special.h"
 
-#include <cmath>
-
 using namespace LAMMPS_NS;
 using MathSpecial::powint;
 
diff --git a/src/grid2d.cpp b/src/grid2d.cpp
index ea6e8e4123..e1265839f9 100644
--- a/src/grid2d.cpp
+++ b/src/grid2d.cpp
@@ -23,6 +23,8 @@
 #include "memory.h"
 #include "pair.h"
 
+#include <cstring>
+
 using namespace LAMMPS_NS;
 
 static constexpr int DELTA = 16;
diff --git a/src/grid3d.cpp b/src/grid3d.cpp
index 4ce1978660..f11e5d0513 100644
--- a/src/grid3d.cpp
+++ b/src/grid3d.cpp
@@ -24,6 +24,8 @@
 #include "math_extra.h"
 #include "memory.h"
 
+#include <cstring>
+
 using namespace LAMMPS_NS;
 
 static constexpr int DELTA = 16;
diff --git a/src/imbalance_neigh.cpp b/src/imbalance_neigh.cpp
index 8d4a17976b..f6b2be2cad 100644
--- a/src/imbalance_neigh.cpp
+++ b/src/imbalance_neigh.cpp
@@ -14,10 +14,8 @@
 #include "imbalance_neigh.h"
 
 #include "accelerator_kokkos.h"
-#include "atom.h"
 #include "comm.h"
 #include "error.h"
-#include "neighbor.h"
 
 using namespace LAMMPS_NS;
 
diff --git a/src/integrate.h b/src/integrate.h
index e622f6328d..d078f4a997 100644
--- a/src/integrate.h
+++ b/src/integrate.h
@@ -15,9 +15,9 @@
 #define LMP_INTEGRATE_H
 
 #include "pointers.h"
-#include "compute.h"
 
 namespace LAMMPS_NS {
+class Compute;
 
 class Integrate : protected Pointers {
  public:
diff --git a/src/lammps.cpp b/src/lammps.cpp
index 3329cb8d7b..b3659fdf50 100644
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@@ -66,7 +66,6 @@
 
 #include "lmpinstalledpkgs.h"
 #include "lmpgitversion.h"
-#include "lmpfftsettings.h"
 
 #if defined(LAMMPS_UPDATE)
 #define UPDATE_STRING " - " LAMMPS_UPDATE
diff --git a/src/min.h b/src/min.h
index b94d937fa5..a395a98bbc 100644
--- a/src/min.h
+++ b/src/min.h
@@ -15,9 +15,9 @@
 #define LMP_MIN_H
 
 #include "pointers.h"    // IWYU pragma: export
-#include "compute.h"
 
 namespace LAMMPS_NS {
+class Compute;
 
 class Min : protected Pointers {
  public:
@@ -116,7 +116,7 @@ class Min : protected Pointers {
   int narray;                         // # of arrays stored by fix_minimize
   class FixMinimize *fix_minimize;    // fix that stores auxiliary data
 
-  class Compute *pe_compute;    // compute for potential energy
+  Compute *pe_compute;    // compute for potential energy
   double ecurrent;              // current potential energy
 
   bigint ndoftotal;    // total dof for entire problem
diff --git a/src/npair_bin.cpp b/src/npair_bin.cpp
index d3d3415bc0..2c6fbbb49b 100644
--- a/src/npair_bin.cpp
+++ b/src/npair_bin.cpp
@@ -23,6 +23,8 @@
 #include "neigh_list.h"
 #include "neighbor.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace NeighConst;
 
diff --git a/src/npair_halffull.cpp b/src/npair_halffull.cpp
index b3d8d4f50e..aa560b5731 100644
--- a/src/npair_halffull.cpp
+++ b/src/npair_halffull.cpp
@@ -19,6 +19,8 @@
 #include "my_page.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/npair_multi.cpp b/src/npair_multi.cpp
index b5f813c0a8..44c68a9195 100644
--- a/src/npair_multi.cpp
+++ b/src/npair_multi.cpp
@@ -24,6 +24,8 @@
 #include "neighbor.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace NeighConst;
 
diff --git a/src/npair_multi_old.cpp b/src/npair_multi_old.cpp
index a4ca1e7361..3d983bd7fd 100644
--- a/src/npair_multi_old.cpp
+++ b/src/npair_multi_old.cpp
@@ -23,6 +23,8 @@
 #include "my_page.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/npair_nsq.cpp b/src/npair_nsq.cpp
index c2f43d9cb0..9f469de480 100644
--- a/src/npair_nsq.cpp
+++ b/src/npair_nsq.cpp
@@ -24,6 +24,8 @@
 #include "neigh_list.h"
 #include "neighbor.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace NeighConst;
 
diff --git a/src/npair_respa_bin.cpp b/src/npair_respa_bin.cpp
index f2fb9f7486..5347f4d3a0 100644
--- a/src/npair_respa_bin.cpp
+++ b/src/npair_respa_bin.cpp
@@ -23,6 +23,8 @@
 #include "my_page.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/npair_respa_nsq.cpp b/src/npair_respa_nsq.cpp
index 9ca166a491..065c9eb6d6 100644
--- a/src/npair_respa_nsq.cpp
+++ b/src/npair_respa_nsq.cpp
@@ -24,6 +24,8 @@
 #include "my_page.h"
 #include "neigh_list.h"
 
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/procmap.cpp b/src/procmap.cpp
index 71e1cf5a6b..b520354f53 100644
--- a/src/procmap.cpp
+++ b/src/procmap.cpp
@@ -150,13 +150,9 @@ void ProcMap::twolevel_grid(int nprocs, int *user_procgrid, int *procgrid,
    auto-detects NUMA sockets within a multi-core node
 ------------------------------------------------------------------------- */
 
-void ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
-                        int *numagrid)
+void ProcMap::numa_grid(int numa_nodes, int nprocs, int *user_procgrid,
+                        int *procgrid, int *numagrid)
 {
-  // hardwire this for now
-
-  int numa_nodes = 1;
-
   // get names of all nodes
 
   int name_length;
@@ -181,6 +177,7 @@ void ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
   }
   procs_per_node = name_map.begin()->second;
   procs_per_numa = procs_per_node / numa_nodes;
+  if (procs_per_numa < 1) procs_per_numa = 1;
 
   delete [] node_names;
 
@@ -192,6 +189,24 @@ void ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
       user_procgrid[2] > 1)
     error->all(FLERR,"Could not create numa grid of processors");
 
+  // factorization for the grid of NUMA nodes
+
+  int node_count = nprocs / procs_per_numa;
+
+  int **nodefactors;
+  int nodepossible = factor(node_count,nullptr);
+  memory->create(nodefactors,nodepossible,3,"procmap:nodefactors");
+  nodepossible = factor(node_count,nodefactors);
+
+  if (domain->dimension == 2)
+    nodepossible = cull_2d(nodepossible,nodefactors,3);
+  nodepossible = cull_user(nodepossible,nodefactors,3,user_procgrid);
+
+  if (nodepossible == 0)
+    error->all(FLERR,"Could not create numa grid of processors");
+
+  best_factors(nodepossible,nodefactors,nodegrid,1,1,1);
+
   // user settings for the factorization per numa node
   // currently not user settable
   // if user specifies 1 for a proc grid dimension,
@@ -204,6 +219,7 @@ void ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
   if (user_procgrid[1] == 1) user_numagrid[1] = 1;
   if (user_procgrid[2] == 1) user_numagrid[2] = 1;
 
+  // perform NUMA node factorization using subdomain sizes
   // initial factorization within NUMA node
 
   int **numafactors;
@@ -218,38 +234,6 @@ void ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
   if (numapossible == 0)
     error->all(FLERR,"Could not create numa grid of processors");
 
-  best_factors(numapossible,numafactors,numagrid,1,1,1);
-
-  // user_nodegrid = implied user constraints on nodes
-
-  int user_nodegrid[3];
-  user_nodegrid[0] = user_procgrid[0] / numagrid[0];
-  user_nodegrid[1] = user_procgrid[1] / numagrid[1];
-  user_nodegrid[2] = user_procgrid[2] / numagrid[2];
-
-  // factorization for the grid of NUMA nodes
-
-  int node_count = nprocs / procs_per_numa;
-
-  int **nodefactors;
-  int nodepossible = factor(node_count,nullptr);
-  memory->create(nodefactors,nodepossible,3,"procmap:nodefactors");
-  nodepossible = factor(node_count,nodefactors);
-
-  if (domain->dimension == 2)
-    nodepossible = cull_2d(nodepossible,nodefactors,3);
-  nodepossible = cull_user(nodepossible,nodefactors,3,user_nodegrid);
-
-  if (nodepossible == 0)
-    error->all(FLERR,"Could not create numa grid of processors");
-
-  best_factors(nodepossible,nodefactors,nodegrid,
-               numagrid[0],numagrid[1],numagrid[2]);
-
-  // repeat NUMA node factorization using subdomain sizes
-  // refines the factorization if the user specified the node layout
-  // NOTE: this will not re-enforce user-procgrid constraint will it?
-
   best_factors(numapossible,numafactors,numagrid,
                nodegrid[0],nodegrid[1],nodegrid[2]);
 
@@ -270,6 +254,7 @@ void ProcMap::numa_grid(int nprocs, int *user_procgrid, int *procgrid,
   procgrid[0] = nodegrid[0] * numagrid[0];
   procgrid[1] = nodegrid[1] * numagrid[1];
   procgrid[2] = nodegrid[2] * numagrid[2];
+
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/procmap.h b/src/procmap.h
index 06867837c6..2731aec984 100644
--- a/src/procmap.h
+++ b/src/procmap.h
@@ -24,7 +24,7 @@ class ProcMap : protected Pointers {
 
   void onelevel_grid(int, int *, int *, int, int, int *, int *);
   void twolevel_grid(int, int *, int *, int, int *, int *, int, int, int *, int *);
-  void numa_grid(int, int *, int *, int *);
+  void numa_grid(int, int, int *, int *, int *);
   void custom_grid(char *, int, int *, int *);
   void cart_map(int, int *, int *, int[3][2], int ***);
   void cart_map(int, int *, int, int *, int *, int[3][2], int ***);
diff --git a/src/rerun.cpp b/src/rerun.cpp
index 74a8956994..96491bab36 100644
--- a/src/rerun.cpp
+++ b/src/rerun.cpp
@@ -26,6 +26,7 @@
 #include "update.h"
 #include "variable.h"
 
+#include <cmath>
 #include <cstring>
 
 using namespace LAMMPS_NS;
diff --git a/src/reset_atoms_image.cpp b/src/reset_atoms_image.cpp
index 84df5bf746..63030c632b 100644
--- a/src/reset_atoms_image.cpp
+++ b/src/reset_atoms_image.cpp
@@ -22,7 +22,6 @@
 #include "group.h"
 #include "input.h"
 #include "modify.h"
-#include "update.h"
 #include "variable.h"
 
 #include <cmath>
diff --git a/src/utils.cpp b/src/utils.cpp
index bde6dffca5..8aa33e14e6 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -33,6 +33,7 @@
 #include <cerrno>
 #include <cstring>
 #include <ctime>
+#include <stdexcept>
 
 /*! \file utils.cpp */
 
diff --git a/src/write_dump.cpp b/src/write_dump.cpp
index 89dd48e0ba..8cd7dec88e 100644
--- a/src/write_dump.cpp
+++ b/src/write_dump.cpp
@@ -45,11 +45,16 @@ void WriteDump::command(int narg, char **arg)
   // create the Dump instance
   // create dump command line with extra required args
 
+  // work around "fix not computed at compatible times" errors.
+
+  int dumpfreq = MAX(1, update->nsteps);
+  dumpfreq += update->ntimestep % dumpfreq;
+
   auto dumpargs = new char *[modindex + 2];
-  dumpargs[0] = (char *) "WRITE_DUMP";                                       // dump id
-  dumpargs[1] = arg[0];                                                      // group
-  dumpargs[2] = arg[1];                                                      // dump style
-  dumpargs[3] = utils::strdup(std::to_string(MAX(update->ntimestep, 1)));    // dump frequency
+  dumpargs[0] = (char *) "WRITE_DUMP";                      // dump id
+  dumpargs[1] = arg[0];                                     // group
+  dumpargs[2] = arg[1];                                     // dump style
+  dumpargs[3] = utils::strdup(std::to_string(dumpfreq));    // dump frequency
 
   for (int i = 2; i < modindex; ++i) dumpargs[i + 2] = arg[i];
 
diff --git a/unittest/force-styles/tests/atomic-pair-meam_ms.yaml b/unittest/force-styles/tests/atomic-pair-meam_ms.yaml
index e479514017..fff938d940 100644
--- a/unittest/force-styles/tests/atomic-pair-meam_ms.yaml
+++ b/unittest/force-styles/tests/atomic-pair-meam_ms.yaml
@@ -1,7 +1,7 @@
 ---
-lammps_version: 22 Dec 2022
+lammps_version: 7 Feb 2024
 tags: slow
-date_generated: Thu Jan 26 15:27:03 2023
+date_generated: Wed Feb 28 17:07:42 2024
 epsilon: 2.5e-12
 skip_tests:
 prerequisites: ! |
@@ -20,75 +20,75 @@ natoms: 32
 init_vdwl: 785.6030480758675
 init_coul: 0
 init_stress: ! |2-
-   3.3502530994900699e+03  3.6405858278699407e+03  3.6349804214165547e+03 -3.1609283411508039e+02 -7.9448207656135153e+01 -1.9854140603340727e+02
+   3.3484562777721521e+03  3.6420237714822438e+03  3.6353392995221716e+03 -3.1708756217128752e+02 -7.7972133915567312e+01 -1.9763732315997078e+02
 init_forces: ! |2
-    1  1.2872255079741514e+01 -7.5031848810810864e-01  4.5969595156096510e+01
-    2 -3.9028679722038632e+01 -1.5647800180326567e+02 -1.6643992152928173e+00
-    3 -6.1521549955194672e+01  2.6970968316419874e+02 -9.6866430262650326e+01
-    4  3.1462579880342336e+01  4.0240291291218455e+01  1.1654869213327775e+01
-    5  1.4859248182951113e+01 -3.4132880749392825e+01  6.7430378007130244e+01
-    6  6.4609571260694096e+00 -3.8973222482916441e+01 -2.8510000379627442e+01
-    7  7.8114612113500250e+00 -1.0421431668544374e+01 -4.2887607385766536e+01
-    8 -4.8934215863351795e+01 -6.3567347969802590e-01  1.1845972792272754e+02
-    9  9.4089549606898402e+01 -7.4342942103394511e+00  2.5331198575951383e+01
-   10  1.5130369934140692e+01 -5.9245630928969938e+01 -6.7469126603400198e+01
-   11 -2.5176547213746847e+01  1.1577205529172168e+02 -2.2897457133540517e+01
-   12  6.2237686199502349e+01  2.0501996047945163e+01 -2.8805091517252826e+01
-   13 -5.9438589221526925e+01  3.0453092653824072e+01 -1.9919245831196157e+01
-   14  6.9128305482543766e+01 -7.7400771634148342e+01  3.3376079908119145e+01
-   15 -4.9671207786831857e+01 -4.9520814527298228e+01  8.4325181097614305e+01
-   16 -1.1782591146017666e+01 -3.2478963020209051e+01  1.5503663677714293e+01
-   17  9.0881787245915220e+00  6.2377477671714963e+01 -4.0411006180232363e+01
-   18 -4.2285082775720454e+01  2.4883979527636967e+01 -4.4858149086530510e+00
-   19 -8.0259798420493979e+01  9.6356660229207137e+01  6.0543230952477984e+01
-   20  8.0924547938759346e+01  7.1034504027236025e+01 -7.1958482512489610e+01
-   21  1.0833434220705425e+02 -1.5973910256481020e+02 -2.5432700070393153e+01
-   22 -2.3754601906353900e+00  5.2216955012971823e+01  4.7112051341131576e+00
-   23 -2.7227169255996543e+01  8.1968603165764222e+01  4.6535834898716878e+01
-   24 -2.9230758067555616e+01  6.5909555829367733e+01 -2.8250697734131258e+01
-   25 -5.1310041582953993e+01 -3.0895272949222822e+01 -5.4271286813003794e+00
-   26  3.9605941911194620e+01 -5.5919050176828883e+01 -1.0209061328106253e+01
-   27  8.2934427989660890e+01  6.1956200199325636e+01  5.0072108788590960e+01
-   28 -7.8572755094413296e+01 -3.9613391730681300e+01 -2.6183413623428891e+00
-   29  6.9475725072041925e+01 -6.0535433603583563e+01 -1.4566536349135829e+01
-   30 -2.4347184151182930e+01 -1.9359391333689970e+02 -2.6718379302915952e+01
-   31  7.7351971629808688e+01 -7.0102650745312999e+01 -5.4615048867524763e+01
-   32 -1.5060591772899014e+02  8.4489763988097266e+01  2.9799482293372058e+01
-run_vdwl: 682.3107192428497
+    1  1.2671882952049245e+01 -1.3853231851077510e+00  4.5439363666101173e+01
+    2 -3.8831765890428208e+01 -1.5598031052421931e+02 -1.8130309648194238e+00
+    3 -6.1494363653409785e+01  2.6941705868913857e+02 -9.6911726427029521e+01
+    4  3.1464910628044116e+01  4.0207585968399570e+01  1.2103864047414767e+01
+    5  1.4903052489944621e+01 -3.4527783082194034e+01  6.6654211762662953e+01
+    6  5.5548661222686047e+00 -3.9623462470037410e+01 -2.8613497963820279e+01
+    7  8.0397023763303324e+00 -1.0757727989654303e+01 -4.3166301457121655e+01
+    8 -4.7852982145265067e+01 -4.5739689932561944e-02  1.1843318640063390e+02
+    9  9.3964008212392528e+01 -7.2172885595082938e+00  2.5433781685526100e+01
+   10  1.5271997609830436e+01 -5.8803169091726673e+01 -6.7108575386479288e+01
+   11 -2.5045971665663821e+01  1.1625946130045016e+02 -2.3085965300813825e+01
+   12  6.2629055188396414e+01  2.0611095490210126e+01 -2.8436306261149859e+01
+   13 -5.9236259945824884e+01  3.0240277910808498e+01 -1.9273929877935466e+01
+   14  6.8663923609350405e+01 -7.7397309055975725e+01  3.3069829655105920e+01
+   15 -4.9647288254699909e+01 -4.9489231564386692e+01  8.4478943742851953e+01
+   16 -1.2170657087644635e+01 -3.2040178726180827e+01  1.4894301261010035e+01
+   17  8.8923416811864868e+00  6.2137744083574610e+01 -4.0135012964265592e+01
+   18 -4.1397401572483183e+01  2.5783976616022077e+01 -4.0437450156810772e+00
+   19 -7.9531558557888033e+01  9.6620429213911947e+01  6.0896893032878161e+01
+   20  8.1086619135335383e+01  7.0912440236148569e+01 -7.1683775489576050e+01
+   21  1.0782008087175012e+02 -1.5990396045249986e+02 -2.5346325379258598e+01
+   22 -2.2473039860970294e+00  5.2408111378634587e+01  4.9048274485706003e+00
+   23 -2.7403676909994125e+01  8.1791101929225192e+01  4.6914817444621839e+01
+   24 -3.0326733771468501e+01  6.5019994519315844e+01 -2.8273999335586893e+01
+   25 -5.1399449796211421e+01 -3.1235356363355280e+01 -5.1117557575884405e+00
+   26  3.9830854256806596e+01 -5.5977510234235112e+01 -1.0703040355620866e+01
+   27  8.2947330460814442e+01  6.2103713508335836e+01  5.0160761445057645e+01
+   28 -7.8724584730369727e+01 -3.9670230601217270e+01 -2.7333809658021124e+00
+   29  6.8825650256056235e+01 -6.1113096673842620e+01 -1.5031235234703393e+01
+   30 -2.4157792055811530e+01 -1.9333002107500442e+02 -2.6525581060126832e+01
+   31  7.7150429127214139e+01 -6.9426709034866391e+01 -5.5252913067136127e+01
+   32 -1.5024891495451018e+02  8.4411417529769210e+01  2.9865316672080176e+01
+run_vdwl: 682.3135170292994
 run_coul: 0
 run_stress: ! |2-
-   3.2247564044913129e+03  3.3749506031067485e+03  3.3223794967215117e+03 -2.8460979167554797e+02 -7.2614457076660575e+00 -3.1510685747732862e+02
+   3.2220423802202231e+03  3.3749844973876429e+03  3.3217643573058244e+03 -2.8493376779188281e+02 -5.1850956493150733e+00 -3.1548413267512802e+02
 run_forces: ! |2
-    1 -1.2037185973996296e+01 -2.5090364403764944e+01  1.4014184973113366e+01
-    2 -3.7365848425239264e+01 -1.5871199357658887e+02  3.7846333470446991e+00
-    3 -3.2057228694304293e+01  2.5316344962361612e+02 -6.0679585186816752e+01
-    4  2.9086197614116237e+01  4.8267528016068823e+01  4.3387429619749920e+00
-    5 -1.1672554618399744e+01 -2.6840760926124332e+01  4.9694308545223279e+01
-    6  1.1892092913978592e+01 -4.9360840569608243e+01 -2.3083171938147949e+01
-    7  2.1084251901459215e+01 -4.8251731643401072e+00 -3.8474871193885967e+01
-    8 -5.7775944085787714e+01  1.3522956442661442e+01  1.1661345819661486e+02
-    9  7.2926105059437930e+01  4.8686056096860133e+00  2.3817134806042311e+01
-   10  1.7307367990304396e+01 -3.0865570121704572e+01 -1.2314307646704794e+01
-   11 -1.1341297645054201e+01  9.1441145595173211e+01 -2.1806407500802493e+01
-   12  4.0645024127126625e+01  1.2207243511090397e+01 -2.6757649464936929e+01
-   13 -5.2283270287937697e+01  3.4023912643812679e+01 -1.9030352703627774e+01
-   14  8.4403128243303399e+01 -9.3773678297574406e+01  1.6481720093363641e+01
-   15 -4.2790833192154764e+01 -4.3242943642279130e+01  7.1075696811865868e+01
-   16 -1.5041912007490836e+01 -3.3544044565611586e+01  2.4823109532967212e+01
-   17 -9.6413207346836316e-01  4.5826021602656141e+01 -3.9155163702194102e+01
-   18 -2.0337015515785971e+01  7.2815285567550134e+00 -8.2049879725129813e+00
-   19 -6.4105384732081120e+01  1.1564665740933788e+02  2.4163791756721466e+01
-   20  8.5723654185276146e+01  8.3354105531647818e+01 -6.6380939444134356e+01
-   21  7.2614253221132458e+01 -1.0858997173537107e+02 -9.7505297776024449e+00
-   22 -7.0420361713052930e+00  5.3431098224890221e+01  3.3089063930822551e+00
-   23 -2.6591358240682062e+01  5.7408565880721866e+01  2.7437106471305679e+01
-   24 -4.1792038450554799e+01  5.1730557789864775e+01 -4.0814677464080816e+01
-   25 -4.1432062506590214e+01 -2.5839213423062226e+01  4.2240164846210408e+00
-   26  4.7210066329871566e+01 -5.2462761136081880e+01 -7.3222050314410501e+00
-   27  7.1880187551772764e+01  6.4264938765955392e+01  4.3600944370341068e+01
-   28 -8.4540787660053340e+01 -3.5402262816619938e+01 -1.8100280797937039e+01
-   29  6.9538301274653790e+01 -6.3441028093040622e+01 -1.4636386232064458e+01
-   30 -1.0347208112535196e+01 -1.7647584813608077e+02  7.2581082578181517e+00
-   31  5.5139777976761025e+01 -4.2081916983382541e+01 -4.6602437208067727e+01
-   32 -1.0993230999577290e+02  3.4110056387297462e+01  1.8478090262857769e+01
+    1 -1.2080548564836072e+01 -2.5477429439416710e+01  1.3806658731417755e+01
+    2 -3.7225520761818601e+01 -1.5818788485602025e+02  3.6451090218322113e+00
+    3 -3.2025464407673795e+01  2.5325620480420051e+02 -6.1031861941385266e+01
+    4  2.8988993769053742e+01  4.8033753615324457e+01  4.5694168356438283e+00
+    5 -1.2111374313827447e+01 -2.6961881177725004e+01  4.9632634513889542e+01
+    6  1.1311954816991424e+01 -5.0058087854226159e+01 -2.3204914001781273e+01
+    7  2.1516323786702536e+01 -5.0655040143290933e+00 -3.8825663342733620e+01
+    8 -5.6894831060887554e+01  1.3949056084318798e+01  1.1660108213204735e+02
+    9  7.2998009821741320e+01  4.8416842517465923e+00  2.3999831954683799e+01
+   10  1.7317912093622368e+01 -3.0947927635823405e+01 -1.2110169744224052e+01
+   11 -1.1067246227356863e+01  9.1960655066099605e+01 -2.2189177432965508e+01
+   12  4.1022329509360503e+01  1.2454231845197597e+01 -2.6395875385015671e+01
+   13 -5.2111607038923381e+01  3.3882585667855231e+01 -1.8629522935187623e+01
+   14  8.4036225281616041e+01 -9.3803437140562750e+01  1.6331247969198998e+01
+   15 -4.2720657484421395e+01 -4.3226247350447558e+01  7.1228336312410107e+01
+   16 -1.5337511847731712e+01 -3.3391288191194008e+01  2.4548585347623519e+01
+   17 -1.1747625966656585e+00  4.5776133215622977e+01 -3.9163692487889747e+01
+   18 -1.9491081233389249e+01  8.1566249764824512e+00 -7.8409414667475472e+00
+   19 -6.3157066638040050e+01  1.1562375461943864e+02  2.4100335394819872e+01
+   20  8.5633177613771736e+01  8.3249649056120163e+01 -6.5949837133210536e+01
+   21  7.2627995354135379e+01 -1.0905523323056843e+02 -9.7050849900191878e+00
+   22 -6.9254927960387569e+00  5.3565487765173124e+01  3.5264724633499451e+00
+   23 -2.6913648610060896e+01  5.7376365847813567e+01  2.7795031305112104e+01
+   24 -4.2988718884544781e+01  5.0965555337988739e+01 -4.0879720794423847e+01
+   25 -4.1545681275179334e+01 -2.6070548274497312e+01  4.6030493990981363e+00
+   26  4.7343706717511481e+01 -5.2491830895913587e+01 -7.5334343348923181e+00
+   27  7.1967929368447670e+01  6.4565679128450881e+01  4.3787238332434981e+01
+   28 -8.4353146891369605e+01 -3.5766877650630498e+01 -1.8194167121606220e+01
+   29  6.8890904369057438e+01 -6.3904080920499425e+01 -1.5203579536664831e+01
+   30 -1.0556200802502623e+01 -1.7613156919674918e+02  7.5443620074181723e+00
+   31  5.4944350533734486e+01 -4.1411243100978645e+01 -4.7232700660537759e+01
+   32 -1.0991925160047839e+02  3.4293649647748623e+01  1.8370951588304823e+01
 ...
diff --git a/unittest/force-styles/tests/in.conp b/unittest/force-styles/tests/in.conp
index 92d2f63cd1..08673ec20b 100644
--- a/unittest/force-styles/tests/in.conp
+++ b/unittest/force-styles/tests/in.conp
@@ -22,3 +22,4 @@ angle_coeff *
 
 group bot type 1
 group top type 2
+group ele type 1 2
diff --git a/unittest/force-styles/tests/kspace-ewald_conp_charge.yaml b/unittest/force-styles/tests/kspace-ewald_conp_charge.yaml
index d0eaee6b92..9bc190a766 100644
--- a/unittest/force-styles/tests/kspace-ewald_conp_charge.yaml
+++ b/unittest/force-styles/tests/kspace-ewald_conp_charge.yaml
@@ -1,6 +1,6 @@
 ---
-lammps_version: 23 Jun 2022
-date_generated: Wed Sep 21 13:52:53 2022
+lammps_version: 7 Feb 2024
+date_generated: Mon Mar  4 09:44:30 2024
 epsilon: 1e-12
 skip_tests: gpu kokkos_omp omp
 prerequisites: ! |
@@ -16,6 +16,7 @@ post_commands: ! |
   kspace_modify gewald 0.23118
   kspace_modify slab ew2d
   fix fxcpm bot electrode/conp -1.0 1.805 couple top 1.0 symm on
+  fix fxforce ele setforce 0 0 0
 input_file: in.conp
 pair_style: coul/long 15.0
 pair_coeff: ! |
@@ -27,97 +28,97 @@ init_coul: 2.215589572896434
 init_stress: ! |2-
    0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
 init_forces: ! |2
-    1  2.0780648532795694e-04  1.9949672015209204e-03  3.1005914149473996e+00
-    2 -1.6777235182686288e-02  2.1481432256290419e-03  3.0881659196467988e+00
-    3  6.0082164895554737e-04  5.1573260226633801e-03  3.1029192412328555e+00
-    4 -1.6728974802490675e-02  6.1174723156886242e-03  3.0909324782862346e+00
-    5  4.2029366155132378e-02 -2.3455526736195693e-03 -1.5659617577954634e+00
-    6  5.5635790919204904e-02 -2.4542947062522369e-03 -1.5693827709331334e+00
-    7  4.2014920784252008e-02 -7.5287470219125008e-04 -1.5671265392163820e+00
-    8  5.5808767852333470e-02 -9.9105389808573120e-04 -1.5707104957299389e+00
-    9 -5.0959878750421551e-02 -2.3630298689785601e-03 -1.5769250181497101e+00
-   10 -3.3526564930579039e-02 -2.3802275431282884e-03 -1.5617801011657175e+00
-   11 -5.1236396351794389e-02 -4.9531100598979201e-04 -1.5779995894034005e+00
-   12 -3.3740693032952060e-02 -1.0210406243572182e-03 -1.5630986537874150e+00
-   13 -1.1437102611353016e-03 -4.6454866413029015e-05  5.4282837980149448e-03
-   14  2.3914999373115431e-03 -1.6478680244651469e-04  2.9802178734319239e-02
-   15  3.9287193302652786e-05 -2.5715673267285659e-05  2.8944525105129479e-03
-   16  2.0458480716482328e-03 -1.2119161321908735e-04  3.3689550843809452e-02
-   17 -2.7146073277767471e-03 -8.2376243258224663e-04  2.6564130941474612e-02
-   18  1.3669692885198135e-03 -4.2357196145489820e-04  3.2396141113926739e-02
-   19  3.0143371860819995e-04 -8.6218593339583785e-04  2.6284521141350669e-02
-   20  1.1542435168435056e-03 -2.7252318260838826e-04  3.4237916528138110e-02
-   21 -1.2350056952573553e-03  4.8655691135364269e-04  5.9284283442393631e-03
-   22  2.3656743884722890e-03  9.6575340844312705e-04  2.9811074931784823e-02
-   23  4.6754986244969657e-05  3.0149464050350903e-04  3.4630785686112129e-03
-   24  2.0301227080749633e-03  6.3879578068684812e-04  3.3653437189053413e-02
-   25 -2.3656211013513076e-03 -8.0454594828768334e-04  2.8476980555362911e-02
-   26  1.1566723797447039e-03 -3.9614599888570504e-04  3.2873323713155905e-02
-   27  2.8784994028036400e-04 -8.3661697184444898e-04  2.8317655886021253e-02
-   28  9.3882364605486020e-04 -2.3327601777843495e-04  3.4334676606415648e-02
-   29 -4.7969977052124917e-04 -1.2933334305373028e-04 -1.2336987392568071e-02
-   30  6.4733118786851766e-05 -1.3190918849005797e-04 -1.2737933567178844e-02
-   31  2.4269094157913586e-04 -1.3093943526788584e-04 -1.2136133260085013e-02
-   32  1.7452552740941527e-04 -1.1792779046242341e-04 -1.4181538324619835e-02
-   33 -3.8366266481516803e-04 -7.1061854758754556e-05 -1.3699106365426135e-02
-   34  2.8849004082563746e-05 -5.7838605310673531e-05 -1.3764181266896890e-02
-   35  2.2648059665862587e-04 -7.2851385190891320e-05 -1.3537361892926607e-02
-   36  1.2929221129083645e-04 -4.2862960950045859e-05 -1.4926105930886896e-02
-   37 -4.7698025941707008e-04  2.9971529466656788e-04 -1.2393604822896313e-02
-   38  6.4231095731188766e-05  2.7548977518460050e-04 -1.2789498345723021e-02
-   39  2.4169204779864826e-04  3.0552093685810269e-04 -1.2193908285665961e-02
-   40  1.7324998349441456e-04  2.2898000918153004e-04 -1.4225267020837207e-02
-   41 -3.4345772150395188e-04 -9.8012060153887415e-05 -1.4482722052972283e-02
-   42  2.0345466940577010e-05 -8.5250083485342566e-05 -1.4497101004472062e-02
-   43  2.0917627239292995e-04 -1.0051271468149899e-04 -1.4335313646556430e-02
-   44  1.1456796622437295e-04 -6.7553675788598551e-05 -1.5543196158604005e-02
+    1  2.0780648532797705e-04  1.9949672015210172e-03  3.1005914149473988e+00
+    2 -1.6777235182686239e-02  2.1481432256291962e-03  3.0881659196467979e+00
+    3  6.0082164895566240e-04  5.1573260226632518e-03  3.1029192412328550e+00
+    4 -1.6728974802490665e-02  6.1174723156885227e-03  3.0909324782862333e+00
+    5  4.2029366155132364e-02 -2.3455526736195940e-03 -1.5659617577954636e+00
+    6  5.5635790919204904e-02 -2.4542947062522933e-03 -1.5693827709331336e+00
+    7  4.2014920784251980e-02 -7.5287470219118655e-04 -1.5671265392163822e+00
+    8  5.5808767852333491e-02 -9.9105389808568046e-04 -1.5707104957299391e+00
+    9 -5.0959878750421538e-02 -2.3630298689785965e-03 -1.5769250181497101e+00
+   10 -3.3526564930579081e-02 -2.3802275431283552e-03 -1.5617801011657175e+00
+   11 -5.1236396351794437e-02 -4.9531100598972577e-04 -1.5779995894034005e+00
+   12 -3.3740693032952088e-02 -1.0210406243571744e-03 -1.5630986537874152e+00
+   13  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   14  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   15  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   16  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   17  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   18  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   19  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   20  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   21  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   22  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   23  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   24  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   25  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   26  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   27  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   28  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   29  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   30  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   31  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   32  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   33  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   34  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   35  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   36  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   37  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   38  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   39  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   40  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   41  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   42  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   43  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   44  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
 run_vdwl: 0
-run_coul: 6.662694629990089
+run_coul: 6.662694556930397
 run_stress: ! |2-
    0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
 run_forces: ! |2
-    1  2.4590612609445102e-04  1.9614041218568861e-03  3.0874291949281147e+00
-    2 -1.6646393233505193e-02  2.1136941574790400e-03  3.0751132981100078e+00
-    3  6.3535217476586373e-04  5.1012487117746350e-03  3.0897301317927290e+00
-    4 -1.6598677148313409e-02  6.0535154567108685e-03  3.0778495346381409e+00
-    5  4.2257888534896988e-02 -2.3028533365965051e-03 -1.5593100596807521e+00
-    6  5.5690508027606708e-02 -2.4104721624763235e-03 -1.5626897542440843e+00
-    7  4.2243973130370149e-02 -7.6153220413259775e-04 -1.5604618260035832e+00
-    8  5.5862703939049158e-02 -9.9825803703216718e-04 -1.5640031014325448e+00
-    9 -5.1059409954744304e-02 -2.3195553026588347e-03 -1.5701677851024036e+00
-   10 -3.3824298857146967e-02 -2.3375522139358631e-03 -1.5551647619109401e+00
-   11 -5.1334079184640377e-02 -5.0583705005136689e-04 -1.5712298444761112e+00
-   12 -3.4037363466305925e-02 -1.0275978089057873e-03 -1.5564691026885336e+00
-   13 -1.1767076011504501e-03 -4.7681963272732406e-05  5.6177800544716262e-03
-   14  2.3826294437743331e-03 -1.6349140124449633e-04  2.9836275824428962e-02
-   15  4.1635367214843796e-05 -2.7287283914685102e-05  3.0967012748694773e-03
-   16  2.0334138778396313e-03 -1.1986039772787527e-04  3.3679727731055195e-02
-   17 -2.7026084826568797e-03 -8.1815079485725360e-04  2.6574868248546435e-02
-   18  1.3603406762441243e-03 -4.1902595052936860e-04  3.2373613783594497e-02
-   19  2.9940467686436986e-04 -8.5646794759970863e-04  2.6296909514905095e-02
-   20  1.1469475577225402e-03 -2.6907093945665336e-04  3.4197589258157073e-02
-   21 -1.2662578686531134e-03  4.9770031968890231e-04  6.1141650873547037e-03
-   22  2.3568409810395020e-03  9.5671841592085381e-04  2.9844352492872490e-02
-   23  4.8965507374742117e-05  3.1616579858329929e-04  3.6613180489820005e-03
-   24  2.0177821554069170e-03  6.3083810187911310e-04  3.3643353017422439e-02
-   25 -2.3537455003017457e-03 -7.9846295760147956e-04  2.8468250829639500e-02
-   26  1.1507655048236000e-03 -3.9159985067612060e-04  3.2839870487003708e-02
-   27  2.8582562554448814e-04 -8.3038492818152999e-04  2.8309777443009273e-02
-   28  9.3274285761092680e-04 -2.2997823984283208e-04  3.4287630335266286e-02
-   29 -4.7502414048888327e-04 -1.2847214455389489e-04 -1.2453998829891042e-02
-   30  6.3675154563755000e-05 -1.3104204562344653e-04 -1.2848240218511071e-02
-   31  2.4068203429808906e-04 -1.3007692195448562e-04 -1.2254443488117142e-02
-   32  1.7286880375665112e-04 -1.1713944254614034e-04 -1.4279748536149278e-02
-   33 -3.7975304094097439e-04 -7.0481503989778179e-05 -1.3805675914786045e-02
-   34  2.8172580422574018e-05 -5.7414812953860394e-05 -1.3867744309035916e-02
-   35  2.2448042926710853e-04 -7.2258368796568220e-05 -1.3645037267085249e-02
-   36  1.2804302797390986e-04 -4.2547023166907131e-05 -1.5019384166440140e-02
-   37 -4.7231656130619122e-04  2.9759414220405656e-04 -1.2510308090752414e-02
-   38  6.3181133759042352e-05  2.7366531591578288e-04 -1.2899551129187765e-02
-   39  2.3968383978790597e-04  3.0338116638639894e-04 -1.2311919409216509e-02
-   40  1.7159804905743131e-04  2.2744508340904917e-04 -1.4323246783684936e-02
-   41 -3.3986046011853923e-04 -9.7274166717457145e-05 -1.4583591616566860e-02
-   42  1.9775995840422978e-05 -8.4675117269364898e-05 -1.4595729099929700e-02
-   43  2.0725822207231137e-04 -9.9764702521265135e-05 -1.4437236974797743e-02
-   44  1.1345006523014360e-04 -6.7103771021504086e-05 -1.5632251527466189e-02
+    1  2.4590683616588566e-04  1.9614031577864796e-03  3.0874292323214036e+00
+    2 -1.6646391056109287e-02  2.1136931488459688e-03  3.0751133344119794e+00
+    3  6.3535284261976926e-04  5.1012486499620825e-03  3.0897301693141319e+00
+    4 -1.6598674977089559e-02  6.0535154273402107e-03  3.0778495712965124e+00
+    5  4.2257890248609438e-02 -2.3028528771736243e-03 -1.5593100743454236e+00
+    6  5.5690508840794850e-02 -2.4104716736066006e-03 -1.5626897684026868e+00
+    7  4.2243974848895645e-02 -7.6153214117129057e-04 -1.5604618407191602e+00
+    8  5.5862704771247280e-02 -9.9825804058299890e-04 -1.5640031157582626e+00
+    9 -5.1059412079591346e-02 -2.3195548367148894e-03 -1.5701677996288432e+00
+   10 -3.3824301875619085e-02 -2.3375517362837230e-03 -1.5551647759943836e+00
+   11 -5.1334081278836916e-02 -5.0583707575582921e-04 -1.5712298590599834e+00
+   12 -3.4037366496628307e-02 -1.0275977984217173e-03 -1.5564691169384559e+00
+   13  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   14  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   15  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   16  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   17  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   18  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   19  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   20  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   21  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   22  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   23  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   24  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   25  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   26  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   27  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   28  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   29  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   30  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   31  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   32  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   33  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   34  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   35  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   36  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   37  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   38  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   39  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   40  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   41  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   42  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   43  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   44  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
 ...
diff --git a/unittest/force-styles/tests/kspace-pppm_conp_charge.yaml b/unittest/force-styles/tests/kspace-pppm_conp_charge.yaml
index 0555d12ed4..38161ee044 100644
--- a/unittest/force-styles/tests/kspace-pppm_conp_charge.yaml
+++ b/unittest/force-styles/tests/kspace-pppm_conp_charge.yaml
@@ -1,6 +1,6 @@
 ---
-lammps_version: 23 Jun 2022
-date_generated: Wed Sep 21 13:52:39 2022
+lammps_version: 7 Feb 2024
+date_generated: Mon Mar  4 09:44:31 2024
 epsilon: 3e-12
 skip_tests: gpu kokkos_omp omp
 prerequisites: ! |
@@ -16,6 +16,7 @@ post_commands: ! |
   kspace_modify gewald 0.23118
   kspace_modify slab 3.0
   fix fxcpm bot electrode/conp -1.0 1.805 couple top 1.0 symm on
+  fix fxforce ele setforce 0 0 0
 input_file: in.conp
 pair_style: coul/long 15.0
 pair_coeff: ! |
@@ -23,101 +24,101 @@ pair_coeff: ! |
 extract: ! ""
 natoms: 44
 init_vdwl: 0
-init_coul: 2.2156402256727614
+init_coul: 2.215640225672775
 init_stress: ! |2-
    0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
 init_forces: ! |2
-    1  2.0996096688279944e-04  1.9837586784580306e-03  3.1004822661058822e+00
-    2 -1.6783332510617883e-02  2.1368843599407611e-03  3.0880130470329230e+00
-    3  6.0300296042517466e-04  5.1688381279905342e-03  3.1028182137891114e+00
-    4 -1.6735061532950901e-02  6.1290626039690339e-03  3.0907879891042755e+00
-    5  4.2014131860757913e-02 -2.3478381081742388e-03 -1.5658874682481487e+00
-    6  5.5659823770659422e-02 -2.4566144388410410e-03 -1.5693278833316506e+00
-    7  4.1999624791768865e-02 -7.5066218795240259e-04 -1.5670569809441617e+00
-    8  5.5832732887661884e-02 -9.8883264742169940e-04 -1.5706605160409139e+00
-    9 -5.0976953599115804e-02 -2.3653810185280950e-03 -1.5768945194236066e+00
-   10 -3.3513771125456657e-02 -2.3824712764543426e-03 -1.5616806812004886e+00
-   11 -5.1253442064492741e-02 -4.9304425051529275e-04 -1.5779738349804424e+00
-   12 -3.3727836471637192e-02 -1.0188844490582761e-03 -1.5630041309277038e+00
-   13 -1.1453068449918257e-03 -4.7335833322794788e-05  5.4292779404649470e-03
-   14  2.3900993287279790e-03 -1.6878550058260119e-04  2.9808528147740175e-02
-   15  4.0078428215627730e-05 -2.6184607051201481e-05  2.8941780231019881e-03
-   16  2.0473353699190459e-03 -1.2552209515766962e-04  3.3684989110502959e-02
-   17 -2.7210216747431955e-03 -8.2349543008294359e-04  2.6567504438257068e-02
-   18  1.3656002828979516e-03 -4.2323438710338486e-04  3.2404938547366383e-02
-   19  3.0785575286292939e-04 -8.6186674263511191e-04  2.6288541663855129e-02
-   20  1.1555469330548321e-03 -2.7230960410720359e-04  3.4235148032534163e-02
-   21 -1.2368093613861506e-03  4.8760847861882366e-04  5.9296798954256557e-03
-   22  2.3643140421916085e-03  9.6975102599399746e-04  2.9817231402721564e-02
-   23  4.7705653522709085e-05  3.0203836842154655e-04  3.4631818106649337e-03
-   24  2.0316297431160258e-03  6.4335031755788927e-04  3.3648629802522749e-02
-   25 -2.3728144718995455e-03 -8.0497592536520339e-04  2.8474707915345274e-02
-   26  1.1555985481661916e-03 -3.9649433660109970e-04  3.2876098209196375e-02
-   27  2.9459292459149998e-04 -8.3700881746301306e-04  2.8316136079038545e-02
-   28  9.4027352090446912e-04 -2.3371025598546553e-04  3.4325153603153732e-02
-   29 -5.2133856931286127e-04 -1.4498587872629142e-04 -1.2345168780426297e-02
-   30  7.0344538924238829e-05 -1.4805754895657979e-04 -1.2765142487049358e-02
-   31  2.7857644686035687e-04 -1.4667349483298643e-04 -1.2140095836769501e-02
-   32  1.7479865631996218e-04 -1.3335074368636031e-04 -1.4152171307753206e-02
-   33 -4.2607366742485959e-04 -7.2661709209033136e-05 -1.3713642029394900e-02
-   34  3.4224570995904750e-05 -5.9352423088202727e-05 -1.3797063100154012e-02
-   35  2.6332638434852483e-04 -7.4573909050170201e-05 -1.3548481212572565e-02
-   36  1.2956589882656260e-04 -4.4469640559387641e-05 -1.4903750420442119e-02
-   37 -5.1855202137555164e-04  3.1768672664149645e-04 -1.2402413876588990e-02
-   38  6.9744637010106164e-05  2.9379923413403201e-04 -1.2817238930047764e-02
-   39  2.7753815671547851e-04  3.2364879063853462e-04 -1.2198687630220518e-02
-   40  1.7359211286247436e-04  2.4661766514410969e-04 -1.4196607980261094e-02
-   41 -3.8501793941197521e-04 -9.8818538537990245e-05 -1.4472863167957050e-02
-   42  2.5779944975443997e-05 -8.5975255466014692e-05 -1.4504933662725301e-02
-   43  2.4535595442148292e-04 -1.0127263490049206e-04 -1.4321758087878972e-02
-   44  1.1457678622968817e-04 -6.8200688092186871e-05 -1.5499407046729242e-02
+    1  2.0996096688288336e-04  1.9837586784579777e-03  3.1004822661058866e+00
+    2 -1.6783332510618088e-02  2.1368843599407052e-03  3.0880130470329270e+00
+    3  6.0300296042527755e-04  5.1688381279906869e-03  3.1028182137891140e+00
+    4 -1.6735061532951116e-02  6.1290626039692056e-03  3.0907879891042778e+00
+    5  4.2014131860757857e-02 -2.3478381081742123e-03 -1.5658874682481501e+00
+    6  5.5659823770659547e-02 -2.4566144388410201e-03 -1.5693278833316524e+00
+    7  4.1999624791768810e-02 -7.5066218795247393e-04 -1.5670569809441637e+00
+    8  5.5832732887661961e-02 -9.8883264742177161e-04 -1.5706605160409157e+00
+    9 -5.0976953599115846e-02 -2.3653810185280759e-03 -1.5768945194236084e+00
+   10 -3.3513771125456573e-02 -2.3824712764543218e-03 -1.5616806812004898e+00
+   11 -5.1253442064492769e-02 -4.9304425051536007e-04 -1.5779738349804435e+00
+   12 -3.3727836471637081e-02 -1.0188844490583557e-03 -1.5630041309277052e+00
+   13  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   14  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   15  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   16  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   17  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   18  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   19  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   20  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   21  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   22  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   23  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   24  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   25  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   26  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   27  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   28  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   29  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   30  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   31  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   32  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   33  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   34  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   35  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   36  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   37  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   38  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   39  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   40  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   41  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   42  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   43  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   44  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
 run_vdwl: 0
-run_coul: 6.662844717848837
+run_coul: 6.662844644802024
 run_stress: ! |2-
    0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
 run_forces: ! |2
-    1  2.4838374656870440e-04  1.9503798034564181e-03  3.0873204052231675e+00
-    2 -1.6652792550963507e-02  2.1026197438206527e-03  3.0749612313228378e+00
-    3  6.3785681925848106e-04  5.1125747842690368e-03  3.0896293583134606e+00
-    4 -1.6605065971975488e-02  6.0649203428150876e-03  3.0777057441315305e+00
-    5  4.2242720963296274e-02 -2.3050719334346786e-03 -1.5592361839228894e+00
-    6  5.5714272244614366e-02 -2.4127241272198356e-03 -1.5626351053810128e+00
-    7  4.2228744507856318e-02 -7.5938680980830215e-04 -1.5603926173148708e+00
-    8  5.5886400463161408e-02 -9.9610508394880446e-04 -1.5639532943562677e+00
-    9 -5.1076260774663269e-02 -2.3218376339398856e-03 -1.5701372580807029e+00
-   10 -3.3811558405906099e-02 -2.3397301017438945e-03 -1.5550660295253020e+00
-   11 -5.1350900750795785e-02 -5.0363945411724615e-04 -1.5712039970018119e+00
-   12 -3.4024561045577786e-02 -1.0255079139096881e-03 -1.5563752017113657e+00
-   13 -1.1783355827329363e-03 -4.8584425213699084e-05  5.6187810005819084e-03
-   14  2.3812433011153062e-03 -1.6745821939573657e-04  2.9842604329757397e-02
-   15  4.2472634867315832e-05 -2.7784504406519571e-05  3.0964622176844602e-03
-   16  2.0348857368272344e-03 -1.2415016403438035e-04  3.3675205082768264e-02
-   17 -2.7089567432503573e-03 -8.1788662401764114e-04  2.6578231998707000e-02
-   18  1.3589859834844708e-03 -4.1869284075205220e-04  3.2382362625908642e-02
-   19  3.0575998684987288e-04 -8.5615261685325662e-04  2.6300918365183709e-02
-   20  1.1482382726252934e-03 -2.6885864047674810e-04  3.4194853807722227e-02
-   21 -1.2680891310638792e-03  4.9877122346546224e-04  6.1154164862940522e-03
-   22  2.3554944751877239e-03  9.6068685589809240e-04  2.9850487598267021e-02
-   23  4.9959016556344255e-05  3.1673714227676154e-04  3.6614479955528112e-03
-   24  2.0192733259603224e-03  6.3534849028298586e-04  3.3638585635455770e-02
-   25 -2.3608563352120440e-03 -7.9888874314704573e-04  2.8465994834953420e-02
-   26  1.1497012824588045e-03 -3.9194609517719160e-04  3.2842627585052867e-02
-   27  2.9249325092285189e-04 -8.3077344407106490e-04  2.8308269755484061e-02
-   28  9.3417705968603434e-04 -2.3040781495046246e-04  3.4278191255128133e-02
-   29 -5.1652528928022433e-04 -1.4404353042683099e-04 -1.2462150875064196e-02
-   30  6.9288940488795056e-05 -1.4710788156542239e-04 -1.2875332550432134e-02
-   31  2.7644608312646314e-04 -1.4573040212804145e-04 -1.2258366148270420e-02
-   32  1.7312052380602741e-04 -1.3246895843787418e-04 -1.4250473428514749e-02
-   33 -4.2201273060055557e-04 -7.2075937985441126e-05 -1.3820153961237609e-02
-   34  3.3549241776445280e-05 -5.8924381188893540e-05 -1.3900492261623465e-02
-   35  2.6119063773180797e-04 -7.3975274656908229e-05 -1.3656086631368676e-02
-   36  1.2829798186810219e-04 -4.4146305074977061e-05 -1.4997087695938056e-02
-   37 -5.1375024459660446e-04  3.1547360096919003e-04 -1.2519087424517190e-02
-   38  6.8697062203298403e-05  2.9188411177715840e-04 -1.2927175328444108e-02
-   39  2.7540798890579723e-04  3.2141722701930868e-04 -1.2316658300695350e-02
-   40  1.7191884579607650e-04  2.4497453169432579e-04 -1.4294679585099376e-02
-   41 -3.8125358429239129e-04 -9.8073404256391242e-05 -1.4573806404289218e-02
-   42  2.5208712883704254e-05 -8.5394829999053783e-05 -1.4603558816163261e-02
-   43  2.4329125095209453e-04 -1.0051714757303402e-04 -1.4423743772053488e-02
-   44  1.1343880007511943e-04 -6.7742613833467660e-05 -1.5588639087563531e-02
+    1  2.4838446863956616e-04  1.9503788466163226e-03  3.0873204426106526e+00
+    2 -1.6652790386577014e-02  2.1026187422225435e-03  3.0749612676149027e+00
+    3  6.3785749905510489e-04  5.1125747155211397e-03  3.0896293958296610e+00
+    4 -1.6605063813845172e-02  6.0649203065202620e-03  3.0777057807809554e+00
+    5  4.2242722672783749e-02 -2.3050714764837649e-03 -1.5592361985876177e+00
+    6  5.5714273062644336e-02 -2.4127236407636803e-03 -1.5626351195306318e+00
+    7  4.2228746222139832e-02 -7.5938674438442760e-04 -1.5603926320308752e+00
+    8  5.5886401300220948e-02 -9.9610508505008281e-04 -1.5639533086735800e+00
+    9 -5.1076262902731158e-02 -2.3218371704875301e-03 -1.5701372726008263e+00
+   10 -3.3811561421032657e-02 -2.3397296264742822e-03 -1.5550660436064543e+00
+   11 -5.1350902848165500e-02 -5.0363947736732217e-04 -1.5712040115797643e+00
+   12 -3.4024564072498521e-02 -1.0255079009805182e-03 -1.5563752159595889e+00
+   13  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   14  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   15  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   16  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   17  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   18  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   19  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   20  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   21  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   22  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   23  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   24  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   25  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   26  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   27  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   28  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   29  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   30  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   31  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   32  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   33  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   34  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   35  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   36  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   37  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   38  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   39  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   40  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   41  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   42  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   43  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
+   44  0.0000000000000000e+00  0.0000000000000000e+00  0.0000000000000000e+00
 ...
diff --git a/unittest/force-styles/tests/manybody-pair-rebomos.yaml b/unittest/force-styles/tests/manybody-pair-rebomos.yaml
new file mode 100644
index 0000000000..74fbe2b001
--- /dev/null
+++ b/unittest/force-styles/tests/manybody-pair-rebomos.yaml
@@ -0,0 +1,125 @@
+---
+lammps_version: 7 Feb 2024
+tags: slow
+date_generated: Thu Feb 22 09:08:59 2024
+epsilon: 1e-12
+skip_tests:
+prerequisites: ! |
+  pair rebomos
+pre_commands: ! |
+  variable newton_pair delete
+  if "$(is_active(package,gpu)) > 0.0" then "variable newton_pair index off" else "variable newton_pair index on"
+post_commands: ! ""
+input_file: in.airebo
+pair_style: rebomos
+pair_coeff: ! |
+  * * MoS.rebomos Mo S
+extract: ! ""
+natoms: 48
+init_vdwl: 3158.017726833385
+init_coul: 0
+init_stress: ! |2-
+   6.8398718310371441e+03  6.7325636075141883e+03  6.1154248388685965e+03 -3.2850057579078185e+02 -6.6397329123828470e+01 -3.4208234997867203e+02
+init_forces: ! |2
+    1 -3.7681565852737293e+00  2.4378308384489483e+02 -2.6969740060923279e+01
+    2  2.6266038011491645e+02 -2.0681295165426090e+02 -1.9964225279104706e+01
+    3 -2.5778010496370018e+02 -2.3678496612327552e+02  4.3829487525746238e+01
+    4 -1.0244959408607365e+01 -2.6794342328905179e+02 -2.6250177085935768e+01
+    5  2.2561151172717553e+02  2.0458598509633319e+02 -4.2183143539914880e+01
+    6 -2.4766208404861507e+02  1.8231884233051196e+02  1.2358374858464346e+01
+    7 -2.8974107341390795e+01  1.1031227139363692e+02 -1.2014739688866064e+02
+    8  2.1125868843851663e+02 -2.8863731327540540e+02  1.6869558243398984e+01
+    9 -3.1045083889038222e+02 -1.7569259168198960e+02  7.9653354234911163e+01
+   10  3.9886149586070935e+01 -1.0347554263069082e+02  1.5430519714983259e+02
+   11  1.8975960895859026e+02  1.4943835159807929e+02 -7.3224202319244824e+01
+   12 -2.1525843617159188e+02  2.2144411990369784e+02 -1.4893230739895195e+01
+   13 -4.5214798787473534e+00  1.1303681465405538e+02 -1.1385660017739841e+02
+   14  2.3203040375527121e+02 -3.4422786145586961e+01  1.8438743650405590e+02
+   15 -9.7170644212253990e+01 -2.9205504121924292e+02  1.9237141908302370e+01
+   16 -5.1044169101209107e+01 -1.4503490444304657e+02  1.5798442448724001e+02
+   17  2.8968061091312188e+02  8.7626477818666558e+01 -5.8802357863256212e+01
+   18 -1.7269858357220591e+02  8.9805161881631591e+01 -1.7847636055101773e+02
+   19  1.0760757939777642e+02  1.2642591396366257e+02  1.5914528700154095e+01
+   20  8.4608047558760049e+01 -2.4114295115066687e+02 -8.5792447633922421e+01
+   21 -8.8196263713344706e+01 -1.2718199182512282e+02 -7.0146571930858386e+01
+   22 -9.8957412653883708e+01 -1.4706747771082254e+02 -4.1887992372999022e+01
+   23  1.3739608451158423e+02  2.5576040689729510e+02  3.0309933932861746e+01
+   24 -1.7919184387909232e+02  1.4014151619269020e+02  1.6188399417577682e+02
+   25 -7.7347072523993347e+01  1.5555894832740958e+02  2.6182670200573220e+01
+   26  7.1360176064432650e+01 -2.9346871053231695e+02 -4.0766232856732509e+00
+   27 -2.6964124483003886e+01  9.8094028485618843e+00  2.4520760165936114e+01
+   28  1.2261226472559841e+02 -2.1507437048459283e+02 -3.0529457478077248e+01
+   29  2.2123474818204119e+01  2.8827028167050270e+02  1.0492685172067040e+02
+   30 -3.0573547162322302e+02  6.5880918489915032e+01 -1.1459287271946665e+02
+   31  6.0299759271951014e+00  1.7876996651274851e+02 -1.2964028093837922e+02
+   32  2.3752911820416773e+02 -1.2903240767402318e+02  1.2355068302179771e+02
+   33 -2.7462402312452838e+02 -5.3957296643601545e+01  9.5908146508348565e+01
+   34 -2.4514118579997412e+01 -1.0385194449627924e+02  1.4149428440602534e+02
+   35  1.2613298864651166e+02  6.2171771831118461e+00 -2.0642312645163636e+02
+   36 -6.9996802234927173e+01  2.9354514776980676e+02 -2.0963604805137543e+01
+   37  5.0241789394721707e+01  2.4495589528252060e+02 -1.1814425338523709e+01
+   38  2.2684897209808264e+02 -1.8673318434123004e+02  1.5220428086050066e+00
+   39 -2.5107271651763259e+02 -1.6443685417603410e+02 -3.8081572318769169e+01
+   40 -5.6754727284178912e+01 -2.5956532443118618e+02 -5.9343761520548126e+00
+   41  2.7426370608616236e+02  1.7742153146823480e+02  1.3451000229991498e+01
+   42 -2.2732796257204490e+02  2.5279459742055184e+02 -2.7020668313372529e+01
+   43  1.6217419052721431e+02  1.3946372126978235e+02 -2.0623397237537300e+00
+   44  3.0490806490775839e+01  2.1072542774667330e+01  8.2426559376766466e+00
+   45 -1.8440339212966003e+02 -2.5521581334939563e+02  2.6487508881411735e+01
+   46 -3.5020523296396959e+01 -2.5119105337038474e+02  8.0096253171487604e+00
+   47  3.2722641189388571e+02  9.7949745935413119e+01 -1.1898751818014821e+01
+   48 -1.3785292104885133e+02  3.2239007811982697e+02  2.4602884867061839e+01
+run_vdwl: 838.005031679853
+run_coul: 0
+run_stress: ! |2-
+   2.0454406617512559e+03  2.1642975706136021e+03  3.2875013790709349e+03  2.4615001869678809e+02  9.3532964794023911e+01 -1.6795786485689854e+02
+run_forces: ! |2
+    1 -2.5386905249574866e+01 -9.9909927316940710e+01  1.9622022776949393e+00
+    2 -3.6801215363277215e+01  1.3354608905768052e+02  1.3651128931415545e+02
+    3  2.1845136723115344e+00  2.3527273580981478e+00  2.4005941692097457e+00
+    4 -3.7818914621624749e+01  3.2946537814811997e+01  4.3018847892043375e+01
+    5  9.6010433504280890e+00  4.3327442916171194e+01 -1.0513311611209312e+02
+    6  8.4308572792324540e+01 -2.3503352214725012e+01  5.0539884405443850e+01
+    7 -1.1497740491253731e+01 -6.4938790153209212e+01  3.8948855627091397e+01
+    8 -4.2897157525993697e+00 -3.3530261299383611e+01  9.5574489119024229e+00
+    9  4.8767710902858741e-01 -3.5176606358955858e+00 -1.4806227207057554e-01
+   10 -1.4766722110672411e+01  2.3818988804615220e+01 -5.3134339246512809e+01
+   11 -1.3935431402422819e+02 -1.5665571900752980e+02 -3.7627334252485582e+01
+   12  7.7074185304589484e+00 -1.2031768398608595e+01  1.8274050209872662e+01
+   13  2.3670515505343992e+01 -6.4282829210616583e+01  4.6336363412751325e+01
+   14 -2.5980738378663424e+00  8.4167979467127569e+00 -2.3849686761968631e+00
+   15  2.6909213334137871e+01  9.4711670949167956e+00  1.4728430099051328e+00
+   16  2.6072330920174934e+01  1.5861581881675242e+01 -5.1429144994723586e+01
+   17 -4.5656802478750279e+01 -1.0557685195759221e+02 -1.8486644164722456e+01
+   18  1.3345348571390588e+02  5.3791289185967734e+01  3.1330462313925089e+01
+   19 -3.4744179120880638e+01 -2.9727735009574776e+01  1.3758697899848009e+01
+   20  3.5805745710027459e+00  2.3635583506556830e+00 -9.8641913612550636e-01
+   21 -1.9112967021807076e+01  2.4093036642722907e+01 -1.8643238887150801e+01
+   22 -9.0201617792954476e+00  3.7160721932912736e+01 -5.0873237843214003e+00
+   23 -2.0559670924056718e+01 -1.3037172781831362e+01 -4.1555140102832148e+01
+   24  3.5016464856031341e+01 -1.2488249708112150e+02 -5.5132899737529639e+00
+   25  2.4856779395007460e+00 -6.7533673428345381e+01 -1.7153563726726016e+01
+   26 -8.6470517146734750e+01 -7.9057942497429794e+00  3.8434105758101332e+01
+   27  3.1062776626038996e+01  2.5568916700234454e+00 -2.3812255018358194e+01
+   28 -3.6128021960690276e+01  8.8535946500139033e+01 -3.1718315624050986e-01
+   29 -1.7473599694524488e+01  5.1605700760038529e+00  3.1477016370448561e+00
+   30  3.2722165297173231e+00 -5.9807188688706105e+00  3.0903475914489444e+01
+   31 -5.5852599877204412e-01 -3.7635607965114964e+01  5.5407547347738578e+01
+   32 -4.8419811021707950e+01  1.2083134296973620e+02 -1.4299477119499166e+01
+   33  2.5993167832212944e+01  5.4415694550334088e+01 -2.2817585641730194e+01
+   34  1.3632918027769463e+01  1.0165209000060044e+02 -6.0709294862836366e+01
+   35 -1.7896100047169217e+01  5.7265522876438499e+00  6.7274680861552483e+00
+   36 -1.8125137765726237e+01  2.7564787465936047e+00 -9.5278575701404478e+00
+   37 -3.5898678616111592e+00 -7.2362382910010311e+01 -9.3588358281326709e+00
+   38 -3.6726824963125949e+01  2.0008528283799198e+01  4.2387983046244827e+01
+   39  1.4756349734330427e+00  3.4387817449154888e+01  4.6004570693407864e+01
+   40  1.3004687263347805e+01  6.5948251357408196e+01 -9.3870897441198231e+00
+   41 -9.2360908880622592e+00  3.0113497063365648e+00 -1.1785409504259610e+01
+   42 -1.1795417170666214e+01 -1.1546754933247897e+01 -1.2188857610119777e+01
+   43 -6.5511968637617390e-01 -3.9133448932537512e+01  5.5788437875425423e-01
+   44 -4.2976958767216473e+01  1.8814792063050870e+01 -3.2742042602021932e+01
+   45  1.8589630046648591e+02  5.3367997582431663e+01  3.8324469380182308e+01
+   46  2.2463767362468872e+01  7.7485287617829670e+01  2.6113916756613811e+01
+   47  3.0703470673161828e+00  2.4966324680483410e+01 -5.0826014249556524e+00
+   48  7.6310071304830785e+01 -9.3082908173611301e+01 -1.1280958703044767e+02
+...