diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index f99a336dbb..9b316fbeb9 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -135,6 +135,7 @@ src/timer.*               @akohlmey
 src/utils.*               @akohlmey @rbberger
 src/verlet.*              @sjplimp @stanmoore1
 src/math_eigen_impl.h     @jewettaij
+src/fix_press_langevin.*  @Bibobu
 
 # tools
 tools/coding_standard/* @akohlmey @rbberger
diff --git a/cmake/Modules/Packages/ML-PACE.cmake b/cmake/Modules/Packages/ML-PACE.cmake
index 6cdb751617..ce8f02f5f4 100644
--- a/cmake/Modules/Packages/ML-PACE.cmake
+++ b/cmake/Modules/Packages/ML-PACE.cmake
@@ -1,6 +1,6 @@
-set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.01.3.fix.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
+set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.10.04.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
 
-set(PACELIB_MD5 "4f0b3b5b14456fe9a73b447de3765caa" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
+set(PACELIB_MD5 "70ff79f4e59af175e55d24f3243ad1ff" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
 mark_as_advanced(PACELIB_URL)
 mark_as_advanced(PACELIB_MD5)
 GetFallbackURL(PACELIB_URL PACELIB_FALLBACK)
diff --git a/cmake/packaging/build_windows_vs.cmake b/cmake/packaging/build_windows_vs.cmake
index bd55f3f442..283425ff65 100644
--- a/cmake/packaging/build_windows_vs.cmake
+++ b/cmake/packaging/build_windows_vs.cmake
@@ -21,6 +21,13 @@ file(WRITE qtdeploy.bat "@ECHO OFF\r\nset VSCMD_DEBUG=0\r\nCALL ${VC_INIT} x64\r
 execute_process(COMMAND cmd.exe /c qtdeploy.bat COMMAND_ECHO STDERR)
 file(REMOVE qtdeploy.bat)
 
+# download and uncompress static FFMpeg and gzip binaries
+file(DOWNLOAD "https://download.lammps.org/thirdparty/ffmpeg-gzip.zip" ffmpeg-gzip.zip)
+file(WRITE unpackzip.ps1 "Expand-Archive -Path ffmpeg-gzip.zip -DestinationPath LAMMPS_GUI")
+execute_process(COMMAND powershell -ExecutionPolicy Bypass -File unpackzip.ps1)
+file(REMOVE unpackzip.ps1)
+file(REMOVE ffmpeg-gzip.zip)
+
 # create zip archive
 file(WRITE makearchive.ps1 "Compress-Archive -Path LAMMPS_GUI -CompressionLevel Optimal -DestinationPath LAMMPS_GUI-Win10-amd64.zip")
 execute_process(COMMAND powershell -ExecutionPolicy Bypass -File makearchive.ps1)
diff --git a/cmake/presets/macos-multiarch.cmake b/cmake/presets/macos-multiarch.cmake
index 58ef013f68..8ceaec11f8 100644
--- a/cmake/presets/macos-multiarch.cmake
+++ b/cmake/presets/macos-multiarch.cmake
@@ -10,5 +10,3 @@ set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
 set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
 
 set(BUILD_MPI FALSE CACHE BOOL "" FORCE)
-set(BUILD_SHARED_LIBS FALSE CACHE BOOL "" FORCE)
-set(LAMMPS_EXCEPTIONS TRUE CACHE BOOL "" FORCE)
diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst
index 5aabb7967c..7301d1345e 100644
--- a/doc/src/Commands_fix.rst
+++ b/doc/src/Commands_fix.rst
@@ -69,7 +69,7 @@ OPT.
    * :doc:`drude/transform/inverse <fix_drude_transform>`
    * :doc:`dt/reset (k) <fix_dt_reset>`
    * :doc:`edpd/source <fix_dpd_source>`
-   * :doc:`efield <fix_efield>`
+   * :doc:`efield (k) <fix_efield>`
    * :doc:`efield/tip4p <fix_efield>`
    * :doc:`ehex <fix_ehex>`
    * :doc:`electrode/conp (i) <fix_electrode>`
@@ -181,6 +181,7 @@ OPT.
    * :doc:`pour <fix_pour>`
    * :doc:`precession/spin <fix_precession_spin>`
    * :doc:`press/berendsen <fix_press_berendsen>`
+   * :doc:`press/langevin <fix_press_langevin>`
    * :doc:`print <fix_print>`
    * :doc:`propel/self <fix_propel_self>`
    * :doc:`property/atom (k) <fix_property_atom>`
@@ -232,7 +233,7 @@ OPT.
    * :doc:`spring <fix_spring>`
    * :doc:`spring/chunk <fix_spring_chunk>`
    * :doc:`spring/rg <fix_spring_rg>`
-   * :doc:`spring/self <fix_spring_self>`
+   * :doc:`spring/self (k) <fix_spring_self>`
    * :doc:`srd <fix_srd>`
    * :doc:`store/force <fix_store_force>`
    * :doc:`store/state <fix_store_state>`
diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index c45a1d778c..923c040aaf 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -265,7 +265,7 @@ OPT.
    * :doc:`smd/tri_surface <pair_smd_triangulated_surface>`
    * :doc:`smd/ulsph <pair_smd_ulsph>`
    * :doc:`smtbq <pair_smtbq>`
-   * :doc:`snap (k) <pair_snap>`
+   * :doc:`snap (ik) <pair_snap>`
    * :doc:`soft (go) <pair_soft>`
    * :doc:`sph/heatconduction <pair_sph_heatconduction>`
    * :doc:`sph/idealgas <pair_sph_idealgas>`
@@ -305,5 +305,5 @@ OPT.
    * :doc:`wf/cut <pair_wf_cut>`
    * :doc:`ylz <pair_ylz>`
    * :doc:`yukawa (gko) <pair_yukawa>`
-   * :doc:`yukawa/colloid (go) <pair_yukawa_colloid>`
+   * :doc:`yukawa/colloid (gko) <pair_yukawa_colloid>`
    * :doc:`zbl (gko) <pair_zbl>`
diff --git a/doc/src/Howto_lammps_gui.rst b/doc/src/Howto_lammps_gui.rst
index ee190b540e..165ed84d95 100644
--- a/doc/src/Howto_lammps_gui.rst
+++ b/doc/src/Howto_lammps_gui.rst
@@ -5,48 +5,61 @@ This document describes **LAMMPS GUI version 1.5**.
 
 -----
 
-LAMMPS GUI is a simple graphical text editor that is linked to the
-:ref:`LAMMPS library <lammps_c_api>` and thus can run LAMMPS directly
-using the contents of the editor's text buffer as input.  It can
-retrieve and display information from LAMMPS while it is running and is
-adapted in multiple ways specifically for editing LAMMPS input files.
+LAMMPS GUI is a graphical text editor customized for editing LAMMPS
+input files that is linked to the :ref:`LAMMPS library <lammps_c_api>`
+and thus can run LAMMPS directly using the contents of the editor's text
+buffer as input.  It can retrieve and display information from LAMMPS
+while it is running, display visualizations created with the :doc:`dump
+image command <dump_image>`, and is adapted specifically for editing
+LAMMPS input files through text completion and reformatting, and linking
+to the online LAMMPS documentation for known LAMMPS commands and styles.
 
 .. note::
 
    Pre-compiled, ready-to-use LAMMPS GUI executables for Linux (Ubuntu
    20.04LTS or later and compatible), macOS (version 11 aka Big Sur or
    later), and Windows (version 10 or later) :ref:`are available
-   <lammps-gui-install>` for download.  The executables are linked to
-   a current version of LAMMPS as well.  The source code for the
-   LAMMPS GUI is included in the ``tools/lammps-gui`` folder of the
-   LAMMPS distribution and it can be compiled alongside LAMMPS with
-   CMake.
+   <lammps_gui_install>` for download.  They may be linked to a
+   development version of LAMMPS in case they need features not yet
+   available in a released version. Serial LAMMPS executables of the
+   same LAMMPS version are included as well.  The source code for the
+   LAMMPS GUI is included in the LAMMPS source code and can be found in
+   the ``tools/lammps-gui`` folder.  It can be compiled alongside LAMMPS
+   when :doc:`compiling with CMake <Build_cmake>`.
 
-LAMMPS GUI tries to be similar to what people traditionally would do
-to run LAMMPS using a command line window: editing inputs with a text
-editor, run LAMMPS on the input with selected command line flags, and
-then extract data from the created files and view them.  That
-procedure is quite effective and often required when running LAMMPS on
-high-performance computing facilities, or for people proficient in
-using the command line, as that allows them to use tools for the
-individual steps which they are most comfortable with.
+LAMMPS GUI tries to provide an experience similar to what people
+traditionally would do to run LAMMPS using a command line window:
 
-The main benefit of a GUI application is that many basic tasks can be
-done directly from the GUI without switching to a text console or
-requiring external programs, let alone scripts to extract data from
-the generated output.  It also integrates well with graphical desktop
-environments.
+- editing inputs with a text editor
+- run LAMMPS on the input with selected command line flags
+- and then use or extract data from the created files and visualize it
+
+That procedure is quite effective for people proficient in using the
+command line, as that allows them to use tools for the individual steps
+which they are most comfortable with.  It is often required when running
+LAMMPS on high-performance computing facilities.
+
+The main benefit of using the LAMMPS GUI application instead is that
+many basic tasks can be done directly from the GUI without switching to
+a text console window or using external programs, let alone writing
+scripts to extract data from the generated output.  It also integrates
+well with graphical desktop environments.
 
 LAMMPS GUI thus makes it easier for beginners to get started running
-simple LAMMPS simulations.  It is very suitable for tutorials on
-LAMMPS since you only need to learn how to use a single program.  It
-is also designed to keep the barrier low when you decide to switch to
-a full featured, standalone programming editor and more sophisticated
+simple LAMMPS simulations.  It is very suitable for tutorials on LAMMPS
+since you only need to learn how to use a single program for most tasks
+and thus time can be saved and people can focus on learning LAMMPS.  It
+is also designed to keep the barrier low when you decide to switch to a
+full featured, standalone programming editor and more sophisticated
 visualization and analysis tools and run LAMMPS from a command line.
 
 The following text provides a detailed tour of the features and
 functionality of the LAMMPS GUI.
 
+Suggestions for new features and reports of bugs are always welcome.
+You can use the :doc:`the same channels as for LAMMPS itself
+<Errors_bugs>` for that purpose.
+
 -----
 
 Main window
@@ -86,9 +99,9 @@ save them.
 Running LAMMPS
 ^^^^^^^^^^^^^^
 
-From within the LAMMPS GUI main window LAMMPS can be started either
-from the ``Run`` menu using the ``Run LAMMPS from Editor Buffer``
-entry, by the hotkey `Ctrl-Enter` (`Command-Enter` on macOS), or by
+From within the LAMMPS GUI main window LAMMPS can be started either from
+the ``Run`` menu using the ``Run LAMMPS from Editor Buffer`` entry, by
+the keyboard shortcut `Ctrl-Enter` (`Command-Enter` on macOS), or by
 clicking on the green "Run" button in the status bar.  All of these
 operations will cause LAMMPS to process the entire input script, which
 may contain multiple :doc:`run <run>` or :doc:`minimize <minimize>`
@@ -147,10 +160,10 @@ More information on those windows and how to adjust their behavior and
 contents is given below.
 
 An active LAMMPS run can be stopped cleanly by using either the ``Stop
-LAMMPS`` entry in the ``Run`` menu, the hotkey `Ctrl-/` (`Command-/`
-on macOS), or by clicking on the red button in the status bar.  This
-will cause the running LAMMPS process to complete the current timestep
-(or iteration for energy minimization) and then complete the
+LAMMPS`` entry in the ``Run`` menu, the keyboard shortcut `Ctrl-/`
+(`Command-/` on macOS), or by clicking on the red button in the status
+bar.  This will cause the running LAMMPS process to complete the current
+timestep (or iteration for energy minimization) and then complete the
 processing of the buffer while skipping all run or minimize commands.
 This is equivalent to the input script command :doc:`timer timeout 0
 <timer>` and is implemented by calling the
@@ -172,17 +185,20 @@ be seen in the command line window, as shown below.
 LAMMPS GUI captures the screen output as it is generated and updates
 the log window regularly during a run.
 
-By default, there will be a new window for each run, so that it is
-possible to visually compare outputs from different runs.  It is also
-possible to change the behavior of LAMMPS GUI in the preferences dialog
-to *replace* an existing log window for a new run or to not show the log
-window by default.  It is also possible to show or hide the current log
-window from the ``View`` menu.
+By default, the log window will be replaced each time a run is started.
+The runs are counted and the run number for the current run is displayed
+in the window title.  It is possible to change the behavior of LAMMPS
+GUI in the preferences dialog to create a *new* log window for every run
+or to not show the current log window.  It is also possible to show or
+hide the *current* log window from the ``View`` menu.
 
 The text in the log window is read-only and cannot be modified, but
-editor commands to select and copy all or parts of the text can be used.
-The "Select All" and "Copy" functions are also available via a context
-menu by clicking with the right mouse button.
+keyboard shortcuts to select and copy all or parts of the text can be
+used to transfer text to another program. Also, the keyboard shortcut
+`Ctrl-S` (`Command-S` on macOS) is available to save the log buffer to a
+file.  The "Select All" and "Copy" functions, as well as a "Save Log to
+File" option are also available from a context menu by clicking with the
+right mouse button into the log window text area.
 
 Chart Window
 ------------
@@ -199,10 +215,16 @@ The drop down menu on the top right allows selection of different
 properties that are computed and written to thermo output.  Only one
 property can be shown at a time.  The plots will be updated with new
 data as the run progresses, so they can be used to visually monitor the
-evolution of available properties.  From the ``File`` menu on the top
-left, it is possible to save an image of the currently displayed plot or
-export the data in either plain text columns (for use by plotting tools
-like `gnuplot <http://www.gnuplot.info/>`_ or `grace
+evolution of available properties.  The window title will show the
+current run number that this chart window corresponds to.  Same as
+explained for the log window above, by default, the chart window will
+be replaced on each new run, but the behavior can be changed in the
+preferences dialog.
+
+From the ``File`` menu on the top left, it is possible to save an image
+of the currently displayed plot or export the data in either plain text
+columns (for use by plotting tools like `gnuplot
+<http://www.gnuplot.info/>`_ or `grace
 <https://plasma-gate.weizmann.ac.il/Grace/>`_), or as CSV data which can
 be imported for further processing with Microsoft Excel or `pandas
 <https://pandas.pydata.org/>`_
@@ -225,19 +247,20 @@ displays the images created by LAMMPS as they are written.
    :align: center
    :scale: 50%
 
-The various buttons at the bottom right of the window allow either
-single stepping through the sequence of images or playing an animation
-(as a continuous loop or once from first to last).  It is also possible
-to zoom in or zoom out of the displayed images.  The slide show window
-will be closed when a new file is loaded.
+The various buttons at the bottom right of the window allow single
+stepping through the sequence of images or playing an animation (as a
+continuous loop or once from first to last).  It is also possible to
+zoom in or zoom out of the displayed images, and to export the slide
+show animation to a movie file, if `ffmpeg <https://ffmpeg.org/>`_ is
+installed.
 
 Variable Info
 -------------
 
-During a run, it may be of interest to monitor the value of input
-script variables, for example to monitor the progress of loops.  This
-can be done by enabling the "Variables Window" in the ``View`` menu or
-by using the `Ctrl-Shift-W` hotkey.  This will show info similar to
+During a run, it may be of interest to monitor the value of input script
+variables, for example to monitor the progress of loops.  This can be
+done by enabling the "Variables Window" in the ``View`` menu or by using
+the `Ctrl-Shift-W` keyboard shortcut.  This will show info similar to
 the :doc:`info variables <info>` command in a separate window as shown
 below.
 
@@ -250,16 +273,27 @@ during a run.  It will show "(none)" if there are no variables
 defined.  Note that it is also possible to *set* :doc:`index style
 variables <variable>`, that would normally be set via command line
 flags, via the "Set Variables..." dialog from the ``Run`` menu.
+LAMMPS GUI will automatically set the variable "gui_run" to the
+current value of the run counter.  That way it would be possible
+to automatically record a log for each run attempt by using the
+command
+
+.. code-block:: LAMMPS
+
+   log logfile-${gui_run}.txt
+
+at the beginning of an input file. That would record logs to files
+``logfile-1.txt``, ``logfile-2.txt``, and so on for successive runs.
 
 Viewing Snapshot Images
 -----------------------
 
 By selecting the ``Create Image`` entry in the ``Run`` menu, or by
-hitting the `Ctrl-I` (`Command-I` on macOS) hotkey, or by clicking on
-the "palette" button in the status bar, LAMMPS GUI will send a custom
-:doc:`write_dump image <dump_image>` command to LAMMPS and read the
-resulting snapshot image with the current state of the system into an
-image viewer window.  This functionality is not available *during* an
+hitting the `Ctrl-I` (`Command-I` on macOS) keyboard shortcut, or by
+clicking on the "palette" button in the status bar, LAMMPS GUI will send
+a custom :doc:`write_dump image <dump_image>` command to LAMMPS and read
+the resulting snapshot image with the current state of the system into
+an image viewer window.  This functionality is not available *during* an
 ongoing run.  When LAMMPS is not yet initialized, LAMMPS GUI will try to
 identify the line with the first run or minimize command and execute all
 command up to that line from the input buffer and then add a "run 0"
@@ -306,34 +340,41 @@ contents to a file.
 Context Specific Word Completion
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-By default, LAMMPS GUI will display a small popup frame with possible
-completions for LAMMPS input script commands or styles after 2
-characters of a word have been typed. The word can then be completed
-through selecting an entry by scrolling down with the cursor keys and
-selecting with the 'Enter' key or by clicking on the entry with the
-mouse.  The automatic completion popup can be disabled in the
-``Preferences`` dialog, but the completion can still be requested
-manually by either hitting the 'Shift-TAB' key or by right-clicking with
-the mouse and selecting the option from the context menu.  Most of the
-completion information is taken from the LAMMPS instance and thus it
-will be adjusted to only show options available that have been enabled
-while compiling LAMMPS, however that excludes accelerated styles and
-commands, only non-suffix versions are shown.
+By default, LAMMPS GUI will display a small pop-up frame with possible
+choices for LAMMPS input script commands or styles after 2 characters of
+a word have been typed.
+
+.. image:: JPG/lammps-gui-complete.png
+   :align: center
+   :scale: 75%
+
+The word can then be completed through selecting an entry by scrolling
+up and down with the cursor keys and selecting with the 'Enter' key or
+by clicking on the entry with the mouse.  The automatic completion
+pop-up can be disabled in the ``Preferences`` dialog, but the completion
+can still be requested manually by either hitting the 'Shift-TAB' key or
+by right-clicking with the mouse and selecting the option from the
+context menu.  Most of the completion information is taken from the
+LAMMPS instance and thus it will be adjusted to only show available
+options that have been enabled while compiling LAMMPS. That, however,
+excludes accelerated styles and commands; for improved clarity, only the
+non-suffix version of styles are shown.
 
 Line Reformatting
 ^^^^^^^^^^^^^^^^^
 
-The editor supports reformatting lines according to the syntax in
-order to have consistently aligned lines.  This primarily means adding
-space padding to commands, type specifiers, IDs and names.  This
+The editor supports reformatting lines according to the syntax in order
+to have consistently aligned lines.  This primarily means adding
+whitespace padding to commands, type specifiers, IDs and names.  This
 reformatting is performed by default when hitting the 'Enter' key to
-start a new line.  This feature can be turned off in the
+start a new line.  This feature can be turned on or off in the
 ``Preferences`` dialog, but it can still be manually performed by
-hitting the 'TAB' key.
+hitting the 'TAB' key.  The amount of padding can also be changed in the
+``Preferences`` dialog.
 
 Internally this functionality is achieved by splitting the line into
 "words" and then putting it back together with padding added where the
-context can be detected; otherwise a single space is used.
+context can be detected; otherwise a single space is used between words.
 
 Context Specific Help
 ^^^^^^^^^^^^^^^^^^^^^
@@ -343,23 +384,23 @@ Context Specific Help
    :scale: 50%
 
 A unique feature of the LAMMPS GUI is the option to look up the
-documentation for the command in the current line.  This can be done
-by either clicking the right mouse button or by using the `Ctrl-?`
-hotkey.  When clicking the mouse there are additional entries in the
-context menu that will open the corresponding documentation page in
-the online LAMMPS documentation.  When using the hotkey, the first of
+documentation for the command in the current line.  This can be done by
+either clicking the right mouse button or by using the `Ctrl-?` keyboard
+shortcut.  When clicking the mouse there are additional entries in the
+context menu that will open the corresponding documentation page in the
+online LAMMPS documentation.  When using the keyboard, the first of
 those entries will be chosen directly.
 
 Menu
 ----
 
 The menu bar has entries ``File``, ``Edit``, ``Run``, ``View``, and
-``About``.  Instead of using the mouse to click on them, the
-individual menus can also be activated by hitting the `Alt` key
-together with the corresponding underlined letter, that is `Alt-F`
-will activate the ``File`` menu.  For the corresponding activated
-sub-menus, the underlined letter together with the `Alt` key can again
-be used to select entries instead of using the mouse.
+``About``.  Instead of using the mouse to click on them, the individual
+menus can also be activated by hitting the `Alt` key together with the
+corresponding underlined letter, that is `Alt-F` will activate the
+``File`` menu.  For the corresponding activated sub-menus, the key
+corresponding the underlined letters can again be used to select entries
+instead of using the mouse.
 
 File
 ^^^^
@@ -385,8 +426,8 @@ Edit
 
 The ``Edit`` menu offers the usual editor functions like ``Undo``,
 ``Redo``, ``Cut``, ``Copy``, ``Paste``.  It can also open a
-``Preferences`` dialog (hotkey `Ctrl-P`) and allows deletion of all
-stored preferences so they will be reset to default values.
+``Preferences`` dialog (keyboard shortcut `Ctrl-P`) and allows deletion
+of all stored preferences so they will be reset to default values.
 
 Run
 ^^^
@@ -516,7 +557,7 @@ General Settings:
 - *Replace image window on new render:* when checked, an existing
   chart window will be replaced when a new snapshot image is requested,
   otherwise each command will create a new image window.
-- *Path to LAMMPS Shared Library File:* this options is only available
+- *Path to LAMMPS Shared Library File:* this option is only visible
   when LAMMPS GUI was compiled to load the LAMMPS library at run time
   instead of being linked to it directly.  With the ``Browse..`` button
   or by changing the text, a different shared library file with a
@@ -574,26 +615,26 @@ the range between 1 and 32.
 
 The two settings which follow enable or disable the automatic
 reformatting when hitting the 'Enter' key and the automatic display of
-the completion popup window.
+the completion pop-up window.
 
 -----------
 
-Hotkeys
--------
+Keyboard Shortcuts
+------------------
 
-Almost all functionality is accessible from the menu or via hotkeys.
-The following hotkeys are available (On macOS use the Command key
-instead of Ctrl/Control).
+Almost all functionality is accessible from the menu of the editor
+window or through keyboard shortcuts.  The following shortcuts are
+available (On macOS use the Command key instead of Ctrl/Control).
 
 .. list-table::
    :header-rows: 1
    :widths: auto
 
-   * - Hotkey
+   * - Shortcut
      - Function
-     - Hotkey
+     - Shortcut
      - Function
-     - Hotkey
+     - Shortcut
      - Function
    * - Ctrl+N
      - New File
@@ -620,7 +661,7 @@ instead of Ctrl/Control).
      - Ctrl+I
      - Snapshot Image
    * - Ctrl+Q
-     - Quit
+     - Quit Application
      - Ctrl+V
      - Paste text
      - Ctrl+L
@@ -653,3 +694,7 @@ instead of Ctrl/Control).
 Further editing keybindings `are documented with the Qt documentation
 <https://doc.qt.io/qt-5/qplaintextedit.html#editing-key-bindings>`_.  In
 case of conflicts the list above takes precedence.
+
+All other windows only support a subset of keyboard shortcuts listed
+above.  Typically, the shortcuts `Ctrl-/` (Stop Run), `Ctrl-W` (Close
+Window), and `Ctrl-Q` (Quit Application) are supported.
diff --git a/doc/src/Howto_output.rst b/doc/src/Howto_output.rst
index 851b7703fd..6fcd36ab56 100644
--- a/doc/src/Howto_output.rst
+++ b/doc/src/Howto_output.rst
@@ -1,7 +1,7 @@
 Output from LAMMPS (thermo, dumps, computes, fixes, variables)
 ==============================================================
 
-There are four basic kinds of LAMMPS output:
+There are four basic forms of LAMMPS output:
 
 * :doc:`Thermodynamic output <thermo_style>`, which is a list of
   quantities printed every few timesteps to the screen and logfile.
@@ -20,18 +20,17 @@ output files, depending on what :doc:`dump <dump>` and :doc:`fix <fix>`
 commands you specify.
 
 As discussed below, LAMMPS gives you a variety of ways to determine
-what quantities are computed and printed when the thermodynamics,
+what quantities are calculated and printed when the thermodynamics,
 dump, or fix commands listed above perform output.  Throughout this
 discussion, note that users can also :doc:`add their own computes and
-fixes to LAMMPS <Modify>` which can then generate values that can then
-be output with these commands.
+fixes to LAMMPS <Modify>` which can generate values that can then be
+output with these commands.
 
 The following subsections discuss different LAMMPS commands related
 to output and the kind of data they operate on and produce:
 
 * :ref:`Global/per-atom/local/per-grid data <global>`
 * :ref:`Scalar/vector/array data <scalar>`
-* :ref:`Per-grid data <grid>`
 * :ref:`Disambiguation <disambiguation>`
 * :ref:`Thermodynamic output <thermo>`
 * :ref:`Dump file output <dump>`
@@ -48,34 +47,65 @@ to output and the kind of data they operate on and produce:
 Global/per-atom/local/per-grid data
 -----------------------------------
 
-Various output-related commands work with four different styles of
+Various output-related commands work with four different "styles" of
 data: global, per-atom, local, and per-grid.  A global datum is one or
 more system-wide values, e.g. the temperature of the system.  A
 per-atom datum is one or more values per atom, e.g. the kinetic energy
 of each atom.  Local datums are calculated by each processor based on
-the atoms it owns, but there may be zero or more per atom, e.g. a list
+the atoms it owns, and there may be zero or more per atom, e.g. a list
 of bond distances.
 
 A per-grid datum is one or more values per grid cell, for a grid which
-overlays the simulation domain.  The grid cells and the data they
-store are distributed across processors; each processor owns the grid
-cells whose center point falls within its subdomain.
+overlays the simulation domain.  Similar to atoms and per-atom data,
+the grid cells and the data they store are distributed across
+processors; each processor owns the grid cells whose center points
+fall within its subdomain.
 
 .. _scalar:
 
 Scalar/vector/array data
 ------------------------
 
-Global, per-atom, and local datums can come in three kinds: a single
-scalar value, a vector of values, or a 2d array of values.  The doc
-page for a "compute" or "fix" or "variable" that generates data will
-specify both the style and kind of data it produces, e.g. a per-atom
-vector.
+Global, per-atom, local, and per-grid datums can come in three
+"kinds": a single scalar value, a vector of values, or a 2d array of
+values.  More specifically these are the valid kinds for each style:
 
-When a quantity is accessed, as in many of the output commands
-discussed below, it can be referenced via the following bracket
-notation, where ID in this case is the ID of a compute.  The leading
-"c\_" would be replaced by "f\_" for a fix, or "v\_" for a variable:
+* global scalar
+* global vector
+* global array
+* per-atom vector
+* per-atom array
+* local vector
+* local array
+* per-grid vector
+* per-grid array
+
+A per-atom vector means a single value per atom; the "vector" is the
+length of the number of atoms.  A per-atom array means multiple values
+per atom.  Similarly a local vector or array means one or multiple
+values per entity (e.g. per bond in the system).  And a per-grid
+vector or array means one or multiple values per grid cell.
+
+The doc page for a compute or fix or variable that generates data will
+specify both the styles and kinds of data it produces, e.g. a per-atom
+vector.  Note that a compute or fix may generate multiple styles and
+kinds of output.  However, for per-atom data only a vector or array is
+output, never both.  Likewise for per-local and per-grid data.  An
+example of a fix which generates multiple styles and kinds of data is
+the :doc:`fix mdi/qm <fix_mdi_qm>` command.  It outputs a global
+scalar, global vector, and per-atom array for the quantum mechanical
+energy and virial of the system and forces on each atom.
+
+By contrast, different variable styles generate only a single kind of
+data: a global scalar for an equal-style variable, global vector for a
+vector-style variable, and a per-atom vector for an atom-style
+variable.
+
+When data is accessed by another command, as in many of the output
+commands discussed below, it can be referenced via the following
+bracket notation, where ID in this case is the ID of a compute.  The
+leading "c\_" would be replaced by "f\_" for a fix, or "v\_" for a
+variable (and ID would be the name of the variable):
 
 +-------------+--------------------------------------------+
 | c_ID        | entire scalar, vector, or array            |
@@ -85,40 +115,56 @@ notation, where ID in this case is the ID of a compute.  The leading
 | c_ID[I][J]  | one element of array                       |
 +-------------+--------------------------------------------+
 
-In other words, using one bracket reduces the dimension of the data
-once (vector -> scalar, array -> vector).  Using two brackets reduces
-the dimension twice (array -> scalar).  Thus a command that uses
-scalar values as input can typically also process elements of a vector
-or array.
+Note that using one bracket reduces the dimension of the data once
+(vector -> scalar, array -> vector).  Using two brackets reduces the
+dimension twice (array -> scalar).  Thus a command that uses scalar
+values as input can also conceptually operate on an element of a
+vector or array.
 
-.. _grid:
-
-Per-grid data
-------------------------
-
-Per-grid data can come in two kinds: a vector of values (one per grid
-cekk), or a 2d array of values (multiple values per grid ckk).  The
-doc page for a "compute" or "fix" that generates data will specify
-names for both the grid(s) and datum(s) it produces, e.g. per-grid
-vectors or arrays, which can be referenced by other commands.  See the
-:doc:`Howto grid <Howto_grid>` doc page for more details.
+Per-grid vectors or arrays are accessed similarly, except that the ID
+for the compute or fix includes a grid name and a data name.  This is
+because a fix or compute can create multiple grids (of different
+sizes) and multiple sets of data (for each grid).  The fix or compute
+defines names for each grid and for each data set, so that all of them
+can be accessed by other commands.  See the :doc:`Howto grid
+<Howto_grid>` doc page for more details.
 
 .. _disambiguation:
 
 Disambiguation
 --------------
 
-Some computes and fixes produce data in multiple styles, e.g. a global
-scalar and a per-atom vector. Usually the context in which the input
-script references the data determines which style is meant. Example:
-if a compute provides both a global scalar and a per-atom vector, the
-former will be accessed by using ``c_ID`` in an equal-style variable,
-while the latter will be accessed by using ``c_ID`` in an atom-style
-variable.  Note that atom-style variable formulas can also access
-global scalars, but in this case it is not possible to do this
-directly because of the ambiguity.  Instead, an equal-style variable
-can be defined which accesses the global scalar, and that variable can
-be used in the atom-style variable formula in place of ``c_ID``.
+When a compute or fix produces data in multiple styles, e.g. global
+and per-atom, a reference to the data can sometimes be ambiguous.
+Usually the context in which the input script references the data
+determines which style is meant.
+
+For example, if a compute outputs a global vector and a per-atom
+array, an element of the global vector will be accessed by using
+``c_ID[I]`` in :doc:`thermodynamic output <thermo_style>`, while a
+column of the per-atom array will be accessed by using ``c_ID[I]`` in
+a :doc:`dump custom <dump>` command.
+
+However, if a :doc:`atom-style variable <variable>` references
+``c_ID[I]``, then it could be intended to refer to a single element of
+the global vector or a column of the per-atom array.  The doc page for
+any command that has a potential ambiguity (variables are the most
+common) will explain how to resolve the ambiguity.
+
+In this case, an atom-style variables references per-atom data if it
+exists.  If access to an element of a global vector is needed (as in
+this example), an equal-style variable which references the value can
+be defined and used in the atom-style variable formula instead.
+
+Similarly, :doc:`thermodynamic output <thermo_style>` can only
+reference global data from a compute or fix.  But you can indirectly
+access per-atom data as follows.  The reference ``c_ID[245][2]`` for
+the ID of a :doc:`compute displace/atom <compute_displace_atom>`
+command, refers to the y-component of displacement for the atom with
+ID 245.  While you cannot use that reference directly in the
+:doc:`thermo_style <thermo_style>` command, you can use it an
+equal-style variable formula, and then reference the variable in
+thermodynamic output.
 
 .. _thermo:
 
@@ -389,7 +435,7 @@ output and input data types must match, e.g. global/per-atom/local
 data and scalar/vector/array data.
 
 Also note that, as described above, when a command takes a scalar as
-input, that could be an element of a vector or array.  Likewise a
+input, that could also be an element of a vector or array.  Likewise a
 vector input could be a column of an array.
 
 +--------------------------------------------------------+----------------------------------------------+----------------------------------------------------+
diff --git a/doc/src/Howto_triclinic.rst b/doc/src/Howto_triclinic.rst
index 0efadbcc8c..2983d013c6 100644
--- a/doc/src/Howto_triclinic.rst
+++ b/doc/src/Howto_triclinic.rst
@@ -12,7 +12,8 @@ is created, e.g. by the :doc:`create_box <create_box>` or
 :doc:`read_data <read_data>` or :doc:`read_restart <read_restart>`
 commands.  Additionally, LAMMPS defines box size parameters lx,ly,lz
 where lx = xhi-xlo, and similarly in the y and z dimensions.  The 6
-parameters, as well as lx,ly,lz, can be output via the :doc:`thermo_style custom <thermo_style>` command.
+parameters, as well as lx,ly,lz, can be output via the
+:doc:`thermo_style custom <thermo_style>` command.
 
 LAMMPS also allows simulations to be performed in triclinic
 (non-orthogonal) simulation boxes shaped as a parallelepiped with
diff --git a/doc/src/Intro_nonfeatures.rst b/doc/src/Intro_nonfeatures.rst
index 3289b838d6..ea12e8c0b5 100644
--- a/doc/src/Intro_nonfeatures.rst
+++ b/doc/src/Intro_nonfeatures.rst
@@ -5,7 +5,7 @@ LAMMPS is designed to be a fast, parallel engine for molecular
 dynamics (MD) simulations.  It provides only a modest amount of
 functionality for setting up simulations and analyzing their output.
 
-Specifically, LAMMPS was not conceived and designed for:
+Originally, LAMMPS was not conceived and designed for:
 
 * being run through a GUI
 * building molecular systems, or building molecular topologies
@@ -14,9 +14,10 @@ Specifically, LAMMPS was not conceived and designed for:
 * visualize your MD simulation interactively
 * plot your output data
 
-Over the years some of these limitations have been reduced or
-removed, through features added to LAMMPS or external tools
-that either closely interface with LAMMPS or extend LAMMPS.
+Over the years many of these limitations have been reduced or
+removed. In part through features added to LAMMPS and in part
+through external tools that either closely interface with LAMMPS
+or extend LAMMPS.
 
 Here are suggestions on how to perform these tasks:
 
@@ -24,8 +25,9 @@ Here are suggestions on how to perform these tasks:
   wraps the library interface is provided.  Thus, GUI interfaces can be
   written in Python or C/C++ that run LAMMPS and visualize or plot its
   output.  Examples of this are provided in the python directory and
-  described on the :doc:`Python <Python_head>` doc page.  Also, there
-  are several external wrappers or GUI front ends.
+  described on the :doc:`Python <Python_head>` doc page.  As of version
+  2 August 2023 :ref:`a GUI tool <lammps_gui>` is included in LAMMPS.
+  Also, there are several external wrappers or GUI front ends.
 * **Builder:** Several pre-processing tools are packaged with LAMMPS.
   Some of them convert input files in formats produced by other MD codes
   such as CHARMM, AMBER, or Insight into LAMMPS input formats.  Some of
diff --git a/doc/src/JPG/lammps-gui-chart.png b/doc/src/JPG/lammps-gui-chart.png
index 447e709625..a16fcb167c 100644
Binary files a/doc/src/JPG/lammps-gui-chart.png and b/doc/src/JPG/lammps-gui-chart.png differ
diff --git a/doc/src/JPG/lammps-gui-complete.png b/doc/src/JPG/lammps-gui-complete.png
new file mode 100644
index 0000000000..8e80aa1998
Binary files /dev/null and b/doc/src/JPG/lammps-gui-complete.png differ
diff --git a/doc/src/JPG/lammps-gui-log.png b/doc/src/JPG/lammps-gui-log.png
index 2865dc4d86..3dbae1e424 100644
Binary files a/doc/src/JPG/lammps-gui-log.png and b/doc/src/JPG/lammps-gui-log.png differ
diff --git a/doc/src/JPG/lammps-gui-main.png b/doc/src/JPG/lammps-gui-main.png
index 1e43827aa4..f700b4264f 100644
Binary files a/doc/src/JPG/lammps-gui-main.png and b/doc/src/JPG/lammps-gui-main.png differ
diff --git a/doc/src/JPG/lammps-gui-popup-help.png b/doc/src/JPG/lammps-gui-popup-help.png
index 0d692f1795..395e06ff43 100644
Binary files a/doc/src/JPG/lammps-gui-popup-help.png and b/doc/src/JPG/lammps-gui-popup-help.png differ
diff --git a/doc/src/JPG/lammps-gui-prefs-accel.png b/doc/src/JPG/lammps-gui-prefs-accel.png
index ffc558f736..3cab94136a 100644
Binary files a/doc/src/JPG/lammps-gui-prefs-accel.png and b/doc/src/JPG/lammps-gui-prefs-accel.png differ
diff --git a/doc/src/JPG/lammps-gui-prefs-editor.png b/doc/src/JPG/lammps-gui-prefs-editor.png
index 5dbe5ef380..5d2fe50380 100644
Binary files a/doc/src/JPG/lammps-gui-prefs-editor.png and b/doc/src/JPG/lammps-gui-prefs-editor.png differ
diff --git a/doc/src/JPG/lammps-gui-prefs-general.png b/doc/src/JPG/lammps-gui-prefs-general.png
index e079f15963..765ba8f84d 100644
Binary files a/doc/src/JPG/lammps-gui-prefs-general.png and b/doc/src/JPG/lammps-gui-prefs-general.png differ
diff --git a/doc/src/JPG/lammps-gui-prefs-image.png b/doc/src/JPG/lammps-gui-prefs-image.png
index 978685dc5b..b87e8f2d20 100644
Binary files a/doc/src/JPG/lammps-gui-prefs-image.png and b/doc/src/JPG/lammps-gui-prefs-image.png differ
diff --git a/doc/src/JPG/lammps-gui-slideshow.png b/doc/src/JPG/lammps-gui-slideshow.png
index 188a078429..21ef80e210 100644
Binary files a/doc/src/JPG/lammps-gui-slideshow.png and b/doc/src/JPG/lammps-gui-slideshow.png differ
diff --git a/doc/src/Tools.rst b/doc/src/Tools.rst
index e9ae37a463..49022a4ee9 100644
--- a/doc/src/Tools.rst
+++ b/doc/src/Tools.rst
@@ -645,9 +645,14 @@ LAMMPS GUI
 Overview
 ^^^^^^^^
 
-LAMMPS GUI is a simple graphical text editor that is linked to the
-:ref:`LAMMPS C-library interface <lammps_c_api>` and thus can run LAMMPS
-directly using the contents of the editor's text buffer as input.
+LAMMPS GUI is a graphical text editor customized for editing LAMMPS
+input files that is linked to the :ref:`LAMMPS C-library <lammps_c_api>`
+and thus can run LAMMPS directly using the contents of the editor's text
+buffer as input.  It can retrieve and display information from LAMMPS
+while it is running, display visualizations created with the :doc:`dump
+image command <dump_image>`, and is adapted specifically for editing
+LAMMPS input files through text completion and reformatting, and linking
+to the online LAMMPS documentation for known LAMMPS commands and styles.
 
 This is similar to what people traditionally would do to run LAMMPS:
 using a regular text editor to edit the input and run the necessary
@@ -656,9 +661,9 @@ terminal window.  This similarity is a design goal. While making it easy
 for beginners to start with LAMMPS, it is also the intention to simplify
 the transition to workflows like most experienced LAMMPS users do.
 
-All features have been extensively exposed to hotkeys, so that there is
-also appeal for experienced LAMMPS users, too, especially for
-prototyping and testing simulations setups.
+All features have been extensively exposed to keyboard shortcuts, so
+that there is also appeal for experienced LAMMPS users for prototyping
+and testing simulations setups.
 
 Features
 ^^^^^^^^
@@ -697,22 +702,26 @@ Prerequisites and portability
 LAMMPS GUI is programmed in C++ based on the C++11 standard and using
 the `Qt GUI framework <https://www.qt.io/product/framework>`_.
 Currently, Qt version 5.12 or later is required; Qt 5.15LTS is
-recommended; Qt 6.x not (yet) supported.  Building LAMMPS with CMake is
-required.  The LAMMPS GUI has been successfully compiled and tested on:
+recommended; support for Qt version 6.x is under active development and
+thus far only tested with Qt 6.5LTS on Linux.  Building LAMMPS with
+CMake is required.
+
+The LAMMPS GUI has been successfully compiled and tested on:
 
 - Ubuntu Linux 20.04LTS x86_64 using GCC 9, Qt version 5.12
 - Fedora Linux 38 x86\_64 using GCC 13 and Clang 16, Qt version 5.15LTS
+- Fedora Linux 38 x86\_64 using GCC 13, Qt version 6.5LTS
 - Apple macOS 12 (Monterey) and macOS 13 (Ventura) with Xcode on arm64 and x86\_64, Qt version 5.15LTS
 - Windows 10 and 11 x86_64 with Visual Studio 2022 and Visual C++ 14.36, Qt version 5.15LTS
 - Windows 10 and 11 x86_64 with MinGW / GCC 10.0 cross-compiler on Fedora 38, Qt version 5.15LTS
 
-.. _lammps-gui-install:
+.. _lammps_gui_install:
 
 
 Pre-compiled executables
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
-Pre-compiled LAMMPS executables including the GUI are currently
+Pre-compiled LAMMPS executable packages that include the GUI are currently
 available from https://download.lammps.org/static or
 https://github.com/lammps/lammps/releases.  You can unpack the archives
 (or mount the macOS disk image) and run the GUI directly in place. The
@@ -737,7 +746,10 @@ stored in a location where CMake can find them without additional help.
 Otherwise, the location of the Qt library installation must be indicated
 by setting ``-D Qt5_DIR=/path/to/qt5/lib/cmake/Qt5``, which is a path to
 a folder inside the Qt installation that contains the file
-``Qt5Config.cmake``.
+``Qt5Config.cmake``. Similarly, for Qt6 the location of the Qt library
+installation can be indicated by setting ``-D Qt6_DIR=/path/to/qt6/lib/cmake/Qt6``,
+if necessary.  When both, Qt5 and Qt6 are available, Qt6 will be preferred
+unless ``-D LAMMPS_GUI_USE_QT5=yes`` is set.
 
 It should be possible to build the LAMMPS GUI as a standalone
 compilation (e.g. when LAMMPS has been compiled with traditional make),
diff --git a/doc/src/atom_modify.rst b/doc/src/atom_modify.rst
index 1e5a3d49ff..21590e6680 100644
--- a/doc/src/atom_modify.rst
+++ b/doc/src/atom_modify.rst
@@ -65,6 +65,11 @@ switch.  This is described on the :doc:`Build_settings <Build_settings>`
 doc page.  If atom IDs are not used, they must be specified as 0 for
 all atoms, e.g. in a data or restart file.
 
+.. note::
+
+   If a :doc:`triclinic simulation box <Howto_triclinic>` is used,
+   atom IDs are required, due to how neighbor lists are built.
+
 The *map* keyword determines how atoms with specific IDs are found
 when required.  An example are the bond (angle, etc) methods which
 need to find the local index of an atom with a specific global ID
diff --git a/doc/src/compute.rst b/doc/src/compute.rst
index abc89fb663..6737203618 100644
--- a/doc/src/compute.rst
+++ b/doc/src/compute.rst
@@ -27,58 +27,62 @@ Examples
 Description
 """""""""""
 
-Define a computation that will be performed on a group of atoms.
-Quantities calculated by a compute are instantaneous values, meaning
-they are calculated from information about atoms on the current
-timestep or iteration, though a compute may internally store some
-information about a previous state of the system.  Defining a compute
-does not perform a computation.  Instead computes are invoked by other
-LAMMPS commands as needed (e.g., to calculate a temperature needed for
-a thermostat fix or to generate thermodynamic or dump file output).
-See the :doc:`Howto output <Howto_output>` page for a summary of
-various LAMMPS output options, many of which involve computes.
+Define a diagnostic computation that will be performed on a group of
+atoms.  Quantities calculated by a compute are instantaneous values,
+meaning they are calculated from information about atoms on the
+current timestep or iteration, though internally a compute may store
+some information about a previous state of the system.  Defining a
+compute does not perform the computation.  Instead computes are
+invoked by other LAMMPS commands as needed (e.g., to calculate a
+temperature needed for a thermostat fix or to generate thermodynamic
+or dump file output).  See the :doc:`Howto output <Howto_output>` page
+for a summary of various LAMMPS output options, many of which involve
+computes.
 
 The ID of a compute can only contain alphanumeric characters and
 underscores.
 
 ----------
 
-Computes calculate one or more of four styles of quantities: global,
-per-atom, local, or per-atom.  A global quantity is one or more
-system-wide values, e.g. the temperature of the system.  A per-atom
-quantity is one or more values per atom, e.g. the kinetic energy of
-each atom.  Per-atom values are set to 0.0 for atoms not in the
-specified compute group.  Local quantities are calculated by each
-processor based on the atoms it owns, but there may be zero or more
-per atom, e.g. a list of bond distances.  Per-grid quantities are
-calculated on a regular 2d or 3d grid which overlays a 2d or 3d
-simulation domain.  The grid points and the data they store are
-distributed across processors; each processor owns the grid points
-which fall within its subdomain.
+Computes calculate and store any of four *styles* of quantities:
+global, per-atom, local, or per-grid.
 
-Computes that produce per-atom quantities have the word "atom" at the
-end of their style, e.g. *ke/atom*\ .  Computes that produce local
-quantities have the word "local" at the end of their style,
-e.g. *bond/local*\ .  Computes that produce per-grid quantities have
-the word "grid" at the end of their style, e.g. *property/grid*\ .
-Styles with neither "atom" or "local" or "grid" at the end of their
-style name produce global quantities.
+A global quantity is one or more system-wide values, e.g. the
+temperature of the system.  A per-atom quantity is one or more values
+per atom, e.g. the kinetic energy of each atom.  Per-atom values are
+set to 0.0 for atoms not in the specified compute group.  Local
+quantities are calculated by each processor based on the atoms it
+owns, but there may be zero or more per atom, e.g. a list of bond
+distances.  Per-grid quantities are calculated on a regular 2d or 3d
+grid which overlays a 2d or 3d simulation domain.  The grid points and
+the data they store are distributed across processors; each processor
+owns the grid points which fall within its subdomain.
 
-Note that a single compute typically produces either global or
-per-atom or local or per-grid values.  It does not compute both global
-and per-atom values.  It can produce local values or per-grid values
-in tandem with global or per-atom quantities.  The compute doc page
-will explain the details.
+As a general rule of thumb, computes that produce per-atom quantities
+have the word "atom" at the end of their style, e.g. *ke/atom*\ .
+Computes that produce local quantities have the word "local" at the
+end of their style, e.g. *bond/local*\ .  Computes that produce
+per-grid quantities have the word "grid" at the end of their style,
+e.g. *property/grid*\ .  And styles with neither "atom" or "local" or
+"grid" at the end of their style name produce global quantities.
 
-Global, per-atom, local, and per-grid quantities come in three kinds:
-a single scalar value, a vector of values, or a 2d array of values.
-The doc page for each compute describes the style and kind of values
-it produces, e.g. a per-atom vector.  Some computes produce more than
-one kind of a single style, e.g. a global scalar and a global vector.
+Global, per-atom, local, and per-grid quantities can also be of three
+*kinds*: a single scalar value (global only), a vector of values, or a
+2d array of values.  For per-atom, local, and per-grid quantities, a
+"vector" means a single value for each atom, each local entity
+(e.g. bond), or grid cell.  Likewise an "array", means multiple values
+for each atom, each local entity, or each grid cell.
 
-When a compute quantity is accessed, as in many of the output commands
-discussed below, it can be referenced via the following bracket
-notation, where ID is the ID of the compute:
+Note that a single compute can produce any combination of global,
+per-atom, local, or per-grid values.  Likewise it can produce any
+combination of scalar, vector, or array output for each style.  The
+exception is that for per-atom, local, and per-grid output, either a
+vector or array can be produced, but not both.  The doc page for each
+compute explains the values it produces.
+
+When a compute output is accessed by another input script command it
+is referenced via the following bracket notation, where ID is the ID
+of the compute:
 
 +-------------+--------------------------------------------+
 | c_ID        | entire scalar, vector, or array            |
@@ -89,17 +93,23 @@ notation, where ID is the ID of the compute:
 +-------------+--------------------------------------------+
 
 In other words, using one bracket reduces the dimension of the
-quantity once (vector :math:`\to` scalar, array :math:`\to` vector).  Using two
-brackets reduces the dimension twice (array :math:`\to` scalar).  Thus a
-command that uses scalar compute values as input can also process elements of a
-vector or array.
+quantity once (vector :math:`\to` scalar, array :math:`\to` vector).
+Using two brackets reduces the dimension twice (array :math:`\to`
+scalar).  Thus, for example, a command that uses global scalar compute
+values as input can also process elements of a vector or array.
+Depending on the command, this can either be done directly using the
+syntax in the table, or by first defining a :doc:`variable <variable>`
+of the appropriate style to store the quantity, then using the
+variable as an input to the command.
 
-Note that commands and :doc:`variables <variable>` which use compute
-quantities typically do not allow for all kinds (e.g., a command may
-require a vector of values, not a scalar).  This means there is no
-ambiguity about referring to a compute quantity as c_ID even if it
-produces, for example, both a scalar and vector.  The doc pages for
-various commands explain the details.
+Note that commands and :doc:`variables <variable>` which take compute
+outputs as input typically do not allow for all styles and kinds of
+data (e.g., a command may require global but not per-atom values, or
+it may require a vector of values, not a scalar).  This means there is
+typically no ambiguity about referring to a compute output as c_ID
+even if it produces, for example, both a scalar and vector.  The doc
+pages for various commands explain the details, including how any
+ambiguities are resolved.
 
 ----------
 
diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst
index 204f1c090d..6820d2ee04 100644
--- a/doc/src/compute_reduce.rst
+++ b/doc/src/compute_reduce.rst
@@ -37,13 +37,16 @@ Syntax
        v_name = per-atom vector calculated by an atom-style variable with name
 
 * zero or more keyword/args pairs may be appended
-* keyword = *replace*
+* keyword = *replace* or *inputs*
 
   .. parsed-literal::
 
        *replace* args = vec1 vec2
          vec1 = reduced value from this input vector will be replaced
          vec2 = replace it with vec1[N] where N is index of max/min value from vec2
+       *inputs* arg = peratom or local
+         peratom = all inputs are per-atom quantities (default)
+         local = all input are local quantities
 
 Examples
 """"""""
@@ -60,38 +63,44 @@ Description
 """""""""""
 
 Define a calculation that "reduces" one or more vector inputs into
-scalar values, one per listed input.  The inputs can be per-atom or
-local quantities; they cannot be global quantities.  Atom attributes
-are per-atom quantities, :doc:`computes <compute>` and :doc:`fixes <fix>`
-may generate any of the three kinds of quantities, and :doc:`atom-style variables <variable>` generate per-atom quantities.  See the
-:doc:`variable <variable>` command and its special functions which can
-perform the same operations as the compute reduce command on global
-vectors.
+scalar values, one per listed input.  For the compute reduce command,
+the inputs can be either per-atom or local quantities and must all be
+of the same kind (per-atom or local); see discussion of the optional
+*inputs* keyword below.  The compute reduce/region command can only be
+used with per-atom inputs.
+
+Atom attributes are per-atom quantities, :doc:`computes <compute>` and
+:doc:`fixes <fix>` can generate either per-atom or local quantities,
+and :doc:`atom-style variables <variable>` generate per-atom
+quantities.  See the :doc:`variable <variable>` command and its
+special functions which can perform the same reduction operations as
+the compute reduce command on global vectors.
 
 The reduction operation is specified by the *mode* setting.  The *sum*
 option adds the values in the vector into a global total.  The *min*
 or *max* options find the minimum or maximum value across all vector
 values.  The *minabs* or *maxabs* options find the minimum or maximum
 value across all absolute vector values.  The *ave* setting adds the
-vector values into a global total, then divides by the number of values
-in the vector.  The *sumsq* option sums the square of the values in the
-vector into a global total.  The *avesq* setting does the same as *sumsq*,
-then divides the sum of squares by the number of values.  The last two options
-can be useful for calculating the variance of some quantity (e.g., variance =
-sumsq :math:`-` ave\ :math:`^2`).  The *sumabs* option sums the absolute
-values in the vector into a global total.  The *aveabs* setting does the same
-as *sumabs*, then divides the sum of absolute values by the number of
+vector values into a global total, then divides by the number of
+values in the vector.  The *sumsq* option sums the square of the
+values in the vector into a global total.  The *avesq* setting does
+the same as *sumsq*, then divides the sum of squares by the number of
+values.  The last two options can be useful for calculating the
+variance of some quantity (e.g., variance = sumsq :math:`-` ave\
+:math:`^2`).  The *sumabs* option sums the absolute values in the
+vector into a global total.  The *aveabs* setting does the same as
+*sumabs*, then divides the sum of absolute values by the number of
 values.
 
 Each listed input is operated on independently.  For per-atom inputs,
 the group specified with this command means only atoms within the
-group contribute to the result.  For per-atom inputs, if the compute
-reduce/region command is used, the atoms must also currently be within
-the region.  Note that an input that produces per-atom quantities may
-define its own group which affects the quantities it returns.  For
-example, if a compute is used as an input which generates a per-atom
-vector, it will generate values of 0.0 for atoms that are not in the
-group specified for that compute.
+group contribute to the result.  Likewise for per-atom inputs, if the
+compute reduce/region command is used, the atoms must also currently
+be within the region.  Note that an input that produces per-atom
+quantities may define its own group which affects the quantities it
+returns.  For example, if a compute is used as an input which
+generates a per-atom vector, it will generate values of 0.0 for atoms
+that are not in the group specified for that compute.
 
 Each listed input can be an atom attribute (position, velocity, force
 component) or can be the result of a :doc:`compute <compute>` or
@@ -123,52 +132,54 @@ array with six columns:
 
 ----------
 
-The atom attribute values (*x*, *y*, *z*, *vx*, *vy*, *vz*, *fx*, *fy*, and
-*fz*) are self-explanatory.  Note that other atom attributes can be used as
-inputs to this fix by using the
-:doc:`compute property/atom <compute_property_atom>` command and then specifying
-an input value from that compute.
+The atom attribute values (*x*, *y*, *z*, *vx*, *vy*, *vz*, *fx*,
+*fy*, and *fz*) are self-explanatory.  Note that other atom attributes
+can be used as inputs to this fix by using the :doc:`compute
+property/atom <compute_property_atom>` command and then specifying an
+input value from that compute.
 
 If a value begins with "c\_", a compute ID must follow which has been
-previously defined in the input script.  Computes can generate
-per-atom or local quantities.  See the individual
-:doc:`compute <compute>` page for details.  If no bracketed integer
-is appended, the vector calculated by the compute is used.  If a
-bracketed integer is appended, the Ith column of the array calculated
-by the compute is used.  Users can also write code for their own
-compute styles and :doc:`add them to LAMMPS <Modify>`.  See the
-discussion above for how :math:`I` can be specified with a wildcard asterisk
-to effectively specify multiple values.
+previously defined in the input script.  Valid computes can generate
+per-atom or local quantities.  See the individual :doc:`compute
+<compute>` page for details.  If no bracketed integer is appended, the
+vector calculated by the compute is used.  If a bracketed integer is
+appended, the Ith column of the array calculated by the compute is
+used.  Users can also write code for their own compute styles and
+:doc:`add them to LAMMPS <Modify>`.  See the discussion above for how
+:math:`I` can be specified with a wildcard asterisk to effectively
+specify multiple values.
 
 If a value begins with "f\_", a fix ID must follow which has been
-previously defined in the input script.  Fixes can generate per-atom
-or local quantities.  See the individual :doc:`fix <fix>` page for
-details.  Note that some fixes only produce their values on certain
-timesteps, which must be compatible with when compute reduce
+previously defined in the input script.  Valid fixes can generate
+per-atom or local quantities.  See the individual :doc:`fix <fix>`
+page for details.  Note that some fixes only produce their values on
+certain timesteps, which must be compatible with when compute reduce
 references the values, else an error results.  If no bracketed integer
 is appended, the vector calculated by the fix is used.  If a bracketed
 integer is appended, the Ith column of the array calculated by the fix
 is used.  Users can also write code for their own fix style and
 :doc:`add them to LAMMPS <Modify>`.  See the discussion above for how
-:math:`I` can be specified with a wildcard asterisk to effectively specify
-multiple values.
+:math:`I` can be specified with a wildcard asterisk to effectively
+specify multiple values.
 
 If a value begins with "v\_", a variable name must follow which has
 been previously defined in the input script.  It must be an
 :doc:`atom-style variable <variable>`.  Atom-style variables can
 reference thermodynamic keywords and various per-atom attributes, or
 invoke other computes, fixes, or variables when they are evaluated, so
-this is a very general means of generating per-atom quantities to reduce.
+this is a very general means of generating per-atom quantities to
+reduce.
 
 ----------
 
 If the *replace* keyword is used, two indices *vec1* and *vec2* are
-specified, where each index ranges from 1 to the number of input values.
-The replace keyword can only be used if the *mode* is *min* or *max*\ .
-It works as follows.  A min/max is computed as usual on the *vec2*
-input vector.  The index :math:`N` of that value within *vec2* is also stored.
-Then, instead of performing a min/max on the *vec1* input vector, the
-stored index is used to select the :math:`N`\ th element of the *vec1* vector.
+specified, where each index ranges from 1 to the number of input
+values.  The replace keyword can only be used if the *mode* is *min*
+or *max*\ .  It works as follows.  A min/max is computed as usual on
+the *vec2* input vector.  The index :math:`N` of that value within
+*vec2* is also stored.  Then, instead of performing a min/max on the
+*vec1* input vector, the stored index is used to select the :math:`N`\
+th element of the *vec1* vector.
 
 Thus, for example, if you wish to use this compute to find the bond
 with maximum stretch, you can do it as follows:
@@ -190,6 +201,16 @@ information in this context, the *replace* keywords will extract the
 atom IDs for the two atoms in the bond of maximum stretch.  These atom
 IDs and the bond stretch will be printed with thermodynamic output.
 
+.. versionadded:: TBD
+
+The *inputs* keyword allows selection of whether all the inputs are
+per-atom or local quantities.  As noted above, all the inputs must be
+the same kind (per-atom or local).  Per-atom is the default setting.
+If a compute or fix is specified as an input, it must produce per-atom
+or local data to match this setting.  If it produces both, e.g. for
+the :doc:`compute voronoi/atom <compute_voronoi_atom>` command, then
+this keyword selects between them.
+
 ----------
 
 If a single input is specified this compute produces a global scalar
@@ -197,38 +218,41 @@ value.  If multiple inputs are specified, this compute produces a
 global vector of values, the length of which is equal to the number of
 inputs specified.
 
-As discussed below, for the *sum*, *sumabs*, and *sumsq* modes, the value(s)
-produced by this compute are all "extensive", meaning their value
-scales linearly with the number of atoms involved.  If normalized
-values are desired, this compute can be accessed by the
+As discussed below, for the *sum*, *sumabs*, and *sumsq* modes, the
+value(s) produced by this compute are all "extensive", meaning their
+value scales linearly with the number of atoms involved.  If
+normalized values are desired, this compute can be accessed by the
 :doc:`thermo_style custom <thermo_style>` command with
-:doc:`thermo_modify norm yes <thermo_modify>` set as an option.
-Or it can be accessed by a
-:doc:`variable <variable>` that divides by the appropriate atom count.
+:doc:`thermo_modify norm yes <thermo_modify>` set as an option.  Or it
+can be accessed by a :doc:`variable <variable>` that divides by the
+appropriate atom count.
 
 ----------
 
 Output info
 """""""""""
 
-This compute calculates a global scalar if a single input value is specified
-or a global vector of length :math:`N`, where :math:`N` is the number of
-inputs, and which can be accessed by indices 1 to :math:`N`.  These values can
-be used by any command that uses global scalar or vector values from a
-compute as input.  See the :doc:`Howto output <Howto_output>` doc page
-for an overview of LAMMPS output options.
+This compute calculates a global scalar if a single input value is
+specified or a global vector of length :math:`N`, where :math:`N` is
+the number of inputs, and which can be accessed by indices 1 to
+:math:`N`.  These values can be used by any command that uses global
+scalar or vector values from a compute as input.  See the :doc:`Howto
+output <Howto_output>` doc page for an overview of LAMMPS output
+options.
 
 All the scalar or vector values calculated by this compute are
 "intensive", except when the *sum*, *sumabs*, or *sumsq* modes are used on
 per-atom or local vectors, in which case the calculated values are
 "extensive".
 
-The scalar or vector values will be in whatever :doc:`units <units>` the
-quantities being reduced are in.
+The scalar or vector values will be in whatever :doc:`units <units>`
+the quantities being reduced are in.
 
 Restrictions
 """"""""""""
- none
+
+As noted above, the compute reduce/region command can only be used
+with per-atom inputs.
 
 Related commands
 """"""""""""""""
@@ -238,4 +262,4 @@ Related commands
 Default
 """""""
 
-none
+The default value for the *inputs* keyword is peratom.
diff --git a/doc/src/compute_voronoi_atom.rst b/doc/src/compute_voronoi_atom.rst
index 274be1b702..9607401ccd 100644
--- a/doc/src/compute_voronoi_atom.rst
+++ b/doc/src/compute_voronoi_atom.rst
@@ -13,7 +13,7 @@ Syntax
 * ID, group-ID are documented in :doc:`compute <compute>` command
 * voronoi/atom = style name of this compute command
 * zero or more keyword/value pairs may be appended
-* keyword = *only_group* or *occupation* or *surface* or *radius* or *edge_histo* or *edge_threshold* or *face_threshold* or *neighbors* or *peratom*
+* keyword = *only_group* or *occupation* or *surface* or *radius* or *edge_histo* or *edge_threshold* or *face_threshold* or *neighbors*
 
   .. parsed-literal::
 
@@ -31,7 +31,6 @@ Syntax
        *face_threshold* arg = minarea
          minarea = minimum area for a face to be counted
        *neighbors* value = *yes* or *no* = store list of all neighbors or no
-       *peratom* value = *yes* or *no* = per-atom quantities accessible or no
 
 Examples
 """"""""
@@ -53,14 +52,12 @@ atoms in the simulation box.  The tessellation is calculated using all
 atoms in the simulation, but non-zero values are only stored for atoms
 in the group.
 
-By default two per-atom quantities are calculated by this compute.
-The first is the volume of the Voronoi cell around each atom.  Any
-point in an atom's Voronoi cell is closer to that atom than any other.
-The second is the number of faces of the Voronoi cell. This is
-equal to the number of nearest neighbors of the central atom,
-plus any exterior faces (see note below). If the *peratom* keyword
-is set to "no", the per-atom quantities are still calculated,
-but they are not accessible.
+Two per-atom quantities are calculated by this compute.  The first is
+the volume of the Voronoi cell around each atom.  Any point in an
+atom's Voronoi cell is closer to that atom than any other.  The second
+is the number of faces of the Voronoi cell. This is equal to the
+number of nearest neighbors of the central atom, plus any exterior
+faces (see note below).
 
 ----------
 
@@ -97,13 +94,13 @@ present in atom_style sphere for granular models.
 
 The *edge_histo* keyword activates the compilation of a histogram of
 number of edges on the faces of the Voronoi cells in the compute
-group. The argument *maxedge* of the this keyword is the largest number
-of edges on a single Voronoi cell face expected to occur in the
-sample. This keyword adds the generation of a global vector with
-*maxedge*\ +1 entries. The last entry in the vector contains the number of
-faces with more than *maxedge* edges. Since the polygon with the
-smallest amount of edges is a triangle, entries 1 and 2 of the vector
-will always be zero.
+group. The argument *maxedge* of the this keyword is the largest
+number of edges on a single Voronoi cell face expected to occur in the
+sample. This keyword generates output of a global vector by this
+compute with *maxedge*\ +1 entries. The last entry in the vector
+contains the number of faces with more than *maxedge* edges. Since the
+polygon with the smallest amount of edges is a triangle, entries 1 and
+2 of the vector will always be zero.
 
 The *edge_threshold* and *face_threshold* keywords allow the
 suppression of edges below a given minimum length and faces below a
@@ -127,8 +124,8 @@ to locate vacancies (the coordinates are given by the atom coordinates
 at the time step when the compute was first invoked), while column two
 data can be used to identify interstitial atoms.
 
-If the *neighbors* value is set to yes, then this compute creates a
-local array with 3 columns. There is one row for each face of each
+If the *neighbors* value is set to yes, then this compute also creates
+a local array with 3 columns. There is one row for each face of each
 Voronoi cell. The 3 columns are the atom ID of the atom that owns the
 cell, the atom ID of the atom in the neighboring cell (or zero if the
 face is external), and the area of the face.  The array can be
@@ -143,8 +140,8 @@ containing all the Voronoi neighbors in a system:
    compute 6 all voronoi/atom neighbors yes
    dump d2 all local 1 dump.neighbors index c_6[1] c_6[2] c_6[3]
 
-If the *face_threshold* keyword is used, then only faces
-with areas greater than the threshold are stored.
+If the *face_threshold* keyword is used, then only faces with areas
+greater than the threshold are stored.
 
 ----------
 
@@ -158,48 +155,52 @@ Voro++ software in the src/VORONOI/README file.
 
 .. note::
 
-   The calculation of Voronoi volumes is performed by each processor for
-   the atoms it owns, and includes the effect of ghost atoms stored by
-   the processor.  This assumes that the Voronoi cells of owned atoms
-   are not affected by atoms beyond the ghost atom cut-off distance.
-   This is usually a good assumption for liquid and solid systems, but
-   may lead to underestimation of Voronoi volumes in low density
-   systems.  By default, the set of ghost atoms stored by each processor
-   is determined by the cutoff used for :doc:`pair_style <pair_style>`
-   interactions.  The cutoff can be set explicitly via the
-   :doc:`comm_modify cutoff <comm_modify>` command.  The Voronoi cells
-   for atoms adjacent to empty regions will extend into those regions up
-   to the communication cutoff in :math:`x`, :math:`y`, or :math:`z`.
-   In that situation, an exterior face is created at the cutoff distance
-   normal to the :math:`x`, :math:`y`, or :math:`z` direction.  For
-   triclinic systems, the exterior face is parallel to the corresponding
-   reciprocal lattice vector.
+   The calculation of Voronoi volumes is performed by each processor
+   for the atoms it owns, and includes the effect of ghost atoms
+   stored by the processor.  This assumes that the Voronoi cells of
+   owned atoms are not affected by atoms beyond the ghost atom cut-off
+   distance.  This is usually a good assumption for liquid and solid
+   systems, but may lead to underestimation of Voronoi volumes in low
+   density systems.  By default, the set of ghost atoms stored by each
+   processor is determined by the cutoff used for :doc:`pair_style
+   <pair_style>` interactions.  The cutoff can be set explicitly via
+   the :doc:`comm_modify cutoff <comm_modify>` command.  The Voronoi
+   cells for atoms adjacent to empty regions will extend into those
+   regions up to the communication cutoff in :math:`x`, :math:`y`, or
+   :math:`z`.  In that situation, an exterior face is created at the
+   cutoff distance normal to the :math:`x`, :math:`y`, or :math:`z`
+   direction.  For triclinic systems, the exterior face is parallel to
+   the corresponding reciprocal lattice vector.
 
 .. note::
 
-   The Voro++ package performs its calculation in 3d.  This will
-   still work for a 2d LAMMPS simulation, provided all the atoms have the
-   same :math:`z`-coordinate. The Voronoi cell of each atom will be a columnar
-   polyhedron with constant cross-sectional area along the :math:`z`-direction
-   and two exterior faces at the top and bottom of the simulation box. If
-   the atoms do not all have the same :math:`z`-coordinate, then the columnar
-   cells will be accordingly distorted. The cross-sectional area of each
-   Voronoi cell can be obtained by dividing its volume by the :math:`z` extent
-   of the simulation box.  Note that you define the :math:`z` extent of the
-   simulation box for 2d simulations when using the
-   :doc:`create_box <create_box>` or :doc:`read_data <read_data>` commands.
+   The Voro++ package performs its calculation in 3d.  This will still
+   work for a 2d LAMMPS simulation, provided all the atoms have the
+   same :math:`z`-coordinate. The Voronoi cell of each atom will be a
+   columnar polyhedron with constant cross-sectional area along the
+   :math:`z`-direction and two exterior faces at the top and bottom of
+   the simulation box. If the atoms do not all have the same
+   :math:`z`-coordinate, then the columnar cells will be accordingly
+   distorted. The cross-sectional area of each Voronoi cell can be
+   obtained by dividing its volume by the :math:`z` extent of the
+   simulation box.  Note that you define the :math:`z` extent of the
+   simulation box for 2d simulations when using the :doc:`create_box
+   <create_box>` or :doc:`read_data <read_data>` commands.
 
 Output info
 """""""""""
 
-By default, this compute calculates a per-atom array with two
-columns. In regular dynamic tessellation mode the first column is the
-Voronoi volume, the second is the neighbor count, as described above
-(read above for the output data in case the *occupation* keyword is
-specified).  These values can be accessed by any command that uses
-per-atom values from a compute as input.  See the :doc:`Howto output <Howto_output>` page for an overview of LAMMPS output
-options. If the *peratom* keyword is set to "no", the per-atom array
-is still created, but it is not accessible.
+.. deprecated:: TBD
+
+   The *peratom* keyword was removed as it is no longer required.
+
+This compute calculates a per-atom array with two columns. In regular
+dynamic tessellation mode the first column is the Voronoi volume, the
+second is the neighbor count, as described above (read above for the
+output data in case the *occupation* keyword is specified).  These
+values can be accessed by any command that uses per-atom values from a
+compute as input.  See the :doc:`Howto output <Howto_output>` page for
+an overview of LAMMPS output options.
 
 If the *edge_histo* keyword is used, then this compute generates a
 global vector of length *maxedge*\ +1, containing a histogram of the
@@ -209,17 +210,6 @@ If the *neighbors* value is set to *yes*, then this compute calculates a
 local array with three columns. There is one row for each face of each
 Voronoi cell.
 
-.. note::
-
-   Some LAMMPS commands such as the :doc:`compute reduce <compute_reduce>`
-   command can accept either a per-atom or local quantity. If this compute
-   produces both quantities, the command
-   may access the per-atom quantity, even if you want to access the local
-   quantity.  This effect can be eliminated by using the *peratom*
-   keyword to turn off the production of the per-atom quantities.  For
-   the default value *yes* both quantities are produced.  For the value
-   *no*, only the local array is produced.
-
 The Voronoi cell volume will be in distance :doc:`units <units>` cubed.
 The Voronoi face area will be in distance :doc:`units <units>` squared.
 
@@ -227,7 +217,8 @@ Restrictions
 """"""""""""
 
 This compute is part of the VORONOI package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
 
 It also requires you have a copy of the Voro++ library built and
 installed on your system.  See instructions on obtaining and
@@ -241,5 +232,4 @@ Related commands
 Default
 """""""
 
-*neighbors* no, *peratom* yes
-
+The default for the neighbors keyword is no.
diff --git a/doc/src/fix.rst b/doc/src/fix.rst
index 09fc05d500..0889fe281f 100644
--- a/doc/src/fix.rst
+++ b/doc/src/fix.rst
@@ -77,35 +77,44 @@ for individual fixes for info on which ones can be restarted.
 
 ----------
 
-Some fixes calculate one or more of four styles of quantities: global,
-per-atom, local, or per-grid, which can be used by other commands or
-output as described below.  A global quantity is one or more
-system-wide values, e.g. the energy of a wall interacting with
-particles.  A per-atom quantity is one or more values per atom,
-e.g. the displacement vector for each atom since time 0.  Per-atom
-values are set to 0.0 for atoms not in the specified fix group.  Local
-quantities are calculated by each processor based on the atoms it
-owns, but there may be zero or more per atoms.  Per-grid quantities
-are calculated on a regular 2d or 3d grid which overlays a 2d or 3d
-simulation domain.  The grid points and the data they store are
-distributed across processors; each processor owns the grid points
-which fall within its subdomain.
+Some fixes calculate and store any of four *styles* of quantities:
+global, per-atom, local, or per-grid.
 
-Note that a single fix typically produces either global or per-atom or
-local or per-grid values (or none at all).  It does not produce both
-global and per-atom.  It can produce local or per-grid values in
-tandem with global or per-atom values.  The fix doc page will explain
-the details.
+A global quantity is one or more system-wide values, e.g. the energy
+of a wall interacting with particles.  A per-atom quantity is one or
+more values per atom, e.g. the original coordinates of each atom at
+time 0.  Per-atom values are set to 0.0 for atoms not in the specified
+fix group.  Local quantities are calculated by each processor based on
+the atoms it owns, but there may be zero or more per atom, e.g. values
+for each bond.  Per-grid quantities are calculated on a regular 2d or
+3d grid which overlays a 2d or 3d simulation domain.  The grid points
+and the data they store are distributed across processors; each
+processor owns the grid points which fall within its subdomain.
 
-Global, per-atom, local, and per-grid quantities come in three kinds:
-a single scalar value, a vector of values, or a 2d array of values.
-The doc page for each fix describes the style and kind of values it
-produces, e.g. a per-atom vector.  Some fixes produce more than one
-kind of a single style, e.g. a global scalar and a global vector.
+As a general rule of thumb, fixes that produce per-atom quantities
+have the word "atom" at the end of their style, e.g. *ave/atom*\ .
+Fixes that produce local quantities have the word "local" at the end
+of their style, e.g. *store/local*\ .  Fixes that produce per-grid
+quantities have the word "grid" at the end of their style,
+e.g. *ave/grid*\ .
 
-When a fix quantity is accessed, as in many of the output commands
-discussed below, it can be referenced via the following bracket
-notation, where ID is the ID of the fix:
+Global, per-atom, local, and per-grid quantities can also be of three
+*kinds*: a single scalar value (global only), a vector of values, or a
+2d array of values.  For per-atom, local, and per-grid quantities, a
+"vector" means a single value for each atom, each local entity
+(e.g. bond), or grid cell.  Likewise an "array", means multiple values
+for each atom, each local entity, or each grid cell.
+
+Note that a single fix can produce any combination of global,
+per-atom, local, or per-grid values.  Likewise it can produce any
+combination of scalar, vector, or array output for each style.  The
+exception is that for per-atom, local, and per-grid output, either a
+vector or array can be produced, but not both.  The doc page for each
+fix explains the values it produces, if any.
+
+When a fix output is accessed by another input script command it is
+referenced via the following bracket notation, where ID is the ID of
+the fix:
 
 +-------------+--------------------------------------------+
 | f_ID        | entire scalar, vector, or array            |
@@ -116,19 +125,23 @@ notation, where ID is the ID of the fix:
 +-------------+--------------------------------------------+
 
 In other words, using one bracket reduces the dimension of the
-quantity once (vector :math:`\to` scalar, array :math:`\to` vector).  Using two
-brackets reduces the dimension twice (array :math:`\to` scalar).  Thus, a
-command that uses scalar fix values as input can also process elements of a
-vector or array.
+quantity once (vector :math:`\to` scalar, array :math:`\to` vector).
+Using two brackets reduces the dimension twice (array :math:`\to`
+scalar).  Thus, for example, a command that uses global scalar fix
+values as input can also process elements of a vector or array.
+Depending on the command, this can either be done directly using the
+syntax in the table, or by first defining a :doc:`variable <variable>`
+of the appropriate style to store the quantity, then using the
+variable as an input to the command.
 
-Note that commands and :doc:`variables <variable>` that use fix
-quantities typically do not allow for all kinds (e.g., a command may
-require a vector of values, not a scalar), and even if they do, the context
-in which they are called can be used to resolve which output is being
-requested.  This means there is no
-ambiguity about referring to a fix quantity as f_ID even if it
-produces, for example, both a scalar and vector.  The doc pages for
-various commands explain the details.
+Note that commands and :doc:`variables <variable>` which take fix
+outputs as input typically do not allow for all styles and kinds of
+data (e.g., a command may require global but not per-atom values, or
+it may require a vector of values, not a scalar).  This means there is
+typically no ambiguity about referring to a fix output as c_ID even if
+it produces, for example, both a scalar and vector.  The doc pages for
+various commands explain the details, including how any ambiguities
+are resolved.
 
 ----------
 
@@ -333,6 +346,7 @@ accelerated styles exist.
 * :doc:`pour <fix_pour>` - pour new atoms/molecules into a granular simulation domain
 * :doc:`precession/spin <fix_precession_spin>` - apply a precession torque to each magnetic spin
 * :doc:`press/berendsen <fix_press_berendsen>` - pressure control by Berendsen barostat
+* :doc:`press/langevin <fix_press_langevin>` - pressure control by Langevin barostat
 * :doc:`print <fix_print>` - print text and variables during a simulation
 * :doc:`propel/self <fix_propel_self>` - model self-propelled particles
 * :doc:`property/atom <fix_property_atom>` - add customized per-atom values
diff --git a/doc/src/fix_ave_histo.rst b/doc/src/fix_ave_histo.rst
index 8bb66f0615..9699e4238c 100644
--- a/doc/src/fix_ave_histo.rst
+++ b/doc/src/fix_ave_histo.rst
@@ -79,9 +79,10 @@ Description
 
 Use one or more values as inputs every few timesteps to create a
 single histogram.  The histogram can then be averaged over longer
-timescales.  The resulting histogram can be used by other :doc:`output commands <Howto_output>`, and can also be written to a file.  The
-fix ave/histo/weight command has identical syntax to fix ave/histo,
-except that exactly two values must be specified.  See details below.
+timescales.  The resulting histogram can be used by other :doc:`output
+commands <Howto_output>`, and can also be written to a file.  The fix
+ave/histo/weight command has identical syntax to fix ave/histo, except
+that exactly two values must be specified.  See details below.
 
 The group specified with this command is ignored for global and local
 input values.  For per-atom input values, only atoms in the group
@@ -96,14 +97,18 @@ different ways; see the discussion of the *beyond* keyword below.
 
 Each input value can be an atom attribute (position, velocity, force
 component) or can be the result of a :doc:`compute <compute>` or
-:doc:`fix <fix>` or the evaluation of an equal-style or vector-style or
-atom-style :doc:`variable <variable>`.  The set of input values can be
-either all global, all per-atom, or all local quantities.  Inputs of
-different kinds (e.g. global and per-atom) cannot be mixed.  Atom
-attributes are per-atom vector values.  See the page for
-individual "compute" and "fix" commands to see what kinds of
-quantities they generate.  See the optional *kind* keyword below for
-how to force the fix ave/histo command to disambiguate if necessary.
+:doc:`fix <fix>` or the evaluation of an equal-style or vector-style
+or atom-style :doc:`variable <variable>`.  The set of input values can
+be either all global, all per-atom, or all local quantities.  Inputs
+of different kinds (e.g. global and per-atom) cannot be mixed.  Atom
+attributes are per-atom vector values.  See the page for individual
+"compute" and "fix" commands to see what kinds of quantities they
+generate.
+
+Note that a compute or fix can produce multiple kinds of data (global,
+per-atom, local).  If LAMMPS cannot unambiguously determine which kind
+of data to use, the optional *kind* keyword discussed below can force
+the desired disambiguation.
 
 Note that the output of this command is a single histogram for all
 input values combined together, not one histogram per input value.
@@ -258,13 +263,14 @@ keyword is set to *vector*, then all input values must be global or
 per-atom or local vectors, or columns of global or per-atom or local
 arrays.
 
-The *kind* keyword only needs to be set if a compute or fix produces
-more than one kind of output (global, per-atom, local).  If this is
-not the case, then LAMMPS will determine what kind of input is
-provided and whether all the input arguments are consistent.  If a
-compute or fix produces more than one kind of output, the *kind*
-keyword should be used to specify which output will be used.  The
-remaining input arguments must still be consistent.
+The *kind* keyword only needs to be used if any of the specified input
+computes or fixes produce more than one kind of output (global,
+per-atom, local).  If not, LAMMPS will determine the kind of data all
+the inputs produce and verify it is all the same kind.  If not, an
+error will be triggered.  If a compute or fix produces more than one
+kind of output, the *kind* keyword should be used to specify which
+output will be used.  The other input arguments must still be
+consistent.
 
 The *beyond* keyword determines how input values that fall outside the
 *lo* to *hi* bounds are treated.  Values such that *lo* :math:`\le` value
diff --git a/doc/src/fix_efield.rst b/doc/src/fix_efield.rst
index 2958d89794..a870590856 100644
--- a/doc/src/fix_efield.rst
+++ b/doc/src/fix_efield.rst
@@ -1,4 +1,5 @@
 .. index:: fix efield
+.. index:: fix efield/kk
 .. index:: fix efield/tip4p
 
 fix efield command
@@ -210,6 +211,12 @@ the iteration count during the minimization.
    system (the quantity being minimized), you MUST enable the
    :doc:`fix_modify <fix_modify>` *energy* option for this fix.
 
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
 Restrictions
 """"""""""""
 
diff --git a/doc/src/fix_press_langevin.rst b/doc/src/fix_press_langevin.rst
new file mode 100644
index 0000000000..8438d72192
--- /dev/null
+++ b/doc/src/fix_press_langevin.rst
@@ -0,0 +1,301 @@
+.. index:: fix press/langevin
+
+fix press/langevin command
+===========================
+
+Syntax
+""""""
+
+.. parsed-literal::
+
+   fix ID group-ID press/langevin keyword value ...
+
+* ID, group-ID are documented in :doc:`fix <fix>` command
+* press/langevin = style name of this fix command
+
+  .. parsed-literal::
+
+     one or more keyword value pairs may be appended
+     keyword = *iso* or *aniso* or *tri* or *x* or *y* or *z* or *xy* or *xz* or *yz* or *couple* or *dilate* or *modulus* or *temp* or *flip*
+       *iso* or *aniso* or *tri* values = Pstart Pstop Pdamp
+         Pstart,Pstop = scalar external pressure at start/end of run (pressure units)
+         Pdamp = pressure damping parameter (time units)
+       *x* or *y* or *z* or *xy* or *xz* or *yz* values = Pstart Pstop Pdamp
+         Pstart,Pstop = external stress tensor component at start/end of run (pressure units)
+         Pdamp = pressure damping parameter
+       *flip* value = *yes* or *no* = allow or disallow box flips when it becomes highly skewed
+       *couple* = *none* or *xyz* or *xy* or *yz* or *xz*
+       *friction* value = Friction coefficient for the barostat (time units)
+       *temp* values = Tstart, Tstop, seed
+       Tstart, Tstop = target temperature used for the barostat at start/end of run
+       seed = seed of the random number generator
+       *dilate* value = *all* or *partial*
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   fix 1 all press/langevin iso 0.0 0.0 1000.0 temp 300 300 487374
+   fix 2 all press/langevin aniso 0.0 0.0 1000.0 temp 100 300 238 dilate partial
+
+Description
+"""""""""""
+
+Adjust the pressure of the system by using a Langevin stochastic barostat
+:ref:`(Gronbech) <Gronbech>`, which rescales the system volume and
+(optionally) the atoms coordinates within the simulation box every
+timestep.
+
+The Langevin barostat couple each direction *L* with a pseudo-particle that obeys
+the Langevin equation such as:
+
+.. math::
+
+   f_P = & \frac{N k_B T_{target}}{V} + \frac{1}{V d}\sum_{i=1}^{N} \vec r_i \cdot \vec f_i - P_{target} \\
+   Q\ddot{L} + \alpha{}\dot{L} = & f_P + \beta(t)\\
+   L^{n+1} = & L^{n} + bdt\dot{L}^{n} \frac{bdt^{2}}{2Q} \\
+   \dot{L}^{n+1} = & \alpha\dot{L}^{n} + \frac{dt}{2Q}\left(a f^{n}_{P} + f^{n+1}_{P}\right) + \frac{b}{Q}\beta^{n+1} \\
+   a = & \frac{1-\frac{\alpha{}dt}{2Q}}{1+\frac{\alpha{}dt}{2Q}} \\
+   b = & \frac{1}{1+\frac{\alpha{}dt}{2Q}} \\
+   \left< \beta(t)\beta(t') \right> = & 2\alpha k_B Tdt
+
+Where :math:`dt` is the timestep :math:`\dot{L}` and :math:`\ddot{L}` the first
+and second derivatives of the coupled direction with regard to time,
+:math:`\alpha` is a friction coefficient, :math:`\beta` is a random gaussian
+variable and :math:`Q` the effective mass of the coupled pseudoparticle. The
+two first terms on the right-hand side of the first equation are the virial
+expression of the canonical pressure. It is to be noted that the temperature
+used to compute the pressure is not based on the atom velocities but rather on
+the canonical
+target temperature directly. This temperature is specified using the *temp*
+keyword parameter and should be close to the expected target temperature of the
+system.
+
+Regardless of what atoms are in the fix group, a global pressure is
+computed for all atoms. Similarly, when the size of the simulation
+box is changed, all atoms are re-scaled to new positions, unless the
+keyword *dilate* is specified with a value of *partial*, in which case
+only the atoms in the fix group are re-scaled. The latter can be
+useful for leaving the coordinates of atoms in a solid substrate
+unchanged and controlling the pressure of a surrounding fluid.
+
+.. note::
+
+   Unlike the :doc:`fix npt <fix_nh>` or :doc:`fix nph <fix_nh>` commands which
+   perform Nose-Hoover barostatting AND time integration, this fix does NOT
+   perform time integration of the atoms but only of the barostat coupled
+   coordinate. It then only modifies the box size and atom coordinates to
+   effect barostatting. Thus you must use a separate time integration fix,
+   like :doc:`fix nve <fix_nve>` or :doc:`fix nvt <fix_nh>` to actually update
+   the positions and velocities of atoms.  This fix can be used in conjunction
+   with thermostatting fixes to control the temperature, such as :doc:`fix nvt
+   <fix_nh>` or :doc:`fix langevin <fix_langevin>` or :doc:`fix temp/berendsen
+   <fix_temp_berendsen>`.
+
+See the :doc:`Howto barostat <Howto_barostat>` page for a
+discussion of different ways to perform barostatting.
+
+----------
+
+The barostat is specified using one or more of the *iso*, *aniso*, *tri* *x*,
+*y*, *z*, *xy*, *xz*, *yz*, and *couple* keywords.  These keywords give you the
+ability to specify the 3 diagonal components of an external stress tensor, and
+to couple various of these components together so that the dimensions they
+represent are varied together during a constant-pressure simulation.
+
+The target pressures for each of the 6 diagonal components of the stress tensor
+can be specified independently via the *x*, *y*, *z*, keywords, which
+correspond to the 3 simulation box dimensions, and the *xy*, *xz* and *yz*
+keywords which corresponds to the 3 simulation box tilt factors. For each
+component, the external pressure or tensor component at each timestep is a
+ramped value during the run from *Pstart* to *Pstop*\ . If a target pressure is
+specified for a component, then the corresponding box dimension will change
+during a simulation.  For example, if the *y* keyword is used, the y-box length
+will change.  A box dimension will not change if that component is not
+specified, although you have the option to change that dimension via the
+:doc:`fix deform <fix_deform>` command.
+
+The *Pdamp* parameter can be seen in the same way as a Nose-Hoover parameter as
+it is used to compute the mass of the fictitious particle. Without friction,
+the barostat can be compared to a single particle Nose-Hoover barostat and
+should follow a similar decay in time. The mass of the barostat is
+linked to *Pdamp* by the relation
+:math:`Q=(N_{at}+1)\cdot{}k_BT_{target}\cdot{}P_{damp}^2`. Note that *Pdamp*
+should be expressed in time units.
+
+.. note::
+
+   As for Berendsen barostat, a Langevin barostat will not work well for
+   arbitrary values of *Pdamp*\ .  If *Pdamp* is too small, the pressure and
+   volume can fluctuate wildly; if it is too large, the pressure will take a
+   very long time to equilibrate.  A good choice for many models is a *Pdamp*
+   of around 1000 timesteps.  However, note that *Pdamp* is specified in time
+   units, and that timesteps are NOT the same as time units for most
+   :doc:`units <units>` settings.
+
+----------
+
+The *temp* keyword sets the temperature to use in the equation of motion of the
+barostat. This value is used to compute the value of the force :math:`f_P` in
+the equation of motion. It is important to note that this value is not the
+instantaneous temperature but a target temperature that ramps from *Tstart* to
+*Tstop*. Also the required argument *seed* sets the seed for the random
+number generator used in the generation of the random forces.
+
+----------
+
+The *couple* keyword allows two or three of the diagonal components of
+the pressure tensor to be "coupled" together.  The value specified
+with the keyword determines which are coupled.  For example, *xz*
+means the *Pxx* and *Pzz* components of the stress tensor are coupled.
+*Xyz* means all 3 diagonal components are coupled.  Coupling means two
+things: the instantaneous stress will be computed as an average of the
+corresponding diagonal components, and the coupled box dimensions will
+be changed together in lockstep, meaning coupled dimensions will be
+dilated or contracted by the same percentage every timestep.  The
+*Pstart*, *Pstop*, *Pdamp* parameters for any coupled dimensions must
+be identical.  *Couple xyz* can be used for a 2d simulation; the *z*
+dimension is simply ignored.
+
+----------
+
+The *iso*, *aniso* and *tri* keywords are simply shortcuts that are
+equivalent to specifying several other keywords together.
+
+The keyword *iso* means couple all 3 diagonal components together when
+pressure is computed (hydrostatic pressure), and dilate/contract the
+dimensions together.  Using "iso Pstart Pstop Pdamp" is the same as
+specifying these 4 keywords:
+
+.. parsed-literal::
+
+   x Pstart Pstop Pdamp
+   y Pstart Pstop Pdamp
+   z Pstart Pstop Pdamp
+   couple xyz
+
+The keyword *aniso* means *x*, *y*, and *z* dimensions are controlled
+independently using the *Pxx*, *Pyy*, and *Pzz* components of the
+stress tensor as the driving forces, and the specified scalar external
+pressure.  Using "aniso Pstart Pstop Pdamp" is the same as specifying
+these 4 keywords:
+
+.. parsed-literal::
+
+   x Pstart Pstop Pdamp
+   y Pstart Pstop Pdamp
+   z Pstart Pstop Pdamp
+   couple none
+
+The keyword *tri* is the same as *aniso* but also adds the control on the
+shear pressure coupled with the tilt factors.
+
+.. parsed-literal::
+
+   x Pstart Pstop Pdamp
+   y Pstart Pstop Pdamp
+   z Pstart Pstop Pdamp
+   xy Pstart Pstop Pdamp
+   xz Pstart Pstop Pdamp
+   yz Pstart Pstop Pdamp
+   couple none
+
+----------
+
+The *flip* keyword allows the tilt factors for a triclinic box to
+exceed half the distance of the parallel box length, as discussed
+below.  If the *flip* value is set to *yes*, the bound is enforced by
+flipping the box when it is exceeded.  If the *flip* value is set to
+*no*, the tilt will continue to change without flipping.  Note that if
+applied stress induces large deformations (e.g. in a liquid), this
+means the box shape can tilt dramatically and LAMMPS will run less
+efficiently, due to the large volume of communication needed to
+acquire ghost atoms around a processor's irregular-shaped subdomain.
+For extreme values of tilt, LAMMPS may also lose atoms and generate an
+error.
+
+----------
+
+The *friction* keyword sets the friction parameter :math:`\alpha` in the
+equations of motion of the barostat. For each barostat direction, the value of
+:math:`\alpha` depends on both *Pdamp* and *friction*. The value given as a
+parameter is the Langevin characteristic time
+:math:`\tau_{L}=\frac{Q}{\alpha}` in time units. The langevin time can be understood as a
+decorrelation time for the pressure. A long Langevin time value will make the
+barostat act as an underdamped oscillator while a short value will make it
+act as an overdamped oscillator. The ideal configuration would be to find
+the critical parameter of the barostat. Empirically this is observed to
+occur for :math:`\tau_{L}\approx{}P_{damp}`.  For this reason, if the *friction*
+keyword is not used, the default value *Pdamp* is used for each barostat direction.
+
+----------
+
+This fix computes pressure each timestep. To do
+this, the fix creates its own computes of style "pressure",
+as if this command had been issued:
+
+.. code-block:: LAMMPS
+
+   compute fix-ID_press group-ID pressure NULL virial
+
+The kinetic contribution to the pressure is taken as the ensemble value
+:math:`\frac{Nk_bT}{V}` and computed by the fix itself.
+
+See the :doc:`compute pressure <compute_pressure>` command for details.  Note
+that the IDs of the new compute is the fix-ID + underscore + "press" and the
+group for the new computes is the same as the fix group.
+
+Note that this is NOT the compute used by thermodynamic output (see the
+:doc:`thermo_style <thermo_style>` command) with ID = *thermo_press*. This
+means you can change the attributes of this fix's pressure via the
+:doc:`compute_modify <compute_modify>` command or print this temperature or
+pressure during thermodynamic output via the :doc:`thermo_style custom
+<thermo_style>` command using the appropriate compute-ID. It also means that
+changing attributes of *thermo_temp* or *thermo_press* will have no effect on
+this fix.
+
+Restart, fix_modify, output, run start/stop, minimize info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+No information about this fix is written to :doc:`binary restart files <restart>`.
+
+The :doc:`fix_modify <fix_modify>` *press* option is
+supported by this fix.  You can use it to assign a
+:doc:`compute <compute>` you have defined to this fix which will be used
+in its pressure calculations.
+
+No global or per-atom quantities are stored by this fix for access by
+various :doc:`output commands <Howto_output>`.
+
+This fix can ramp its target pressure and temperature over multiple runs, using
+the *start* and *stop* keywords of the :doc:`run <run>` command.  See the
+:doc:`run <run>` command for details of how to do this. It is recommended that
+the ramped temperature is the same as the effective temperature of the
+thermostatted system. That is, if the system's temperature is ramped by other
+commands, it is recommended to do the same with this pressure control.
+
+This fix is not invoked during :doc:`energy minimization <minimize>`.
+
+Restrictions
+""""""""""""
+
+Any dimension being adjusted by this fix must be periodic.
+
+Related commands
+""""""""""""""""
+
+:doc:`fix press/berendsen <fix_press_berendsen>`,
+:doc:`fix nve <fix_nve>`, :doc:`fix nph <fix_nh>`, :doc:`fix npt <fix_nh>`, :doc:`fix langevin <fix_langevin>`,
+:doc:`fix_modify <fix_modify>`
+
+Default
+"""""""
+
+The keyword defaults are *dilate* = all, *flip* = yes, and *friction* = *Pdamp*.
+
+----------
+
+.. _Gronbech:
+
+**(Gronbech)** Gronbech-Jensen, Farago, J Chem Phys, 141, 194108 (2014).
diff --git a/doc/src/fix_rigid.rst b/doc/src/fix_rigid.rst
index 89759da817..a50e215681 100644
--- a/doc/src/fix_rigid.rst
+++ b/doc/src/fix_rigid.rst
@@ -843,7 +843,7 @@ stress/atom <compute_stress_atom>` commands.  The former can be
 accessed by :doc:`thermodynamic output <thermo_style>`.  The default
 setting for this fix is :doc:`fix_modify virial yes <fix_modify>`.
 
-All of the *rigid* styles (not the *rigid/small* styles) compute a
+All of the *rigid* styles (but not the *rigid/small* styles) compute a
 global array of values which can be accessed by various :doc:`output
 commands <Howto_output>`.  Similar information about the bodies
 defined by the *rigid/small* styles can be accessed via the
@@ -887,7 +887,8 @@ Restrictions
 """"""""""""
 
 These fixes are all part of the RIGID package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
 
 Assigning a temperature via the :doc:`velocity create <velocity>`
 command to a system with :doc:`rigid bodies <fix_rigid>` may not have
diff --git a/doc/src/fix_spring_self.rst b/doc/src/fix_spring_self.rst
index 3383f27ebb..4453fd61c5 100644
--- a/doc/src/fix_spring_self.rst
+++ b/doc/src/fix_spring_self.rst
@@ -1,4 +1,5 @@
 .. index:: fix spring/self
+.. index:: fix spring/self/kk
 
 fix spring/self command
 =======================
@@ -80,6 +81,12 @@ invoked by the :doc:`minimize <minimize>` command.
    you MUST enable the :doc:`fix_modify <fix_modify>` *energy* option for
    this fix.
 
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
 Restrictions
 """"""""""""
  none
diff --git a/doc/src/fix_srd.rst b/doc/src/fix_srd.rst
index 1fc574a7ad..59044a6e3b 100644
--- a/doc/src/fix_srd.rst
+++ b/doc/src/fix_srd.rst
@@ -61,24 +61,30 @@ Description
 Treat a group of particles as stochastic rotation dynamics (SRD)
 particles that serve as a background solvent when interacting with big
 (colloidal) particles in groupbig-ID.  The SRD formalism is described
-in :ref:`(Hecht) <Hecht>`.  The key idea behind using SRD particles as a
-cheap coarse-grained solvent is that SRD particles do not interact
-with each other, but only with the solute particles, which in LAMMPS
-can be spheroids, ellipsoids, or line segments, or triangles, or rigid
-bodies containing multiple spheroids or ellipsoids or line segments
-or triangles.  The collision and rotation properties of the model
-imbue the SRD particles with fluid-like properties, including an
-effective viscosity.  Thus simulations with large solute particles can
-be run more quickly, to measure solute properties like diffusivity
-and viscosity in a background fluid.  The usual LAMMPS fixes for such
-simulations, such as :doc:`fix deform <fix_deform>`, :doc:`fix viscosity <fix_viscosity>`, and :doc:`fix nvt/sllod <fix_nvt_sllod>`,
-can be used in conjunction with the SRD model.
+in :ref:`(Hecht) <Hecht>`.  The same methodology is also called
+multi-particle collision dynamics (MPCD) in the literature.
 
-For more details on how the SRD model is implemented in LAMMPS, :ref:`this paper <Petersen1>` describes the implementation and usage of pure SRD
-fluids.  :ref:`This paper <Lechman>`, which is nearly complete, describes
-the implementation and usage of mixture systems (solute particles in
-an SRD fluid).  See the examples/srd directory for sample input
-scripts using SRD particles in both settings.
+The key idea behind using SRD particles as a cheap coarse-grained
+solvent is that SRD particles do not interact with each other, but
+only with the solute particles, which in LAMMPS can be spheroids,
+ellipsoids, or line segments, or triangles, or rigid bodies containing
+multiple spheroids or ellipsoids or line segments or triangles.  The
+collision and rotation properties of the model imbue the SRD particles
+with fluid-like properties, including an effective viscosity.  Thus
+simulations with large solute particles can be run more quickly, to
+measure solute properties like diffusivity and viscosity in a
+background fluid.  The usual LAMMPS fixes for such simulations, such
+as :doc:`fix deform <fix_deform>`, :doc:`fix viscosity
+<fix_viscosity>`, and :doc:`fix nvt/sllod <fix_nvt_sllod>`, can be
+used in conjunction with the SRD model.
+
+These 3 papers give more details on how the SRD model is implemented
+in LAMMPS.  :ref:`(Petersen) <Petersen1>` describes pure SRD fluid
+systems.  :ref:`(Bolintineanu1) <Bolintineanu1>` describes models
+where pure SRD fluids :ref:interact with boundary walls.
+:ref:`(Bolintineanu2) <Bolintineanu2>` describes mixture models where
+large colloidal particles are solvated by an SRD fluid.  See the
+``examples/srd`` :ref:directory for sample input scripts.
 
 This fix does two things:
 
@@ -357,28 +363,28 @@ These are the 12 quantities.  All are values for the current timestep,
 except for quantity 5 and the last three, each of which are
 cumulative quantities since the beginning of the run.
 
-* (1) # of SRD/big collision checks performed
-* (2) # of SRDs which had a collision
-* (3) # of SRD/big collisions (including multiple bounces)
-* (4) # of SRD particles inside a big particle
-* (5) # of SRD particles whose velocity was rescaled to be < Vmax
-* (6) # of bins for collision searching
-* (7) # of bins for SRD velocity rotation
-* (8) # of bins in which SRD temperature was computed
-* (9) SRD temperature
-* (10) # of SRD particles which have undergone max # of bounces
-* (11) max # of bounces any SRD particle has had in a single step
-* (12) # of reneighborings due to SRD particles moving too far
+(1) # of SRD/big collision checks performed
+(2) # of SRDs which had a collision
+(3) # of SRD/big collisions (including multiple bounces)
+(4) # of SRD particles inside a big particle
+(5) # of SRD particles whose velocity was rescaled to be < Vmax
+(6) # of bins for collision searching
+(7) # of bins for SRD velocity rotation
+(8) # of bins in which SRD temperature was computed
+(9) SRD temperature
+(10) # of SRD particles which have undergone max # of bounces
+(11) max # of bounces any SRD particle has had in a single step
+(12) # of reneighborings due to SRD particles moving too far
 
 No parameter of this fix can be used with the *start/stop* keywords of
-the :doc:`run <run>` command.  This fix is not invoked during :doc:`energy minimization <minimize>`.
+the :doc:`run <run>` command.  This fix is not invoked during
+:doc:`energy minimization <minimize>`.
 
 Restrictions
 """"""""""""
 
-This command can only be used if LAMMPS was built with the SRD
-package.  See the :doc:`Build package <Build_package>` doc
-page for more info.
+This command can only be used if LAMMPS was built with the SRD package.
+See the :doc:`Build package <Build_package>` doc page for more info.
 
 Related commands
 """"""""""""""""
@@ -404,6 +410,12 @@ no, and rescale = yes.
 **(Petersen)** Petersen, Lechman, Plimpton, Grest, in' t Veld, Schunk, J
 Chem Phys, 132, 174106 (2010).
 
-.. _Lechman:
+.. _Bolintineanu1:
 
-**(Lechman)** Lechman, et al, in preparation (2010).
+**(Bolintineanu1)**
+Bolintineanu, Lechman, Plimpton, Grest, Phys Rev E, 86, 066703 (2012).
+
+.. _Bolintineanu2:
+
+**(Bolintineanu2)** Bolintineanu, Grest, Lechman, Pierce, Plimpton,
+Schunk, Comp Particle Mechanics, 1, 321-356 (2014).
diff --git a/doc/src/pair_ilp_tmd.rst b/doc/src/pair_ilp_tmd.rst
index 482d75a100..70a4768389 100644
--- a/doc/src/pair_ilp_tmd.rst
+++ b/doc/src/pair_ilp_tmd.rst
@@ -22,12 +22,12 @@ Examples
 .. code-block:: LAMMPS
 
    pair_style  hybrid/overlay ilp/tmd 16.0 1
-   pair_coeff  * * ilp/tmd  TMD.ILP Mo S S
+   pair_coeff  * * ilp/tmd  MoS2.ILP Mo S S
 
    pair_style  hybrid/overlay sw/mod sw/mod ilp/tmd 16.0
    pair_coeff  * * sw/mod 1  tmd.sw.mod Mo S S NULL NULL NULL
    pair_coeff  * * sw/mod 2  tmd.sw.mod NULL NULL NULL Mo S S
-   pair_coeff  * * ilp/tmd   TMD.ILP    Mo S S Mo S S
+   pair_coeff  * * ilp/tmd   MoS2.ILP   Mo S S Mo S S
 
 Description
 """""""""""
@@ -69,7 +69,7 @@ calculating the normals.
    each atom `i`, its six nearest neighboring atoms belonging to the same
    sub-layer are chosen to define the normal vector `{\bf n}_i`.
 
-The parameter file (e.g. TMD.ILP), is intended for use with *metal*
+The parameter file (e.g. MoS2.ILP), is intended for use with *metal*
 :doc:`units <units>`, with energies in meV. Two additional parameters,
 *S*, and *rcut* are included in the parameter file. *S* is designed to
 facilitate scaling of energies. *rcut* is designed to build the neighbor
@@ -77,7 +77,7 @@ list for calculating the normals for each atom pair.
 
 .. note::
 
-   The parameters presented in the parameter file (e.g. TMD.ILP),
+   The parameters presented in the parameter file (e.g. MoS2.ILP),
    are fitted with taper function by setting the cutoff equal to 16.0
    Angstrom.  Using different cutoff or taper function should be careful.
    These parameters provide a good description in both short- and long-range
@@ -133,10 +133,10 @@ if LAMMPS was built with that package.  See the :doc:`Build package
 This pair style requires the newton setting to be *on* for pair
 interactions.
 
-The TMD.ILP potential file provided with LAMMPS (see the potentials
+The MoS2.ILP potential file provided with LAMMPS (see the potentials
 directory) are parameterized for *metal* units.  You can use this
 potential with any LAMMPS units, but you would need to create your own
-custom TMD.ILP potential file with coefficients listed in the appropriate
+custom MoS2.ILP potential file with coefficients listed in the appropriate
 units, if your simulation does not use *metal* units.
 
 Related commands
diff --git a/doc/src/pair_reaxff.rst b/doc/src/pair_reaxff.rst
index 4dac9baf85..067eb3afc3 100644
--- a/doc/src/pair_reaxff.rst
+++ b/doc/src/pair_reaxff.rst
@@ -43,22 +43,22 @@ Examples
 Description
 """""""""""
 
-Style *reaxff* computes the ReaxFF potential of van Duin, Goddard and
-co-workers.  ReaxFF uses distance-dependent bond-order functions to
+Pair style *reaxff* computes the ReaxFF potential of van Duin, Goddard
+and co-workers.  ReaxFF uses distance-dependent bond-order functions to
 represent the contributions of chemical bonding to the potential
-energy. There is more than one version of ReaxFF. The version
+energy.  There is more than one version of ReaxFF.  The version
 implemented in LAMMPS uses the functional forms documented in the
 supplemental information of the following paper:
-:ref:`(Chenoweth et al., 2008) <Chenoweth_20082>`.  The version integrated
-into LAMMPS matches the version of ReaxFF From Summer 2010.  For more
-technical details about the pair reaxff implementation of ReaxFF, see
-the :ref:`(Aktulga) <Aktulga>` paper. The *reaxff* style was initially
-implemented as a stand-alone C code and is now converted to C++ and
-integrated into LAMMPS as a package.
+:ref:`(Chenoweth et al., 2008) <Chenoweth_20082>` and matches the
+version of the reference ReaxFF implementation from Summer 2010.  For
+more technical details about the implementation of ReaxFF in pair style
+*reaxff*, see the :ref:`(Aktulga) <Aktulga>` paper. The *reaxff* style
+was initially implemented as a stand-alone C code and is now converted
+to C++ and integrated into LAMMPS as a package.
 
 The *reaxff/kk* style is a Kokkos version of the ReaxFF potential that
-is derived from the *reaxff* style. The Kokkos version can run on GPUs
-and can also use OpenMP multithreading. For more information about the
+is derived from the *reaxff* style.  The Kokkos version can run on GPUs
+and can also use OpenMP multithreading.  For more information about the
 Kokkos package, see :doc:`Packages details <Packages_details>` and
 :doc:`Speed kokkos <Speed_kokkos>` doc pages.  One important
 consideration when using the *reaxff/kk* style is the choice of either
diff --git a/doc/src/pair_snap.rst b/doc/src/pair_snap.rst
index ebedb288c1..ffc43c712a 100644
--- a/doc/src/pair_snap.rst
+++ b/doc/src/pair_snap.rst
@@ -1,10 +1,11 @@
 .. index:: pair_style snap
+.. index:: pair_style snap/intel
 .. index:: pair_style snap/kk
 
 pair_style snap command
 =======================
 
-Accelerator Variants: *snap/kk*
+Accelerator Variants: *snap/intel*, *snap/kk*
 
 Syntax
 """"""
@@ -260,6 +261,14 @@ This style is part of the ML-SNAP package.  It is only enabled if LAMMPS
 was built with that package.  See the :doc:`Build package
 <Build_package>` page for more info.
 
+The *snap/intel* accelerator variant will *only* be available if LAMMPS
+is built with Intel *compilers* and for CPUs with AVX-512 support.
+While the INTEL package in general allows multiple floating point
+precision modes to be selected, *snap/intel* will currently always use
+full double precision regardless of the precision mode selected.
+Additionally, the *intel* variant of snap will **NOT** use multiple
+threads with OpenMP.
+
 Related commands
 """"""""""""""""
 
diff --git a/doc/src/pair_yukawa_colloid.rst b/doc/src/pair_yukawa_colloid.rst
index 6611ea04e4..c6f201d249 100644
--- a/doc/src/pair_yukawa_colloid.rst
+++ b/doc/src/pair_yukawa_colloid.rst
@@ -1,11 +1,12 @@
 .. index:: pair_style yukawa/colloid
 .. index:: pair_style yukawa/colloid/gpu
+.. index:: pair_style yukawa/colloid/kk
 .. index:: pair_style yukawa/colloid/omp
 
 pair_style yukawa/colloid command
 =================================
 
-Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/omp*
+Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/kk*, *yukawa/colloid/omp*
 
 Syntax
 """"""
@@ -131,6 +132,12 @@ per-type polydispersity is allowed.  This means all particles of the
 same type must have the same diameter.  Each type can have a different
 diameter.
 
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
 Related commands
 """"""""""""""""
 
diff --git a/doc/src/thermo_style.rst b/doc/src/thermo_style.rst
index 63ad59e553..89a2c0b740 100644
--- a/doc/src/thermo_style.rst
+++ b/doc/src/thermo_style.rst
@@ -385,19 +385,20 @@ creates a global vector with 6 values.
 The *c_ID* and *c_ID[I]* and *c_ID[I][J]* keywords allow global values
 calculated by a compute to be output.  As discussed on the
 :doc:`compute <compute>` doc page, computes can calculate global,
-per-atom, or local values.  Only global values can be referenced by
-this command.  However, per-atom compute values for an individual atom
-can be referenced in a :doc:`variable <variable>` and the variable
-referenced by thermo_style custom, as discussed below.  See the
-discussion above for how the I in *c_ID[I]* can be specified with a
-wildcard asterisk to effectively specify multiple values from a global
-compute vector.
+per-atom, local, and per-grid values.  Only global values can be
+referenced by this command.  However, per-atom compute values for an
+individual atom can be referenced in a :doc:`equal-style variable
+<variable>` and the variable referenced by thermo_style custom, as
+discussed below.  See the discussion above for how the I in *c_ID[I]*
+can be specified with a wildcard asterisk to effectively specify
+multiple values from a global compute vector.
 
 The ID in the keyword should be replaced by the actual ID of a compute
 that has been defined elsewhere in the input script.  See the
-:doc:`compute <compute>` command for details.  If the compute calculates
-a global scalar, vector, or array, then the keyword formats with 0, 1,
-or 2 brackets will reference a scalar value from the compute.
+:doc:`compute <compute>` command for details.  If the compute
+calculates a global scalar, vector, or array, then the keyword formats
+with 0, 1, or 2 brackets will reference a scalar value from the
+compute.
 
 Note that some computes calculate "intensive" global quantities like
 temperature; others calculate "extensive" global quantities like
@@ -410,13 +411,14 @@ norm <thermo_modify>` option being used.
 
 The *f_ID* and *f_ID[I]* and *f_ID[I][J]* keywords allow global values
 calculated by a fix to be output.  As discussed on the :doc:`fix
-<fix>` doc page, fixes can calculate global, per-atom, or local
-values.  Only global values can be referenced by this command.
-However, per-atom fix values can be referenced for an individual atom
-in a :doc:`variable <variable>` and the variable referenced by
-thermo_style custom, as discussed below.  See the discussion above for
-how the I in *f_ID[I]* can be specified with a wildcard asterisk to
-effectively specify multiple values from a global fix vector.
+<fix>` doc page, fixes can calculate global, per-atom, local, and
+per-grid values.  Only global values can be referenced by this
+command.  However, per-atom fix values can be referenced for an
+individual atom in a :doc:`equal-style variable <variable>` and the
+variable referenced by thermo_style custom, as discussed below.  See
+the discussion above for how the I in *f_ID[I]* can be specified with
+a wildcard asterisk to effectively specify multiple values from a
+global fix vector.
 
 The ID in the keyword should be replaced by the actual ID of a fix
 that has been defined elsewhere in the input script.  See the
@@ -438,14 +440,15 @@ output.  The name in the keyword should be replaced by the variable
 name that has been defined elsewhere in the input script.  Only
 equal-style and vector-style variables can be referenced; the latter
 requires a bracketed term to specify the Ith element of the vector
-calculated by the variable.  However, an atom-style variable can be
-referenced for an individual atom by an equal-style variable and that
-variable referenced.  See the :doc:`variable <variable>` command for
-details.  Variables of style *equal* and *vector* and *atom* define a
-formula which can reference per-atom properties or thermodynamic
-keywords, or they can invoke other computes, fixes, or variables when
-evaluated, so this is a very general means of creating thermodynamic
-output.
+calculated by the variable.  However, an equal-style variable can use
+an atom-style variable in its formula indexed by the ID of an
+individual atom.  This is a way to output a specific atom's per-atom
+coordinates or other per-atom properties in thermo output.  See the
+:doc:`variable <variable>` command for details.  Note that variables
+of style *equal* and *vector* and *atom* define a formula which can
+reference per-atom properties or thermodynamic keywords, or they can
+invoke other computes, fixes, or variables when evaluated, so this is
+a very general means of creating thermodynamic output.
 
 Note that equal-style and vector-style variables are assumed to
 produce "intensive" global quantities, which are thus printed as-is,
diff --git a/doc/src/variable.rst b/doc/src/variable.rst
index 28c0d29799..92a78ee3c1 100644
--- a/doc/src/variable.rst
+++ b/doc/src/variable.rst
@@ -550,12 +550,11 @@ variables.
 Most of the formula elements produce a scalar value.  Some produce a
 global or per-atom vector of values.  Global vectors can be produced
 by computes or fixes or by other vector-style variables.  Per-atom
-vectors are produced by atom vectors, compute references that
-represent a per-atom vector, fix references that represent a per-atom
-vector, and variables that are atom-style variables.  Math functions
-that operate on scalar values produce a scalar value; math function
-that operate on global or per-atom vectors do so element-by-element
-and produce a global or per-atom vector.
+vectors are produced by atom vectors, computes or fixes which output a
+per-atom vector or array, and variables that are atom-style variables.
+Math functions that operate on scalar values produce a scalar value;
+math function that operate on global or per-atom vectors do so
+element-by-element and produce a global or per-atom vector.
 
 A formula for equal-style variables cannot use any formula element
 that produces a global or per-atom vector.  A formula for a
@@ -564,12 +563,13 @@ scalar value or a global vector value, but cannot use a formula
 element that produces a per-atom vector.  A formula for an atom-style
 variable can use formula elements that produce either a scalar value
 or a per-atom vector, but not one that produces a global vector.
+
 Atom-style variables are evaluated by other commands that define a
-:doc:`group <group>` on which they operate, e.g. a :doc:`dump <dump>` or
-:doc:`compute <compute>` or :doc:`fix <fix>` command.  When they invoke
-the atom-style variable, only atoms in the group are included in the
-formula evaluation.  The variable evaluates to 0.0 for atoms not in
-the group.
+:doc:`group <group>` on which they operate, e.g. a :doc:`dump <dump>`
+or :doc:`compute <compute>` or :doc:`fix <fix>` command.  When they
+invoke the atom-style variable, only atoms in the group are included
+in the formula evaluation.  The variable evaluates to 0.0 for atoms
+not in the group.
 
 ----------
 
@@ -1138,69 +1138,74 @@ only defined if an :doc:`atom_style <atom_style>` is being used that
 defines molecule IDs.
 
 Note that many other atom attributes can be used as inputs to a
-variable by using the :doc:`compute property/atom <compute_property_atom>` command and then specifying
-a quantity from that compute.
+variable by using the :doc:`compute property/atom
+<compute_property_atom>` command and then specifying a quantity from
+that compute.
 
 ----------
 
 Compute References
 ------------------
 
-Compute references access quantities calculated by a
-:doc:`compute <compute>`.  The ID in the reference should be replaced by
-the ID of a compute defined elsewhere in the input script.  As
-discussed in the page for the :doc:`compute <compute>` command,
-computes can produce global, per-atom, or local values.  Only global
-and per-atom values can be used in a variable.  Computes can also
-produce a scalar, vector, or array.
+Compute references access quantities calculated by a :doc:`compute
+<compute>`.  The ID in the reference should be replaced by the ID of a
+compute defined elsewhere in the input script.
 
-An equal-style variable can only use scalar values, which means a
-global scalar, or an element of a global or per-atom vector or array.
-A vector-style variable can use scalar values or a global vector of
-values, or a column of a global array of values.  Atom-style variables
-can use global scalar values.  They can also use per-atom vector
-values, or a column of a per-atom array.  See the doc pages for
-individual computes to see what kind of values they produce.
+As discussed on the page for the :doc:`compute <compute>` command,
+computes can produce global, per-atom, local, and per-grid values.
+Only global and per-atom values can be used in a variable.  Computes
+can also produce scalars (global only), vectors, and arrays.  See the
+doc pages for individual computes to see what different kinds of data
+they produce.
 
-Examples of different kinds of compute references are as follows.
-There is typically no ambiguity (see exception below) as to what a
-reference means, since computes only produce either global or per-atom
-quantities, never both.
+An equal-style variable can only use scalar values, either from global
+or per-atom data.  In the case of per-atom data, this would be a value
+for a specific atom.
 
-+-------------+-------------------------------------------------------------------------------------------------------+
-| c_ID       | global scalar, or per-atom vector                                                                      |
-+-------------+-------------------------------------------------------------------------------------------------------+
-| c_ID[I]    | Ith element of global vector, or atom I's value in per-atom vector, or Ith column from per-atom array  |
-+-------------+-------------------------------------------------------------------------------------------------------+
-| c_ID[I][J] | I,J element of global array, or atom I's Jth value in per-atom array                                   |
-+-------------+-------------------------------------------------------------------------------------------------------+
+A vector-style variable can use scalar values (same as for equal-style
+variables), or global vectors of values.  The latter can also be a
+column of a global array.
 
-For I and J indices, integers can be specified or a variable name,
-specified as v_name, where name is the name of the variable.  The
-rules for this syntax are the same as for the "Atom Values and
-Vectors" discussion above.
+Atom-style variables can use scalar values (same as for equal-style
+variables), or per-atom vectors of values.  The latter can also be a
+column of a per-atom array.
 
-One source of ambiguity for compute references is when a vector-style
-variable refers to a compute that produces both a global scalar and a
-global vector.  Consider a compute with ID "foo" that does this,
-referenced as follows by variable "a", where "myVec" is another
-vector-style variable:
+The various allowed compute references in the variable formulas for
+equal-, vector-, and atom-style variables are listed in the following
+table:
 
-.. code-block:: LAMMPS
++--------+------------+------------------------------------------+
+| equal  | c_ID       | global scalar                            |
+| equal  | c_ID[I]    | element of global vector                 |
+| equal  | c_ID[I][J] | element of global array                  |
+| equal  | C_ID[I]    | element of per-atom vector (I = atom ID) |
+| equal  | C_ID[I][J] | element of per-atom array (I = atom ID)  |
++--------+------------+------------------------------------------+
+| vector | c_ID       | global vector                            |
+| vector | c_ID[I]    | column of global array                   |
++--------+------------+------------------------------------------+
+| atom   | c_ID       | per-atom vector                          |
+| atom   | c_ID[I]    | column of per-atom array                 |
++--------+------------+------------------------------------------+
 
-   variable a vector c_foo*v_myVec
+Note that if an equal-style variable formula wishes to access per-atom
+data from a compute, it must use capital "C" as the ID prefix and not
+lower-case "c".
 
-The reference "c_foo" could refer to either the global scalar or
-global vector produced by compute "foo".  In this case, "c_foo" will
-always refer to the global scalar, and "C_foo" can be used to
-reference the global vector.  Similarly if the compute produces both a
-global vector and global array, then "c_foo[I]" will always refer to
-an element of the global vector, and "C_foo[I]" can be used to
-reference the Ith column of the global array.
+Also note that if a vector- or atom-style variable formula needs to
+access a scalar value from a compute (i.e. the 5 kinds of values in
+the first 5 lines of the table), it can not do so directly.  Instead,
+it can use a reference to an equal-style variable which stores the
+scalar value from the compute.
 
-Note that if a variable containing a compute is evaluated directly in
-an input script (not during a run), then the values accessed by the
-compute must be current.  See the discussion below about "Variable
+The I and J indices in these compute references can be integers or can
+be a variable name, specified as v_name, where name is the name of the
+variable.  The rules for this syntax are the same as for indices in
+the "Atom Values and Vectors" discussion above.
+
+If a variable containing a compute is evaluated directly in an input
+script (not during a run), then the values accessed by the compute
+should be current.  See the discussion below about "Variable
 Accuracy".
 
 ----------
@@ -1208,51 +1213,59 @@ Accuracy".
 Fix References
 --------------
 
-Fix references access quantities calculated by a :doc:`fix <compute>`.
+Fix references access quantities calculated by a :doc:`fix <fix>`.
 The ID in the reference should be replaced by the ID of a fix defined
-elsewhere in the input script.  As discussed in the page for the
-:doc:`fix <fix>` command, fixes can produce global, per-atom, or local
-values.  Only global and per-atom values can be used in a variable.
-Fixes can also produce a scalar, vector, or array.  An equal-style
-variable can only use scalar values, which means a global scalar, or
-an element of a global or per-atom vector or array.  Atom-style
-variables can use the same scalar values.  They can also use per-atom
-vector values.  A vector value can be a per-atom vector itself, or a
-column of an per-atom array.  See the doc pages for individual fixes
-to see what kind of values they produce.
+elsewhere in the input script.
 
-The different kinds of fix references are exactly the same as the
-compute references listed in the above table, where "c\_" is replaced
-by "f\_".  Again, there is typically no ambiguity (see exception below)
-as to what a reference means, since fixes only produce either global
-or per-atom quantities, never both.
+As discussed on the page for the :doc:`fix <fix>` command, fixes can
+produce global, per-atom, local, and per-grid values.  Only global and
+per-atom values can be used in a variable.  Fixes can also produce
+scalars (global only), vectors, and arrays.  See the doc pages for
+individual fixes to see what different kinds of data they produce.
 
-+-------------+-------------------------------------------------------------------------------------------------------+
-| f_ID       | global scalar, or per-atom vector                                                                      |
-+-------------+-------------------------------------------------------------------------------------------------------+
-| f_ID[I]    | Ith element of global vector, or atom I's value in per-atom vector, or Ith column from per-atom array  |
-+-------------+-------------------------------------------------------------------------------------------------------+
-| f_ID[I][J] | I,J element of global array, or atom I's Jth value in per-atom array                                   |
-+-------------+-------------------------------------------------------------------------------------------------------+
+An equal-style variable can only use scalar values, either from global
+or per-atom data.  In the case of per-atom data, this would be a value
+for a specific atom.
 
-For I and J indices, integers can be specified or a variable name,
-specified as v_name, where name is the name of the variable.  The
-rules for this syntax are the same as for the "Atom Values and
-Vectors" discussion above.
+A vector-style variable can use scalar values (same as for equal-style
+variables), or global vectors of values.  The latter can also be a
+column of a global array.
 
-One source of ambiguity for fix references is the same ambiguity
-discussed for compute references above.  Namely when a vector-style
-variable refers to a fix that produces both a global scalar and a
-global vector.  The solution is the same as for compute references.
-For a fix with ID "foo", "f_foo" will always refer to the global
-scalar, and "F_foo" can be used to reference the global vector.  And
-similarly for distinguishing between a fix's global vector versus
-global array with "f_foo[I]" versus "F_foo[I]".
+Atom-style variables can use scalar values (same as for equal-style
+variables), or per-atom vectors of values.  The latter can also be a
+column of a per-atom array.
 
-Note that if a variable containing a fix is evaluated directly in an
-input script (not during a run), then the values accessed by the fix
-should be current.  See the discussion below about "Variable
-Accuracy".
+The allowed fix references in variable formulas for equal-, vector-,
+and atom-style variables are listed in the following table:
+
++--------+------------+------------------------------------------+
+| equal  | f_ID       | global scalar                            |
+| equal  | f_ID[I]    | element of global vector                 |
+| equal  | f_ID[I][J] | element of global array                  |
+| equal  | F_ID[I]    | element of per-atom vector (I = atom ID) |
+| equal  | F_ID[I][J] | element of per-atom array (I = atom ID)  |
++--------+------------+------------------------------------------+
+| vector | f_ID       | global vector                            |
+| vector | f_ID[I]    | column of global array                   |
++--------+------------+------------------------------------------+
+| atom   | f_ID       | per-atom vector                          |
+| atom   | f_ID[I]    | column of per-atom array                 |
++--------+------------+------------------------------------------+
+
+Note that if an equal-style variable formula wishes to access per-atom
+data from a fix, it must use capital "F" as the ID prefix and not
+lower-case "f".
+
+Also note that if a vector- or atom-style variable formula needs to
+access a scalar value from a fix (i.e. the 5 kinds of values in the
+first 5 lines of the table), it can not do so directly.  Instead, it
+can use a reference to an equal-style variable which stores the scalar
+value from the fix.
+
+The I and J indices in these fix references can be integers or can be
+a variable name, specified as v_name, where name is the name of the
+variable.  The rules for this syntax are the same as for indices in
+the "Atom Values and Vectors" discussion above.
 
 Note that some fixes only generate quantities on certain timesteps.
 If a variable attempts to access the fix on non-allowed timesteps, an
@@ -1260,6 +1273,10 @@ error is generated.  For example, the :doc:`fix ave/time <fix_ave_time>`
 command may only generate averaged quantities every 100 steps.  See
 the doc pages for individual fix commands for details.
 
+If a variable containing a fix is evaluated directly in an input
+script (not during a run), then the values accessed by the fix should
+be current.  See the discussion below about "Variable Accuracy".
+
 ----------
 
 Variable References
@@ -1294,26 +1311,32 @@ including other atom-style or atomfile-style variables.  If it uses a
 vector-style variable, a subscript must be used to access a single
 value from the vector-style variable.
 
-Examples of different kinds of variable references are as follows.
-There is no ambiguity as to what a reference means, since variables
-produce only a global scalar or global vector or per-atom vector.
+The allowed variable references in variable formulas for equal-,
+vector-, and atom-style variables are listed in the following table.
+Note that there is no ambiguity as to what a reference means, since
+referenced variables produce only a global scalar or global vector or
+per-atom vector.
 
-+------------+----------------------------------------------------------------------+
-| v_name    | global scalar from equal-style variable                               |
-+------------+----------------------------------------------------------------------+
-| v_name    | global vector from vector-style variable                              |
-+------------+----------------------------------------------------------------------+
-| v_name    | per-atom vector from atom-style or atomfile-style variable            |
-+------------+----------------------------------------------------------------------+
-| v_name[I] | Ith element of a global vector from vector-style variable             |
-+------------+----------------------------------------------------------------------+
-| v_name[I] | value of atom with ID = I from atom-style or atomfile-style variable  |
-+------------+----------------------------------------------------------------------+
++--------+-----------+-----------------------------------------------------------------------------------+
+| equal  | v_name    | global scalar from an equal-style variable                                        |
+| equal  | v_name[I] | element of global vector from a vector-style variable                             |
+| equal  | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
++--------+-----------+-----------------------------------------------------------------------------------+
+| vector | v_name    | global scalar from an equal-style variable                                        |
+| vector | v_name    | global vector from a vector-style variable                                        |
+| vector | v_name[I] | element of global vector from a vector-style variable                             |
+| vector | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
++--------+-----------+-----------------------------------------------------------------------------------+
+| atom   | v_name    | global scalar from an equal-style variable                                        |
+| atom   | v_name    | per-atom vector from an atom-style or atomfile-style variable                     |
+| atom   | v_name[I] | element of global vector from a vector-style variable                             |
+| atom   | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
++--------+-----------+-----------------------------------------------------------------------------------+
 
 For the I index, an integer can be specified or a variable name,
 specified as v_name, where name is the name of the variable.  The
-rules for this syntax are the same as for the "Atom Values and
-Vectors" discussion above.
+rules for this syntax are the same as for indices in the "Atom Values
+and Vectors" discussion above.
 
 ----------
 
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index 8f3486b1a7..75589e3115 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -2892,6 +2892,7 @@ pscrozi
 pseudocode
 Pseudocode
 pseudodynamics
+pseudoparticle
 pseudopotential
 psllod
 pSp
@@ -3755,6 +3756,7 @@ uncomment
 uncommented
 uncompress
 uncompute
+underdamped
 underprediction
 undump
 uniaxial
diff --git a/examples/mliap/in.mliap.quadratic.compute b/examples/mliap/in.mliap.quadratic.compute
index 929dbf3824..cc9ad331b5 100644
--- a/examples/mliap/in.mliap.quadratic.compute
+++ b/examples/mliap/in.mliap.quadratic.compute
@@ -65,7 +65,7 @@ compute         bsum2 snapgroup2 reduce sum c_b[*]
 # fix 		bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
 compute		vbsum all reduce sum c_vb[*]
 # fix 		vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
-variable	db_2_100 equal c_db[2][100]
+variable	db_2_100 equal C_db[2][100]
 
 # test output:   1: total potential energy
 #                2: xy component of stress tensor
diff --git a/examples/mliap/in.mliap.snap.compute b/examples/mliap/in.mliap.snap.compute
index 4cfccedbdf..c49365f55f 100644
--- a/examples/mliap/in.mliap.snap.compute
+++ b/examples/mliap/in.mliap.snap.compute
@@ -65,7 +65,7 @@ compute         bsum2 snapgroup2 reduce sum c_b[*]
 # fix 		bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
 compute		vbsum all reduce sum c_vb[*]
 # fix 		vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
-variable	db_2_25 equal c_db[2][25]
+variable	db_2_25 equal C_db[2][25]
 
 thermo 		100
 
diff --git a/examples/snap/in.grid.snap b/examples/snap/in.grid.snap
index 08c95a004f..da48957d97 100644
--- a/examples/snap/in.grid.snap
+++ b/examples/snap/in.grid.snap
@@ -67,18 +67,18 @@ compute 	mygridlocal all sna/grid/local grid ${ngrid} ${ngrid} ${ngrid} &
 
 # define output
 
-variable	B5atom equal c_b[2][5]
+variable	B5atom equal C_b[2][5]
 variable	B5grid equal c_mygrid[8][8]
 
 variable	rmse_global equal "sqrt(   &
 	 (c_mygrid[8][1] - x[2])^2 +      &
 	 (c_mygrid[8][2] - y[2])^2 +      &
 	 (c_mygrid[8][3] - z[2])^2 +      &
-	 (c_mygrid[8][4] - c_b[2][1])^2 + &
-	 (c_mygrid[8][5] - c_b[2][2])^2 + &
-	 (c_mygrid[8][6] - c_b[2][3])^2 + &
-	 (c_mygrid[8][7] - c_b[2][4])^2 + &
-	 (c_mygrid[8][8] - c_b[2][5])^2   &
+	 (c_mygrid[8][4] - C_b[2][1])^2 + &
+	 (c_mygrid[8][5] - C_b[2][2])^2 + &
+	 (c_mygrid[8][6] - C_b[2][3])^2 + &
+	 (c_mygrid[8][7] - C_b[2][4])^2 + &
+	 (c_mygrid[8][8] - C_b[2][5])^2   &
 	 )"
 
 thermo_style	custom step v_B5atom v_B5grid v_rmse_global
diff --git a/examples/snap/in.grid.tri b/examples/snap/in.grid.tri
index 5283957eb8..95a14f3bb4 100644
--- a/examples/snap/in.grid.tri
+++ b/examples/snap/in.grid.tri
@@ -87,18 +87,18 @@ compute 	mygridlocal all sna/grid/local grid ${ngridx} ${ngridy} ${ngridz} &
 
 # define output
 
-variable	B5atom equal c_b[7][5]
+variable	B5atom equal C_b[7][5]
 variable	B5grid equal c_mygrid[13][8]
 
 # do not compare x,y,z because assignment of ids
 # to atoms is not unnique for different processor grids
 
 variable	rmse_global equal "sqrt(    &
-	 (c_mygrid[13][4] - c_b[7][1])^2 + &
-	 (c_mygrid[13][5] - c_b[7][2])^2 + &
-	 (c_mygrid[13][6] - c_b[7][3])^2 + &
-	 (c_mygrid[13][7] - c_b[7][4])^2 + &
-	 (c_mygrid[13][8] - c_b[7][5])^2   &
+	 (c_mygrid[13][4] - C_b[7][1])^2 + &
+	 (c_mygrid[13][5] - C_b[7][2])^2 + &
+	 (c_mygrid[13][6] - C_b[7][3])^2 + &
+	 (c_mygrid[13][7] - C_b[7][4])^2 + &
+	 (c_mygrid[13][8] - C_b[7][5])^2   &
 	 )"
 
 thermo_style	custom step v_B5atom v_B5grid v_rmse_global
diff --git a/examples/snap/in.snap.compute b/examples/snap/in.snap.compute
index b0c7314882..8d2ffe8b96 100644
--- a/examples/snap/in.snap.compute
+++ b/examples/snap/in.snap.compute
@@ -70,7 +70,7 @@ compute         bsum2 snapgroup2 reduce sum c_b[*]
 # fix 		bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
 compute		vbsum all reduce sum c_vb[*]
 # fix 		vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
-variable	db_2_25 equal c_db[2][25]
+variable	db_2_25 equal C_db[2][25]
 
 # set up compute snap generating global array
 
diff --git a/examples/snap/in.snap.compute.quadratic b/examples/snap/in.snap.compute.quadratic
index e03d4af3bf..20d5ed3039 100644
--- a/examples/snap/in.snap.compute.quadratic
+++ b/examples/snap/in.snap.compute.quadratic
@@ -70,7 +70,7 @@ compute         bsum2 snapgroup2 reduce sum c_b[*]
 # fix 		bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
 compute		vbsum all reduce sum c_vb[*]
 # fix 		vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
-variable	db_2_100 equal c_db[2][100]
+variable	db_2_100 equal C_db[2][100]
 
 # set up compute snap generating global array
 
diff --git a/examples/voronoi/in.voronoi b/examples/voronoi/in.voronoi
index 5254969fbd..79b6c6efec 100644
--- a/examples/voronoi/in.voronoi
+++ b/examples/voronoi/in.voronoi
@@ -146,10 +146,10 @@ variable i2 equal 257
 compute v1 all voronoi/atom occupation
 compute r0 all   reduce sum c_v1[1]
 compute r1 all   reduce sum c_v1[2]
-variable d5a equal c_v1[${i1}][1]
-variable d5b equal c_v1[${i2}][1]
-variable d5c equal c_v1[${i1}][2]
-variable d5d equal c_v1[${i2}][2]
+variable d5a equal C_v1[${i1}][1]
+variable d5b equal C_v1[${i2}][1]
+variable d5c equal C_v1[${i1}][2]
+variable d5d equal C_v1[${i2}][2]
 thermo_style custom c_r0 c_r1 v_d5a v_d5b v_d5c v_d5d
 
 run 0
diff --git a/examples/voronoi/in.voronoi.data b/examples/voronoi/in.voronoi.data
index 853c2c2bd1..e5d925c498 100644
--- a/examples/voronoi/in.voronoi.data
+++ b/examples/voronoi/in.voronoi.data
@@ -63,11 +63,9 @@ undump          dlocal
 # TEST 2: 
 #
 
-# This compute voronoi generates  
-# local and global quantities, but
-# not per-atom quantities
+# This compute voronoi generates peratom and local and global quantities
 
-compute 	v2 all voronoi/atom neighbors yes edge_histo 6 peratom no
+compute 	v2 all voronoi/atom neighbors yes edge_histo 6
 
 # write voronoi local quantities to a file
 
@@ -75,7 +73,7 @@ dump            d2 all local  1 dump.neighbors2 index c_v2[1] c_v2[2] c_v2[3]
 
 # sum up a voronoi local quantity
 
-compute 	sumarea all reduce sum c_v2[3]
+compute 	sumarea all reduce sum c_v2[3] inputs local
 
 # output voronoi global quantities
 
@@ -83,6 +81,3 @@ thermo_style 	custom c_sumarea c_v2[3] c_v2[4] c_v2[5] c_v2[6] c_v2[7]
 thermo 		1
 
 run  		0
-
-
-
diff --git a/lib/pace/Install.py b/lib/pace/Install.py
index 4f3cf299ac..8d31852e44 100644
--- a/lib/pace/Install.py
+++ b/lib/pace/Install.py
@@ -18,11 +18,11 @@ from install_helpers import fullpath, geturl, checkmd5sum, getfallback
 # settings
 
 thisdir = fullpath('.')
-version ='v.2023.01.3.fix'
+version ='v.2023.10.04'
 
 # known checksums for different PACE versions. used to validate the download.
 checksums = { \
-    'v.2023.01.3.fix': '4f0b3b5b14456fe9a73b447de3765caa'
+    'v.2023.10.04': '70ff79f4e59af175e55d24f3243ad1ff'
 }
 
 parser = ArgumentParser(prog='Install.py', description="LAMMPS library build wrapper script")
diff --git a/src/BOCS/fix_bocs.cpp b/src/BOCS/fix_bocs.cpp
index d17884855a..17bb1af002 100644
--- a/src/BOCS/fix_bocs.cpp
+++ b/src/BOCS/fix_bocs.cpp
@@ -1024,7 +1024,10 @@ void FixBocs::final_integrate()
 
   if (pstat_flag) {
     if (pstyle == ISO) pressure->compute_scalar();
-    else pressure->compute_vector();
+    else {
+      temperature->compute_vector();
+      pressure->compute_vector();
+    }
     couple();
     pressure->addstep(update->ntimestep+1);
   }
@@ -1961,6 +1964,7 @@ void FixBocs::nhc_press_integrate()
   int ich,i,pdof;
   double expfac,factor_etap,kecurrent;
   double kt = boltz * t_target;
+  double lkt_press;
 
   // Update masses, to preserve initial freq, if flag set
 
@@ -2006,7 +2010,8 @@ void FixBocs::nhc_press_integrate()
     }
   }
 
-  double lkt_press = pdof * kt;
+  if (pstyle == ISO) lkt_press = kt;
+  else lkt_press = pdof * kt;
   etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0];
 
   double ncfac = 1.0/nc_pchain;
diff --git a/src/Depend.sh b/src/Depend.sh
index 4004f12686..dbffb2dba0 100755
--- a/src/Depend.sh
+++ b/src/Depend.sh
@@ -64,6 +64,7 @@ fi
 
 if (test $1 = "COLLOID") then
   depend GPU
+  depend KOKKOS
   depend OPENMP
 fi
 
@@ -185,6 +186,7 @@ fi
 if (test $1 = "ML-SNAP") then
   depend ML-IAP
   depend KOKKOS
+  depend INTEL
 fi
 
 if (test $1 = "CG-SPICA") then
diff --git a/src/EXTRA-DUMP/dump_yaml.cpp b/src/EXTRA-DUMP/dump_yaml.cpp
index 3ca5c59edf..6c21c24f77 100644
--- a/src/EXTRA-DUMP/dump_yaml.cpp
+++ b/src/EXTRA-DUMP/dump_yaml.cpp
@@ -24,6 +24,8 @@
 
 using namespace LAMMPS_NS;
 
+static constexpr char special_chars[] = "{}[],&:*#?|-<>=!%@\\";
+
 /* ---------------------------------------------------------------------- */
 DumpYAML::DumpYAML(class LAMMPS *_lmp, int narg, char **args) :
     DumpCustom(_lmp, narg, args), thermo(false)
@@ -67,7 +69,12 @@ void DumpYAML::write_header(bigint ndump)
       const auto &fields = th->get_fields();
 
       thermo_data += "thermo:\n  - keywords: [ ";
-      for (int i = 0; i < nfield; ++i) thermo_data += fmt::format("{}, ", keywords[i]);
+      for (int i = 0; i < nfield; ++i) {
+        if (keywords[i].find_first_of(special_chars) == std::string::npos)
+          thermo_data += fmt::format("{}, ", keywords[i]);
+        else
+          thermo_data += fmt::format("'{}', ", keywords[i]);
+      }
       thermo_data += "]\n  - data: [ ";
 
       for (int i = 0; i < nfield; ++i) {
@@ -107,7 +114,12 @@ void DumpYAML::write_header(bigint ndump)
     if (domain->triclinic) fmt::print(fp, "  - [ {}, {}, {} ]\n", boxxy, boxxz, boxyz);
 
     fmt::print(fp, "keywords: [ ");
-    for (const auto &item : utils::split_words(columns)) fmt::print(fp, "{}, ", item);
+    for (const auto &item : utils::split_words(columns)) {
+      if (item.find_first_of(special_chars) == std::string::npos)
+        fmt::print(fp, "{}, ", item);
+      else
+        fmt::print(fp, "'{}', ", item);
+    }
     fputs(" ]\ndata:\n", fp);
   } else    // reset so that the remainder of the output is not multi-proc
     filewriter = 0;
diff --git a/src/INTEL/TEST/in.intel.snap b/src/INTEL/TEST/in.intel.snap
new file mode 100644
index 0000000000..4e45fe01f5
--- /dev/null
+++ b/src/INTEL/TEST/in.intel.snap
@@ -0,0 +1,70 @@
+# Toy demonstration of SNAP "scale" parameter, using fix/adapt and hybrid/overlay
+# Mixing linear and quadratic SNAP Ni potentials by Zuo et al. JCPA 2020
+
+variable	w index 10	# Warmup Timesteps
+variable	t index 100	# Main Run Timesteps
+variable	m index 1	# Main Run Timestep Multiplier
+variable	n index 0	# Use NUMA Mapping for Multi-Node
+
+variable	x index 4
+variable	y index 2
+variable	z index 2
+
+variable	rr equal floor($t*$m)
+variable        root getenv LMP_ROOT
+
+if "$n > 0"	then "processors * * * grid numa"
+
+# mixing parameter
+
+variable lambda equal 0.2
+
+# Initialize simulation
+
+variable a equal 3.52
+units           metal
+
+# generate the box and atom positions using a FCC lattice
+variable nx equal 20*$x
+variable ny equal 20*$y
+variable nz equal 20*$z
+
+boundary        p p p
+
+lattice         fcc $a
+region          box block 0 ${nx} 0 ${ny} 0 ${nz}
+create_box      1 box
+create_atoms    1 box
+
+mass 1 34.
+
+# choose bundled SNAP Ni potential from Zuo et al. JCPA 2020
+pair_style hybrid/overlay snap snap
+pair_coeff * * snap 1 &
+    ${root}/examples/snap/Ni_Zuo_JPCA2020.snapcoeff &
+    ${root}/examples/snap/Ni_Zuo_JPCA2020.snapparam Ni
+pair_coeff * * snap 2 &
+    ${root}/examples/snap/Ni_Zuo_JPCA2020.quadratic.snapcoeff &
+    ${root}/examples/snap/Ni_Zuo_JPCA2020.quadratic.snapparam Ni
+
+# scale according to mixing parameter
+variable l1 equal ${lambda}
+variable l2 equal 1.0-${lambda}
+fix scale1 all adapt 1 pair snap:1 scale * * v_l1
+fix scale2 all adapt 1 pair snap:2 scale * * v_l2
+
+# Setup output
+thermo          1
+thermo_modify norm yes
+
+# Set up NVE run
+timestep 0.5e-3
+neighbor 1.0 bin
+neigh_modify every 1 delay 0 check yes
+
+# Run MD
+velocity all create 300.0 4928459 loop geom
+fix 1 all nve
+
+if "$w > 0"	then "run $w"
+run		${rr}
diff --git a/src/INTEL/TEST/run_benchmarks.sh b/src/INTEL/TEST/run_benchmarks.sh
index 82eb51c928..eeb9f07a11 100755
--- a/src/INTEL/TEST/run_benchmarks.sh
+++ b/src/INTEL/TEST/run_benchmarks.sh
@@ -35,7 +35,7 @@ export I_MPI_PIN_DOMAIN=core
 # End settings for your system
 #########################################################################
 
-export WORKLOADS="lj rhodo lc sw water eam airebo dpd tersoff"
+export WORKLOADS="lj rhodo lc sw water eam airebo dpd tersoff snap"
 export LMP_ARGS="-pk intel 0 -sf intel -screen none -v d 1"
 export RLMP_ARGS="-pk intel 0 lrt yes -sf intel -screen none -v d 1"
 
diff --git a/src/INTEL/fix_intel.cpp b/src/INTEL/fix_intel.cpp
index 0a3d27a978..cb60149885 100644
--- a/src/INTEL/fix_intel.cpp
+++ b/src/INTEL/fix_intel.cpp
@@ -20,6 +20,7 @@
 #include "fix_intel.h"
 
 #include "comm.h"
+#include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "neighbor.h"
@@ -470,6 +471,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
 
   int need_tag = 0;
   if (atom->molecular != Atom::ATOMIC || three_body_neighbor()) need_tag = 1;
+  if (domain->triclinic && force->newton_pair) need_tag = 1;
 
   // Clear buffers used for pair style
   char kmode[80];
diff --git a/src/INTEL/intel_simd.h b/src/INTEL/intel_simd.h
index 37842621dc..9f31580dd2 100644
--- a/src/INTEL/intel_simd.h
+++ b/src/INTEL/intel_simd.h
@@ -46,13 +46,38 @@ namespace ip_simd {
 
   typedef __mmask16 SIMD_mask;
 
+  inline bool any(const SIMD_mask &m) { return m != 0; }
+
   struct SIMD_int {
     __m512i v;
     SIMD_int() {}
     SIMD_int(const __m512i in) : v(in) {}
+    inline int & operator[](const int i) { return ((int *)&(v))[i]; }
+    inline const int & operator[](const int i) const
+      { return ((int *)&(v))[i]; }
     operator __m512i() const { return v;}
   };
 
+  struct SIMD256_int {
+    __m256i v;
+    SIMD256_int() {}
+    SIMD256_int(const __m256i in) : v(in) {}
+    SIMD256_int(const int in) : v(_mm256_set1_epi32(in)) {}
+    inline int & operator[](const int i) { return ((int *)&(v))[i]; }
+    inline const int & operator[](const int i) const
+      { return ((int *)&(v))[i]; }
+#ifdef __INTEL_LLVM_COMPILER
+    inline SIMD256_int operator&=(const int i)
+      { v=_mm256_and_epi32(v, _mm256_set1_epi32(i)); return *this; };
+#else
+    inline SIMD256_int operator&=(const int i)
+      { v=_mm256_and_si256(v, _mm256_set1_epi32(i)); return *this; };
+#endif
+    inline SIMD256_int operator+=(const int i)
+      { v=_mm256_add_epi32(v, _mm256_set1_epi32(i)); return *this; };
+    operator __m256i() const { return v;}
+  };
+
   struct SIMD_float {
     __m512 v;
     SIMD_float() {}
@@ -64,7 +89,24 @@ namespace ip_simd {
     __m512d v;
     SIMD_double() {}
     SIMD_double(const __m512d in) : v(in) {}
+    SIMD_double(const double in) { v=_mm512_set1_pd(in); }
+    inline double & operator[](const int i) { return ((double *)&(v))[i]; }
+    inline const double & operator[](const int i) const
+      { return ((double *)&(v))[i]; }
     operator __m512d() const { return v;}
+
+    SIMD_double & operator=(const double i)
+      { _mm512_set1_pd(i); return *this; }
+    SIMD_double &operator=(const SIMD_double &i)
+      { v = i.v; return *this; }
+
+    SIMD_double operator-() { return _mm512_xor_pd(v, _mm512_set1_pd(-0.0)); }
+    SIMD_double & operator+=(const SIMD_double & two)
+      { v = _mm512_add_pd(v, two.v); return *this; }
+    SIMD_double & operator-=(const SIMD_double & two)
+      { v = _mm512_sub_pd(v, two.v); return *this; }
+    SIMD_double & operator*=(const SIMD_double & two)
+      { v = _mm512_mul_pd(v, two.v); return *this; }
   };
 
   template<class flt_t>
@@ -99,6 +141,12 @@ namespace ip_simd {
 
   // ------- Set Operations
 
+  inline SIMD256_int SIMD256_set(const int l0, const int l1, const int l2,
+                                 const int l3, const int l4, const int l5,
+                                 const int l6, const int l7) {
+    return _mm256_setr_epi32(l0,l1,l2,l3,l4,l5,l6,l7);
+  }
+
   inline SIMD_int SIMD_set(const int l0, const int l1, const int l2,
                            const int l3, const int l4, const int l5,
                            const int l6, const int l7, const int l8,
@@ -109,6 +157,10 @@ namespace ip_simd {
                              l8,l9,l10,l11,l12,l13,l14,l15);
   }
 
+  inline SIMD256_int SIMD256_set(const int l) {
+    return _mm256_set1_epi32(l);
+  }
+
   inline SIMD_int SIMD_set(const int l) {
     return _mm512_set1_epi32(l);
   }
@@ -121,6 +173,10 @@ namespace ip_simd {
     return _mm512_set1_pd(l);
   }
 
+  inline SIMD256_int SIMD256_count() {
+    return SIMD256_set(0,1,2,3,4,5,6,7);
+  }
+
   inline SIMD_int SIMD_zero_masked(const SIMD_mask &m, const SIMD_int &one) {
     return _mm512_maskz_mov_epi32(m, one);
   }
@@ -147,6 +203,10 @@ namespace ip_simd {
 
   // -------- Load Operations
 
+  inline SIMD256_int SIMD_load(const SIMD256_int *p) {
+    return _mm256_load_epi32((int *)p);
+  }
+
   inline SIMD_int SIMD_load(const int *p) {
     return _mm512_load_epi32(p);
   }
@@ -159,6 +219,10 @@ namespace ip_simd {
     return _mm512_load_pd(p);
   }
 
+  inline SIMD_double SIMD_load(const SIMD_double *p) {
+    return _mm512_load_pd((double *)p);
+  }
+
   inline SIMD_int SIMD_loadz(const SIMD_mask &m, const int *p) {
     return _mm512_maskz_load_epi32(m, p);
   }
@@ -171,6 +235,10 @@ namespace ip_simd {
     return _mm512_maskz_load_pd(m, p);
   }
 
+  inline SIMD256_int SIMD_gather(const int *p, const SIMD256_int &i) {
+    return _mm256_i32gather_epi32(p, i, _MM_SCALE_4);
+  }
+
   inline SIMD_int SIMD_gather(const int *p, const SIMD_int &i) {
     return _mm512_i32gather_epi32(i, p, _MM_SCALE_4);
   }
@@ -179,6 +247,10 @@ namespace ip_simd {
     return _mm512_i32gather_ps(i, p, _MM_SCALE_4);
   }
 
+  inline SIMD_double SIMD_gather(const double *p, const SIMD256_int &i) {
+    return _mm512_i32gather_pd(i, p, _MM_SCALE_8);
+  }
+
   inline SIMD_double SIMD_gather(const double *p, const SIMD_int &i) {
     return _mm512_i32gather_pd(_mm512_castsi512_si256(i), p, _MM_SCALE_8);
   }
@@ -201,6 +273,12 @@ namespace ip_simd {
                                     _mm512_castsi512_si256(i), p, _MM_SCALE_8);
   }
 
+  inline SIMD_double SIMD_gather(const SIMD_mask &m, const double *p,
+                                 const SIMD256_int &i) {
+    return _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
+                                    i, p, _MM_SCALE_8);
+  }
+
   template <typename T>
   inline SIMD_int SIMD_gatherz_offset(const SIMD_mask &m, const int *p,
                                       const SIMD_int &i) {
@@ -252,6 +330,15 @@ namespace ip_simd {
     return _mm512_store_pd(p,one);
   }
 
+  inline void SIMD_store(SIMD_double *p, const SIMD_double &one) {
+    return _mm512_store_pd((double *)p,one);
+  }
+
+  inline void SIMD_scatter(const SIMD_mask &m, int *p,
+                           const SIMD256_int &i, const SIMD256_int &vec) {
+    _mm256_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4);
+  }
+
   inline void SIMD_scatter(const SIMD_mask &m, int *p,
                            const SIMD_int &i, const SIMD_int &vec) {
     _mm512_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4);
@@ -268,8 +355,22 @@ namespace ip_simd {
                               _MM_SCALE_8);
   }
 
+  inline void SIMD_scatter(const SIMD_mask &m, double *p,
+                           const SIMD256_int &i, const SIMD_double &vec) {
+    _mm512_mask_i32scatter_pd(p, m, i, vec, _MM_SCALE_8);
+  }
+
+  inline void SIMD_scatter(double *p,
+                           const SIMD256_int &i, const SIMD_double &vec) {
+    _mm512_i32scatter_pd(p, i, vec, _MM_SCALE_8);
+  }
+
   // ------- Arithmetic Operations
 
+  inline SIMD256_int operator+(const SIMD256_int &one, const SIMD256_int &two) {
+    return _mm256_add_epi32(one,two);
+  }
+
   inline SIMD_int operator+(const SIMD_int &one, const SIMD_int &two) {
     return _mm512_add_epi32(one,two);
   }
@@ -286,6 +387,10 @@ namespace ip_simd {
     return _mm512_add_epi32(one,SIMD_set(two));
   }
 
+  inline SIMD256_int operator+(const SIMD256_int &one, const int two) {
+    return _mm256_add_epi32(one,SIMD256_set(two));
+  }
+
   inline SIMD_float operator+(const SIMD_float &one, const float two) {
     return _mm512_add_ps(one,SIMD_set(two));
   }
@@ -299,6 +404,11 @@ namespace ip_simd {
     return _mm512_mask_add_epi32(one,m,one,SIMD_set(two));
   }
 
+  inline SIMD256_int SIMD_add(const SIMD_mask &m,
+                           const SIMD256_int &one, const int two) {
+    return _mm256_mask_add_epi32(one,m,one,SIMD256_set(two));
+  }
+
   inline SIMD_float SIMD_add(const SIMD_mask &m,
                              const SIMD_float &one, const float two) {
     return _mm512_mask_add_ps(one,m,one,SIMD_set(two));
@@ -309,6 +419,11 @@ namespace ip_simd {
     return _mm512_mask_add_pd(one,m,one,SIMD_set(two));
   }
 
+  inline SIMD_double SIMD_add(const SIMD_mask &m,
+                              const SIMD_double &one, const SIMD_double &two) {
+    return _mm512_mask_add_pd(one,m,one,two);
+  }
+
   inline SIMD_int SIMD_add(const SIMD_int &s, const SIMD_mask &m,
                            const SIMD_int &one, const SIMD_int &two) {
     return _mm512_mask_add_epi32(s,m,one,two);
@@ -387,6 +502,10 @@ namespace ip_simd {
     return _mm512_mul_pd(one,two);
   }
 
+  inline SIMD256_int operator*(const SIMD256_int &one, const int two) {
+    return _mm256_mullo_epi32(one,SIMD256_set(two));
+  }
+
   inline SIMD_int operator*(const SIMD_int &one, const int two) {
     return _mm512_mullo_epi32(one,SIMD_set(two));
   }
@@ -417,6 +536,12 @@ namespace ip_simd {
     return _mm512_fmadd_pd(one,two,three);
   }
 
+  inline SIMD_double SIMD_fma(const SIMD_mask m, const SIMD_double &one,
+                              const SIMD_double &two,
+                              const SIMD_double &three) {
+    return _mm512_mask3_fmadd_pd(one,two,three,m);
+  }
+
   inline SIMD_float SIMD_fms(const SIMD_float &one, const SIMD_float &two,
                              const SIMD_float &three) {
     return _mm512_fmsub_ps(one,two,three);
@@ -493,6 +618,10 @@ namespace ip_simd {
     return _mm512_pow_pd(one, two);
   }
 
+  inline SIMD_double SIMD_pow(const SIMD_double &one, const double two) {
+    return _mm512_pow_pd(one, SIMD_set(two));
+  }
+
   inline SIMD_float SIMD_exp(const SIMD_float &one) {
     return _mm512_exp_ps(one);
   }
@@ -501,6 +630,18 @@ namespace ip_simd {
     return _mm512_exp_pd(one);
   }
 
+  inline SIMD_double SIMD_cos(const SIMD_double &one) {
+    return _mm512_cos_pd(one);
+  }
+
+  inline SIMD_double SIMD_sin(const SIMD_double &one) {
+    return _mm512_sin_pd(one);
+  }
+
+  inline SIMD_double SIMD_tan(const SIMD_double &one) {
+    return _mm512_tan_pd(one);
+  }
+
   // ------- Comparison operations
 
   inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_int &one,
@@ -533,6 +674,14 @@ namespace ip_simd {
     return _mm512_mask_cmplt_pd_mask(m, SIMD_set(one), two);
   }
 
+  inline SIMD_mask operator<(const SIMD256_int &one, const SIMD256_int &two) {
+    return _mm256_cmplt_epi32_mask(one,two);
+  }
+
+  inline SIMD_mask operator<(const int one, const SIMD256_int &two) {
+    return _mm256_cmplt_epi32_mask(SIMD256_set(one),two);
+  }
+
   inline SIMD_mask operator<(const SIMD_int &one, const SIMD_int &two) {
     return _mm512_cmplt_epi32_mask(one,two);
   }
@@ -577,6 +726,10 @@ namespace ip_simd {
     return _mm512_cmple_ps_mask(SIMD_set(one), two);
   }
 
+  inline SIMD_mask operator<=(const SIMD_double &one, const SIMD_double &two) {
+    return _mm512_cmple_pd_mask(one, two);
+  }
+
   inline SIMD_mask operator<=(const double one, const SIMD_double &two) {
     return _mm512_cmple_pd_mask(SIMD_set(one), two);
   }
@@ -593,6 +746,14 @@ namespace ip_simd {
     return _mm512_cmplt_pd_mask(two,one);
   }
 
+  inline SIMD_mask operator>(const SIMD_double &one, const double two) {
+    return _mm512_cmplt_pd_mask(SIMD_set(two),one);
+  }
+
+  inline SIMD_mask operator==(const SIMD256_int &one, const int two) {
+    return _mm256_cmpeq_epi32_mask(one,_mm256_set1_epi32(two));
+  }
+
   inline SIMD_mask operator==(const SIMD_int &one, const SIMD_int &two) {
     return _mm512_cmpeq_epi32_mask(one,two);
   }
diff --git a/src/INTEL/npair_halffull_intel.cpp b/src/INTEL/npair_halffull_intel.cpp
index 42ecb716f5..5a70edae8b 100644
--- a/src/INTEL/npair_halffull_intel.cpp
+++ b/src/INTEL/npair_halffull_intel.cpp
@@ -20,7 +20,9 @@
 
 #include "atom.h"
 #include "comm.h"
+#include "domain.h"
 #include "error.h"
+#include "force.h"
 #include "modify.h"
 #include "my_page.h"
 #include "neigh_list.h"
@@ -56,6 +58,9 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list,
   const int * _noalias const numneigh_full = list->listfull->numneigh;
   const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh;  // NOLINT
 
+  const double delta = 0.01 * force->angstrom;
+  const int triclinic = domain->triclinic;
+
   #if defined(_OPENMP)
   #pragma omp parallel
   #endif
@@ -82,25 +87,50 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list,
       const int * _noalias const jlist = firstneigh_full[i];
       const int jnum = numneigh_full[i];
 
-      #if defined(LMP_SIMD_COMPILER)
-      #pragma vector aligned
-      #pragma ivdep
-      #endif
-      for (int jj = 0; jj < jnum; jj++) {
-        const int joriginal = jlist[jj];
-        const int j = joriginal & NEIGHMASK;
-        int addme = 1;
-        if (j < nlocal) {
-          if (i > j) addme = 0;
-        } else {
-          if (x[j].z < ztmp) addme = 0;
-          if (x[j].z == ztmp) {
-            if (x[j].y < ytmp) addme = 0;
-            if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
+      if (!triclinic) {
+        #if defined(LMP_SIMD_COMPILER)
+        #pragma vector aligned
+        #pragma ivdep
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+          int addme = 1;
+          if (j < nlocal) {
+            if (i > j) addme = 0;
+          } else {
+            if (x[j].z < ztmp) addme = 0;
+            if (x[j].z == ztmp) {
+              if (x[j].y < ytmp) addme = 0;
+              if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
+            }
           }
+          if (addme)
+            neighptr[n++] = joriginal;
+        }
+      } else {
+        #if defined(LMP_SIMD_COMPILER)
+        #pragma vector aligned
+        #pragma ivdep
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+          int addme = 1;
+          if (j < nlocal) {
+            if (i > j) addme = 0;
+          } else {
+            if (fabs(x[j].z-ztmp) > delta) {
+              if (x[j].z < ztmp) addme = 0;
+            } else if (fabs(x[j].y-ytmp) > delta) {
+              if (x[j].y < ytmp) addme = 0;
+            } else {
+              if (x[j].x < xtmp) addme = 0;
+            }
+          }
+          if (addme)
+            neighptr[n++] = joriginal;
         }
-        if (addme)
-          neighptr[n++] = joriginal;
       }
 
       ilist[ii] = i;
@@ -203,7 +233,7 @@ void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
 
 void NPairHalffullNewtonIntel::build(NeighList *list)
 {
-  if (_fix->three_body_neighbor() == 0) {
+  if (_fix->three_body_neighbor() == 0 || domain->triclinic) {
     if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
       build_t(list, _fix->get_mixed_buffers());
     else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
@@ -255,6 +285,8 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
   const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh;  // NOLINT
 
   const flt_t cutsq_custom = cutoff_custom * cutoff_custom;
+  const double delta = 0.01 * force->angstrom;
+  const int triclinic = domain->triclinic;
 
   #if defined(_OPENMP)
   #pragma omp parallel
@@ -282,35 +314,70 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
       const int * _noalias const jlist = firstneigh_full[i];
       const int jnum = numneigh_full[i];
 
-      #if defined(LMP_SIMD_COMPILER)
-      #pragma vector aligned
-      #pragma ivdep
-      #endif
-      for (int jj = 0; jj < jnum; jj++) {
-        const int joriginal = jlist[jj];
-        const int j = joriginal & NEIGHMASK;
-        int addme = 1;
-        if (j < nlocal) {
-          if (i > j) addme = 0;
-        } else {
-          if (x[j].z < ztmp) addme = 0;
-          if (x[j].z == ztmp) {
-            if (x[j].y < ytmp) addme = 0;
-            if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
+      if (!triclinic) {
+        #if defined(LMP_SIMD_COMPILER)
+        #pragma vector aligned
+        #pragma ivdep
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+          int addme = 1;
+          if (j < nlocal) {
+            if (i > j) addme = 0;
+          } else {
+            if (x[j].z < ztmp) addme = 0;
+            if (x[j].z == ztmp) {
+              if (x[j].y < ytmp) addme = 0;
+              if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
+            }
           }
+
+          // trim to shorter cutoff
+
+          const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+          const flt_t rsq = delx * delx + dely * dely + delz * delz;
+
+          if (rsq > cutsq_custom) addme = 0;
+
+          if (addme)
+            neighptr[n++] = joriginal;
         }
+      } else {
+        #if defined(LMP_SIMD_COMPILER)
+        #pragma vector aligned
+        #pragma ivdep
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+          int addme = 1;
+          if (j < nlocal) {
+            if (i > j) addme = 0;
+          } else {
+            if (fabs(x[j].z-ztmp) > delta) {
+              if (x[j].z < ztmp) addme = 0;
+            } else if (fabs(x[j].y-ytmp) > delta) {
+              if (x[j].y < ytmp) addme = 0;
+            } else {
+              if (x[j].x < xtmp) addme = 0;
+            }
+          }
 
-        // trim to shorter cutoff
+          // trim to shorter cutoff
 
-        const flt_t delx = xtmp - x[j].x;
-        const flt_t dely = ytmp - x[j].y;
-        const flt_t delz = ztmp - x[j].z;
-        const flt_t rsq = delx * delx + dely * dely + delz * delz;
+          const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+          const flt_t rsq = delx * delx + dely * dely + delz * delz;
 
-        if (rsq > cutsq_custom) addme = 0;
+          if (rsq > cutsq_custom) addme = 0;
 
-        if (addme)
-          neighptr[n++] = joriginal;
+          if (addme)
+            neighptr[n++] = joriginal;
+        }
       }
 
       ilist[ii] = i;
@@ -433,7 +500,7 @@ void NPairHalffullNewtonTrimIntel::build_t3(NeighList *list, int *numhalf,
 
 void NPairHalffullNewtonTrimIntel::build(NeighList *list)
 {
-  if (_fix->three_body_neighbor() == 0) {
+  if (_fix->three_body_neighbor() == 0 || domain->triclinic) {
     if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
       build_t(list, _fix->get_mixed_buffers());
     else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
diff --git a/src/INTEL/npair_intel.cpp b/src/INTEL/npair_intel.cpp
index 600109d7ae..dcfb66e05f 100644
--- a/src/INTEL/npair_intel.cpp
+++ b/src/INTEL/npair_intel.cpp
@@ -204,6 +204,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
   }
   const int special_bound = sb;
 
+  const double delta = 0.01 * force->angstrom;
+
   #ifdef _LMP_INTEL_OFFLOAD
   const int * _noalias const binhead = this->binhead;
   const int * _noalias const bins = this->bins;
@@ -229,7 +231,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
     in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
     in(offload_end,separate_buffers,astart,aend,nlocal,molecular) \
     in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
-    in(pack_width,special_bound)                                        \
+    in(pack_width,special_bound,delta)                                  \
     out(overflow:length(5) alloc_if(0) free_if(0)) \
     out(timer_compute:length(1) alloc_if(0) free_if(0)) \
     signal(tag)
@@ -331,7 +333,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
         const flt_t ztmp = x[i].z;
         const int itype = x[i].w;
         tagint itag;
-        if (THREE) itag = tag[i];
+        if (THREE || (TRI && !FULL)) itag = tag[i];
         const int ioffset = ntypes * itype;
 
         const int ibin = atombin[i];
@@ -365,7 +367,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
             ty[u] = x[j].y;
             tz[u] = x[j].z;
             tjtype[u] = x[j].w;
-            if (THREE) ttag[u] = tag[j];
+            if (THREE || (TRI && !FULL)) ttag[u] = tag[j];
           }
 
           if (FULL == 0 && TRI != 1) {
@@ -486,12 +488,32 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
 
           // Triclinic
           if (TRI) {
-            if (tz[u] < ztmp) addme = 0;
-            if (tz[u] == ztmp) {
-              if (ty[u] < ytmp) addme = 0;
-              if (ty[u] == ytmp) {
-                if (tx[u] < xtmp) addme = 0;
-                if (tx[u] == xtmp && j <= i) addme = 0;
+            if (FULL) {
+              if (tz[u] < ztmp) addme = 0;
+              if (tz[u] == ztmp) {
+                if (ty[u] < ytmp) addme = 0;
+                if (ty[u] == ytmp) {
+                  if (tx[u] < xtmp) addme = 0;
+                  if (tx[u] == xtmp && j <= i) addme = 0;
+                }
+              }
+            } else {
+              if (j <= i) addme = 0;
+              if (j >= nlocal) {
+                const tagint jtag = ttag[u];
+                if (itag > jtag) {
+                  if ((itag+jtag) % 2 == 0) addme = 0;
+                } else if (itag < jtag) {
+                  if ((itag+jtag) % 2 == 1) addme = 0;
+                } else {
+                  if (fabs(tz[u]-ztmp) > delta) {
+                    if (tz[u] < ztmp) addme = 0;
+                  } else if (fabs(ty[u]-ytmp) > delta) {
+                    if (ty[u] < ytmp) addme = 0;
+                  } else {
+                    if (tx[u] < xtmp) addme = 0;
+                  }
+                }
               }
             }
           }
diff --git a/src/INTEL/pair_snap_intel.cpp b/src/INTEL/pair_snap_intel.cpp
new file mode 100644
index 0000000000..d91f0adc36
--- /dev/null
+++ b/src/INTEL/pair_snap_intel.cpp
@@ -0,0 +1,779 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#if defined(__AVX512F__)
+#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
+
+#include "pair_snap_intel.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "sna_intel.h"
+#include "tokenizer.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace ip_simd;
+
+#define MAXLINE 1024
+#define MAXWORD 3
+
+/* ---------------------------------------------------------------------- */
+
+PairSNAPIntel::PairSNAPIntel(LAMMPS *lmp) : Pair(lmp)
+{
+  single_enable = 0;
+  restartinfo = 0;
+  one_coeff = 1;
+  manybody_flag = 1;
+  centroidstressflag = CENTROID_NOTAVAIL;
+
+  radelem = nullptr;
+  wjelem = nullptr;
+  coeffelem = nullptr;
+  sinnerelem = nullptr;
+  dinnerelem = nullptr;
+
+  beta = nullptr;
+  bispectrum = nullptr;
+  snaptr = nullptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairSNAPIntel::~PairSNAPIntel()
+{
+  if (copymode) return;
+
+  memory->destroy(radelem);
+  memory->destroy(wjelem);
+  memory->destroy(coeffelem);
+  memory->destroy(sinnerelem);
+  memory->destroy(dinnerelem);
+
+  memory->destroy(beta);
+  memory->destroy(bispectrum);
+
+  delete snaptr;
+
+  if (allocated) {
+    memory->destroy(setflag);
+    memory->destroy(cutsq);
+    memory->destroy(scale);
+  }
+
+}
+
+/* ----------------------------------------------------------------------
+   This version is a straightforward implementation
+   ---------------------------------------------------------------------- */
+
+void PairSNAPIntel::compute(int eflag, int vflag)
+{
+  SNA_DVEC fij[3];
+  int *jlist,*numneigh,**firstneigh;
+
+  ev_init(eflag,vflag);
+  int tally_xyz = 0;
+  if (vflag_atom || (vflag && !vflag_fdotr)) tally_xyz = 1;
+
+  double **x = atom->x;
+  double *_x = atom->x[0];
+  double **f = atom->f;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // compute dE_i/dB_i = beta_i for all i in list
+
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  SNA_DVEC sevdwl(0);
+
+  const int vw = snaptr->vector_width();
+  for (int ii = 0; ii < list->inum; ii+=vw) {
+    SNA_IVEC i, jnum;
+    int max_jnum = 0;
+    for (int l = 0; l < vw; l++) {
+      if (ii + l < list->inum) {
+        i[l] = list->ilist[ii + l];
+        jnum[l] = numneigh[i[l]];
+      } else {
+        i[l] = list->ilist[0];
+        jnum[l] = 0;
+      }
+      if (jnum[l] > max_jnum) max_jnum = jnum[l];
+    }
+
+    // ensure rij, inside, wj, and rcutij are of size jnum
+
+    snaptr->grow_rij(max_jnum);
+
+    SNA_IVEC zero_vec(0);
+
+    const SNA_DVEC xtmp = SIMD_gather(_x, i * 3);
+    const SNA_DVEC ytmp = SIMD_gather(_x, i * 3 + 1);
+    const SNA_DVEC ztmp = SIMD_gather(_x, i * 3 + 2);
+    const SNA_IVEC itype = SIMD_gather(type, i);
+    const SNA_IVEC ielem = SIMD_gather(map, itype);
+    const SNA_DVEC radi = SIMD_gather(radelem, ielem);
+
+    // rij[][3] = displacements between atom I and those neighbors
+    // inside = indices of neighbors of I within cutoff
+    // wj = weights for neighbors of I within cutoff
+    // rcutij = cutoffs for neighbors of I within cutoff
+    // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
+
+    SNA_IVEC ninside(0);
+    for (int jj = 0; jj < max_jnum; jj++) {
+      SIMD_mask m(SIMD256_set(jj) < jnum);
+
+      SNA_IVEC j;
+      SV_for (int l = 0; l < vw; l++) {
+        jlist = firstneigh[i[l]];
+        if (jj < jnum[l]) j[l] = jlist[jj];
+        else j[l] = 0;
+      }
+      j &= NEIGHMASK;
+
+      const SNA_DVEC delx = SIMD_gather(m, _x, j * 3) - xtmp;
+      const SNA_DVEC dely = SIMD_gather(m, _x, j * 3 + 1) - ytmp;
+      const SNA_DVEC delz = SIMD_gather(m, _x, j * 3 + 2) - ztmp;
+      const SNA_IVEC jtype = SIMD_gather(type, j);
+      const SNA_DVEC rsq = delx*delx + dely*dely + delz*delz;
+      const SNA_DVEC vcut = SIMD_gather(m, cutsq[0],
+                                        itype * (atom->ntypes + 1) + jtype);
+
+      m &= rsq < vcut;
+      m &= rsq > SIMD_set(1e-20);
+      const SNA_IVEC jelem = SIMD_gather(map, jtype);
+      const SNA_IVEC ni3 = ninside * vw * 3 + SIMD256_count();
+      SIMD_scatter(m, (double *)(snaptr->rij[0]), ni3, delx);
+      SIMD_scatter(m, (double *)(snaptr->rij[0] + 1), ni3, dely);
+      SIMD_scatter(m, (double *)(snaptr->rij[0] + 2), ni3, delz);
+      const SNA_IVEC ni = ninside * vw + SIMD256_count();
+      SIMD_scatter(m, (int *)(snaptr->inside), ni, j);
+      SIMD_scatter(m, (double *)(snaptr->wj), ni,
+                   SIMD_gather(m, wjelem, jelem));
+      SIMD_scatter(m, (double *)(snaptr->rcutij), ni,
+                   (radi + SIMD_gather(m, radelem, jelem)) * rcutfac);
+      if (switchinnerflag) {
+        SIMD_scatter(m, (double *)(snaptr->sinnerij), ni,
+                     (SIMD_gather(m, sinnerelem, ielem) +
+                      SIMD_gather(m, sinnerelem, jelem)) * 0.5);
+        SIMD_scatter(m, (double *)(snaptr->dinnerij), ni,
+                     (SIMD_gather(m, dinnerelem, ielem) +
+                      SIMD_gather(m, dinnerelem, jelem)) * 0.5);
+      }
+      if (chemflag)
+        SIMD_scatter(m, (int *)(snaptr->element), ni, jelem);
+      ninside = SIMD_add(m, ninside, 1);
+    } // for jj
+
+    // compute Ui, Yi for atom I
+
+    if (chemflag)
+      snaptr->compute_ui(ninside, ielem, max_jnum);
+    else
+      snaptr->compute_ui(ninside, zero_vec, max_jnum);
+
+    // Compute bispectrum
+    if (quadraticflag || eflag) {
+      snaptr->compute_zi_or_yi<0>(beta);
+      if (chemflag)
+        snaptr->compute_bi(ielem);
+      else
+        snaptr->compute_bi(zero_vec);
+      for (int icoeff = 0; icoeff < ncoeff; icoeff++)
+        SIMD_store(bispectrum + icoeff, SIMD_load(snaptr->blist + icoeff));
+    }
+
+    // Compute beta
+    for (int icoeff = 0; icoeff < ncoeff; icoeff++)
+      SIMD_store(beta + icoeff, SIMD_gather(coeffelem[0],
+                                            ielem * ncoeffall + icoeff + 1));
+
+    if (quadraticflag) {
+      int k = ncoeff+1;
+      for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
+        SNA_DVEC bveci = SIMD_load(bispectrum + icoeff);
+        SNA_DVEC beta_i = SIMD_load(beta + icoeff) +
+          SIMD_gather(coeffelem[0], ielem * ncoeffall + k) * bveci;
+        k++;
+        for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
+          const SNA_DVEC ci = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
+          beta_i = beta_i + ci * SIMD_load(bispectrum + jcoeff);
+          SIMD_store(beta + jcoeff, ci * bveci + SIMD_load(beta + jcoeff));
+          k++;
+        }
+        SIMD_store(beta + icoeff, beta_i);
+      }
+    }
+
+    // for neighbors of I within cutoff:
+    // compute Fij = dEi/dRj = -dEi/dRi
+    // add to Fi, subtract from Fj
+    // scaling is that for type I
+
+    if (quadraticflag || eflag)
+      snaptr->compute_yi_from_zi(beta);
+    else
+      snaptr->compute_zi_or_yi<1>(beta);
+
+    SNA_DVEC fi_x(0.0), fi_y(0.0), fi_z(0.0);
+    SNA_DVEC scalev = SIMD_gather(scale[0], itype * (atom->ntypes+1) + itype);
+    for (int jj = 0; jj < max_jnum; jj++) {
+      snaptr->compute_duidrj(jj, ninside);
+      if (chemflag && nelements > 1)
+        snaptr->compute_deidrj_e(jj, ninside, fij);
+      else
+        snaptr->compute_deidrj(jj, ninside, fij);
+
+      SNA_DVEC fijs_x = fij[0] * scalev;
+      SNA_DVEC fijs_y = fij[1] * scalev;
+      SNA_DVEC fijs_z = fij[2] * scalev;
+
+      fi_x += fijs_x;
+      fi_y += fijs_y;
+      fi_z += fijs_z;
+
+      for (int l = 0; l < vw; l++) {
+        if (jj < ninside[l]) {
+          int j = snaptr->inside[jj][l];
+          f[j][0] -= fijs_x[l];
+          f[j][1] -= fijs_y[l];
+          f[j][2] -= fijs_z[l];
+
+          if (tally_xyz)
+            ev_tally_xyz(i[l],j,nlocal,newton_pair,0.0,0.0,
+                         fij[0][l],fij[1][l],fij[2][l],
+                         -snaptr->rij[jj][0][l],-snaptr->rij[jj][1][l],
+                         -snaptr->rij[jj][2][l]);
+        }
+      } // for l
+    } // for jj
+    SIMD_mask m((SIMD256_count() + ii) < list->inum);
+    SNA_DVEC fix = SIMD_gather(m, f[0], i * 3) +  fi_x;
+    SIMD_scatter(m, f[0], i * 3, fix);
+    SNA_DVEC fiy = SIMD_gather(m, f[0], i * 3 + 1) +  fi_y;
+    SIMD_scatter(m, f[0], i * 3 + 1, fiy);
+    SNA_DVEC fiz = SIMD_gather(m, f[0], i * 3 + 2) +  fi_z;
+    SIMD_scatter(m, f[0], i * 3 + 2, fiz);
+
+    // tally energy contribution
+
+    if (eflag) {
+      SNA_DVEC evdwl = SIMD_gather(coeffelem[0], ielem * ncoeffall);
+      for (int icoeff = 0; icoeff < ncoeff; icoeff++)
+        evdwl += SIMD_gather(coeffelem[0], ielem * ncoeffall + icoeff +1) *
+          bispectrum[icoeff];
+
+      if (quadraticflag) {
+        int k = ncoeff+1;
+        for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
+          SNA_DVEC bveci = SIMD_load(bispectrum + icoeff);
+          SNA_DVEC c = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
+          k++;
+          evdwl += c * 0.5 * bveci * bveci;
+          for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
+            SNA_DVEC bvecj = SIMD_load(bispectrum + jcoeff);
+            SNA_DVEC cj = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
+            k++;
+            evdwl += cj * bveci * bvecj;
+          }
+        }
+      }
+      sevdwl += scalev * evdwl;
+      if (eatom) {
+        SNA_DVEC ea = SIMD_gather(m, eatom, i) + scalev * evdwl;
+        SIMD_scatter(m, eatom, i, ea);
+      }
+    } // if (eflag)
+  } // for ii
+  if (eflag) eng_vdwl += SIMD_sum(sevdwl);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+void PairSNAPIntel::allocate()
+{
+  allocated = 1;
+  int n = atom->ntypes;
+  memory->create(setflag,n+1,n+1,"pair:setflag");
+  memory->create(cutsq,n+1,n+1,"pair:cutsq");
+  memory->create(scale,n+1,n+1,"pair:scale");
+  map = new int[n+1];
+}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairSNAPIntel::settings(int narg, char ** /* arg */)
+{
+  if (narg > 0)
+    error->all(FLERR,"Illegal pair_style command");
+  if ((comm->me == 0) && (comm->nthreads > 1))
+    error->warning(FLERR, "Pair style snap/intel does not use OpenMP threads");
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+------------------------------------------------------------------------- */
+
+void PairSNAPIntel::coeff(int narg, char **arg)
+{
+  if (!allocated) allocate();
+  if (narg != 4 + atom->ntypes) error->all(FLERR,"Incorrect args for pair coefficients");
+
+  map_element2type(narg-4,arg+4);
+
+  // read snapcoeff and snapparam files
+
+  read_files(arg[2],arg[3]);
+
+  if (!quadraticflag)
+    ncoeff = ncoeffall - 1;
+  else {
+
+    // ncoeffall should be (ncoeff+2)*(ncoeff+1)/2
+    // so, ncoeff = floor(sqrt(2*ncoeffall))-1
+
+    ncoeff = sqrt(2.0*ncoeffall)-1;
+    ncoeffq = (ncoeff*(ncoeff+1))/2;
+    int ntmp = 1+ncoeff+ncoeffq;
+    if (ntmp != ncoeffall) {
+      error->all(FLERR,"Incorrect SNAP coeff file");
+    }
+  }
+
+  snaptr = new SNAIntel(lmp, rfac0, twojmax,
+                        rmin0, switchflag, bzeroflag,
+                        chemflag, bnormflag, wselfallflag,
+                        nelements, switchinnerflag);
+
+  if (ncoeff != snaptr->ncoeff) {
+    if (comm->me == 0)
+      printf("ncoeff = %d snancoeff = %d \n",ncoeff,snaptr->ncoeff);
+    error->all(FLERR,"Incorrect SNAP parameter file");
+  }
+
+  // Calculate maximum cutoff for all elements
+  rcutmax = 0.0;
+  for (int ielem = 0; ielem < nelements; ielem++)
+    rcutmax = MAX(2.0*radelem[ielem]*rcutfac,rcutmax);
+
+  // set default scaling
+  int n = atom->ntypes;
+  for (int ii = 0; ii < n+1; ii++)
+    for (int jj = 0; jj < n+1; jj++)
+      scale[ii][jj] = 1.0;
+
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairSNAPIntel::init_style()
+{
+  if (force->newton_pair == 0)
+    error->all(FLERR,"Pair style SNAP requires newton pair on");
+
+  // need a full neighbor list
+
+  neighbor->add_request(this, NeighConst::REQ_FULL);
+
+  snaptr->init();
+
+  fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
+  if (!fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
+
+  fix->pair_init_check();
+
+  memory->create(bispectrum,ncoeff,"PairSNAP:bispectrum");
+  memory->create(beta,ncoeff,"PairSNAP:beta");
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+double PairSNAPIntel::init_one(int i, int j)
+{
+  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
+  scale[j][i] = scale[i][j];
+  return (radelem[map[i]] +
+          radelem[map[j]])*rcutfac;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairSNAPIntel::read_files(char *coefffilename, char *paramfilename)
+{
+
+  // open SNAP coefficient file on proc 0
+
+  FILE *fpcoeff;
+  if (comm->me == 0) {
+    fpcoeff = utils::open_potential(coefffilename,lmp,nullptr);
+    if (fpcoeff == nullptr)
+      error->one(FLERR,"Cannot open SNAP coefficient file {}: ",
+                                   coefffilename, utils::getsyserror());
+  }
+
+  char line[MAXLINE],*ptr;
+  int eof = 0;
+  int nwords = 0;
+  while (nwords == 0) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpcoeff);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpcoeff);
+      }
+    }
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof) break;
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    // strip comment, skip line if blank
+
+    nwords = utils::count_words(utils::trim_comment(line));
+  }
+  if (nwords != 2)
+    error->all(FLERR,"Incorrect format in SNAP coefficient file");
+
+  // strip single and double quotes from words
+
+  int nelemtmp = 0;
+  try {
+    ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f");
+    nelemtmp = words.next_int();
+    ncoeffall = words.next_int();
+  } catch (TokenizerException &e) {
+    error->all(FLERR,"Incorrect format in SNAP coefficient file: {}", e.what());
+  }
+
+  // clean out old arrays and set up element lists
+
+  memory->destroy(radelem);
+  memory->destroy(wjelem);
+  memory->destroy(coeffelem);
+  memory->destroy(sinnerelem);
+  memory->destroy(dinnerelem);
+  memory->create(radelem,nelements,"pair:radelem");
+  memory->create(wjelem,nelements,"pair:wjelem");
+  memory->create(coeffelem,nelements,ncoeffall,"pair:coeffelem");
+  memory->create(sinnerelem,nelements,"pair:sinnerelem");
+  memory->create(dinnerelem,nelements,"pair:dinnerelem");
+
+  // initialize checklist for all required nelements
+
+  int *elementflags = new int[nelements];
+  for (int jelem = 0; jelem < nelements; jelem++)
+      elementflags[jelem] = 0;
+
+  // loop over nelemtmp blocks in the SNAP coefficient file
+
+  for (int ielem = 0; ielem < nelemtmp; ielem++) {
+
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpcoeff);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpcoeff);
+      }
+    }
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof)
+      error->all(FLERR,"Incorrect format in SNAP coefficient file");
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    std::vector<std::string> words;
+    try {
+      words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
+    } catch (TokenizerException &) {
+      // ignore
+    }
+    if (words.size() != 3)
+      error->all(FLERR,"Incorrect format in SNAP coefficient file");
+
+    int jelem;
+    for (jelem = 0; jelem < nelements; jelem++)
+      if (words[0] == elements[jelem]) break;
+
+    // if this element not needed, skip this block
+
+    if (jelem == nelements) {
+      if (comm->me == 0) {
+        for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
+          ptr = fgets(line,MAXLINE,fpcoeff);
+          if (ptr == nullptr) {
+            eof = 1;
+            fclose(fpcoeff);
+          }
+        }
+      }
+      MPI_Bcast(&eof,1,MPI_INT,0,world);
+      if (eof)
+        error->all(FLERR,"Incorrect format in SNAP coefficient file");
+      continue;
+    }
+
+    if (elementflags[jelem] == 1)
+      error->all(FLERR,"Incorrect format in SNAP coefficient file");
+    else
+      elementflags[jelem] = 1;
+
+    radelem[jelem] = utils::numeric(FLERR,words[1],false,lmp);
+    wjelem[jelem] = utils::numeric(FLERR,words[2],false,lmp);
+
+    if (comm->me == 0)
+      utils::logmesg(lmp,"SNAP Element = {}, Radius {}, Weight {}\n",
+                     elements[jelem], radelem[jelem], wjelem[jelem]);
+
+    for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
+      if (comm->me == 0) {
+        ptr = fgets(line,MAXLINE,fpcoeff);
+        if (ptr == nullptr) {
+          eof = 1;
+          fclose(fpcoeff);
+        }
+      }
+
+      MPI_Bcast(&eof,1,MPI_INT,0,world);
+      if (eof)
+        error->all(FLERR,"Incorrect format in SNAP coefficient file");
+      MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+      try {
+        ValueTokenizer coeff(utils::trim_comment(line));
+        if (coeff.count() != 1)
+          error->all(FLERR,"Incorrect format in SNAP coefficient file");
+
+        coeffelem[jelem][icoeff] = coeff.next_double();
+      } catch (TokenizerException &e) {
+        error->all(FLERR,"Incorrect format in SNAP coefficient file: {}", e.what());
+      }
+    }
+  }
+
+  if (comm->me == 0) fclose(fpcoeff);
+
+  for (int jelem = 0; jelem < nelements; jelem++) {
+    if (elementflags[jelem] == 0)
+      error->all(FLERR,"Element {} not found in SNAP coefficient file", elements[jelem]);
+  }
+  delete[] elementflags;
+
+  // set flags for required keywords
+
+  rcutfacflag = 0;
+  twojmaxflag = 0;
+
+  // Set defaults for optional keywords
+
+  rfac0 = 0.99363;
+  rmin0 = 0.0;
+  switchflag = 1;
+  bzeroflag = 1;
+  quadraticflag = 0;
+  chemflag = 0;
+  bnormflag = 0;
+  wselfallflag = 0;
+  switchinnerflag = 0;
+  chunksize = 32768;
+  parallel_thresh = 8192;
+
+  // set local input checks
+
+  int sinnerflag = 0;
+  int dinnerflag = 0;
+
+  // open SNAP parameter file on proc 0
+
+  FILE *fpparam;
+  if (comm->me == 0) {
+    fpparam = utils::open_potential(paramfilename,lmp,nullptr);
+    if (fpparam == nullptr)
+      error->one(FLERR,"Cannot open SNAP parameter file {}: {}",
+                                   paramfilename, utils::getsyserror());
+  }
+
+  eof = 0;
+  while (true) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fpparam);
+      if (ptr == nullptr) {
+        eof = 1;
+        fclose(fpparam);
+      }
+    }
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof) break;
+    MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
+
+    // words = ptrs to all words in line
+    // strip single and double quotes from words
+
+    std::vector<std::string> words;
+    try {
+      words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
+    } catch (TokenizerException &) {
+      // ignore
+    }
+
+    if (words.size() == 0) continue;
+
+    if (words.size() < 2)
+      error->all(FLERR,"Incorrect format in SNAP parameter file");
+
+    auto keywd = words[0];
+    auto keyval = words[1];
+
+    // check for keywords with more than one value per element
+
+    if (keywd == "sinner" || keywd == "dinner") {
+
+      if ((int)words.size() != nelements+1)
+        error->all(FLERR,"Incorrect SNAP parameter file");
+
+      // innerlogstr collects all values of sinner or dinner for log output below
+
+      std::string innerlogstr;
+
+      int iword = 1;
+
+      if (keywd == "sinner") {
+        for (int ielem = 0; ielem < nelements; ielem++) {
+          keyval = words[iword];
+          sinnerelem[ielem] = utils::numeric(FLERR,keyval,false,lmp);
+          iword++;
+          innerlogstr += keyval + " ";
+        }
+        sinnerflag = 1;
+      } else if (keywd == "dinner") {
+        for (int ielem = 0; ielem < nelements; ielem++) {
+          keyval = words[iword];
+          dinnerelem[ielem] = utils::numeric(FLERR,keyval,false,lmp);
+          iword++;
+          innerlogstr += keyval + " ";
+        }
+        dinnerflag = 1;
+      }
+
+      if (comm->me == 0)
+        utils::logmesg(lmp,"SNAP keyword {} {} ... \n", keywd, innerlogstr);
+
+    } else {
+
+      // all other keywords take one value
+
+      if (nwords != 2)
+        error->all(FLERR,"Incorrect SNAP parameter file");
+
+      if (comm->me == 0)
+        utils::logmesg(lmp,"SNAP keyword {} {}\n",keywd,keyval);
+
+      if (keywd == "rcutfac") {
+        rcutfac = utils::numeric(FLERR,keyval,false,lmp);
+        rcutfacflag = 1;
+      } else if (keywd == "twojmax") {
+        twojmax = utils::inumeric(FLERR,keyval,false,lmp);
+        twojmaxflag = 1;
+      } else if (keywd == "rfac0")
+        rfac0 = utils::numeric(FLERR,keyval,false,lmp);
+      else if (keywd == "rmin0")
+        rmin0 = utils::numeric(FLERR,keyval,false,lmp);
+      else if (keywd == "switchflag")
+        switchflag = utils::inumeric(FLERR,keyval,false,lmp);
+      else if (keywd == "bzeroflag")
+        bzeroflag = utils::inumeric(FLERR,keyval,false,lmp);
+      else if (keywd == "quadraticflag")
+        quadraticflag = utils::inumeric(FLERR,keyval,false,lmp);
+      else if (keywd == "chemflag")
+        chemflag = utils::inumeric(FLERR,keyval,false,lmp);
+      else if (keywd == "bnormflag")
+        bnormflag = utils::inumeric(FLERR,keyval,false,lmp);
+      else if (keywd == "wselfallflag")
+        wselfallflag = utils::inumeric(FLERR,keyval,false,lmp);
+      else if (keywd == "switchinnerflag")
+        switchinnerflag = utils::inumeric(FLERR,keyval,false,lmp);
+      else if (keywd == "chunksize")
+        chunksize = utils::inumeric(FLERR,keyval,false,lmp);
+      else if (keywd == "parallelthresh")
+        parallel_thresh = utils::inumeric(FLERR,keyval,false,lmp);
+      else
+        error->all(FLERR,"Unknown parameter '{}' in SNAP parameter file", keywd);
+    }
+  }
+
+  if (rcutfacflag == 0 || twojmaxflag == 0)
+    error->all(FLERR,"Incorrect SNAP parameter file");
+
+  if (chemflag && nelemtmp != nelements)
+    error->all(FLERR,"Incorrect SNAP parameter file");
+
+  if (switchinnerflag && !(sinnerflag && dinnerflag))
+    error->all(FLERR,"Incorrect SNAP parameter file");
+
+  if (!switchinnerflag && (sinnerflag || dinnerflag))
+    error->all(FLERR,"Incorrect SNAP parameter file");
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double PairSNAPIntel::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+
+  int n = atom->ntypes+1;
+  bytes += (double)n*n*sizeof(int);         // setflag
+  bytes += (double)n*n*sizeof(double);      // cutsq
+  bytes += (double)n*n*sizeof(double);      // scale
+  bytes += (double)n*sizeof(int);           // map
+  bytes += (double)ncoeff*sizeof(SNA_DVEC); // bispectrum
+  bytes += (double)ncoeff*sizeof(SNA_DVEC); // beta
+
+  bytes += snaptr->memory_usage(); // SNA object
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void *PairSNAPIntel::extract(const char *str, int &dim)
+{
+  dim = 2;
+  if (strcmp(str,"scale") == 0) return (void *) scale;
+  return nullptr;
+}
+
+#endif
+#endif
diff --git a/src/INTEL/pair_snap_intel.h b/src/INTEL/pair_snap_intel.h
new file mode 100644
index 0000000000..2dc758f244
--- /dev/null
+++ b/src/INTEL/pair_snap_intel.h
@@ -0,0 +1,83 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#if defined(__AVX512F__)
+#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(snap/intel,PairSNAPIntel);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_SNAP_INTEL_H
+#define LMP_PAIR_SNAP_INTEL_H
+
+#include "fix_intel.h"
+#include "pair.h"
+
+namespace ip_simd { class SIMD_double; class SIMD_int; };
+#define SNA_DVEC ip_simd::SIMD_double
+#define SNA_IVEC ip_simd::SIMD256_int
+
+namespace LAMMPS_NS {
+
+class PairSNAPIntel : public Pair {
+ public:
+  PairSNAPIntel(class LAMMPS *);
+  ~PairSNAPIntel() override;
+  void compute(int, int) override;
+  void settings(int, char **) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double init_one(int, int) override;
+  double memory_usage() override;
+  void *extract(const char *, int &) override;
+
+  double rcutfac, quadraticflag;    // declared public to workaround gcc 4.9
+  int ncoeff;                       //  compiler bug, manifest in KOKKOS package
+
+ protected:
+  FixIntel *fix;
+
+  int ncoeffq, ncoeffall;
+  class SNAIntel *snaptr;
+  virtual void allocate();
+  void read_files(char *, char *);
+  inline int equal(double *x, double *y);
+  inline double dist2(double *x, double *y);
+
+  double rcutmax;         // max cutoff for all elements
+  double *radelem;        // element radii
+  double *wjelem;         // elements weights
+  double **coeffelem;     // element bispectrum coefficients
+  SNA_DVEC *beta;          // betas for all atoms in list
+  SNA_DVEC *bispectrum;    // bispectrum components for all atoms in list
+  double **scale;         // for thermodynamic integration
+  int twojmax, switchflag, bzeroflag, bnormflag;
+  int chemflag, wselfallflag;
+  int switchinnerflag;    // inner cutoff switch
+  double *sinnerelem;     // element inner cutoff midpoint
+  double *dinnerelem;     // element inner cutoff half-width
+  int chunksize, parallel_thresh;
+  double rfac0, rmin0, wj1, wj2;
+  int rcutfacflag, twojmaxflag;    // flags for required parameters
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
+
+#endif
+#endif
diff --git a/src/INTEL/sna_intel.cpp b/src/INTEL/sna_intel.cpp
new file mode 100644
index 0000000000..b83c90688d
--- /dev/null
+++ b/src/INTEL/sna_intel.cpp
@@ -0,0 +1,1505 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: W. Michael Brown, Intel
+------------------------------------------------------------------------- */
+
+#if defined(__AVX512F__)
+#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
+
+#include "sna_intel.h"
+
+#include "comm.h"
+#include "error.h"
+#include "math_const.h"
+#include "math_special.h"
+#include "memory.h"
+
+#include <cmath>
+
+using namespace std;
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace MathSpecial;
+using namespace ip_simd;
+
+/* ----------------------------------------------------------------------
+
+   this implementation is based on the method outlined
+   in Bartok[1], using formulae from VMK[2].
+
+   for the Clebsch-Gordan coefficients, we
+   convert the VMK half-integral labels
+   a, b, c, alpha, beta, gamma
+   to array offsets j1, j2, j, m1, m2, m
+   using the following relations:
+
+   j1 = 2*a
+   j2 = 2*b
+   j =  2*c
+
+   m1 = alpha+a      2*alpha = 2*m1 - j1
+   m2 = beta+b    or 2*beta = 2*m2 - j2
+   m =  gamma+c      2*gamma = 2*m - j
+
+   in this way:
+
+   -a <= alpha <= a
+   -b <= beta <= b
+   -c <= gamma <= c
+
+   becomes:
+
+   0 <= m1 <= j1
+   0 <= m2 <= j2
+   0 <= m <= j
+
+   and the requirement that
+   a+b+c be integral implies that
+   j1+j2+j must be even.
+   The requirement that:
+
+   gamma = alpha+beta
+
+   becomes:
+
+   2*m - j = 2*m1 - j1 + 2*m2 - j2
+
+   Similarly, for the Wigner U-functions U(J,m,m') we
+   convert the half-integral labels J,m,m' to
+   array offsets j,ma,mb:
+
+   j = 2*J
+   ma = J+m
+   mb = J+m'
+
+   so that:
+
+   0 <= j <= 2*Jmax
+   0 <= ma, mb <= j.
+
+   For the bispectrum components B(J1,J2,J) we convert to:
+
+   j1 = 2*J1
+   j2 = 2*J2
+   j = 2*J
+
+   and the requirement:
+
+   |J1-J2| <= J <= J1+J2, for j1+j2+j integral
+
+   becomes:
+
+   |j1-j2| <= j <= j1+j2, for j1+j2+j even integer
+
+   or
+
+   j = |j1-j2|, |j1-j2|+2,...,j1+j2-2,j1+j2
+
+   [1] Albert Bartok-Partay, "Gaussian Approximation..."
+   Doctoral Thesis, Cambridge University, (2009)
+
+   [2] D. A. Varshalovich, A. N. Moskalev, and V. K. Khersonskii,
+   "Quantum Theory of Angular Momentum," World Scientific (1988)
+
+------------------------------------------------------------------------- */
+
+SNAIntel::SNAIntel(LAMMPS* lmp, double rfac0_in, int twojmax_in,
+                   double rmin0_in, int switch_flag_in, int bzero_flag_in,
+                   int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in,
+                   int nelements_in, int switch_inner_flag_in) : Pointers(lmp)
+{
+  wself = 1.0;
+
+  rfac0 = rfac0_in;
+  rmin0 = rmin0_in;
+  switch_flag = switch_flag_in;
+  switch_inner_flag = switch_inner_flag_in;
+  bzero_flag = bzero_flag_in;
+  chem_flag = chem_flag_in;
+  bnorm_flag = bnorm_flag_in;
+  wselfall_flag = wselfall_flag_in;
+
+  if (bnorm_flag != chem_flag)
+    lmp->error->warning(FLERR, "bnormflag and chemflag are not equal."
+                        "This is probably not what you intended");
+
+  if (chem_flag)
+    nelements = nelements_in;
+  else
+    nelements = 1;
+
+  twojmax = twojmax_in;
+
+  compute_ncoeff();
+
+  rij = nullptr;
+  inside = nullptr;
+  wj = nullptr;
+  rcutij = nullptr;
+  sinnerij = nullptr;
+  dinnerij = nullptr;
+  element = nullptr;
+  nmax = 0;
+  idxz = nullptr;
+  idxb = nullptr;
+  ulist_r_ij = nullptr;
+  ulist_i_ij = nullptr;
+
+  build_indexlist();
+  create_twojmax_arrays();
+
+  if (bzero_flag) {
+    double www = wself*wself*wself;
+    for (int j = 0; j <= twojmax; j++)
+      if (bnorm_flag)
+        bzero[j] = www;
+      else
+        bzero[j] = www*(j+1);
+  }
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+SNAIntel::~SNAIntel()
+{
+  memory->destroy(rij);
+  memory->destroy(inside);
+  memory->destroy(wj);
+  memory->destroy(rcutij);
+  memory->destroy(sinnerij);
+  memory->destroy(dinnerij);
+  if (chem_flag) memory->destroy(element);
+  memory->destroy(ulist_r_ij);
+  memory->destroy(ulist_i_ij);
+  delete[] idxz;
+  delete[] idxb;
+  destroy_twojmax_arrays();
+}
+
+void SNAIntel::build_indexlist()
+{
+
+  // index list for cglist
+
+  int jdim = twojmax + 1;
+  memory->create(idxcg_block, jdim, jdim, jdim,
+                 "sna:idxcg_block");
+
+  int idxcg_count = 0;
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
+        idxcg_block[j1][j2][j] = idxcg_count;
+        for (int m1 = 0; m1 <= j1; m1++)
+          for (int m2 = 0; m2 <= j2; m2++)
+            idxcg_count++;
+      }
+  idxcg_max = idxcg_count;
+
+  // index list for uarray
+  // need to include both halves
+
+  memory->create(idxu_block, jdim,
+                 "sna:idxu_block");
+
+  int idxu_count = 0;
+
+  for (int j = 0; j <= twojmax; j++) {
+    idxu_block[j] = idxu_count;
+    for (int mb = 0; mb <= j; mb++)
+      for (int ma = 0; ma <= j; ma++)
+        idxu_count++;
+  }
+  idxu_max = idxu_count;
+
+  // index list for beta and B
+
+  int idxb_count = 0;
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
+        if (j >= j1) idxb_count++;
+
+  idxb_max = idxb_count;
+  idxb = new SNA_BINDICES[idxb_max];
+
+  idxb_count = 0;
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
+        if (j >= j1) {
+          idxb[idxb_count].j1 = j1;
+          idxb[idxb_count].j2 = j2;
+          idxb[idxb_count].j = j;
+          idxb_count++;
+        }
+
+  // reverse index list for beta and b
+
+  memory->create(idxb_block, jdim, jdim, jdim,
+                 "sna:idxb_block");
+  idxb_count = 0;
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
+        if (j >= j1) {
+          idxb_block[j1][j2][j] = idxb_count;
+          idxb_count++;
+        }
+      }
+
+  // index list for zlist
+
+  int idxz_count = 0;
+
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
+        for (int mb = 0; 2*mb <= j; mb++)
+          for (int ma = 0; ma <= j; ma++)
+            idxz_count++;
+
+  idxz_max = idxz_count;
+  idxz = new SNA_ZINDICES[idxz_max];
+
+  memory->create(idxz_block, jdim, jdim, jdim,
+                 "sna:idxz_block");
+
+  idxz_count = 0;
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
+        idxz_block[j1][j2][j] = idxz_count;
+
+        // find right beta[jjb] entry
+        // multiply and divide by j+1 factors
+        // account for multiplicity of 1, 2, or 3
+
+        for (int mb = 0; 2*mb <= j; mb++)
+          for (int ma = 0; ma <= j; ma++) {
+            idxz[idxz_count].j1 = j1;
+            idxz[idxz_count].j2 = j2;
+            idxz[idxz_count].j = j;
+            idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2);
+            idxz[idxz_count].ma2max = (2 * ma - j - (2 * idxz[idxz_count].ma1min - j1) + j2) / 2;
+            idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - idxz[idxz_count].ma1min + 1;
+            idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2);
+            idxz[idxz_count].mb2max = (2 * mb - j - (2 * idxz[idxz_count].mb1min - j1) + j2) / 2;
+            idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - idxz[idxz_count].mb1min + 1;
+            // apply to z(j1,j2,j,ma,mb) to unique element of y(j)
+
+            const int jju = idxu_block[j] + (j+1)*mb + ma;
+            idxz[idxz_count].jju = jju;
+
+            idxz_count++;
+          }
+      }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void SNAIntel::init()
+{
+  init_clebsch_gordan();
+  //   print_clebsch_gordan();
+  init_rootpqarray();
+}
+
+void SNAIntel::grow_rij(int newnmax)
+{
+  if (newnmax <= nmax) return;
+
+  nmax = newnmax;
+
+  memory->destroy(rij);
+  memory->destroy(inside);
+  memory->destroy(wj);
+  memory->destroy(rcutij);
+  memory->destroy(sinnerij);
+  memory->destroy(dinnerij);
+  if (chem_flag) memory->destroy(element);
+  memory->destroy(ulist_r_ij);
+  memory->destroy(ulist_i_ij);
+  memory->create(rij, nmax, 3, "pair:rij");
+  memory->create(inside, nmax, "pair:inside");
+  memory->create(wj, nmax, "pair:wj");
+  memory->create(rcutij, nmax, "pair:rcutij");
+  memory->create(sinnerij, nmax, "pair:sinnerij");
+  memory->create(dinnerij, nmax, "pair:dinnerij");
+  if (chem_flag) memory->create(element, nmax, "sna:element");
+  memory->create(ulist_r_ij, nmax, idxu_max, "sna:ulist_ij");
+  memory->create(ulist_i_ij, nmax, idxu_max, "sna:ulist_ij");
+}
+
+/* ----------------------------------------------------------------------
+   compute Ui by summing over neighbors j
+------------------------------------------------------------------------- */
+
+void SNAIntel::compute_ui(const SNA_IVEC &jnum, const SNA_IVEC &ielem,
+                          const int max_jnum)
+{
+  // utot(j,ma,mb) = 0 for all j,ma,ma
+  // utot(j,ma,ma) = 1 for all j,ma
+  // for j in neighbors of i:
+  //   compute r0 = (x,y,z,z0)
+  //   utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
+
+  zero_uarraytot(ielem);
+
+  for (int j = 0; j < max_jnum; j++) {
+    const SNA_DVEC x = rij[j][0];
+    const SNA_DVEC y = rij[j][1];
+    const SNA_DVEC z = rij[j][2];
+    const SNA_DVEC rcut = rcutij[j];
+    const SNA_DVEC rsq = x * x + y * y + z * z;
+    const SNA_DVEC r = SIMD_sqrt(rsq);
+    const SNA_DVEC rscale0 = SIMD_rcp(rcut - rmin0) * rfac0 * MY_PI;
+    const SNA_DVEC theta0 = (r - rmin0) * rscale0;
+    const SNA_DVEC z0 = r * SIMD_rcp(SIMD_tan(theta0));
+
+    compute_uarray(x, y, z, z0, r, j, jnum);
+    add_uarraytot(r, j, jnum);
+  }
+
+}
+
+/* ----------------------------------------------------------------------
+   pick out right beta value
+------------------------------------------------------------------------- */
+
+double SNAIntel::choose_beta(const int j, const int j1, const int j2,
+                             const int elem1, const int elem2, const int elem3,
+                             int &itriple)
+{
+  double bfactor;
+  if (j >= j1) {
+    const int jjb = idxb_block[j1][j2][j];
+    itriple = ((elem1 * nelements + elem2) * nelements + elem3) *
+      idxb_max + jjb;
+    if (j1 == j) {
+      if (j2 == j)
+        bfactor = 3.0;
+      else
+        bfactor = 2.0;
+    } else
+      bfactor = 1.0;
+  } else if (j >= j2) {
+    const int jjb = idxb_block[j][j2][j1];
+    itriple = ((elem3 * nelements + elem2) * nelements + elem1) *
+      idxb_max + jjb;
+    if (j2 == j)
+      bfactor = 2.0;
+    else
+      bfactor = 1.0;
+  } else {
+    const int jjb = idxb_block[j2][j][j1];
+    itriple = ((elem2 * nelements + elem3) * nelements + elem1) *
+      idxb_max + jjb;
+    bfactor = 1.0;
+  }
+
+  if (!bnorm_flag && j1 > j)
+    bfactor *= (1.0 + j1) / (1.0 + j);
+
+  return bfactor;
+}
+
+/* ----------------------------------------------------------------------
+   compute Yi from Ui without storing Zi, looping over zlist indices
+------------------------------------------------------------------------- */
+
+template <int COMPUTE_YI>
+void SNAIntel::compute_zi_or_yi(const SNA_DVEC* beta)
+{
+  if (COMPUTE_YI) {
+    memset(ylist_r,0,idxu_max*nelements*sizeof(SNA_DVEC));
+    memset(ylist_i,0,idxu_max*nelements*sizeof(SNA_DVEC));
+  }
+
+  double *zlist_rp = (double *)zlist_r;
+  double *zlist_ip = (double *)zlist_i;
+
+  int zlist_i = 0;
+
+  for (int elem1 = 0; elem1 < nelements; elem1++)
+    for (int elem2 = 0; elem2 < nelements; elem2++) {
+      for (int jjz = 0; jjz < idxz_max; jjz++) {
+        const int j1 = idxz[jjz].j1;
+        const int j2 = idxz[jjz].j2;
+        const int j = idxz[jjz].j;
+        const int ma1min = idxz[jjz].ma1min;
+        const int ma2max = idxz[jjz].ma2max;
+        const int na = idxz[jjz].na;
+        const int mb1min = idxz[jjz].mb1min;
+        const int mb2max = idxz[jjz].mb2max;
+        const int nb = idxz[jjz].nb;
+
+        const double *cgblock = cglist + idxcg_block[j1][j2][j];
+
+        SNA_DVEC ztmp_r = 0.0;
+        SNA_DVEC ztmp_i = 0.0;
+
+        const double *u_r = (double *)ulisttot_r;
+        const double *u_i = (double *)ulisttot_i;
+
+        int jju1 = elem1 * idxu_max + idxu_block[j1] + (j1 + 1) * mb1min;
+        int jju2 = elem2 * idxu_max + idxu_block[j2] + (j2 + 1) * mb2max;
+        jju1 *= vector_width();
+        jju2 *= vector_width();
+        int icgb = mb1min * (j2 + 1) + mb2max;
+        for (int ib = 0; ib < nb; ib++) {
+
+          SNA_DVEC suma1_r = 0.0;
+          SNA_DVEC suma1_i = 0.0;
+
+          int ma1 = ma1min * vector_width();
+          int ma2 = ma2max * vector_width();
+          int icga = ma1min * (j2 + 1) + ma2max;
+
+          for (int ia = 0; ia < na; ia++) {
+            const SNA_DVEC u1_r = SIMD_load(u_r + jju1 + ma1);
+            const SNA_DVEC u2_r = SIMD_load(u_r + jju2 + ma2);
+            const SNA_DVEC u1_i = SIMD_load(u_i + jju1 + ma1);
+            const SNA_DVEC u2_i = SIMD_load(u_i + jju2 + ma2);
+            suma1_r += (u1_r*u2_r - u1_i*u2_i) * cgblock[icga];
+            suma1_i += (u1_r*u2_i + u1_i*u2_r) * cgblock[icga];
+            ma1+= vector_width();
+            ma2-= vector_width();
+            icga += j2;
+          } // end loop over ia
+
+          ztmp_r += suma1_r * cgblock[icgb];
+          ztmp_i += suma1_i * cgblock[icgb];
+
+          jju1 += (j1 + 1) * vector_width();
+          jju2 -= (j2 + 1) * vector_width();
+          icgb += j2;
+        } // end loop over ib
+
+        // apply to z(j1,j2,j,ma,mb) to unique element of y(j)
+        // find right y_list[jju] and beta[jjb] entries
+        // multiply and divide by j+1 factors
+        // account for multiplicity of 1, 2, or 3
+
+        if (bnorm_flag) {
+          ztmp_i *= SIMD_rcp(SIMD_set(static_cast<double>(j+1)));
+          ztmp_r *= SIMD_rcp(SIMD_set(static_cast<double>(j+1)));
+        }
+
+        if (COMPUTE_YI) {
+          int jju = idxz[jjz].jju;
+          for (int elem3 = 0; elem3 < nelements; elem3++) {
+            int itriple;
+            double bfactor = choose_beta(j, j1, j2, elem1, elem2, elem3,
+                                         itriple);
+            const SNA_DVEC betaj = beta[itriple] * bfactor;
+            const int i = elem3 * idxu_max + jju;
+            SIMD_store(&(ylist_r[i]), SIMD_load(ylist_r + i) + betaj * ztmp_r);
+            SIMD_store(&(ylist_i[i]), SIMD_load(ylist_i + i) + betaj * ztmp_i);
+          }
+        } else {
+          SIMD_store(zlist_rp + zlist_i, ztmp_r);
+          SIMD_store(zlist_ip + zlist_i, ztmp_i);
+          zlist_i += vector_width();
+        }
+      }// end loop over jjz
+    }
+}
+
+/* ----------------------------------------------------------------------
+   compute Yi from Zi
+------------------------------------------------------------------------- */
+
+void SNAIntel::compute_yi_from_zi(const SNA_DVEC* beta)
+{
+  memset(ylist_r,0,idxu_max*nelements*sizeof(SNA_DVEC));
+  memset(ylist_i,0,idxu_max*nelements*sizeof(SNA_DVEC));
+
+  double *zlist_rp = (double *)zlist_r;
+  double *zlist_ip = (double *)zlist_i;
+
+  int zlist_i = 0;
+
+  for (int elem1 = 0; elem1 < nelements; elem1++)
+    for (int elem2 = 0; elem2 < nelements; elem2++) {
+      for (int jjz = 0; jjz < idxz_max; jjz++) {
+        const int j1 = idxz[jjz].j1;
+        const int j2 = idxz[jjz].j2;
+        const int j = idxz[jjz].j;
+
+        const SNA_DVEC ztmp_r = SIMD_load(zlist_rp + zlist_i);
+        const SNA_DVEC ztmp_i = SIMD_load(zlist_ip + zlist_i);
+        zlist_i += vector_width();
+
+        int jju = idxz[jjz].jju;
+        for (int elem3 = 0; elem3 < nelements; elem3++) {
+          int itriple;
+          double bfactor = choose_beta(j, j1, j2, elem1, elem2, elem3,
+                                       itriple);
+          const SNA_DVEC betaj = beta[itriple] * bfactor;
+          const int i = elem3 * idxu_max + jju;
+          SIMD_store(&(ylist_r[i]), SIMD_load(ylist_r + i) + betaj * ztmp_r);
+          SIMD_store(&(ylist_i[i]), SIMD_load(ylist_i + i) + betaj * ztmp_i);
+        }
+      } // end loop over jjz
+    }
+}
+
+/* ----------------------------------------------------------------------
+   compute dEidRj
+------------------------------------------------------------------------- */
+
+void SNAIntel::compute_deidrj_e(const int jj, const SNA_IVEC &jnum,
+                                SNA_DVEC* dedr)
+{
+  double *ylist_rp = (double *)ylist_r;
+  double *ylist_ip = (double *)ylist_i;
+  double *dulist_rp = (double *)(dulist_r[0]);
+  double *dulist_ip = (double *)(dulist_i[0]);
+
+  for (int k = 0; k < 3; k++)
+    dedr[k] = SIMD_set(0.0);
+
+  SNA_IVEC jelem;
+  if (chem_flag) jelem = SIMD_load(element + jj);
+  else jelem = SIMD256_set(0);
+
+  SIMD_mask m(jj < jnum);
+
+  for (int j = 0; j <= twojmax; j++) {
+    int jju = idxu_block[j] * vector_width();
+    int jju3 = jju * 3;
+    SNA_IVEC i = jelem*idxu_max*vector_width() + jju + SIMD256_count();
+
+    for (int mb = 0; 2*mb < j; mb++)
+      for (int ma = 0; ma <= j; ma++) {
+        SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i);
+        SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i);
+        for (int k = 0; k < 3; k++) {
+          SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
+          SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
+          SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
+          dedr[k] = SIMD_add(m, dedr[k], du);
+          jju3 += vector_width();
+        }
+        i = i + vector_width();
+      }
+
+    if (j%2 == 0) {
+      int mb = j / 2;
+      for (int ma = 0; ma < mb; ma++) {
+        SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i);
+        SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i);
+        for (int k = 0; k < 3; k++) {
+          SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
+          SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
+          SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
+          dedr[k] = SIMD_add(m, dedr[k], du);
+          jju3 += vector_width();
+        }
+        i = i + vector_width();
+      }
+
+      SNA_DVEC jjjmambyarray_r = SIMD_gather(m, ylist_rp, i);
+      SNA_DVEC jjjmambyarray_i = SIMD_gather(m, ylist_ip, i);
+      for (int k = 0; k < 3; k++) {
+        SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
+        SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
+        SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
+        dedr[k] = SIMD_fma(m, SIMD_set(0.5), du, dedr[k]);
+        jju3 += vector_width();
+      }
+    } // if j%2
+  } // for j
+
+  for (int k = 0; k < 3; k++)
+    dedr[k] = dedr[k] * 2.0;
+}
+
+/* ----------------------------------------------------------------------
+   compute dEidRj
+------------------------------------------------------------------------- */
+
+void SNAIntel::compute_deidrj(const int jj, const SNA_IVEC &jnum,
+                              SNA_DVEC* dedr)
+{
+  double *ylist_rp = (double *)ylist_r;
+  double *ylist_ip = (double *)ylist_i;
+  double *dulist_rp = (double *)(dulist_r[0]);
+  double *dulist_ip = (double *)(dulist_i[0]);
+
+  for (int k = 0; k < 3; k++)
+    dedr[k] = SIMD_set(0.0);
+
+  SIMD_mask m(jj < jnum);
+
+  for (int j = 0; j <= twojmax; j++) {
+    int jju = idxu_block[j] * vector_width();
+    int jju3 = jju * 3;
+
+    for (int mb = 0; 2*mb < j; mb++)
+      for (int ma = 0; ma <= j; ma++) {
+        SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju);
+        SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju);
+        for (int k = 0; k < 3; k++) {
+          SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
+          SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
+          SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
+          dedr[k] = SIMD_add(m, dedr[k], du);
+          jju3 += vector_width();
+        }
+        jju += vector_width();
+      }
+
+    if (j%2 == 0) {
+      int mb = j / 2;
+      for (int ma = 0; ma < mb; ma++) {
+        SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju);
+        SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju);
+        for (int k = 0; k < 3; k++) {
+          SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
+          SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
+          SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
+          dedr[k] = SIMD_add(m, dedr[k], du);
+          jju3 += vector_width();
+        }
+        jju += vector_width();
+      }
+
+      SNA_DVEC jjjmambyarray_r = SIMD_load(ylist_rp + jju);
+      SNA_DVEC jjjmambyarray_i = SIMD_load(ylist_ip + jju);
+      for (int k = 0; k < 3; k++) {
+        SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
+        SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
+        SNA_DVEC du = du_r * jjjmambyarray_r + du_i * jjjmambyarray_i;
+        dedr[k] = SIMD_fma(m, SIMD_set(0.5), du, dedr[k]);
+        jju3 += vector_width();
+      }
+    } // if j%2
+  } // for j
+
+  for (int k = 0; k < 3; k++)
+    dedr[k] = dedr[k] * 2.0;
+}
+
+/* ----------------------------------------------------------------------
+   compute Bi by summing conj(Ui)*Zi
+------------------------------------------------------------------------- */
+
+void SNAIntel::compute_bi(const SNA_IVEC &ielem) {
+  // for j1 = 0,...,twojmax
+  //   for j2 = 0,twojmax
+  //     for j = |j1-j2|,Min(twojmax,j1+j2),2
+  //        b(j1,j2,j) = 0
+  //        for mb = 0,...,jmid
+  //          for ma = 0,...,j
+  //            b(j1,j2,j) +=
+  //              2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb)
+
+  double *ulisttot_rp = (double *)ulisttot_r;
+  double *ulisttot_ip = (double *)ulisttot_i;
+  double *blistp = (double *)blist;
+
+  int itriple = 0;
+  int idouble = 0;
+  for (int elem1 = 0; elem1 < nelements; elem1++)
+    for (int elem2 = 0; elem2 < nelements; elem2++) {
+
+      double *zlist_rp = (double *)(zlist_r + idouble*idxz_max);
+      double *zlist_ip = (double *)(zlist_i + idouble*idxz_max);
+
+      for (int elem3 = 0; elem3 < nelements; elem3++) {
+        for (int jjb = 0; jjb < idxb_max; jjb++) {
+          const int j1 = idxb[jjb].j1;
+          const int j2 = idxb[jjb].j2;
+          const int j = idxb[jjb].j;
+
+          int jjz = idxz_block[j1][j2][j] * vector_width();
+          int jju = (elem3 * idxu_max + idxu_block[j]) * vector_width();
+          SNA_DVEC sumzu(0.0);
+          for (int mb = 0; 2 * mb < j; mb++)
+            for (int ma = 0; ma <= j; ma++) {
+              const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju);
+              const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju);
+              const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz);
+              const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz);
+              sumzu = sumzu + utot_r * z_r + utot_i * z_i;
+              jjz += vector_width();
+              jju += vector_width();
+            } // end loop over ma, mb
+
+          // For j even, handle middle column
+
+          if (j % 2 == 0) {
+            int mb = j / 2;
+            for (int ma = 0; ma < mb; ma++) {
+              const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju);
+              const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju);
+              const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz);
+              const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz);
+              sumzu = sumzu + utot_r * z_r + utot_i * z_i;
+              jjz += vector_width();
+              jju += vector_width();
+            }
+
+            const SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju);
+            const SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju);
+            const SNA_DVEC z_r = SIMD_load(zlist_rp + jjz);
+            const SNA_DVEC z_i = SIMD_load(zlist_ip + jjz);
+            sumzu = sumzu + (utot_r * z_r + utot_i * z_i) * 0.5;
+          } // end if jeven
+
+          SIMD_store(blistp + (itriple*idxb_max+jjb) * vector_width(),
+                     sumzu * 2.0);
+        }
+        itriple++;
+      }
+      idouble++;
+    }
+
+  // apply bzero shift
+
+  if (bzero_flag) {
+    if (!wselfall_flag) {
+      SNA_IVEC itriplev = (ielem*nelements+ielem)*nelements+ielem;
+      for (int jjb = 0; jjb < idxb_max; jjb++) {
+        const int j = idxb[jjb].j;
+        SNA_IVEC i = (itriplev*idxb_max+jjb) * vector_width() + SIMD256_count();
+        SIMD_scatter(blistp, i, SIMD_gather(blistp, i) - bzero[j]);
+      } // end loop over JJ
+    } else {
+      int itriple = 0;
+      for (int elem1 = 0; elem1 < nelements; elem1++)
+        for (int elem2 = 0; elem2 < nelements; elem2++) {
+          for (int elem3 = 0; elem3 < nelements; elem3++) {
+            for (int jjb = 0; jjb < idxb_max; jjb++) {
+              const int j = idxb[jjb].j;
+              int i = (itriple*idxb_max+jjb) * vector_width();
+              SIMD_store(blistp + i, SIMD_load(blistp + i) - bzero[j]);
+            } // end loop over JJ
+            itriple++;
+          } // end loop over elem3
+        } // end loop over elem1,elem2
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   calculate derivative of Ui w.r.t. atom j
+------------------------------------------------------------------------- */
+
+void SNAIntel::compute_duidrj(const int jj, const SNA_IVEC &jnum)
+{
+  const SNA_DVEC x = rij[jj][0];
+  const SNA_DVEC y = rij[jj][1];
+  const SNA_DVEC z = rij[jj][2];
+  const SNA_DVEC rcut = rcutij[jj];
+  const SNA_DVEC rsq = x * x + y * y + z * z;
+  const SNA_DVEC r = SIMD_sqrt(rsq);
+  const SNA_DVEC rscale0 = SIMD_rcp(rcut - rmin0) * rfac0 * MY_PI;
+  const SNA_DVEC theta0 = (r - rmin0) * rscale0;
+  const SNA_DVEC z0 = r * SIMD_rcp(SIMD_tan(theta0));
+  const SNA_DVEC dz0dr = z0 * SIMD_rcp(r) - (r*rscale0) * (rsq + z0 * z0) *
+    SIMD_rcp(rsq);
+  compute_duarray(x, y, z, z0, r, dz0dr, wj[jj], rcut, jj, jnum);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void SNAIntel::zero_uarraytot(const SNA_IVEC &ielem)
+{
+  double *ulisttot_rp = (double *)ulisttot_r;
+  double *ulisttot_ip = (double *)ulisttot_i;
+  for (int jelem = 0; jelem < nelements; jelem++)
+    for (int j = 0; j <= twojmax; j++) {
+      int jju = (jelem * idxu_max + idxu_block[j]) * vector_width();
+      for (int mb = 0; mb <= j; mb++) {
+        for (int ma = 0; ma <= j; ma++) {
+          SIMD_store(ulisttot_rp + jju, SIMD_set(0.0));
+          SIMD_store(ulisttot_ip + jju, SIMD_set(0.0));
+
+          // utot(j,ma,ma) = wself, sometimes
+          if (ma == mb) {
+            if (wselfall_flag || nelements == 1)
+              SIMD_store(ulisttot_rp + jju, SIMD_set(wself));
+            else {
+              SIMD_mask m(ielem == jelem);
+              SIMD_store(ulisttot_rp + jju,
+                         SIMD_zero_masked(~m, SIMD_set(wself)));
+            }
+          }
+          jju += vector_width();
+        }
+      }
+    }
+}
+
+
+
+/* ----------------------------------------------------------------------
+   add Wigner U-functions for one neighbor to the total
+------------------------------------------------------------------------- */
+
+void SNAIntel::add_uarraytot(const SNA_DVEC &r, const int jj,
+                             const SNA_IVEC &jnum)
+{
+  SNA_DVEC sfac = compute_sfac(r, rcutij[jj], sinnerij[jj], dinnerij[jj]);
+  sfac *= wj[jj];
+
+  double *ulisttot_rp = (double *)ulisttot_r;
+  double *ulisttot_ip = (double *)ulisttot_i;
+  const double* ulist_r = (double *)(ulist_r_ij[jj]);
+  const double* ulist_i = (double *)(ulist_i_ij[jj]);
+
+  SIMD_mask m(jj < jnum);
+
+  if (chem_flag && nelements > 1) {
+    SNA_IVEC jelem = SIMD_load(element+jj);
+    for (int j = 0; j <= twojmax; j++) {
+      int jju = idxu_block[j] * vector_width();
+      SNA_IVEC i = jelem*idxu_max*vector_width() + jju + SIMD256_count();
+      for (int mb = 0; mb <= j; mb++)
+        for (int ma = 0; ma <= j; ma++) {
+          SNA_DVEC utot_r = SIMD_gather(m, ulisttot_rp, i);
+          SNA_DVEC utot_i = SIMD_gather(m, ulisttot_ip, i);
+          utot_r = SIMD_fma(m, sfac, SIMD_load(ulist_r + jju), utot_r);
+          utot_i = SIMD_fma(m, sfac, SIMD_load(ulist_i + jju), utot_i);
+          SIMD_scatter(m, ulisttot_rp, i, utot_r);
+          SIMD_scatter(m, ulisttot_ip, i, utot_i);
+          jju += vector_width();
+          i = i + vector_width();
+        }
+    }
+  } else {
+    for (int j = 0; j <= twojmax; j++) {
+      int jju = idxu_block[j] * vector_width();
+      for (int mb = 0; mb <= j; mb++)
+        for (int ma = 0; ma <= j; ma++) {
+          SNA_DVEC utot_r = SIMD_load(ulisttot_rp + jju);
+          SNA_DVEC utot_i = SIMD_load(ulisttot_ip + jju);
+          utot_r = SIMD_fma(m, sfac, SIMD_load(ulist_r + jju), utot_r);
+          utot_i = SIMD_fma(m, sfac, SIMD_load(ulist_i + jju), utot_i);
+          SIMD_store(ulisttot_rp + jju, utot_r);
+          SIMD_store(ulisttot_ip + jju, utot_i);
+          jju += vector_width();
+        }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute Wigner U-functions for one neighbor
+------------------------------------------------------------------------- */
+
+void SNAIntel::compute_uarray(const SNA_DVEC &x, const SNA_DVEC &y,
+                              const SNA_DVEC &z, const SNA_DVEC &z0,
+                              const SNA_DVEC &r, const int jj,
+                              const SNA_IVEC &jnum)
+{
+  // compute Cayley-Klein parameters for unit quaternion
+
+  const SNA_DVEC r0inv = SIMD_invsqrt(r * r + z0 * z0);
+  const SNA_DVEC a_r = z0 * r0inv;
+  const SNA_DVEC a_i = -z * r0inv;
+  const SNA_DVEC b_r = y * r0inv;
+  const SNA_DVEC b_i = -x * r0inv;
+
+  // VMK Section 4.8.2
+
+  double *ulist_rp = (double *)(ulist_r_ij[jj]);
+  double *ulist_ip = (double *)(ulist_i_ij[jj]);
+
+  SIMD_store(ulist_rp, SIMD_set(1.0));
+  SIMD_store(ulist_ip, SIMD_set(0.0));
+
+  for (int j = 1; j <= twojmax; j++) {
+    int jju = idxu_block[j] * vector_width();
+    int jjup = idxu_block[j-1] * vector_width();
+
+    // fill in left side of matrix layer from previous layer
+
+    for (int mb = 0; 2*mb <= j; mb++) {
+      SIMD_store(ulist_rp + jju, SIMD_set(0.0));
+      SIMD_store(ulist_ip + jju, SIMD_set(0.0));
+
+      for (int ma = 0; ma < j; ma++) {
+        double rootpq = rootpqarray[j - ma][j - mb];
+        SNA_DVEC u_r = SIMD_load(ulist_rp + jju);
+        SNA_DVEC u_i = SIMD_load(ulist_ip + jju);
+        const SNA_DVEC up_r = SIMD_load(ulist_rp + jjup);
+        const SNA_DVEC up_i = SIMD_load(ulist_ip + jjup);
+
+        SNA_DVEC u_ro, u_io;
+
+        u_ro = a_r * up_r + a_i * up_i;
+        u_r = SIMD_fma(SIMD_set(rootpq), u_ro, u_r);
+        SIMD_store(ulist_rp + jju, u_r);
+        u_io = a_r * up_i - a_i * up_r;
+        u_i = SIMD_fma(SIMD_set(rootpq), u_io, u_i);
+        SIMD_store(ulist_ip + jju, u_i);
+
+        jju += vector_width();
+
+        rootpq = -rootpqarray[ma + 1][j - mb];
+        u_r = (b_r * up_r + b_i * up_i) * rootpq;
+        SIMD_store(ulist_rp + jju, u_r);
+        u_i = (b_r * up_i - b_i * up_r) * rootpq;
+        SIMD_store(ulist_ip + jju, u_i);
+
+        jjup += vector_width();
+      }
+      jju += vector_width();
+    }
+
+    // copy left side to right side with inversion symmetry VMK 4.4(2)
+    // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb])
+
+    jju = idxu_block[j];
+    jjup = (jju+(j+1)*(j+1)-1) * vector_width();
+    jju *=  vector_width();
+    int mbpar = 1;
+    for (int mb = 0; 2*mb <= j; mb++) {
+      int mapar = mbpar;
+      for (int ma = 0; ma <= j; ma++) {
+        if (mapar == 1) {
+          SIMD_store(ulist_rp + jjup, SIMD_load(ulist_rp + jju));
+          SIMD_store(ulist_ip + jjup, -SIMD_load(ulist_ip + jju));
+        } else {
+          SIMD_store(ulist_rp + jjup, -SIMD_load(ulist_rp + jju));
+          SIMD_store(ulist_ip + jjup, SIMD_load(ulist_ip + jju));
+        }
+        mapar = -mapar;
+        jju += vector_width();
+        jjup -= vector_width();
+      }
+      mbpar = -mbpar;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   Compute derivatives of Wigner U-functions for one neighbor
+   see comments in compute_uarray()
+------------------------------------------------------------------------- */
+
+void SNAIntel::compute_duarray(const SNA_DVEC &x, const SNA_DVEC &y,
+                               const SNA_DVEC &z, const SNA_DVEC &z0,
+                               const SNA_DVEC &r, const SNA_DVEC &dz0dr,
+                               const SNA_DVEC &wj, const SNA_DVEC &rcut,
+                               const int jj, const SNA_IVEC &jnum)
+{
+  const SNA_DVEC rinv = SIMD_rcp(r);
+  const SNA_DVEC r0inv = SIMD_invsqrt(r * r + z0 * z0);
+  SNA_DVEC up[3];
+  up[0] = x * rinv;
+  up[1] = y * rinv;
+  up[2] = z * rinv;
+  const SNA_DVEC a_r = z0 * r0inv;
+  const SNA_DVEC a_i = -z * r0inv;
+  const SNA_DVEC b_r = y * r0inv;
+  const SNA_DVEC b_i = -x * r0inv;
+  const SNA_DVEC dr0invdr = -SIMD_pow(r0inv, 3.0) * (r + z0 * dz0dr);
+
+  SNA_DVEC dr0inv[3], da_r[3], da_i[3];
+  for (int k = 0; k < 3; k++) {
+    dr0inv[k] = dr0invdr * up[k];
+    da_r[k] = dz0dr * up[k] * r0inv + z0 * dr0inv[k];
+    da_i[k] = -z * dr0inv[k];
+  }
+  da_i[2] += -r0inv;
+
+  double *ulist_rp = (double *)(ulist_r_ij[jj]);
+  double *ulist_ip = (double *)(ulist_i_ij[jj]);
+  double *dulist_rp = (double *)(dulist_r[0]);
+  double *dulist_ip = (double *)(dulist_i[0]);
+
+  SNA_DVEC db_r[3], db_i[3];
+  for (int k = 0; k < 3; k++) {
+    SIMD_store(dulist_rp + k * vector_width(), SIMD_set(0.0));
+    SIMD_store(dulist_ip + k * vector_width(), SIMD_set(0.0));
+    db_r[k] = y * dr0inv[k];
+    db_i[k] = -x * dr0inv[k];
+  }
+  db_i[0] -= r0inv;
+  db_r[1] += r0inv;
+
+  for (int j = 1; j <= twojmax; j++) {
+    int jju3 = idxu_block[j] * 3 * vector_width();
+    int jjup = idxu_block[j-1] * vector_width();
+    int jjup3 = jjup * 3;
+    for (int mb = 0; 2*mb <= j; mb++) {
+      for (int k = 0; k < 3; k++) {
+        SIMD_store(dulist_rp + jju3 + k * vector_width(), SIMD_set(0.0));
+        SIMD_store(dulist_ip + jju3 + k * vector_width(), SIMD_set(0.0));
+      }
+
+      for (int ma = 0; ma < j; ma++) {
+        const double rootpq = rootpqarray[j - ma][j - mb];
+        const double mrootpq = -rootpqarray[ma + 1][j - mb];
+        const SNA_DVEC up_r = SIMD_load(ulist_rp + jjup);
+        const SNA_DVEC up_i = SIMD_load(ulist_ip + jjup);
+        for (int k = 0; k < 3; k++) {
+          SNA_DVEC du_r = SIMD_load(dulist_rp + jju3);
+          SNA_DVEC du_i = SIMD_load(dulist_ip + jju3);
+          const SNA_DVEC dup_r = SIMD_load(dulist_rp + jjup3);
+          const SNA_DVEC dup_i = SIMD_load(dulist_ip + jjup3);
+
+          SNA_DVEC du_ro, du_io;
+
+          du_ro = (da_r[k]*up_r + da_i[k]*up_i + a_r*dup_r + a_i*dup_i);
+          du_r = SIMD_fma(SIMD_set(rootpq), du_ro, du_r);
+          SIMD_store(dulist_rp + jju3, du_r);
+
+          du_io = (da_r[k]*up_i - da_i[k]*up_r + a_r*dup_i - a_i*dup_r);
+          du_i = SIMD_fma(SIMD_set(rootpq), du_io, du_i);
+          SIMD_store(dulist_ip + jju3, du_i);
+
+          du_r = (db_r[k]*up_r + db_i[k]*up_i + b_r*dup_r + b_i*dup_i);
+          SIMD_store(dulist_rp + jju3 + 3 * vector_width(), du_r * mrootpq);
+
+          du_i = (db_r[k]*up_i - db_i[k]*up_r + b_r*dup_i - b_i*dup_r);
+          SIMD_store(dulist_ip + jju3 + 3 * vector_width(), du_i * mrootpq);
+
+          jju3 += vector_width();
+          jjup3 += vector_width();
+        }
+        jjup += vector_width();
+      } // for ma
+      jju3 += 3 * vector_width();
+    } // for mb
+
+    // copy left side to right side with inversion symmetry VMK 4.4(2)
+    // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb])
+
+    SNA_DVEC *du_r_p = dulist_r[0];
+    SNA_DVEC *du_i_p = dulist_i[0];
+
+    int jju = idxu_block[j];
+    jjup = (jju+(j+1)*(j+1)-1) * 3 * vector_width();
+    jju *=  3 * vector_width();
+    int mbpar = 1;
+    for (int mb = 0; 2*mb <= j; mb++) {
+      int mapar = mbpar;
+      for (int ma = 0; ma <= j; ma++) {
+        if (mapar == 1) {
+          for (int k = 0; k < 3; k++) {
+            SIMD_store(dulist_rp + jjup, SIMD_load(dulist_rp + jju));
+            SIMD_store(dulist_ip + jjup, -SIMD_load(dulist_ip + jju));
+            jju += vector_width();
+            jjup += vector_width();
+          }
+        } else {
+          for (int k = 0; k < 3; k++) {
+            SIMD_store(dulist_rp + jjup, -SIMD_load(dulist_rp + jju));
+            SIMD_store(dulist_ip + jjup, SIMD_load(dulist_ip + jju));
+            jju += vector_width();
+            jjup += vector_width();
+          }
+        }
+        mapar = -mapar;
+        jjup -= 6 * vector_width();
+      } // for ma
+      mbpar = -mbpar;
+    } // for mb
+  } // for j
+
+  SNA_DVEC dsfac;
+  SNA_DVEC sfac = compute_sfac_dsfac(r, rcut, sinnerij[jj], dinnerij[jj],
+                                      dsfac);
+  sfac = sfac * wj;
+  dsfac = dsfac * wj;
+
+  for (int j = 0; j <= twojmax; j++) {
+    int jju = idxu_block[j] * vector_width();
+    int jju3 = jju * 3;
+    for (int mb = 0; 2*mb <= j; mb++)
+      for (int ma = 0; ma <= j; ma++) {
+        const SNA_DVEC ur_dsfac = dsfac * SIMD_load(ulist_rp + jju);
+        const SNA_DVEC ui_dsfac = dsfac * SIMD_load(ulist_ip + jju);
+        jju += vector_width();
+        for (int k = 0; k < 3; k++) {
+          SNA_DVEC du_r = ur_dsfac * up[k] + sfac * SIMD_load(dulist_rp+jju3);
+          SIMD_store(dulist_rp + jju3, du_r);
+          SNA_DVEC du_i = ui_dsfac * up[k] + sfac * SIMD_load(dulist_ip+jju3);
+          SIMD_store(dulist_ip + jju3, du_i);
+          jju3 += vector_width();
+        }
+      }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of arrays
+------------------------------------------------------------------------- */
+
+double SNAIntel::memory_usage()
+{
+  int jdimpq = twojmax + 2;
+  int jdim = twojmax + 1;
+  double bytes;
+
+  bytes = 0;
+
+  bytes += (double)jdimpq*jdimpq * sizeof(double);               // pqarray
+  bytes += (double)idxcg_max * sizeof(double);                   // cglist
+
+  bytes += (double)nmax * idxu_max * sizeof(SNA_DVEC) * 2;       // ulist_ij
+  bytes += (double)idxu_max * nelements * sizeof(SNA_DVEC) * 2;  // ulisttot
+  bytes += (double)idxu_max * 3 * sizeof(SNA_DVEC) * 2;          // dulist
+
+  bytes += (double)idxz_max * ndoubles * sizeof(SNA_DVEC) * 2;   // zlist
+  bytes += (double)idxb_max * ntriples * sizeof(SNA_DVEC);       // blist
+  bytes += (double)idxb_max * ntriples * 3 * sizeof(double);     // dblist
+  bytes += (double)idxu_max * nelements * sizeof(SNA_DVEC) * 2;  // ylist
+
+  bytes += (double)jdim * jdim * jdim * sizeof(int);             // idxcg_block
+  bytes += (double)jdim * sizeof(int);                           // idxu_block
+  bytes += (double)jdim * jdim * jdim * sizeof(int);             // idxz_block
+  bytes += (double)jdim * jdim * jdim * sizeof(int);             // idxb_block
+
+  bytes += (double)idxz_max * sizeof(SNA_ZINDICES);              // idxz
+  bytes += (double)idxb_max * sizeof(SNA_BINDICES);              // idxb
+
+  if (bzero_flag)
+  bytes += (double)jdim * sizeof(double);                        // bzero
+
+  bytes += (double)nmax * 3 * sizeof(SNA_DVEC);                  // rij
+  bytes += (double)nmax * sizeof(SNA_IVEC);                      // inside
+  bytes += (double)nmax * sizeof(SNA_DVEC);                      // wj
+  bytes += (double)nmax * sizeof(SNA_DVEC);                      // rcutij
+  bytes += (double)nmax * sizeof(SNA_DVEC);                      // sinnerij
+  bytes += (double)nmax * sizeof(SNA_DVEC);                      // dinnerij
+  if (chem_flag) bytes += (double)nmax * sizeof(SNA_IVEC);       // element
+
+  return bytes;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void SNAIntel::create_twojmax_arrays()
+{
+  int jdimpq = twojmax + 2;
+  memory->create(rootpqarray, jdimpq, jdimpq,
+                 "sna:rootpqarray");
+  memory->create(cglist, idxcg_max, "sna:cglist");
+  memory->create(ulisttot_r, idxu_max*nelements, "sna:ulisttot");
+  memory->create(ulisttot_i, idxu_max*nelements, "sna:ulisttot");
+  memory->create(dulist_r, idxu_max, 3, "sna:dulist");
+  memory->create(dulist_i, idxu_max, 3, "sna:dulist");
+  memory->create(zlist_r, idxz_max*ndoubles, "sna:zlist");
+  memory->create(zlist_i, idxz_max*ndoubles, "sna:zlist");
+  memory->create(blist, idxb_max*ntriples, "sna:blist");
+  memory->create(dblist, idxb_max*ntriples, 3, "sna:dblist");
+  memory->create(ylist_r, idxu_max*nelements, "sna:ylist");
+  memory->create(ylist_i, idxu_max*nelements, "sna:ylist");
+
+  if (bzero_flag)
+    memory->create(bzero, twojmax+1,"sna:bzero");
+  else
+    bzero = nullptr;
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+void SNAIntel::destroy_twojmax_arrays()
+{
+  memory->destroy(rootpqarray);
+  memory->destroy(cglist);
+  memory->destroy(ulisttot_r);
+  memory->destroy(ulisttot_i);
+  memory->destroy(dulist_r);
+  memory->destroy(dulist_i);
+  memory->destroy(zlist_r);
+  memory->destroy(zlist_i);
+  memory->destroy(blist);
+  memory->destroy(dblist);
+  memory->destroy(ylist_r);
+  memory->destroy(ylist_i);
+
+  memory->destroy(idxcg_block);
+  memory->destroy(idxu_block);
+  memory->destroy(idxz_block);
+  memory->destroy(idxb_block);
+
+  if (bzero_flag)
+    memory->destroy(bzero);
+
+}
+
+/* ----------------------------------------------------------------------
+   the function delta given by VMK Eq. 8.2(1)
+------------------------------------------------------------------------- */
+
+double SNAIntel::deltacg(int j1, int j2, int j)
+{
+  double sfaccg = factorial((j1 + j2 + j) / 2 + 1);
+  return sqrt(factorial((j1 + j2 - j) / 2) *
+              factorial((j1 - j2 + j) / 2) *
+              factorial((-j1 + j2 + j) / 2) / sfaccg);
+}
+
+/* ----------------------------------------------------------------------
+   assign Clebsch-Gordan coefficients using
+   the quasi-binomial formula VMK 8.2.1(3)
+------------------------------------------------------------------------- */
+
+void SNAIntel::init_clebsch_gordan()
+{
+  double sum,dcg,sfaccg;
+  int m, aa2, bb2, cc2;
+  int ifac;
+
+  int idxcg_count = 0;
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
+        for (int m1 = 0; m1 <= j1; m1++) {
+          aa2 = 2 * m1 - j1;
+
+          for (int m2 = 0; m2 <= j2; m2++) {
+
+            // -c <= cc <= c
+
+            bb2 = 2 * m2 - j2;
+            m = (aa2 + bb2 + j) / 2;
+
+            if (m < 0 || m > j) {
+              cglist[idxcg_count] = 0.0;
+              idxcg_count++;
+              continue;
+            }
+
+            sum = 0.0;
+
+            for (int z = MAX(0, MAX(-(j - j2 + aa2)
+                                    / 2, -(j - j1 - bb2) / 2));
+                 z <= MIN((j1 + j2 - j) / 2,
+                          MIN((j1 - aa2) / 2, (j2 + bb2) / 2));
+                 z++) {
+              ifac = z % 2 ? -1 : 1;
+              sum += ifac /
+                (factorial(z) *
+                 factorial((j1 + j2 - j) / 2 - z) *
+                 factorial((j1 - aa2) / 2 - z) *
+                 factorial((j2 + bb2) / 2 - z) *
+                 factorial((j - j2 + aa2) / 2 + z) *
+                 factorial((j - j1 - bb2) / 2 + z));
+            }
+
+            cc2 = 2 * m - j;
+            dcg = deltacg(j1, j2, j);
+            sfaccg = sqrt(factorial((j1 + aa2) / 2) *
+                          factorial((j1 - aa2) / 2) *
+                          factorial((j2 + bb2) / 2) *
+                          factorial((j2 - bb2) / 2) *
+                          factorial((j  + cc2) / 2) *
+                          factorial((j  - cc2) / 2) *
+                          (j + 1));
+
+            cglist[idxcg_count] = sum * dcg * sfaccg;
+            idxcg_count++;
+          }
+        }
+      }
+}
+
+/* ----------------------------------------------------------------------
+   print out values of Clebsch-Gordan coefficients
+   format and notation follows VMK Table 8.11
+------------------------------------------------------------------------- */
+
+void SNAIntel::print_clebsch_gordan()
+{
+  if (comm->me) return;
+
+  int aa2, bb2, cc2;
+  for (int j = 0; j <= twojmax; j += 1) {
+    printf("c = %g\n",j/2.0);
+    printf("a alpha b beta C_{a alpha b beta}^{c alpha+beta}\n");
+    for (int j1 = 0; j1 <= twojmax; j1++)
+      for (int j2 = 0; j2 <= j1; j2++)
+        if (j1-j2 <= j && j1+j2 >= j && (j1+j2+j)%2 == 0) {
+          int idxcg_count = idxcg_block[j1][j2][j];
+          for (int m1 = 0; m1 <= j1; m1++) {
+            aa2 = 2*m1-j1;
+            for (int m2 = 0; m2 <= j2; m2++) {
+              bb2 = 2*m2-j2;
+              double cgtmp = cglist[idxcg_count];
+              cc2 = aa2+bb2;
+              if (cc2 >= -j && cc2 <= j)
+                if (j1 != j2 || (aa2 > bb2 && aa2 >= -bb2) || (aa2 == bb2 && aa2 >= 0))
+                  printf("%4g %4g %4g %4g %10.6g\n",
+                         j1/2.0,aa2/2.0,j2/2.0,bb2/2.0,cgtmp);
+              idxcg_count++;
+            }
+          }
+        }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute table of sqrt[p/m2], p, q = 1,twojmax
+   the p = 0, q = 0 entries are allocated and skipped for convenience.
+------------------------------------------------------------------------- */
+
+void SNAIntel::init_rootpqarray()
+{
+  for (int p = 1; p <= twojmax; p++)
+    for (int q = 1; q <= twojmax; q++)
+      rootpqarray[p][q] = sqrt(static_cast<double>(p)/q);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void SNAIntel::compute_ncoeff()
+{
+  int ncount;
+
+  ncount = 0;
+
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2;
+           j <= MIN(twojmax, j1 + j2); j += 2)
+        if (j >= j1) ncount++;
+
+  ndoubles = nelements*nelements;
+  ntriples = nelements*nelements*nelements;
+  if (chem_flag)
+    ncoeff = ncount*ntriples;
+  else
+    ncoeff = ncount;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double SNAIntel::compute_sfac(double r, double rcut, double sinner, double dinner)
+{
+  double sfac;
+
+  // calculate sfac = sfac_outer
+
+  if (switch_flag == 0) sfac = 1.0;
+  else if (r <= rmin0) sfac = 1.0;
+  else if (r > rcut) sfac = 0.0;
+  else {
+    double rcutfac = MY_PI / (rcut - rmin0);
+    sfac = 0.5 * (cos((r - rmin0) * rcutfac) + 1.0);
+  }
+
+  // calculate sfac *= sfac_inner, rarely visited
+
+  if (switch_inner_flag == 1 && r < sinner + dinner) {
+    if (r > sinner - dinner) {
+      double rcutfac = MY_PI2 / dinner;
+      sfac *= 0.5 * (1.0 - cos(MY_PI2 + (r - sinner) * rcutfac));
+    } else sfac = 0.0;
+  }
+
+  return sfac;
+}
+
+/* ---------------------------------------------------------------------- */
+
+SNA_DVEC SNAIntel::compute_sfac(const SNA_DVEC &r, const SNA_DVEC &rcut,
+                                const SNA_DVEC &sinner, const SNA_DVEC &dinner)
+{
+  // calculate sfac = sfac_outer
+
+  // if (switch_flag == 0 || r <= rmin0)
+  SNA_DVEC sfac = SIMD_set(1.0);
+  if (switch_flag != 0) {
+    // r <= rcut && r > rmin0
+    const SIMD_mask i(r > rmin0);
+    const SIMD_mask m(r <= rcut);
+    const SNA_DVEC rcutfac = SIMD_rcp(rcut - rmin0) * MY_PI;
+    const SNA_DVEC sfac_m = (SIMD_cos((r - rmin0) * rcutfac) + 1.0) * 0.5;
+    sfac = SIMD_set(sfac, m & i, sfac_m);
+    // (r > rcut) && (r> rmin0)
+    sfac = SIMD_zero_masked(m | i, sfac);
+  }
+
+  // calculate sfac *= sfac_inner, rarely visited
+
+  if (switch_inner_flag == 1) {
+    const SIMD_mask m(r < sinner + dinner);
+    // if any(m)
+    const SIMD_mask i(r > sinner - dinner);
+    const SNA_DVEC rcutfac = SIMD_rcp(dinner) * MY_PI2;
+    const SNA_DVEC sfac_m = (SIMD_set(1.0) - SIMD_cos((r-sinner) * rcutfac +
+                                                      MY_PI2)) * 0.5;
+    sfac = SIMD_set(sfac, m & i, sfac_m);
+    sfac = SIMD_zero_masked((~m) | i, sfac);
+  }
+
+  return sfac;
+}
+
+/* ---------------------------------------------------------------------- */
+
+SNA_DVEC SNAIntel::compute_sfac_dsfac(const SNA_DVEC & r,
+                                      const SNA_DVEC & rcut,
+                                      const SNA_DVEC & sinner,
+                                      const SNA_DVEC & dinner,
+                                      SNA_DVEC &dsfac)
+{
+  // calculate sfac = sfac_outer
+
+  // if (switch_flag == 0 || r <= rmin0)
+  SNA_DVEC sfac = SIMD_set(1.0);
+  dsfac = SIMD_set(0.0);
+  if (switch_flag != 0) {
+    // r <= rcut && r > rmin0
+    const SIMD_mask i(r > rmin0);
+    const SIMD_mask m(r <= rcut);
+    const SNA_DVEC rcutfac = SIMD_rcp(rcut - rmin0) * MY_PI;
+    const SNA_DVEC trig_arg = (r - rmin0) * rcutfac;
+    const SNA_DVEC sfac_m = (SIMD_cos(trig_arg) + 1.0) * 0.5;
+    const SNA_DVEC dsfac_m = SIMD_sin(trig_arg) * rcutfac * -0.5;
+    sfac = SIMD_set(sfac, m & i, sfac_m);
+    dsfac = SIMD_set(dsfac, m & i, dsfac_m);
+    // (r > rcut) && (r> rmin0)
+    sfac = SIMD_zero_masked(m | i, sfac);
+  }
+
+  // calculate sfac *= sfac_inner, rarely visited
+
+  if (switch_inner_flag == 1) {
+    const SIMD_mask m(r < sinner + dinner);
+    const SIMD_mask i(r > sinner - dinner);
+    if (any(m & i)) {
+      const SNA_DVEC rcutfac = SIMD_rcp(dinner) * MY_PI2;
+      const SNA_DVEC trig_arg = (r - sinner) * rcutfac + MY_PI2;
+      const SNA_DVEC sfac_inner = (SIMD_set(1.0) - SIMD_cos(trig_arg)) * 0.5;
+      const SNA_DVEC dsfac_inner = rcutfac * 0.5 * SIMD_sin(trig_arg);
+      dsfac = SIMD_set(dsfac, m & i, dsfac * sfac_inner +
+                       sfac * dsfac_inner);
+      sfac = SIMD_set(sfac, m & i, sfac_inner);
+    }
+    sfac = SIMD_zero_masked((~m) | i, sfac);
+    dsfac = SIMD_zero_masked((~m) | i, dsfac);
+  }
+
+  return sfac;
+}
+
+template void SNAIntel::compute_zi_or_yi<1>(const SNA_DVEC *);
+template void SNAIntel::compute_zi_or_yi<0>(const SNA_DVEC *);
+
+#endif
+#endif
diff --git a/src/INTEL/sna_intel.h b/src/INTEL/sna_intel.h
new file mode 100644
index 0000000000..7900dee51b
--- /dev/null
+++ b/src/INTEL/sna_intel.h
@@ -0,0 +1,187 @@
+/* -*- c++ -*- -------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: W. Michael Brown, Intel
+------------------------------------------------------------------------- */
+
+#ifndef LMP_SNA_INTEL_H
+#define LMP_SNA_INTEL_H
+
+#if defined(__AVX512F__)
+#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
+
+#include "pointers.h"
+#include "intel_buffers.h"
+#include "intel_simd.h"
+
+#define SVW 8
+
+#if defined(LMP_SIMD_COMPILER)
+#if defined(USE_OMP_SIMD)
+#define SV_for _Pragma("omp simd") _Pragma("vector aligned") for
+#else
+#define SV_for _Pragma("simd assert") _Pragma("vector aligned") for
+#endif
+#else
+#define SV_for for
+#endif
+
+namespace LAMMPS_NS {
+
+struct SNA_ZINDICES {
+  int j1, j2, j, ma1min, ma2max, mb1min;
+  int mb2max, na, nb, jju;
+};
+
+struct SNA_BINDICES {
+  int j1, j2, j;
+};
+
+#define SNA_DVEC ip_simd::SIMD_double
+#define SNA_IVEC ip_simd::SIMD256_int
+
+class SNAIntel : protected Pointers {
+
+ public:
+  SNAIntel(LAMMPS *, double, int, double, int, int, int, int, int, int, int);
+
+  SNAIntel(LAMMPS *lmp) : Pointers(lmp){};
+  ~SNAIntel() override;
+  void build_indexlist();
+  void init();
+  double memory_usage();
+
+  int ncoeff;
+
+  inline int vector_width() const { return SVW; }
+
+  // functions for bispectrum coefficients
+
+  void compute_ui(const SNA_IVEC &, const SNA_IVEC &, const int max_jnum);
+  template <int> void compute_zi_or_yi(const SNA_DVEC *);
+  void compute_yi_from_zi(const SNA_DVEC *);
+  void compute_yterm(int, int, int, const double *);
+  void compute_bi(const SNA_IVEC &);
+
+  // functions for derivatives
+
+  void compute_duidrj(const int, const SNA_IVEC &);
+  void compute_deidrj_e(const int, const SNA_IVEC &, SNA_DVEC *);
+  void compute_deidrj(const int, const SNA_IVEC &, SNA_DVEC *);
+  double compute_sfac(double, double, double, double);
+  SNA_DVEC compute_sfac(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
+                        const SNA_DVEC &);
+  inline SNA_DVEC compute_sfac_dsfac(const SNA_DVEC &, const SNA_DVEC &,
+                                     const SNA_DVEC &, const SNA_DVEC &,
+                                     SNA_DVEC &);
+
+  // public bispectrum data
+
+  int twojmax;
+  SNA_DVEC *blist;
+  double **dblist;
+
+  // short neighbor list data
+
+  void grow_rij(int);
+  int nmax;    // allocated size of short lists
+
+  SNA_DVEC **rij;      // short rij list
+  SNA_IVEC *inside;       // short neighbor list
+  SNA_DVEC *wj;        // short weight list
+  SNA_DVEC *rcutij;    // short cutoff list
+
+  // only allocated for switch_inner_flag=1
+
+  SNA_DVEC *sinnerij;    // short inner cutoff midpoint list
+  SNA_DVEC *dinnerij;    // short inner half-width list
+
+  // only allocated for chem_flag=1
+
+  SNA_IVEC *element;    // short element list [0,nelements)
+
+ private:
+  double rmin0, rfac0;
+
+  // data for bispectrum coefficients
+
+  SNA_ZINDICES *idxz;
+  SNA_BINDICES *idxb;
+
+  double **rootpqarray;
+  double *cglist;
+  int ***idxcg_block;
+
+  SNA_DVEC *ulisttot_r, *ulisttot_i;
+  SNA_DVEC **ulist_r_ij, **ulist_i_ij;    // short u list
+  int *idxu_block;
+
+  SNA_DVEC *zlist_r, *zlist_i;
+  int ***idxz_block;
+
+  int ***idxb_block;
+
+  SNA_DVEC **dulist_r, **dulist_i;
+
+  SNA_DVEC *ylist_r, *ylist_i;
+  int idxcg_max, idxu_max, idxz_max, idxb_max;
+
+  void create_twojmax_arrays();
+  void destroy_twojmax_arrays();
+  void init_clebsch_gordan();
+  void print_clebsch_gordan();
+  void init_rootpqarray();
+  void zero_uarraytot(const SNA_IVEC &);
+  void add_uarraytot(const SNA_DVEC &, const int, const SNA_IVEC &);
+  void compute_uarray(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
+                      const SNA_DVEC &, const SNA_DVEC &, const int,
+                      const SNA_IVEC &);
+  double deltacg(int, int, int);
+  void compute_ncoeff();
+  void compute_duarray(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
+                       const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
+                       const SNA_DVEC &, const SNA_DVEC &, int,
+                       const SNA_IVEC &);
+  inline double choose_beta(const int, const int, const int,
+                            const int, const int, const int,  int &);
+
+  // Sets the style for the switching function
+  // 0 = none
+  // 1 = cosine
+  int switch_flag;
+
+  // Sets the style for the inner switching function
+  // 0 = none
+  // 1 = cosine
+  int switch_inner_flag;
+
+  // Self-weight
+  double wself;
+
+  int bzero_flag;       // 1 if bzero subtracted from barray
+  double *bzero;        // array of B values for isolated atoms
+  int bnorm_flag;       // 1 if barray divided by j+1
+  int chem_flag;        // 1 for multi-element bispectrum components
+  int wselfall_flag;    // 1 for adding wself to all element labelings
+  int nelements;        // number of elements
+  int ndoubles;         // number of multi-element pairs
+  int ntriples;         // number of multi-element triplets
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
+
+#endif
diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index d44ed1c981..489efc55a0 100755
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -129,6 +129,8 @@ action fix_dt_reset_kokkos.cpp
 action fix_dt_reset_kokkos.h
 action fix_enforce2d_kokkos.cpp
 action fix_enforce2d_kokkos.h
+action fix_efield_kokkos.cpp
+action fix_efield_kokkos.h
 action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp
 action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h
 action fix_freeze_kokkos.cpp fix_freeze.cpp
@@ -173,6 +175,8 @@ action fix_shake_kokkos.cpp fix_shake.cpp
 action fix_shake_kokkos.h fix_shake.h
 action fix_shardlow_kokkos.cpp fix_shardlow.cpp
 action fix_shardlow_kokkos.h fix_shardlow.h
+action fix_spring_self_kokkos.cpp
+action fix_spring_self_kokkos.h
 action fix_viscous_kokkos.cpp
 action fix_viscous_kokkos.h
 action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp
@@ -363,6 +367,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp
 action pair_vashishta_kokkos.h pair_vashishta.h
 action pair_yukawa_kokkos.cpp
 action pair_yukawa_kokkos.h
+action pair_yukawa_colloid_kokkos.cpp pair_yukawa_colloid.cpp
+action pair_yukawa_colloid_kokkos.h pair_yukawa_colloid.h
 action pair_zbl_kokkos.cpp
 action pair_zbl_kokkos.h
 action pppm_kokkos.cpp pppm.cpp
diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp
index 03537e7b88..bc393b29d8 100644
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@@ -44,6 +44,9 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
 
   h_tag_min = Kokkos::subview(h_tag_min_max,0);
   h_tag_max = Kokkos::subview(h_tag_min_max,1);
+
+  nprop_atom = 0;
+  fix_prop_atom = nullptr;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -112,6 +115,7 @@ AtomKokkos::~AtomKokkos()
 
   memoryKK->destroy_kokkos(k_dvector, dvector);
   dvector = nullptr;
+  delete [] fix_prop_atom;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -125,11 +129,37 @@ void AtomKokkos::init()
 
 /* ---------------------------------------------------------------------- */
 
+void AtomKokkos::update_property_atom()
+{
+  nprop_atom = 0;
+  std::vector<Fix *> prop_atom_fixes;
+  for (auto &ifix : modify->get_fix_by_style("^property/atom")) {
+    if (!ifix->kokkosable)
+      error->all(FLERR, "KOKKOS package requires a Kokkos-enabled version of fix property/atom");
+
+    ++nprop_atom;
+    prop_atom_fixes.push_back(ifix);
+  }
+
+  delete[] fix_prop_atom;
+  fix_prop_atom = new FixPropertyAtomKokkos *[nprop_atom];
+
+  int n = 0;
+  for (auto &ifix : prop_atom_fixes)
+    fix_prop_atom[n++] = dynamic_cast<FixPropertyAtomKokkos *>(ifix);
+}
+
+/* ---------------------------------------------------------------------- */
+
 void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
 {
-  if (space == Device && lmp->kokkos->auto_sync) avecKK->modified(Host, mask);
+  if (space == Device && lmp->kokkos->auto_sync) {
+    avecKK->modified(Host, mask);
+    for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(Host, mask);
+  }
 
   avecKK->sync(space, mask);
+  for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync(space, mask);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -137,13 +167,20 @@ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
 void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask)
 {
   avecKK->modified(space, mask);
+  for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(space, mask);
 
-  if (space == Device && lmp->kokkos->auto_sync) avecKK->sync(Host, mask);
+  if (space == Device && lmp->kokkos->auto_sync) {
+    avecKK->sync(Host, mask);
+    for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync(Host, mask);
+  }
 }
 
+/* ---------------------------------------------------------------------- */
+
 void AtomKokkos::sync_overlapping_device(const ExecutionSpace space, unsigned int mask)
 {
   avecKK->sync_overlapping_device(space, mask);
+  for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync_overlapping_device(space, mask);
 }
 /* ---------------------------------------------------------------------- */
 
@@ -375,7 +412,7 @@ AtomVec *AtomKokkos::new_avec(const std::string &style, int trysuffix, int &sfla
   int hybrid_substyle_flag = (avec != nullptr);
 
   AtomVec *avec = Atom::new_avec(style, trysuffix, sflag);
-  if (!avec->kokkosable) error->all(FLERR, "KOKKOS package requires a kokkos enabled atom_style");
+  if (!avec->kokkosable) error->all(FLERR, "KOKKOS package requires a Kokkos-enabled atom_style");
 
   if (!hybrid_substyle_flag)
     avecKK = dynamic_cast<AtomVecKokkos*>(avec);
diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h
index f8b00f21f2..21a9aeebbd 100644
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@@ -14,6 +14,7 @@
 
 #include "atom.h"               // IWYU pragma: export
 #include "kokkos_type.h"
+#include "fix_property_atom_kokkos.h"
 
 #include <Kokkos_Sort.hpp>
 
@@ -25,6 +26,8 @@ namespace LAMMPS_NS {
 class AtomKokkos : public Atom {
  public:
   bool sort_classic;
+  int nprop_atom;
+  FixPropertyAtomKokkos** fix_prop_atom;
 
   DAT::tdual_tagint_1d k_tag;
   DAT::tdual_int_1d k_type, k_mask;
@@ -144,6 +147,7 @@ class AtomKokkos : public Atom {
   }
 
   void init() override;
+  void update_property_atom();
   void allocate_type_arrays() override;
   void sync(const ExecutionSpace space, unsigned int mask);
   void modified(const ExecutionSpace space, unsigned int mask);
diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
index a8ce29f666..c3430b9f6e 100644
--- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
@@ -963,7 +963,6 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask)
     if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPDeviceType>();
     if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPDeviceType>();
     if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPDeviceType>();
-    if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPDeviceType>();
   } else {
     if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
     if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
@@ -980,7 +979,6 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask)
     if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPHostType>();
     if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPHostType>();
     if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPHostType>();
-    if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPHostType>();
   }
 }
 
@@ -1019,8 +1017,6 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in
       perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space);
     if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPDeviceType>())
       perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space);
-    if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPDeviceType>())
-      perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
   } else {
     if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
       perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
@@ -1052,8 +1048,6 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in
       perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space);
     if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPHostType>())
       perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space);
-    if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPHostType>())
-      perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
   }
 }
 
@@ -1077,7 +1071,6 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask)
     if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPDeviceType>();
     if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPDeviceType>();
     if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPDeviceType>();
-    if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPDeviceType>();
   } else {
     if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
     if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
@@ -1094,6 +1087,5 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask)
     if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPHostType>();
     if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPHostType>();
     if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPHostType>();
-    if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPHostType>();
   }
 }
diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h
index d3b2578b68..c10ff5b40a 100644
--- a/src/KOKKOS/atom_vec_kokkos.h
+++ b/src/KOKKOS/atom_vec_kokkos.h
@@ -139,6 +139,8 @@ class AtomVecKokkos : virtual public AtomVec {
 
   DAT::tdual_int_1d k_count;
 
+ public:
+
   #ifdef LMP_KOKKOS_GPU
   template<class ViewType>
   Kokkos::View<typename ViewType::data_type,
diff --git a/src/KOKKOS/fix_dt_reset_kokkos.cpp b/src/KOKKOS/fix_dt_reset_kokkos.cpp
index f3435e711e..4c7545cee0 100644
--- a/src/KOKKOS/fix_dt_reset_kokkos.cpp
+++ b/src/KOKKOS/fix_dt_reset_kokkos.cpp
@@ -113,7 +113,7 @@ void FixDtResetKokkos<DeviceType>::end_of_step()
    update->dt = dt;
    update->dt_default = 0;
    if (force->pair) force->pair->reset_dt();
-   for (int i = 0; i < modify->nfix; i++) modify->fix[i]->reset_dt();
+   for (auto &ifix : modify->get_fix_list()) ifix->reset_dt();
    output->reset_dt();
 
 }
diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp
new file mode 100644
index 0000000000..ffe1c34e97
--- /dev/null
+++ b/src/KOKKOS/fix_efield_kokkos.cpp
@@ -0,0 +1,316 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "fix_efield_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "update.h"
+#include "modify.h"
+#include "domain_kokkos.h"
+#include "region.h"
+#include "input.h"
+#include "variable.h"
+#include "memory_kokkos.h"
+#include "error.h"
+#include "atom_masks.h"
+#include "kokkos_base.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+enum{NONE,CONSTANT,EQUAL,ATOM};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixEfieldKokkos<DeviceType>::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixEfield(lmp, narg, arg)
+{
+  kokkosable = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
+
+  memory->destroy(efield);
+  memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield");
+  d_efield = k_efield.view<DeviceType>();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixEfieldKokkos<DeviceType>::~FixEfieldKokkos()
+{
+  if (copymode) return;
+
+  memoryKK->destroy_kokkos(k_efield,efield);
+  efield = nullptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixEfieldKokkos<DeviceType>::init()
+{
+  FixEfield::init();
+
+  if (utils::strmatch(update->integrate_style,"^respa"))
+    error->all(FLERR,"Cannot (yet) use respa with Kokkos");
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/)
+{
+  atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  image = atomKK->k_image.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+
+  int nlocal = atom->nlocal;
+
+  // update region if necessary
+
+  if (region) {
+    if (!utils::strmatch(region->style, "^block"))
+      error->all(FLERR,"Cannot (yet) use {}-style region with fix efield/kk",region->style);
+    region->prematch();
+    DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal);
+    KokkosBase* regionKKBase = dynamic_cast<KokkosBase*>(region);
+    regionKKBase->match_all_kokkos(groupbit,k_match);
+    k_match.template sync<DeviceType>();
+    d_match = k_match.template view<DeviceType>();
+  }
+
+  // reallocate sforce array if necessary
+
+  if (varflag == ATOM && atom->nmax > maxatom) {
+    maxatom = atom->nmax;
+    memoryKK->destroy_kokkos(k_efield,efield);
+    memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield");
+    d_efield = k_efield.view<DeviceType>();
+  }
+
+  fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0;
+  double_4 fsum_kk;
+  force_flag = 0;
+
+  if (varflag == CONSTANT) {
+    copymode = 1;
+
+    // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
+    //Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldConstant>(0,nlocal),*this,fsum_kk);
+
+    {
+    // local variables for lambda capture
+    auto prd = Few<double,3>(domain->prd);
+    auto h = Few<double,6>(domain->h);
+    auto triclinic = domain->triclinic;
+    auto l_ex = ex;
+    auto l_ey = ey;
+    auto l_ez = ez;
+
+    auto l_x = x;
+    auto l_q = q;
+    auto l_f = f;
+    auto l_mask = mask;
+    auto l_image = image;
+    auto l_groupbit = groupbit;
+
+    Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
+      if (l_mask[i] & l_groupbit) {
+        Few<double,3> x_i;
+        x_i[0] = l_x(i,0);
+        x_i[1] = l_x(i,1);
+        x_i[2] = l_x(i,2);
+        auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
+        auto qtmp = l_q(i);
+        auto fx = qtmp * l_ex;
+        auto fy = qtmp * l_ey;
+        auto fz = qtmp * l_ez;
+        l_f(i,0) += fx;
+        l_f(i,1) += fy;
+        l_f(i,2) += fz;
+        fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
+        fsum_kk.d1 += fx;
+        fsum_kk.d2 += fy;
+        fsum_kk.d3 += fz;
+      }
+    },fsum_kk);
+    }
+
+    copymode = 0;
+
+  // variable force, wrap with clear/add
+
+  } else {
+
+    atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos
+
+    modify->clearstep_compute();
+
+    if (xstyle == EQUAL) ex = input->variable->compute_equal(xvar);
+    else if (xstyle == ATOM)
+      input->variable->compute_atom(xvar,igroup,&efield[0][0],4,0);
+    if (ystyle == EQUAL) ey = input->variable->compute_equal(yvar);
+    else if (ystyle == ATOM)
+      input->variable->compute_atom(yvar,igroup,&efield[0][1],4,0);
+    if (zstyle == EQUAL) ez = input->variable->compute_equal(zvar);
+    else if (zstyle == ATOM)
+      input->variable->compute_atom(zvar,igroup,&efield[0][2],4,0);
+
+    modify->addstep_compute(update->ntimestep + 1);
+
+    if (varflag == ATOM) {  // this can be removed when variable class is ported to Kokkos
+      k_efield.modify<LMPHostType>();
+      k_efield.sync<DeviceType>();
+    }
+
+    copymode = 1;
+    // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
+    //Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldNonConstant>(0,nlocal),*this,fsum_kk);
+    {
+    // local variables for lambda capture
+    auto prd = Few<double,3>(domain->prd);
+    auto h = Few<double,6>(domain->h);
+    auto triclinic = domain->triclinic;
+    auto l_ex = ex;
+    auto l_ey = ey;
+    auto l_ez = ez;
+    auto l_d_efield = d_efield;
+
+    auto l_x = x;
+    auto l_q = q;
+    auto l_f = f;
+    auto l_mask = mask;
+    auto l_image = image;
+    auto l_groupbit = groupbit;
+    auto l_xstyle = xstyle;
+    auto l_ystyle = ystyle;
+    auto l_zstyle = zstyle;
+
+    Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
+      if (l_mask[i] & l_groupbit) {
+        Few<double,3> x_i;
+        x_i[0] = l_x(i,0);
+        x_i[1] = l_x(i,1);
+        x_i[2] = l_x(i,2);
+        auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
+        auto qtmp = l_q(i);
+        auto fx = qtmp * l_ex;
+        auto fy = qtmp * l_ey;
+        auto fz = qtmp * l_ez;
+        if (l_xstyle == ATOM) l_f(i,0) += qtmp * l_d_efield(i,0);
+        else if (l_xstyle) l_f(i,0) += fx;
+        if (l_ystyle == ATOM) l_f(i,1) += qtmp * l_d_efield(i,1);
+        else if (l_ystyle) l_f(i,1) += fy;
+        if (l_zstyle == ATOM) l_f(i,2) += qtmp * l_d_efield(i,2);
+        else if (l_zstyle) l_f(i,2) += fz;
+        fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
+        fsum_kk.d1 += fx;
+        fsum_kk.d2 += fy;
+        fsum_kk.d3 += fz;
+      }
+    },fsum_kk);
+    }
+
+    copymode = 0;
+  }
+
+  atomKK->modified(execution_space, F_MASK);
+
+  fsum[0] = fsum_kk.d0;
+  fsum[1] = fsum_kk.d1;
+  fsum[2] = fsum_kk.d2;
+  fsum[3] = fsum_kk.d3;
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const {
+  if (mask[i] & groupbit) {
+    if (region && !d_match[i]) return;
+
+    auto prd = Few<double,3>(domain->prd);
+    auto h = Few<double,6>(domain->h);
+    auto triclinic = domain->triclinic;
+    Few<double,3> x_i;
+    x_i[0] = x(i,0);
+    x_i[1] = x(i,1);
+    x_i[2] = x(i,2);
+    auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
+    const F_FLOAT qtmp = q(i);
+    const F_FLOAT fx = qtmp * ex;
+    const F_FLOAT fy = qtmp * ey;
+    const F_FLOAT fz = qtmp * ez;
+    f(i,0) += fx;
+    f(i,1) += fy;
+    f(i,2) += fz;
+    // TODO: access to unwrap below crashes
+    fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
+    fsum_kk.d1 += fx;
+    fsum_kk.d2 += fy;
+    fsum_kk.d3 += fz;
+  }
+}
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const {
+  auto prd = Few<double,3>(domain->prd);
+  auto h = Few<double,6>(domain->h);
+  auto triclinic = domain->triclinic;
+  if (mask[i] & groupbit) {
+    if (region && !d_match[i]) return;
+    Few<double,3> x_i;
+    x_i[0] = x(i,0);
+    x_i[1] = x(i,1);
+    x_i[2] = x(i,2);
+    auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
+    const F_FLOAT qtmp = q[i];
+    const F_FLOAT fx = qtmp * ex;
+    const F_FLOAT fy = qtmp * ey;
+    const F_FLOAT fz = qtmp * ez;
+    if (xstyle == ATOM) f(i,0) += d_efield(i,0);
+    else if (xstyle) f(i,0) += fx;
+    if (ystyle == ATOM) f(i,1) += d_efield(i,1);
+    else if (ystyle) f(i,1) += fy;
+    if (zstyle == ATOM) f(i,2) += d_efield(i,2);
+    else if (zstyle) f(i,2) += fz;
+    // TODO: access to unwrap below crashes
+    fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
+    fsum_kk.d1 += fx;
+    fsum_kk.d2 += fy;
+    fsum_kk.d3 += fz;
+  }
+}
+
+namespace LAMMPS_NS {
+template class FixEfieldKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class FixEfieldKokkos<LMPHostType>;
+#endif
+}
+
diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h
new file mode 100644
index 0000000000..d159473d1d
--- /dev/null
+++ b/src/KOKKOS/fix_efield_kokkos.h
@@ -0,0 +1,86 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(efield/kk,FixEfieldKokkos<LMPDeviceType>);
+FixStyle(efield/kk/device,FixEfieldKokkos<LMPDeviceType>);
+FixStyle(efield/kk/host,FixEfieldKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_FIX_EFIELD_KOKKOS_H
+#define LMP_FIX_EFIELD_KOKKOS_H
+
+#include "fix_efield.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+struct e_double_4 {
+  double d0, d1, d2, d3;
+  KOKKOS_INLINE_FUNCTION
+  e_double_4() {
+    d0 = d1 = d2 = d3 = 0.0;
+  }
+  KOKKOS_INLINE_FUNCTION
+  e_double_4& operator+=(const e_double_4 &rhs) {
+    d0 += rhs.d0;
+    d1 += rhs.d1;
+    d2 += rhs.d2;
+    d3 += rhs.d3;
+    return *this;
+  }
+};
+typedef e_double_4 double_4;
+
+struct TagFixEfieldConstant{};
+
+struct TagFixEfieldNonConstant{};
+
+template<class DeviceType>
+class FixEfieldKokkos : public FixEfield {
+ public:
+  typedef DeviceType device_type;
+  typedef double_4 value_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  FixEfieldKokkos(class LAMMPS *, int, char **);
+  ~FixEfieldKokkos() override;
+  void init() override;
+  void post_force(int) override;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagFixEfieldConstant, const int&, double_4&) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagFixEfieldNonConstant, const int&, double_4&) const;
+
+ private:
+  DAT::tdual_ffloat_2d k_efield;
+  typename AT::t_ffloat_2d_randomread d_efield;
+  typename AT::t_int_1d d_match;
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_float_1d_randomread q;
+  typename AT::t_f_array f;
+  typename AT::t_imageint_1d_randomread image;
+  typename AT::t_int_1d_randomread mask;
+};
+
+}
+
+#endif
+#endif
+
diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp
index 1de07b39dc..dcd943cac6 100644
--- a/src/KOKKOS/fix_property_atom_kokkos.cpp
+++ b/src/KOKKOS/fix_property_atom_kokkos.cpp
@@ -30,7 +30,46 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg)
   FixPropertyAtom(lmp, narg, arg)
 {
   atomKK = (AtomKokkos *) atom;
-  grow_arrays(atom->nmax);
+  kokkosable = 1;
+
+  dvector_flag = 0;
+  for (int nv = 0; nv < nvalue; nv++)
+    if (styles[nv] == DVEC) dvector_flag = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPropertyAtomKokkos::post_constructor()
+{
+  atomKK->update_property_atom();
+
+  FixPropertyAtom::post_constructor();
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixPropertyAtomKokkos::~FixPropertyAtomKokkos()
+{
+  // deallocate per-atom vectors in Atom class
+  // set ptrs to a null pointer, so they no longer exist for Atom class
+
+  for (int nv = 0; nv < nvalue; nv++) {
+    if (styles[nv] == MOLECULE) {
+      atom->molecule_flag = 0;
+      memoryKK->destroy_kokkos(atomKK->k_molecule,atom->molecule);
+      atom->molecule = nullptr;
+    } else if (styles[nv] == CHARGE) {
+      atom->q_flag = 0;
+      memoryKK->destroy_kokkos(atomKK->k_q,atom->q);
+      atom->q = nullptr;
+    } else if (styles[nv] == RMASS) {
+      atom->rmass_flag = 0;
+      memoryKK->destroy_kokkos(atomKK->k_rmass,atom->rmass);
+      atom->rmass = nullptr;
+    }
+  }
+
+  atomKK->update_property_atom();
 }
 
 /* ----------------------------------------------------------------------
@@ -44,17 +83,17 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
 {
   for (int nv = 0; nv < nvalue; nv++) {
     if (styles[nv] == MOLECULE) {
-      memory->grow(atom->molecule,nmax,"atom:molecule");
-      size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
-      memset(&atom->molecule[nmax_old],0,nbytes);
+      atomKK->sync(Device,MOLECULE_MASK);
+      memoryKK->grow_kokkos(atomKK->k_molecule,atom->molecule,nmax,"atom:molecule");
+      atomKK->modified(Device,MOLECULE_MASK);
     } else if (styles[nv] == CHARGE) {
-      memory->grow(atom->q,nmax,"atom:q");
-      size_t nbytes = (nmax-nmax_old) * sizeof(double);
-      memset(&atom->q[nmax_old],0,nbytes);
+      atomKK->sync(Device,Q_MASK);
+      memoryKK->grow_kokkos(atomKK->k_q,atom->q,nmax,"atom:q");
+      atomKK->modified(Device,Q_MASK);
     } else if (styles[nv] == RMASS) {
-      memory->grow(atom->rmass,nmax,"atom:rmass");
-      size_t nbytes = (nmax-nmax_old) * sizeof(double);
-      memset(&atom->rmass[nmax_old],0,nbytes);
+      atomKK->sync(Device,RMASS_MASK);
+      memoryKK->grow_kokkos(atomKK->k_rmass,atom->rmass,nmax,"atom:rmass");
+      atomKK->modified(Device,RMASS_MASK);
     } else if (styles[nv] == TEMPERATURE) {
       memory->grow(atom->temperature, nmax, "atom:temperature");
       size_t nbytes = (nmax - nmax_old) * sizeof(double);
@@ -69,7 +108,7 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
       memset(&atom->ivector[index[nv]][nmax_old],0,nbytes);
     } else if (styles[nv] == DVEC) {
       atomKK->sync(Device,DVECTOR_MASK);
-      memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
+      memoryKK->grow_kokkos(atomKK->k_dvector,atom->dvector,atomKK->k_dvector.extent(0),nmax,
                           "atom:dvector");
       atomKK->modified(Device,DVECTOR_MASK);
     } else if (styles[nv] == IARRAY) {
@@ -84,3 +123,62 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
   }
   nmax_old = nmax;
 }
+
+/* ---------------------------------------------------------------------- */
+
+void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync<LMPDeviceType>();
+    if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync<LMPDeviceType>();
+    if (rmass_flag && (mask & RMASS_MASK)) {atomKK->k_rmass.sync<LMPDeviceType>();}
+    if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.sync<LMPDeviceType>();
+  } else {
+    if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync<LMPHostType>();
+    if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync<LMPHostType>();
+    if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.sync<LMPHostType>();
+    if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.sync<LMPHostType>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPropertyAtomKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>())
+      atomKK->avecKK->perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
+      atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
+    if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPDeviceType>())
+      atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space);
+    if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPDeviceType>())
+      atomKK->avecKK->perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
+  } else {
+    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>())
+      atomKK->avecKK->perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
+    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>())
+      atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
+    if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPHostType>())
+      atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space);
+    if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPHostType>())
+      atomKK->avecKK->perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPropertyAtomKokkos::modified(ExecutionSpace space, unsigned int mask)
+{
+  if (space == Device) {
+    if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify<LMPDeviceType>();
+    if (q_flag && (mask & Q_MASK)) atomKK->k_q.modify<LMPDeviceType>();
+    if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.modify<LMPDeviceType>();
+    if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.modify<LMPDeviceType>();
+  } else {
+    if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify<LMPHostType>();
+    if (q_flag && (mask & Q_MASK)) atomKK->k_q.modify<LMPHostType>();
+    if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.modify<LMPHostType>();
+    if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.modify<LMPHostType>();
+  }
+}
diff --git a/src/KOKKOS/fix_property_atom_kokkos.h b/src/KOKKOS/fix_property_atom_kokkos.h
index 90eddc98e0..adbe6ab20b 100644
--- a/src/KOKKOS/fix_property_atom_kokkos.h
+++ b/src/KOKKOS/fix_property_atom_kokkos.h
@@ -22,14 +22,23 @@ FixStyle(property/atom/kk,FixPropertyAtomKokkos);
 #define LMP_FIX_PROPERTY_ATOM_KOKKOS_H
 
 #include "fix_property_atom.h"
+#include "atom_vec_kokkos.h"
 
 namespace LAMMPS_NS {
 
 class FixPropertyAtomKokkos : public FixPropertyAtom {
  public:
   FixPropertyAtomKokkos(class LAMMPS *, int, char **);
-
+  void post_constructor() override;
+  ~FixPropertyAtomKokkos() override;
   void grow_arrays(int) override;
+
+  void sync(ExecutionSpace space, unsigned int mask);
+  void modified(ExecutionSpace space, unsigned int mask);
+  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
+
+ private:
+  int dvector_flag;
 };
 
 }
diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp
new file mode 100644
index 0000000000..efd8a652ff
--- /dev/null
+++ b/src/KOKKOS/fix_spring_self_kokkos.cpp
@@ -0,0 +1,332 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "fix_spring_self_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "update.h"
+#include "modify.h"
+#include "domain_kokkos.h"
+#include "region.h"
+#include "input.h"
+#include "variable.h"
+#include "memory_kokkos.h"
+#include "error.h"
+#include "atom_masks.h"
+#include "kokkos_base.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixSpringSelfKokkos<DeviceType>::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixSpringSelf(lmp, narg, arg)
+{
+  kokkosable = 1;
+  exchange_comm_device = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
+
+  xoriginal_tmp = xoriginal;
+  xoriginal = nullptr;
+
+  int nmax = atom->nmax;
+  grow_arrays(nmax);
+
+  for (int i = 0; i < atom->nlocal; i++) {
+    k_xoriginal.h_view(i,0) = xoriginal_tmp[i][0];
+    k_xoriginal.h_view(i,1) = xoriginal_tmp[i][1];
+    k_xoriginal.h_view(i,2) = xoriginal_tmp[i][2];
+  }
+
+  k_xoriginal.modify_host();
+
+  d_count = typename AT::t_int_scalar("spring/self:count");
+  h_count = Kokkos::create_mirror_view(d_count);
+
+  memory->destroy(xoriginal_tmp);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixSpringSelfKokkos<DeviceType>::~FixSpringSelfKokkos()
+{
+  if (copymode) return;
+
+  memoryKK->destroy_kokkos(k_xoriginal,xoriginal);
+  xoriginal = nullptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixSpringSelfKokkos<DeviceType>::init()
+{
+  FixSpringSelf::init();
+
+  if (utils::strmatch(update->integrate_style,"^respa"))
+    error->all(FLERR,"Cannot (yet) use respa with Kokkos");
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixSpringSelfKokkos<DeviceType>::post_force(int /*vflag*/)
+{
+  atomKK->sync(execution_space, X_MASK | F_MASK | IMAGE_MASK | MASK_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  image = atomKK->k_image.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
+  int nlocal = atom->nlocal;
+
+  double espring_kk;
+
+  k_xoriginal.modify<LMPHostType>();
+  k_xoriginal.sync<DeviceType>();
+
+  copymode = 1;
+
+  {
+  // local variables for lambda capture
+  auto prd = Few<double,3>(domain->prd);
+  auto h = Few<double,6>(domain->h);
+  auto triclinic = domain->triclinic;
+  auto l_k = k;
+  auto l_xoriginal = d_xoriginal;
+
+  auto l_x = x;
+  auto l_f = f;
+  auto l_mask = mask;
+  auto l_image = image;
+  auto l_groupbit = groupbit;
+  auto l_xflag = xflag;
+  auto l_yflag = yflag;
+  auto l_zflag = zflag;
+
+  Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) {
+    if (l_mask[i] & l_groupbit) {
+      Few<double,3> x_i;
+      x_i[0] = l_x(i,0);
+      x_i[1] = l_x(i,1);
+      x_i[2] = l_x(i,2);
+      auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
+      auto dx = unwrap[0] - l_xoriginal(i, 0);
+      auto dy = unwrap[1] - l_xoriginal(i, 1);
+      auto dz = unwrap[2] - l_xoriginal(i, 2);
+      if (!l_xflag) dx = 0.0;
+      if (!l_yflag) dy = 0.0;
+      if (!l_zflag) dz = 0.0;
+      l_f(i,0) -= l_k*dx;
+      l_f(i,1) -= l_k*dy;
+      l_f(i,2) -= l_k*dz;
+      espring_kk += l_k * (dx*dx + dy*dy + dz*dz);
+    }
+  },espring_kk);
+  }
+
+  copymode = 0;
+
+  atomKK->modified(execution_space, F_MASK);
+
+  espring = 0.5*espring_kk;
+}
+
+/* ----------------------------------------------------------------------
+   allocate local atom-based arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixSpringSelfKokkos<DeviceType>::grow_arrays(int nmax)
+{
+  memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,"spring/self:xoriginal");
+  d_xoriginal = k_xoriginal.view<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   copy values within local atom-based arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixSpringSelfKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
+{
+  k_xoriginal.sync_host();
+
+  FixSpringSelf::copy_arrays(i,j,delflag);
+
+  k_xoriginal.modify_host();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixSpringSelfKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offset, const bool &final) const
+{
+  const int i = d_exchange_sendlist(mysend);
+
+  d_buf[mysend] = nsend + offset;
+  int m = nsend + offset;
+  d_buf[m++] = d_xoriginal(i,0);
+  d_buf[m++] = d_xoriginal(i,1);
+  d_buf[m++] = d_xoriginal(i,2);
+  if (mysend == nsend-1) d_count() = m;
+  offset = m - nsend;
+
+  const int j = d_copylist(mysend);
+  if (j > -1) {
+    d_xoriginal(i,0) = d_xoriginal(j,0);
+    d_xoriginal(i,1) = d_xoriginal(j,1);
+    d_xoriginal(i,2) = d_xoriginal(j,2);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+int FixSpringSelfKokkos<DeviceType>::pack_exchange_kokkos(
+   const int &nsend, DAT::tdual_xfloat_2d &k_buf,
+   DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist,
+   ExecutionSpace space)
+{
+
+  k_buf.sync<DeviceType>();
+  k_copylist.sync<DeviceType>();
+  k_exchange_sendlist.sync<DeviceType>();
+
+  d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
+    k_buf.template view<DeviceType>().data(),
+    k_buf.extent(0)*k_buf.extent(1));
+  d_copylist = k_copylist.view<DeviceType>();
+  d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
+  this->nsend = nsend;
+
+
+  k_xoriginal.template sync<DeviceType>();
+
+  Kokkos::deep_copy(d_count,0);
+
+  copymode = 1;
+
+  FixSpringSelfKokkosPackExchangeFunctor<DeviceType> pack_exchange_functor(this);
+  Kokkos::parallel_scan(nsend,pack_exchange_functor);
+
+  copymode = 0;
+
+  k_buf.modify<DeviceType>();
+
+  if (space == Host) k_buf.sync<LMPHostType>();
+  else k_buf.sync<LMPDeviceType>();
+
+  k_xoriginal.template modify<DeviceType>();
+
+  Kokkos::deep_copy(h_count,d_count);
+
+  return h_count();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void FixSpringSelfKokkos<DeviceType>::operator()(TagFixSpringSelfUnpackExchange, const int &i) const
+{
+  int index = d_indices(i);
+
+  if (index > -1) {
+    int m = d_buf[i];
+
+    d_xoriginal(index,0) = static_cast<tagint> (d_buf[m++]);
+    d_xoriginal(index,1) = static_cast<tagint> (d_buf[m++]);
+    d_xoriginal(index,2) = static_cast<tagint> (d_buf[m++]);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class DeviceType>
+void FixSpringSelfKokkos<DeviceType>::unpack_exchange_kokkos(
+  DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
+  ExecutionSpace /*space*/)
+{
+  k_buf.sync<DeviceType>();
+  k_indices.sync<DeviceType>();
+
+  d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
+    k_buf.template view<DeviceType>().data(),
+    k_buf.extent(0)*k_buf.extent(1));
+  d_indices = k_indices.view<DeviceType>();
+
+  k_xoriginal.template sync<DeviceType>();
+
+  copymode = 1;
+
+  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixSpringSelfUnpackExchange>(0,nrecv),*this);
+
+  copymode = 0;
+
+  k_xoriginal.template modify<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   pack values in local atom-based arrays for exchange with another proc
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+int FixSpringSelfKokkos<DeviceType>::pack_exchange(int i, double *buf)
+{
+  k_xoriginal.sync_host();
+
+  int m = FixSpringSelf::pack_exchange(i,buf);
+
+  k_xoriginal.modify_host();
+
+  return m;
+}
+
+/* ----------------------------------------------------------------------
+   unpack values in local atom-based arrays from exchange with another proc
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+int FixSpringSelfKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
+{
+  k_xoriginal.sync_host();
+
+  int m = FixSpringSelf::unpack_exchange(nlocal,buf);
+
+  k_xoriginal.modify_host();
+
+  return m;
+}
+
+namespace LAMMPS_NS {
+template class FixSpringSelfKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class FixSpringSelfKokkos<LMPHostType>;
+#endif
+}
+
diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h
new file mode 100644
index 0000000000..b23e92249b
--- /dev/null
+++ b/src/KOKKOS/fix_spring_self_kokkos.h
@@ -0,0 +1,108 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(spring/self/kk,FixSpringSelfKokkos<LMPDeviceType>);
+FixStyle(spring/self/kk/device,FixSpringSelfKokkos<LMPDeviceType>);
+FixStyle(spring/self/kk/host,FixSpringSelfKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_FIX_SPRING_SELF_KOKKOS_H
+#define LMP_FIX_SPRING_SELF_KOKKOS_H
+
+#include "fix_spring_self.h"
+#include "kokkos_type.h"
+#include "kokkos_base.h"
+
+namespace LAMMPS_NS {
+
+struct TagFixSpringSelfUnpackExchange{};
+
+template<class DeviceType>
+class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase {
+ public:
+  typedef DeviceType device_type;
+  typedef double value_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  FixSpringSelfKokkos(class LAMMPS *, int, char **);
+  ~FixSpringSelfKokkos() override;
+  void init() override;
+  void grow_arrays(int) override;
+  void copy_arrays(int, int, int) override;
+  void post_force(int) override;
+
+  KOKKOS_INLINE_FUNCTION
+  void pack_exchange_item(const int&, int &, const bool &) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagFixSpringSelfUnpackExchange, const int&) const;
+
+  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space) override;
+
+  void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
+                              DAT::tdual_int_1d &indices,int nrecv,
+                              ExecutionSpace space) override;
+
+
+  int pack_exchange(int, double *) override;
+  int unpack_exchange(int, double *) override;
+
+ protected:
+  DAT::tdual_x_array k_xoriginal;
+  typename AT::t_x_array d_xoriginal;
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_imageint_1d_randomread image;
+  typename AT::t_int_1d_randomread mask;
+
+  int nsend;
+
+  typename AT::t_int_2d d_sendlist;
+  typename AT::t_xfloat_1d_um d_buf;
+
+  typename AT::t_int_1d d_exchange_sendlist;
+  typename AT::t_int_1d d_copylist;
+  typename AT::t_int_1d d_indices;
+
+  typename AT::t_int_scalar d_count;
+  HAT::t_int_scalar h_count;
+
+  double **xoriginal_tmp;    // original coords of atoms
+
+};
+
+template <class DeviceType>
+struct FixSpringSelfKokkosPackExchangeFunctor {
+  typedef DeviceType device_type;
+  typedef int value_type;
+  FixSpringSelfKokkos<DeviceType> c;
+  FixSpringSelfKokkosPackExchangeFunctor(FixSpringSelfKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int &i, int &offset, const bool &final) const {
+    c.pack_exchange_item(i, offset, final);
+  }
+};
+
+}
+
+#endif
+#endif
+
diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp
index 91ea6d37ac..84a8f59dd0 100644
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@@ -137,13 +137,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
 
       int set_flag = 0;
       char *str;
-      if ((str = getenv("SLURM_LOCALID"))) {
+      if (str = getenv("SLURM_LOCALID")) {
         int local_rank = atoi(str);
         device = local_rank % ngpus;
         if (device >= skip_gpu) device++;
         set_flag = 1;
       }
-      if ((str = getenv("MPT_LRANK"))) {
+      if (str = getenv("FLUX_TASK_LOCAL_ID")) {
         if (ngpus > 0) {
           int local_rank = atoi(str);
           device = local_rank % ngpus;
@@ -151,7 +151,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
           set_flag = 1;
         }
       }
-      if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
+      if (str = getenv("MPT_LRANK")) {
         if (ngpus > 0) {
           int local_rank = atoi(str);
           device = local_rank % ngpus;
@@ -159,7 +159,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
           set_flag = 1;
         }
       }
-      if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
+      if (str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) {
         if (ngpus > 0) {
           int local_rank = atoi(str);
           device = local_rank % ngpus;
@@ -167,7 +167,15 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
           set_flag = 1;
         }
       }
-      if ((str = getenv("PMI_LOCAL_RANK"))) {
+      if (str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) {
+        if (ngpus > 0) {
+          int local_rank = atoi(str);
+          device = local_rank % ngpus;
+          if (device >= skip_gpu) device++;
+          set_flag = 1;
+        }
+      }
+      if (str = getenv("PMI_LOCAL_RANK")) {
         if (ngpus > 0) {
           int local_rank = atoi(str);
           device = local_rank % ngpus;
diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h
index 7d9ecb5d80..1e22a38657 100644
--- a/src/KOKKOS/kokkos_base.h
+++ b/src/KOKKOS/kokkos_base.h
@@ -41,11 +41,6 @@ class KokkosBase {
                                            int, int *) {return 0;};
   virtual void unpack_forward_comm_fix_kokkos(int, int, DAT::tdual_xfloat_1d &) {}
 
-
-  // Region
-  virtual void match_all_kokkos(int, DAT::tdual_int_1d) {}
-
-  // Fix
   virtual int pack_exchange_kokkos(const int & /*nsend*/, DAT::tdual_xfloat_2d & /*k_buf*/,
                                    DAT::tdual_int_1d /*k_sendlist*/,
                                    DAT::tdual_int_1d /*k_copylist*/,
@@ -54,6 +49,9 @@ class KokkosBase {
                                       DAT::tdual_int_1d & /*indices*/, int /*nrecv*/,
                                       ExecutionSpace /*space*/) {}
 
+  // Region
+  virtual void match_all_kokkos(int, DAT::tdual_int_1d) {}
+
   using KeyViewType = DAT::t_x_array;
   using BinOp = BinOp3DLAMMPS<KeyViewType>;
   virtual void
diff --git a/src/KOKKOS/min_kokkos.cpp b/src/KOKKOS/min_kokkos.cpp
index 4e1c3967ff..bbb9a0bd6e 100644
--- a/src/KOKKOS/min_kokkos.cpp
+++ b/src/KOKKOS/min_kokkos.cpp
@@ -59,6 +59,9 @@ void MinKokkos::init()
 {
   Min::init();
 
+  if (!fix_minimize->kokkosable)
+    error->all(FLERR,"KOKKOS package requires fix minimize/kk");
+
   fix_minimize_kk = (FixMinimizeKokkos*) fix_minimize;
 }
 
diff --git a/src/KOKKOS/modify_kokkos.cpp b/src/KOKKOS/modify_kokkos.cpp
index 0b81a1cabb..8d8ffca671 100644
--- a/src/KOKKOS/modify_kokkos.cpp
+++ b/src/KOKKOS/modify_kokkos.cpp
@@ -362,6 +362,17 @@ void ModifyKokkos::pre_reverse(int eflag, int vflag)
 
 void ModifyKokkos::post_force(int vflag)
 {
+  for (int i = 0; i < n_post_force_group; i++) {
+    atomKK->sync(fix[list_post_force_group[i]]->execution_space,
+                 fix[list_post_force_group[i]]->datamask_read);
+    int prev_auto_sync = lmp->kokkos->auto_sync;
+    if (!fix[list_post_force_group[i]]->kokkosable) lmp->kokkos->auto_sync = 1;
+    fix[list_post_force_group[i]]->post_force(vflag);
+    lmp->kokkos->auto_sync = prev_auto_sync;
+    atomKK->modified(fix[list_post_force_group[i]]->execution_space,
+                     fix[list_post_force_group[i]]->datamask_modify);
+  }
+
   for (int i = 0; i < n_post_force; i++) {
     atomKK->sync(fix[list_post_force[i]]->execution_space,
                  fix[list_post_force[i]]->datamask_read);
diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp
index 4cfe440b1f..b749590779 100644
--- a/src/KOKKOS/neigh_bond_kokkos.cpp
+++ b/src/KOKKOS/neigh_bond_kokkos.cpp
@@ -112,9 +112,8 @@ void NeighBondKokkos<DeviceType>::init_topology_kk() {
   int i,m;
   int bond_off = 0;
   int angle_off = 0;
-  for (i = 0; i < modify->nfix; i++)
-    if ((strcmp(modify->fix[i]->style,"shake") == 0)
-        || (strcmp(modify->fix[i]->style,"rattle") == 0))
+  for (const auto &ifix : modify->get_fix_list())
+    if (utils::strmatch(ifix->style,"^shake") || utils::strmatch(ifix->style,"^rattle"))
       bond_off = angle_off = 1;
   if (force->bond && force->bond_match("quartic")) bond_off = 1;
 
diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp
index 0b40bce841..efb1247560 100644
--- a/src/KOKKOS/neighbor_kokkos.cpp
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@@ -308,7 +308,8 @@ void NeighborKokkos::build_kokkos(int topoflag)
   for (i = 0; i < npair_perpetual; i++) {
     m = plist[i];
     if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK);
-    if (!lists[m]->copy) lists[m]->grow(nlocal,nall);
+    if (!lists[m]->copy || lists[m]->trim || lists[m]->kk2cpu)
+      lists[m]->grow(nlocal,nall);
     neigh_pair[m]->build_setup();
     neigh_pair[m]->build(lists[m]);
   }
diff --git a/src/KOKKOS/npair_halffull_kokkos.cpp b/src/KOKKOS/npair_halffull_kokkos.cpp
index ec17cec844..c8c4d57fc9 100644
--- a/src/KOKKOS/npair_halffull_kokkos.cpp
+++ b/src/KOKKOS/npair_halffull_kokkos.cpp
@@ -18,6 +18,7 @@
 #include "atom_masks.h"
 #include "atom_vec.h"
 #include "domain.h"
+#include "force.h"
 #include "neigh_list_kokkos.h"
 
 #include <cmath>
@@ -26,8 +27,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType, int NEWTON, int TRIM>
-NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) {
+template<class DeviceType, int NEWTON, int TRI, int TRIM>
+NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) {
   atomKK = (AtomKokkos *) atom;
   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
 }
@@ -41,13 +42,14 @@ NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) :
    if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
 ------------------------------------------------------------------------- */
 
-template<class DeviceType, int NEWTON, int TRIM>
-void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
+template<class DeviceType, int NEWTON, int TRI, int TRIM>
+void NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::build(NeighList *list)
 {
   if (NEWTON || TRIM) {
     x = atomKK->k_x.view<DeviceType>();
     atomKK->sync(execution_space,X_MASK);
   }
+
   nlocal = atom->nlocal;
 
   cutsq_custom = cutoff_custom*cutoff_custom;
@@ -66,6 +68,8 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
   d_numneigh = k_list->d_numneigh;
   d_neighbors = k_list->d_neighbors;
 
+  delta = 0.01 * force->angstrom;
+
   // loop over parent full list
 
   copymode = 1;
@@ -78,9 +82,9 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
   k_list->k_ilist.template modify<DeviceType>();
 }
 
-template<class DeviceType, int NEWTON, int TRIM>
+template<class DeviceType, int NEWTON, int TRI, int TRIM>
 KOKKOS_INLINE_FUNCTION
-void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCompute, const int &ii) const {
+void NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::operator()(TagNPairHalffullCompute, const int &ii) const {
   int n = 0;
 
   const int i = d_ilist_full(ii);
@@ -92,6 +96,11 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
   }
 
   // loop over full neighbor list
+  // use i < j < nlocal to eliminate half the local/local interactions
+  // for triclinic, must use delta to eliminate half the local/ghost interactions
+  // cannot use I/J exact coord comparision as for orthog
+  //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+  //   with an added PBC offset can shift all 3 coords by epsilon
 
   const int jnum = d_numneigh_full(i);
   const AtomNeighbors neighbors_i = AtomNeighbors(&d_neighbors(i,0),d_numneigh(i),
@@ -103,6 +112,14 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
     if (NEWTON) {
       if (j < nlocal) {
         if (i > j) continue;
+      } else if (TRI) {
+        if (fabs(x(j,2)-ztmp) > delta) {
+          if (x(j,2) < ztmp) continue;
+        } else if (fabs(x(j,1)-ytmp) > delta) {
+          if (x(j,1) < ytmp) continue;
+        } else {
+          if (x(j,0) < xtmp) continue;
+        }
       } else {
         if (x(j,2) < ztmp) continue;
         if (x(j,2) == ztmp) {
@@ -141,14 +158,18 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
 }
 
 namespace LAMMPS_NS {
-template class NPairHalffullKokkos<LMPDeviceType,0,0>;
-template class NPairHalffullKokkos<LMPDeviceType,0,1>;
-template class NPairHalffullKokkos<LMPDeviceType,1,0>;
-template class NPairHalffullKokkos<LMPDeviceType,1,1>;
+template class NPairHalffullKokkos<LMPDeviceType,0,0,0>;
+template class NPairHalffullKokkos<LMPDeviceType,0,0,1>;
+template class NPairHalffullKokkos<LMPDeviceType,1,0,0>;
+template class NPairHalffullKokkos<LMPDeviceType,1,0,1>;
+template class NPairHalffullKokkos<LMPDeviceType,1,1,0>;
+template class NPairHalffullKokkos<LMPDeviceType,1,1,1>;
 #ifdef LMP_KOKKOS_GPU
-template class NPairHalffullKokkos<LMPHostType,0,0>;
-template class NPairHalffullKokkos<LMPHostType,0,1>;
-template class NPairHalffullKokkos<LMPHostType,1,0>;
-template class NPairHalffullKokkos<LMPHostType,1,1>;
+template class NPairHalffullKokkos<LMPHostType,0,0,0>;
+template class NPairHalffullKokkos<LMPHostType,0,0,1>;
+template class NPairHalffullKokkos<LMPHostType,1,0,0>;
+template class NPairHalffullKokkos<LMPHostType,1,0,1>;
+template class NPairHalffullKokkos<LMPHostType,1,1,0>;
+template class NPairHalffullKokkos<LMPHostType,1,1,1>;
 #endif
 }
diff --git a/src/KOKKOS/npair_halffull_kokkos.h b/src/KOKKOS/npair_halffull_kokkos.h
index c5a09f0b62..3eee19b8c3 100644
--- a/src/KOKKOS/npair_halffull_kokkos.h
+++ b/src/KOKKOS/npair_halffull_kokkos.h
@@ -16,53 +16,79 @@
 
 // Trim off
 
-// Newton
+// Newton, no triclinic
 
-typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
 NPairStyle(halffull/newton/kk/device,
            NPairKokkosHalffullNewtonDevice,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
 NPairStyle(halffull/newton/kk/host,
            NPairKokkosHalffullNewtonHost,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
+           NP_ORTHO | NP_KOKKOS_HOST);
 
-typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
 NPairStyle(halffull/newton/skip/kk/device,
            NPairKokkosHalffullNewtonDevice,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_SKIP | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
 NPairStyle(halffull/newton/skip/kk/host,
            NPairKokkosHalffullNewtonHost,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_SKIP | NP_KOKKOS_HOST);
+
+// Newton, triclinic
+
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
+NPairStyle(halffull/newton/tri/kk/device,
+           NPairKokkosHalffullNewtonTriDevice,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
+
+typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
+NPairStyle(halffull/newton/tri/kk/host,
+           NPairKokkosHalffullNewtonTriHost,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
+
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
+NPairStyle(halffull/newton/tri/skip/kk/device,
+           NPairKokkosHalffullNewtonTriDevice,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
+
+typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
+NPairStyle(halffull/newton/tri/skip/kk/host,
+           NPairKokkosHalffullNewtonTriHost,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_HOST);
 
-// Newtoff
+// Newtoff (can be triclinic but template param always set to 0)
 
-typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
 NPairStyle(halffull/newtoff/kk/device,
            NPairKokkosHalffullNewtoffDevice,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
 NPairStyle(halffull/newtoff/kk/host,
            NPairKokkosHalffullNewtoffHost,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
 
-typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
 NPairStyle(halffull/newtoff/skip/kk/device,
            NPairKokkosHalffullNewtoffDevice,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
 NPairStyle(halffull/newtoff/skip/kk/host,
            NPairKokkosHalffullNewtoffHost,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
@@ -70,166 +96,244 @@ NPairStyle(halffull/newtoff/skip/kk/host,
 
 //************ Ghost **************
 
-// Newton
+// Newton, no triclinic
 
-typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonGhostDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
 NPairStyle(halffull/newton/ghost/kk/device,
-           NPairKokkosHalffullNewtonGhostDevice,
+           NPairKokkosHalffullNewtonDevice,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_GHOST | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
 NPairStyle(halffull/newton/ghost/kk/host,
            NPairKokkosHalffullNewtonHost,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
+           NP_ORTHO | NP_GHOST | NP_KOKKOS_HOST);
 
-typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonGhostDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
 NPairStyle(halffull/newton/skip/ghost/kk/device,
-           NPairKokkosHalffullNewtonGhostDevice,
+           NPairKokkosHalffullNewtonDevice,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
 NPairStyle(halffull/newton/skip/ghost/kk/host,
            NPairKokkosHalffullNewtonHost,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
+
+// Newton, triclinic
+
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
+NPairStyle(halffull/newton/tri/ghost/kk/device,
+           NPairKokkosHalffullNewtonTriDevice,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
+
+typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
+NPairStyle(halffull/newton/tri/ghost/kk/host,
+           NPairKokkosHalffullNewtonTriHost,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
+
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
+NPairStyle(halffull/newton/tri/skip/ghost/kk/device,
+           NPairKokkosHalffullNewtonTriDevice,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
+
+typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
+NPairStyle(halffull/newton/tri/skip/ghost/kk/host,
+           NPairKokkosHalffullNewtonTriHost,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
 
-// Newtoff
+// Newtoff (can be triclinic but template param always set to 0)
 
-typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffGhostDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
 NPairStyle(halffull/newtoff/ghost/kk/device,
-           NPairKokkosHalffullNewtoffGhostDevice,
+           NPairKokkosHalffullNewtoffDevice,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
 NPairStyle(halffull/newtoff/ghost/kk/host,
            NPairKokkosHalffullNewtoffHost,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
 
-typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffGhostDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
 NPairStyle(halffull/newtoff/skip/ghost/kk/device,
-           NPairKokkosHalffullNewtoffGhostDevice,
+           NPairKokkosHalffullNewtoffDevice,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
 NPairStyle(halffull/newtoff/skip/ghost/kk/host,
            NPairKokkosHalffullNewtoffHost,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
 
-
 //************ Trim **************
 
-// Newton
+// Newton, no triclinic
 
-typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
 NPairStyle(halffull/newton/trim/kk/device,
            NPairKokkosHalffullNewtonTrimDevice,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_TRIM | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
 NPairStyle(halffull/newton/trim/kk/host,
            NPairKokkosHalffullNewtonTrimHost,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRIM | NP_KOKKOS_HOST);
+
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
+NPairStyle(halffull/newton/trim/skip/kk/device,
+           NPairKokkosHalffullNewtonTrimDevice,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
+
+typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
+NPairStyle(halffull/newton/trim/skip/kk/host,
+           NPairKokkosHalffullNewtonTrimHost,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
+
+// Newton, triclinic
+
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
+NPairStyle(halffull/newton/tri/trim/kk/device,
+           NPairKokkosHalffullNewtonTriTrimDevice,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
+
+typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
+NPairStyle(halffull/newton/tri/trim/kk/host,
+           NPairKokkosHalffullNewtonTriTrimHost,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST);
 
-typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonTrimDevice;
-NPairStyle(halffull/newton/skip/trim/kk/device,
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
+NPairStyle(halffull/newton/tri/trim/skip/kk/device,
            NPairKokkosHalffullNewtonTrimDevice,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
-NPairStyle(halffull/newton/skip/trim/kk/host,
-           NPairKokkosHalffullNewtonTrimHost,
+typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
+NPairStyle(halffull/newton/tri/trim/skip/kk/host,
+           NPairKokkosHalffullNewtonTriTrimHost,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
 
-// Newtoff
+// Newtoff (can be triclinic but template param always set to 0)
 
-typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
 NPairStyle(halffull/newtoff/trim/kk/device,
            NPairKokkosHalffullNewtoffTrimDevice,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
 NPairStyle(halffull/newtoff/trim/kk/host,
            NPairKokkosHalffullNewtoffTrimHost,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST);
 
-typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffTrimDevice;
-NPairStyle(halffull/newtoff/skip/trim/kk/device,
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
+NPairStyle(halffull/newtoff/trim/skip/kk/device,
            NPairKokkosHalffullNewtoffTrimDevice,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
-NPairStyle(halffull/newtoff/skip/trim/kk/host,
+typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
+NPairStyle(halffull/newtoff/trim/skip/kk/host,
            NPairKokkosHalffullNewtoffTrimHost,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_SKIP |  NP_TRIM | NP_KOKKOS_HOST);
 
 //************ Ghost **************
 
-// Newton
+// Newton, no triclinic
 
-typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonGhostTrimDevice;
-NPairStyle(halffull/newton/ghost/trim/kk/device,
-           NPairKokkosHalffullNewtonGhostTrimDevice,
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
+NPairStyle(halffull/newton/tri/trim/ghost/kk/device,
+           NPairKokkosHalffullNewtonTrimDevice,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
+
+typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
+NPairStyle(halffull/newton/trim/ghost/kk/host,
+           NPairKokkosHalffullNewtonTrimHost,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
+
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
+NPairStyle(halffull/newton/trim/skip/ghost/kk/device,
+           NPairKokkosHalffullNewtonTrimDevice,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
+
+typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
+NPairStyle(halffull/newton/trim/skip/ghost/kk/host,
+           NPairKokkosHalffullNewtonTrimHost,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
+
+// Newton, triclinic
+
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
+NPairStyle(halffull/newton/tri/trim/ghost/kk/device,
+           NPairKokkosHalffullNewtonTriTrimDevice,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
-NPairStyle(halffull/newton/ghost/trim/kk/host,
-           NPairKokkosHalffullNewtonTrimHost,
+typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
+NPairStyle(halffull/newton/tri/trim/ghost/kk/host,
+           NPairKokkosHalffullNewtonTriTrimHost,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
 
-typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonGhostTrimDevice;
-NPairStyle(halffull/newton/skip/ghost/trim/kk/device,
-           NPairKokkosHalffullNewtonGhostTrimDevice,
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
+NPairStyle(halffull/newton/tri/trim/skip/ghost/kk/device,
+           NPairKokkosHalffullNewtonTriTrimDevice,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
-NPairStyle(halffull/newton/skip/ghost/trim/kk/host,
-           NPairKokkosHalffullNewtonTrimHost,
+typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
+NPairStyle(halffull/newton/tri/trim/skip/ghost/kk/host,
+           NPairKokkosHalffullNewtonTriTrimHost,
            NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
 
-// Newtoff
+// Newtoff (can be triclinic but template param always set to 0)
 
-typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffGhostTrimDevice;
-NPairStyle(halffull/newtoff/ghost/trim/kk/device,
-           NPairKokkosHalffullNewtoffGhostTrimDevice,
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
+NPairStyle(halffull/newtoff/trim/ghost/kk/device,
+           NPairKokkosHalffullNewtoffTrimDevice,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
-NPairStyle(halffull/newtoff/ghost/trim/kk/host,
+typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
+NPairStyle(halffull/newtoff/trim/ghost/kk/host,
            NPairKokkosHalffullNewtoffTrimHost,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
 
-typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffGhostTrimDevice;
-NPairStyle(halffull/newtoff/skip/ghost/trim/kk/device,
-           NPairKokkosHalffullNewtoffGhostTrimDevice,
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
+NPairStyle(halffull/newtoff/trim/skip/ghost/kk/device,
+           NPairKokkosHalffullNewtoffTrimDevice,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
 
-typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
-NPairStyle(halffull/newtoff/skip/ghost/trim/kk/host,
+typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
+NPairStyle(halffull/newtoff/trim/skip/ghost/kk/host,
            NPairKokkosHalffullNewtoffTrimHost,
            NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
            NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
+
 // clang-format on
 #else
 
@@ -244,7 +348,7 @@ namespace LAMMPS_NS {
 
 struct TagNPairHalffullCompute{};
 
-template<class DeviceType, int NEWTON, int TRIM>
+template<class DeviceType, int NEWTON, int TRI, int TRIM>
 class NPairHalffullKokkos : public NPair {
  public:
   typedef DeviceType device_type;
@@ -257,8 +361,8 @@ class NPairHalffullKokkos : public NPair {
   void operator()(TagNPairHalffullCompute, const int&) const;
 
  private:
-  int nlocal;
-  double cutsq_custom;
+  int nlocal,triclinic;
+  double cutsq_custom,delta;
 
   typename AT::t_x_array_randomread x;
 
diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp
index 06567cbeb6..45ec83e90e 100644
--- a/src/KOKKOS/npair_kokkos.cpp
+++ b/src/KOKKOS/npair_kokkos.cpp
@@ -155,6 +155,8 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
 
   list->grow(nall);
 
+  const double delta = 0.01 * force->angstrom;
+
   NeighborKokkosExecute<DeviceType>
     data(*list,
          k_cutneighsq.view<DeviceType>(),
@@ -176,7 +178,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
          atomKK->molecular,
          nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
          bininvx,bininvy,bininvz,
-         exclude, nex_type,
+         delta, exclude, nex_type,
          k_ex1_type.view<DeviceType>(),
          k_ex2_type.view<DeviceType>(),
          k_ex_type.view<DeviceType>(),
@@ -217,6 +219,8 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
       atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK);
   }
 
+  if (HALF && NEWTON && TRI) atomKK->sync(Device,TAG_MASK);
+
   data.special_flag[0] = special_flag[0];
   data.special_flag[1] = special_flag[1];
   data.special_flag[2] = special_flag[2];
@@ -261,7 +265,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
 //#endif
     } else {
       if (SIZE) {
-        NPairKokkosBuildFunctorSize<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor);
+        NPairKokkosBuildFunctorSize<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 7 * sizeof(X_FLOAT) * factor);
 #ifdef LMP_KOKKOS_GPU
         if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
           int team_size = atoms_per_bin*factor;
@@ -279,7 +283,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
         Kokkos::parallel_for(nall, f);
 #endif
       } else {
-        NPairKokkosBuildFunctor<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+        NPairKokkosBuildFunctor<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor);
 #ifdef LMP_KOKKOS_GPU
         if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
           int team_size = atoms_per_bin*factor;
@@ -414,6 +418,8 @@ void NeighborKokkosExecute<DeviceType>::
   const X_FLOAT ytmp = x(i, 1);
   const X_FLOAT ztmp = x(i, 2);
   const int itype = type(i);
+  tagint itag;
+  if (HalfNeigh && Newton && Tri) itag = tag(i);
 
   const int ibin = c_atom2bin(i);
 
@@ -484,13 +490,29 @@ void NeighborKokkosExecute<DeviceType>::
 
         if (HalfNeigh && !Newton && j <= i) continue;
         if (!HalfNeigh && j == i) continue;
+
+        // for triclinic, bin stencil is full in all 3 dims
+        // must use itag/jtag to eliminate half the I/J interactions
+        // cannot use I/J exact coord comparision
+        //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+        //   with an added PBC offset can shift all 3 coords by epsilon
+
         if (HalfNeigh && Newton && Tri) {
-          if (x(j,2) < ztmp) continue;
-          if (x(j,2) == ztmp) {
-            if (x(j,1) < ytmp) continue;
-            if (x(j,1) == ytmp) {
-              if (x(j,0) < xtmp) continue;
-              if (x(j,0) == xtmp && j <= i) continue;
+          if (j <= i) continue;
+          if (j >= nlocal) {
+            const tagint jtag = tag(j);
+            if (itag > jtag) {
+              if ((itag+jtag) % 2 == 0) continue;
+            } else if (itag < jtag) {
+              if ((itag+jtag) % 2 == 1) continue;
+            } else {
+              if (fabs(x(j,2)-ztmp) > delta) {
+                if (x(j,2) < ztmp) continue;
+              } else if (fabs(x(j,1)-ytmp) > delta) {
+                if (x(j,1) < ytmp) continue;
+              } else {
+                if (x(j,0) < xtmp) continue;
+              }
             }
           }
         }
@@ -568,8 +590,9 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
                                                       size_t sharedsize) const
 {
   auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
-  /* loop over atoms in i's bin,
-  */
+
+  // loop over atoms in i's bin
+
   const int atoms_per_bin = c_bins.extent(1);
   const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin;
   const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
@@ -579,15 +602,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
 
   if (ibin >= mbins) return;
 
-  X_FLOAT* other_x = sharedmem + 5*atoms_per_bin*MY_BIN;
-  int* other_id = (int*) &other_x[4 * atoms_per_bin];
+  X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN;
+  int* other_id = (int*) &other_x[5 * atoms_per_bin];
 
   int bincount_current = c_bincount[ibin];
 
   for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
     const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
     const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
-    /* if necessary, goto next page and add pages */
 
     int n = 0;
 
@@ -595,6 +617,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
     X_FLOAT ytmp;
     X_FLOAT ztmp;
     int itype;
+    tagint itag;
     const int index = (i >= 0 && i < nlocal) ? i : 0;
     const AtomNeighbors neighbors_i = neigh_transpose ?
     neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index);
@@ -608,6 +631,10 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
       other_x[MY_II + atoms_per_bin] = ytmp;
       other_x[MY_II + 2 * atoms_per_bin] = ztmp;
       other_x[MY_II + 3 * atoms_per_bin] = itype;
+      if (HalfNeigh && Newton && Tri) {
+        itag = tag(i);
+        other_x[MY_II + 4 * atoms_per_bin] = itag;
+      }
     }
     other_id[MY_II] = i;
 
@@ -695,6 +722,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
         other_x[MY_II + atoms_per_bin] = x(j, 1);
         other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
         other_x[MY_II + 3 * atoms_per_bin] = type(j);
+        if (HalfNeigh && Newton && Tri)
+          other_x[MY_II + 4 * atoms_per_bin] = tag(j);
       }
 
       other_id[MY_II] = j;
@@ -708,13 +737,29 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
 
           if (HalfNeigh && !Newton && j <= i) continue;
           if (!HalfNeigh && j == i) continue;
+
+          // for triclinic, bin stencil is full in all 3 dims
+          // must use itag/jtag to eliminate half the I/J interactions
+          // cannot use I/J exact coord comparision
+          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+          //   with an added PBC offset can shift all 3 coords by epsilon
+
           if (HalfNeigh && Newton && Tri) {
-            if (x(j,2) < ztmp) continue;
-            if (x(j,2) == ztmp) {
-              if (x(j,1) < ytmp) continue;
-              if (x(j,1) == ytmp) {
-                if (x(j,0) < xtmp) continue;
-                if (x(j,0) == xtmp && j <= i) continue;
+            if (j <= i) continue;
+            if (j >= nlocal) {
+              const tagint jtag = other_x[m + 4 * atoms_per_bin];
+              if (itag > jtag) {
+                if ((itag+jtag) % 2 == 0) continue;
+              } else if (itag < jtag) {
+                if ((itag+jtag) % 2 == 1) continue;
+              } else {
+                if (fabs(x(j,2)-ztmp) > delta) {
+                  if (x(j,2) < ztmp) continue;
+                } else if (fabs(x(j,1)-ytmp) > delta) {
+                  if (x(j,1) < ytmp) continue;
+                } else {
+                  if (x(j,0) < xtmp) continue;
+                }
               }
             }
           }
@@ -905,6 +950,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGhostGPU(typename Kokkos::Team
                                                       size_t sharedsize) const
 {
   auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
+
   // loop over atoms in i's bin
 
   const int atoms_per_bin = c_bins.extent(1);
@@ -1084,6 +1130,8 @@ void NeighborKokkosExecute<DeviceType>::
   const X_FLOAT ztmp = x(i, 2);
   const X_FLOAT radi = radius(i);
   const int itype = type(i);
+  tagint itag;
+  if (HalfNeigh && Newton && Tri) itag = tag(i);
 
   const int ibin = c_atom2bin(i);
 
@@ -1167,13 +1215,29 @@ void NeighborKokkosExecute<DeviceType>::
 
       if (HalfNeigh && !Newton && j <= i) continue;
       if (!HalfNeigh && j == i) continue;
+
+      // for triclinic, bin stencil is full in all 3 dims
+      // must use itag/jtag to eliminate half the I/J interactions
+      // cannot use I/J exact coord comparision
+      //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+      //   with an added PBC offset can shift all 3 coords by epsilon
+
       if (HalfNeigh && Newton && Tri) {
-        if (x(j,2) < ztmp) continue;
-        if (x(j,2) == ztmp) {
-          if (x(j,1) < ytmp) continue;
-          if (x(j,1) == ytmp) {
-            if (x(j,0) < xtmp) continue;
-            if (x(j,0) == xtmp && j <= i) continue;
+        if (j <= i) continue;
+        if (j >= nlocal) {
+          const tagint jtag = tag(j);
+          if (itag > jtag) {
+            if ((itag+jtag) % 2 == 0) continue;
+          } else if (itag < jtag) {
+            if ((itag+jtag) % 2 == 1) continue;
+          } else {
+            if (fabs(x(j,2)-ztmp) > delta) {
+              if (x(j,2) < ztmp) continue;
+            } else if (fabs(x(j,1)-ytmp) > delta) {
+              if (x(j,1) < ytmp) continue;
+            } else {
+              if (x(j,0) < xtmp) continue;
+            }
           }
         }
       }
@@ -1245,8 +1309,9 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
                                                           size_t sharedsize) const
 {
   auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
-  /* loop over atoms in i's bin,
-   */
+
+  // loop over atoms in i's bin
+
   const int atoms_per_bin = c_bins.extent(1);
   const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin;
   const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
@@ -1256,15 +1321,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
 
   if (ibin >= mbins) return;
 
-  X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN;
-  int* other_id = (int*) &other_x[5 * atoms_per_bin];
+  X_FLOAT* other_x = sharedmem + 7*atoms_per_bin*MY_BIN;
+  int* other_id = (int*) &other_x[6 * atoms_per_bin];
 
   int bincount_current = c_bincount[ibin];
 
   for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
     const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
     const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
-    /* if necessary, goto next page and add pages */
 
     int n = 0;
 
@@ -1273,6 +1337,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
     X_FLOAT ztmp;
     X_FLOAT radi;
     int itype;
+    tagint itag;
     const int index = (i >= 0 && i < nlocal) ? i : 0;
     const AtomNeighbors neighbors_i = neigh_transpose ?
     neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index);
@@ -1289,6 +1354,10 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
       other_x[MY_II + 2 * atoms_per_bin] = ztmp;
       other_x[MY_II + 3 * atoms_per_bin] = itype;
       other_x[MY_II + 4 * atoms_per_bin] = radi;
+      if (HalfNeigh && Newton && Tri) {
+        itag = tag(i);
+        other_x[MY_II + 5 * atoms_per_bin] = itag;
+      }
     }
     other_id[MY_II] = i;
 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
@@ -1381,6 +1450,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
         other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
         other_x[MY_II + 3 * atoms_per_bin] = type(j);
         other_x[MY_II + 4 * atoms_per_bin] = radius(j);
+        if (HalfNeigh && Newton && Tri)
+          other_x[MY_II + 5 * atoms_per_bin] = tag(j);
       }
 
       other_id[MY_II] = j;
@@ -1394,13 +1465,29 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
 
           if (HalfNeigh && !Newton && j <= i) continue;
           if (!HalfNeigh && j == i) continue;
+
+          // for triclinic, bin stencil is full in all 3 dims
+          // must use itag/jtag to eliminate half the I/J interactions
+          // cannot use I/J exact coord comparision
+          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+          //   with an added PBC offset can shift all 3 coords by epsilon
+
           if (HalfNeigh && Newton && Tri) {
-            if (x(j,2) < ztmp) continue;
-            if (x(j,2) == ztmp) {
-              if (x(j,1) < ytmp) continue;
-              if (x(j,1) == ytmp) {
-                if (x(j,0) < xtmp) continue;
-                if (x(j,0) == xtmp && j <= i) continue;
+            if (j <= i) continue;
+            if (j >= nlocal) {
+              const tagint jtag = other_x[m + 5 * atoms_per_bin];
+              if (itag > jtag) {
+                if ((itag+jtag) % 2 == 0) continue;
+              } else if (itag < jtag) {
+                if ((itag+jtag) % 2 == 1) continue;
+              } else {
+                if (fabs(x(j,2)-ztmp) > delta) {
+                  if (x(j,2) < ztmp) continue;
+                } else if (fabs(x(j,1)-ytmp) > delta) {
+                  if (x(j,1) < ytmp) continue;
+                } else {
+                  if (x(j,0) < xtmp) continue;
+                }
               }
             }
           }
diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h
index 4427012926..fe5484a771 100644
--- a/src/KOKKOS/npair_kokkos.h
+++ b/src/KOKKOS/npair_kokkos.h
@@ -189,6 +189,8 @@ class NeighborKokkosExecute
  public:
   NeighListKokkos<DeviceType> neigh_list;
 
+  const double delta;
+
   // data from Neighbor class
 
   const typename AT::t_xfloat_2d_randomread cutneighsq;
@@ -282,7 +284,7 @@ class NeighborKokkosExecute
                         const int & _mbinx,const int & _mbiny,const int & _mbinz,
                         const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
                         const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
-                        const int & _exclude,const int & _nex_type,
+                        const double &_delta,const int & _exclude,const int & _nex_type,
                         const typename AT::t_int_1d_const & _ex1_type,
                         const typename AT::t_int_1d_const & _ex2_type,
                         const typename AT::t_int_2d_const & _ex_type,
@@ -301,7 +303,7 @@ class NeighborKokkosExecute
                         const typename ArrayTypes<LMPHostType>::t_int_scalar _h_resize,
                         const typename AT::t_int_scalar _new_maxneighs,
                         const typename ArrayTypes<LMPHostType>::t_int_scalar _h_new_maxneighs):
-    neigh_list(_neigh_list), cutneighsq(_cutneighsq),exclude(_exclude),
+    neigh_list(_neigh_list), cutneighsq(_cutneighsq),delta(_delta),exclude(_exclude),
     nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type),
     ex_type(_ex_type),nex_group(_nex_group),
     ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),
diff --git a/src/KOKKOS/npair_trim_kokkos.cpp b/src/KOKKOS/npair_trim_kokkos.cpp
index 97931bf250..d04d8676d7 100644
--- a/src/KOKKOS/npair_trim_kokkos.cpp
+++ b/src/KOKKOS/npair_trim_kokkos.cpp
@@ -62,8 +62,8 @@ void NPairTrimKokkos<DeviceType>::trim_to_kokkos(NeighList *list)
   d_ilist_copy = k_list_copy->d_ilist;
   d_numneigh_copy = k_list_copy->d_numneigh;
   d_neighbors_copy = k_list_copy->d_neighbors;
-  int inum_copy = list->listcopy->inum;
-  if (list->ghost) inum_copy += list->listcopy->gnum;
+  int inum_trim = list->listcopy->inum;
+  if (list->ghost) inum_trim += list->listcopy->gnum;
 
   NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
   k_list->maxneighs = k_list_copy->maxneighs; // simple, but could be made more memory efficient
@@ -75,7 +75,7 @@ void NPairTrimKokkos<DeviceType>::trim_to_kokkos(NeighList *list)
   // loop over parent list and trim
 
   copymode = 1;
-  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNPairTrim>(0,inum_copy),*this);
+  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNPairTrim>(0,inum_trim),*this);
   copymode = 0;
 
   list->inum = k_list_copy->inum;
@@ -132,8 +132,8 @@ void NPairTrimKokkos<DeviceType>::trim_to_cpu(NeighList *list)
 
   int inum = listcopy->inum;
   int gnum = listcopy->gnum;
-  int inum_all = inum;
-  if (list->ghost) inum_all += gnum;
+  int inum_trim = inum;
+  if (list->ghost) inum_trim += gnum;
   auto h_ilist = listcopy_kk->k_ilist.h_view;
   auto h_numneigh = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_numneigh);
   auto h_neighbors = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_neighbors);
@@ -151,7 +151,7 @@ void NPairTrimKokkos<DeviceType>::trim_to_cpu(NeighList *list)
   MyPage<int> *ipage = list->ipage;
   ipage->reset();
 
-  for (int ii = 0; ii < inum_all; ii++) {
+  for (int ii = 0; ii < inum_trim; ii++) {
     int n = 0;
     neighptr = ipage->vget();
 
diff --git a/src/KOKKOS/pair_buck_coul_cut_kokkos.h b/src/KOKKOS/pair_buck_coul_cut_kokkos.h
index b91348d557..9b6cc31898 100644
--- a/src/KOKKOS/pair_buck_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.h
@@ -112,15 +112,18 @@ class PairBuckCoulCutKokkos : public PairBuckCoulCut {
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALF,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALFTHREAD,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,0>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,1>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALF>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALFTHREAD>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
   friend EV_FLOAT pair_compute<PairBuckCoulCutKokkos,void>(PairBuckCoulCutKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairBuckCoulCutKokkos>(PairBuckCoulCutKokkos*);
diff --git a/src/KOKKOS/pair_buck_coul_long_kokkos.h b/src/KOKKOS/pair_buck_coul_long_kokkos.h
index b776a84e3c..bed9b0d0f8 100644
--- a/src/KOKKOS/pair_buck_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_buck_coul_long_kokkos.h
@@ -115,27 +115,33 @@ class PairBuckCoulLongKokkos : public PairBuckCoulLong {
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<1> >(PairBuckCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<1>>(PairBuckCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<0> >(PairBuckCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<0>>(PairBuckCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairBuckCoulLongKokkos>(PairBuckCoulLongKokkos*);
 
diff --git a/src/KOKKOS/pair_buck_kokkos.h b/src/KOKKOS/pair_buck_kokkos.h
index 364716453b..15325cd56a 100644
--- a/src/KOKKOS/pair_buck_kokkos.h
+++ b/src/KOKKOS/pair_buck_kokkos.h
@@ -91,16 +91,19 @@ class PairBuckKokkos : public PairBuck {
   int nlocal,nall,eflag,vflag;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairBuckKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairBuckKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairBuckKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairBuckKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairBuckKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairBuckKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairBuckKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairBuckKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,0>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,1>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairBuckKokkos>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
 };
 
diff --git a/src/KOKKOS/pair_coul_cut_kokkos.h b/src/KOKKOS/pair_coul_cut_kokkos.h
index 6626889660..3e0501edd9 100644
--- a/src/KOKKOS/pair_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_coul_cut_kokkos.h
@@ -112,15 +112,18 @@ class PairCoulCutKokkos : public PairCoulCut {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairCoulCutKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairCoulCutKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALF,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALFTHREAD,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,0>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,1>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALF>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALFTHREAD>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
   friend EV_FLOAT pair_compute<PairCoulCutKokkos,void>(PairCoulCutKokkos*,
                                                        NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairCoulCutKokkos>(PairCoulCutKokkos*);
diff --git a/src/KOKKOS/pair_coul_debye_kokkos.h b/src/KOKKOS/pair_coul_debye_kokkos.h
index b6bed9d557..d239291a25 100644
--- a/src/KOKKOS/pair_coul_debye_kokkos.h
+++ b/src/KOKKOS/pair_coul_debye_kokkos.h
@@ -112,15 +112,18 @@ class PairCoulDebyeKokkos : public PairCoulDebye {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALF,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALFTHREAD,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,0>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,1>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALF>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALFTHREAD>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
   friend EV_FLOAT pair_compute<PairCoulDebyeKokkos,void>(PairCoulDebyeKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairCoulDebyeKokkos>(PairCoulDebyeKokkos*);
diff --git a/src/KOKKOS/pair_coul_long_kokkos.h b/src/KOKKOS/pair_coul_long_kokkos.h
index fcb1402028..232cdbb6df 100644
--- a/src/KOKKOS/pair_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_coul_long_kokkos.h
@@ -114,27 +114,33 @@ class PairCoulLongKokkos : public PairCoulLong {
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<1> >(PairCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<1>>(PairCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<0> >(PairCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<0>>(PairCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairCoulLongKokkos>(PairCoulLongKokkos*);
 
diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
index 5cc6fa9443..0dfe56c365 100644
--- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
@@ -1477,7 +1477,7 @@ void PairEAMAlloyKokkos<DeviceType>::file2array_alloy()
 template<typename DeviceType>
 template<class TAG>
 struct PairEAMAlloyKokkos<DeviceType>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
   static auto get(int inum) {
     auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
     return policy;
@@ -1488,7 +1488,7 @@ struct PairEAMAlloyKokkos<DeviceType>::policyInstance {
 template<>
 template<class TAG>
 struct PairEAMAlloyKokkos<Kokkos::Experimental::HIP>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
   static auto get(int inum) {
     static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
                   "Breaking assumption of spline dim for KernelAB and KernelC scratch caching");
diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp
index 8e895dfeac..58ff615c04 100644
--- a/src/KOKKOS/pair_eam_fs_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp
@@ -1487,7 +1487,7 @@ void PairEAMFSKokkos<DeviceType>::file2array_fs()
 template<typename DeviceType>
 template<class TAG>
 struct PairEAMFSKokkos<DeviceType>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
   static auto get(int inum) {
     auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
     return policy;
@@ -1498,7 +1498,7 @@ struct PairEAMFSKokkos<DeviceType>::policyInstance {
 template<>
 template<class TAG>
 struct PairEAMFSKokkos<Kokkos::Experimental::HIP>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
   static auto get(int inum) {
     static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
                   "Breaking assumption of spline dim for KernelAB and KernelC scratch caching");
diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp
index a3bc463bbf..864f736066 100644
--- a/src/KOKKOS/pair_eam_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_kokkos.cpp
@@ -1162,7 +1162,7 @@ void PairEAMKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &
 template<typename DeviceType>
 template<class TAG>
 struct PairEAMKokkos<DeviceType>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
   static auto get(int inum) {
     auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
     return policy;
@@ -1173,7 +1173,7 @@ struct PairEAMKokkos<DeviceType>::policyInstance {
 template<>
 template<class TAG>
 struct PairEAMKokkos<Kokkos::Experimental::HIP>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
   static auto get(int inum) {
     static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
                   "Breaking assumption of spline dim for KernelAB and KernelC scratch caching");
diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h
index 2c2a622791..d3c766f5ae 100644
--- a/src/KOKKOS/pair_kokkos.h
+++ b/src/KOKKOS/pair_kokkos.h
@@ -50,7 +50,7 @@ struct DoCoul<1> {
 
 
 //Specialisation for Neighborlist types Half, HalfThread, Full
-template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
+template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, int ZEROFLAG = 0, class Specialisation = void>
 struct PairComputeFunctor  {
   typedef typename PairStyle::device_type device_type ;
   typedef ArrayTypes<device_type> AT;
@@ -137,7 +137,7 @@ struct PairComputeFunctor  {
     F_FLOAT fytmp = 0.0;
     F_FLOAT fztmp = 0.0;
 
-    if (NEIGHFLAG == FULL) {
+    if (NEIGHFLAG == FULL && ZEROFLAG) {
       f(i,0) = 0.0;
       f(i,1) = 0.0;
       f(i,2) = 0.0;
@@ -211,7 +211,7 @@ struct PairComputeFunctor  {
     F_FLOAT fytmp = 0.0;
     F_FLOAT fztmp = 0.0;
 
-    if (NEIGHFLAG == FULL) {
+    if (NEIGHFLAG == FULL && ZEROFLAG) {
       f(i,0) = 0.0;
       f(i,1) = 0.0;
       f(i,2) = 0.0;
@@ -292,11 +292,13 @@ struct PairComputeFunctor  {
       const X_FLOAT ztmp = c.x(i,2);
       const int itype = c.type(i);
 
-      Kokkos::single(Kokkos::PerThread(team), [&] (){
-        f(i,0) = 0.0;
-        f(i,1) = 0.0;
-        f(i,2) = 0.0;
-      });
+      if (ZEROFLAG) {
+        Kokkos::single(Kokkos::PerThread(team), [&] (){
+          f(i,0) = 0.0;
+          f(i,1) = 0.0;
+          f(i,2) = 0.0;
+        });
+      }
 
       const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
       const int jnum = list.d_numneigh[i];
@@ -355,11 +357,13 @@ struct PairComputeFunctor  {
       const int itype = c.type(i);
       const F_FLOAT qtmp = c.q(i);
 
-      Kokkos::single(Kokkos::PerThread(team), [&] (){
-        f(i,0) = 0.0;
-        f(i,1) = 0.0;
-        f(i,2) = 0.0;
-      });
+      if (ZEROFLAG) {
+        Kokkos::single(Kokkos::PerThread(team), [&] (){
+          f(i,0) = 0.0;
+          f(i,1) = 0.0;
+          f(i,2) = 0.0;
+        });
+      }
 
       const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
       const int jnum = list.d_numneigh[i];
@@ -423,11 +427,13 @@ struct PairComputeFunctor  {
       const X_FLOAT ztmp = c.x(i,2);
       const int itype = c.type(i);
 
-      Kokkos::single(Kokkos::PerThread(team), [&] (){
-        f(i,0) = 0.0;
-        f(i,1) = 0.0;
-        f(i,2) = 0.0;
-      });
+      if (ZEROFLAG) {
+        Kokkos::single(Kokkos::PerThread(team), [&] (){
+          f(i,0) = 0.0;
+          f(i,1) = 0.0;
+          f(i,2) = 0.0;
+        });
+      }
 
       const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
       const int jnum = list.d_numneigh[i];
@@ -525,11 +531,13 @@ struct PairComputeFunctor  {
       const int itype = c.type(i);
       const F_FLOAT qtmp = c.q(i);
 
-      Kokkos::single(Kokkos::PerThread(team), [&] (){
-        f(i,0) = 0.0;
-        f(i,1) = 0.0;
-        f(i,2) = 0.0;
-      });
+      if (ZEROFLAG) {
+        Kokkos::single(Kokkos::PerThread(team), [&] (){
+          f(i,0) = 0.0;
+          f(i,1) = 0.0;
+          f(i,2) = 0.0;
+        });
+      }
 
       const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
       const int jnum = list.d_numneigh[i];
@@ -740,7 +748,7 @@ struct PairComputeFunctor  {
 // By having the enable_if with a ! and without it, exactly one of the functions
 // pair_compute_neighlist will match - either the dummy version
 // or the real one further below.
-template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
+template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisation = void>
 EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*> list) {
   EV_FLOAT ev;
   (void) fpair;
@@ -770,7 +778,7 @@ int GetTeamSize(FunctorStyle& KOKKOS_GPU_ARG(functor), int KOKKOS_GPU_ARG(inum),
 }
 
 // Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL
-template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
+template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisation = void>
 EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*> list) {
   EV_FLOAT ev;
 
@@ -784,13 +792,13 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
     int atoms_per_team = 32;
 
     if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
-      PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
+      PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
       atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
       Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
       if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
       else                              Kokkos::parallel_for(policy,ff);
     } else {
-      PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
+      PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
       atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
       Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
       if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
@@ -798,12 +806,12 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
     }
   } else {
     if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
-      PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
+      PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
       if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
       else                              Kokkos::parallel_for(list->inum,ff);
       ff.contribute();
     } else {
-      PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
+      PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
       if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
       else                              Kokkos::parallel_for(list->inum,ff);
       ff.contribute();
@@ -812,16 +820,21 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
   return ev;
 }
 
-template<class PairStyle, class Specialisation>
+template<class PairStyle, class Specialisation = void>
 EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
   EV_FLOAT ev;
   if (fpair->neighflag == FULL) {
-    fpair->fuse_force_clear_flag = 1;
-    ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
+    if (utils::strmatch(fpair->lmp->force->pair_style,"^hybrid/overlay")) {
+      fpair->fuse_force_clear_flag = 0;
+      ev = pair_compute_neighlist<PairStyle,FULL,0,Specialisation> (fpair,list);
+    } else {
+      fpair->fuse_force_clear_flag = 1;
+      ev = pair_compute_neighlist<PairStyle,FULL,1,Specialisation> (fpair,list);
+    }
   } else if (fpair->neighflag == HALFTHREAD) {
-    ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
+    ev = pair_compute_neighlist<PairStyle,HALFTHREAD,0,Specialisation> (fpair,list);
   } else if (fpair->neighflag == HALF) {
-    ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
+    ev = pair_compute_neighlist<PairStyle,HALF,0,Specialisation> (fpair,list);
   }
   return ev;
 }
diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h
index ae27ee68ab..7e21676fd5 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h
@@ -110,27 +110,33 @@ class PairLJCharmmCoulCharmmImplicitKokkos : public PairLJCharmmCoulCharmmImplic
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJCharmmCoulCharmmImplicitKokkos>(PairLJCharmmCoulCharmmImplicitKokkos*);
 
diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h
index 912ad573c6..1f26242ded 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h
@@ -108,27 +108,33 @@ class PairLJCharmmCoulCharmmKokkos : public PairLJCharmmCoulCharmm {
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJCharmmCoulCharmmKokkos>(PairLJCharmmCoulCharmmKokkos*);
 
diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h
index 4ae8a12944..c6c80e76dc 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h
@@ -106,27 +106,33 @@ class PairLJCharmmCoulLongKokkos : public PairLJCharmmCoulLong {
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJCharmmCoulLongKokkos>(PairLJCharmmCoulLongKokkos*);
 
diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h
index 5ca276c28e..9399345458 100644
--- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h
@@ -104,15 +104,18 @@ class PairLJClass2CoulCutKokkos : public PairLJClass2CoulCut {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALF,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALFTHREAD,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,0>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,1>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALF>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALFTHREAD>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
   friend EV_FLOAT pair_compute<PairLJClass2CoulCutKokkos,void>(PairLJClass2CoulCutKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJClass2CoulCutKokkos>(PairLJClass2CoulCutKokkos*);
diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h
index 599cc2a83c..1cf6590855 100644
--- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h
@@ -107,27 +107,33 @@ class PairLJClass2CoulLongKokkos : public PairLJClass2CoulLong {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJClass2CoulLongKokkos>(PairLJClass2CoulLongKokkos*);
 
diff --git a/src/KOKKOS/pair_lj_class2_kokkos.h b/src/KOKKOS/pair_lj_class2_kokkos.h
index 0936399ca8..5594680929 100644
--- a/src/KOKKOS/pair_lj_class2_kokkos.h
+++ b/src/KOKKOS/pair_lj_class2_kokkos.h
@@ -96,16 +96,19 @@ class PairLJClass2Kokkos : public PairLJClass2 {
   int nlocal,nall,eflag,vflag;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
   friend struct PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
   friend struct PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,0>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,1>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
 };
 
diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
index 87464b37dc..affc67bf16 100644
--- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
@@ -104,15 +104,18 @@ class PairLJCutCoulCutKokkos : public PairLJCutCoulCut {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALF,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALFTHREAD,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,0>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,1>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALF>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALFTHREAD>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
   friend EV_FLOAT pair_compute<PairLJCutCoulCutKokkos,void>(PairLJCutCoulCutKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJCutCoulCutKokkos>(PairLJCutCoulCutKokkos*);
diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h
index ea0b401959..eeed483b76 100644
--- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h
@@ -104,15 +104,18 @@ class PairLJCutCoulDebyeKokkos : public PairLJCutCoulDebye {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,FULL,void>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,HALF,void>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,HALFTHREAD,void>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,FULL,0>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,FULL,1>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,HALF>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,HALFTHREAD>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
   friend EV_FLOAT pair_compute<PairLJCutCoulDebyeKokkos,void>(PairLJCutCoulDebyeKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJCutCoulDebyeKokkos>(PairLJCutCoulDebyeKokkos*);
diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h
index e420bd22a9..d9e5fcfe49 100644
--- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h
@@ -101,15 +101,18 @@ class PairLJCutCoulDSFKokkos : public PairLJCutCoulDSF {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,FULL,void>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,HALF,void>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,HALFTHREAD,void>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,FULL,0>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,FULL,1>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,HALF>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,HALFTHREAD>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
   friend EV_FLOAT pair_compute<PairLJCutCoulDSFKokkos,void>(PairLJCutCoulDSFKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJCutCoulDSFKokkos>(PairLJCutCoulDSFKokkos*);
diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
index bcb97a59cd..ec6e2db176 100644
--- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
@@ -107,27 +107,33 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJCutCoulLongKokkos>(PairLJCutCoulLongKokkos*);
 
diff --git a/src/KOKKOS/pair_lj_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_kokkos.h
index 106f1a9048..b44c1aa6fe 100644
--- a/src/KOKKOS/pair_lj_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_kokkos.h
@@ -92,16 +92,19 @@ class PairLJCutKokkos : public PairLJCut {
   int nlocal,nall,eflag,vflag;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairLJCutKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairLJCutKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,0>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,1>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCutKokkos>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
 };
 
diff --git a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h
index 09a694a122..30e82b7dab 100644
--- a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h
@@ -116,27 +116,33 @@ class PairLJExpandCoulLongKokkos : public PairLJExpandCoulLong {
   double qqrd2e;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJExpandCoulLongKokkos>(PairLJExpandCoulLongKokkos*);
 };
diff --git a/src/KOKKOS/pair_lj_expand_kokkos.h b/src/KOKKOS/pair_lj_expand_kokkos.h
index 0df0a6f8f8..64fe7d8b8e 100644
--- a/src/KOKKOS/pair_lj_expand_kokkos.h
+++ b/src/KOKKOS/pair_lj_expand_kokkos.h
@@ -97,16 +97,19 @@ class PairLJExpandKokkos : public PairLJExpand {
   int nlocal,nall,eflag,vflag;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairLJExpandKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairLJExpandKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,0>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,1>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJExpandKokkos>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJExpandKokkos>(PairLJExpandKokkos*);
 };
 
diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h
index 359c4a1229..020b621e33 100644
--- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h
+++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h
@@ -115,27 +115,33 @@ class PairLJGromacsCoulGromacsKokkos : public PairLJGromacsCoulGromacs {
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,CoulLongTable<1> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALF,CoulLongTable<1> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJGromacsCoulGromacsKokkos,CoulLongTable<1> >(PairLJGromacsCoulGromacsKokkos*,
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,0,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,1,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALF,0,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJGromacsCoulGromacsKokkos,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,CoulLongTable<0> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALF,CoulLongTable<0> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJGromacsCoulGromacsKokkos,CoulLongTable<0> >(PairLJGromacsCoulGromacsKokkos*,
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,0,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,1,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALF,0,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJGromacsCoulGromacsKokkos,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJGromacsCoulGromacsKokkos>(PairLJGromacsCoulGromacsKokkos*);
 
diff --git a/src/KOKKOS/pair_lj_gromacs_kokkos.h b/src/KOKKOS/pair_lj_gromacs_kokkos.h
index 95c600a415..ad41ca5120 100644
--- a/src/KOKKOS/pair_lj_gromacs_kokkos.h
+++ b/src/KOKKOS/pair_lj_gromacs_kokkos.h
@@ -115,27 +115,33 @@ class PairLJGromacsKokkos : public PairLJGromacs {
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,true,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,false,CoulLongTable<1> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,false,CoulLongTable<1> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,CoulLongTable<1> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALF,CoulLongTable<1> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJGromacsKokkos,CoulLongTable<1> >(PairLJGromacsKokkos*,
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,0,CoulLongTable<1>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,1,CoulLongTable<1>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALF,0,CoulLongTable<1>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJGromacsKokkos,CoulLongTable<1>>(PairLJGromacsKokkos*,
                                                             NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,true,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,false,CoulLongTable<0> >;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,false,CoulLongTable<0> >;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,CoulLongTable<0> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALF,CoulLongTable<0> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJGromacsKokkos,CoulLongTable<0> >(PairLJGromacsKokkos*,
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,0,CoulLongTable<0>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,1,CoulLongTable<0>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALF,0,CoulLongTable<0>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJGromacsKokkos,CoulLongTable<0>>(PairLJGromacsKokkos*,
                                                             NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJGromacsKokkos>(PairLJGromacsKokkos*);
 
diff --git a/src/KOKKOS/pair_lj_spica_kokkos.h b/src/KOKKOS/pair_lj_spica_kokkos.h
index b330af4bfd..06c70ebd3e 100644
--- a/src/KOKKOS/pair_lj_spica_kokkos.h
+++ b/src/KOKKOS/pair_lj_spica_kokkos.h
@@ -97,16 +97,19 @@ class PairLJSPICAKokkos : public PairLJSPICA {
   int nlocal,nall,eflag,vflag;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairLJSPICAKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairLJSPICAKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairLJSPICAKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairLJSPICAKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,FULL,void>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,HALF,void>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,HALFTHREAD,void>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJSPICAKokkos,void>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,FULL,0>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,FULL,1>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,HALF>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,HALFTHREAD>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJSPICAKokkos>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairLJSPICAKokkos>(PairLJSPICAKokkos*);
 };
 
diff --git a/src/KOKKOS/pair_morse_kokkos.h b/src/KOKKOS/pair_morse_kokkos.h
index d06cf2deb1..ccf27b018b 100644
--- a/src/KOKKOS/pair_morse_kokkos.h
+++ b/src/KOKKOS/pair_morse_kokkos.h
@@ -92,16 +92,19 @@ class PairMorseKokkos : public PairMorse {
   int nlocal,nall,eflag,vflag;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairMorseKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairMorseKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairMorseKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairMorseKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairMorseKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairMorseKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairMorseKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairMorseKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairMorseKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairMorseKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,FULL,void>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,HALF,void>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,HALFTHREAD,void>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairMorseKokkos,void>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,FULL,0>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,FULL,1>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,HALF>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,HALFTHREAD>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairMorseKokkos>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairMorseKokkos>(PairMorseKokkos*);
 };
 
diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp
index 56a6656d78..153a6d0333 100644
--- a/src/KOKKOS/pair_pace_kokkos.cpp
+++ b/src/KOKKOS/pair_pace_kokkos.cpp
@@ -237,6 +237,9 @@ void PairPACEKokkos<DeviceType>::copy_splines()
 
   ACERadialFunctions* radial_functions = dynamic_cast<ACERadialFunctions*>(basis_set->radial_functions);
 
+  if (radial_functions == nullptr)
+    error->all(FLERR,"Chosen radial basis style not supported by pair style pace/kk");
+
   for (int i = 0; i < nelements; i++) {
     for (int j = 0; j < nelements; j++) {
       k_splines_gk.h_view(i, j) = radial_functions->splines_gk(i, j);
diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h
index 91f432dbaf..7b9fda60db 100644
--- a/src/KOKKOS/pair_snap_kokkos_impl.h
+++ b/src/KOKKOS/pair_snap_kokkos_impl.h
@@ -63,10 +63,6 @@ PairSNAPKokkos<DeviceType, real_type, vector_length>::PairSNAPKokkos(LAMMPS *lmp
   datamask_read = EMPTY_MASK;
   datamask_modify = EMPTY_MASK;
 
-  k_cutsq = tdual_fparams("PairSNAPKokkos::cutsq",atom->ntypes+1,atom->ntypes+1);
-  auto d_cutsq = k_cutsq.template view<DeviceType>();
-  rnd_cutsq = d_cutsq;
-
   host_flag = (execution_space == Host);
 }
 
@@ -546,6 +542,9 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::allocate()
 
   int n = atom->ntypes;
   MemKK::realloc_kokkos(d_map,"PairSNAPKokkos::map",n+1);
+
+  MemKK::realloc_kokkos(k_cutsq,"PairSNAPKokkos::cutsq",n+1,n+1);
+  rnd_cutsq = k_cutsq.template view<DeviceType>();
 }
 
 
diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp
index 83bd74d4af..99d01be4a5 100644
--- a/src/KOKKOS/pair_table_kokkos.cpp
+++ b/src/KOKKOS/pair_table_kokkos.cpp
@@ -133,19 +133,19 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
   EV_FLOAT ev;
   if (atom->ntypes > MAX_TYPES_STACKPARAMS) {
     if (neighflag == FULL) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,false,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,false,0,S_TableCompute<DeviceType,TABSTYLE> >
         ff(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
       else Kokkos::parallel_for(list->inum,ff);
       ff.contribute();
     } else if (neighflag == HALFTHREAD) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,false,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,false,0,S_TableCompute<DeviceType,TABSTYLE> >
         ff(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
       else Kokkos::parallel_for(list->inum,ff);
       ff.contribute();
     } else if (neighflag == HALF) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,false,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,false,0,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
       else Kokkos::parallel_for(list->inum,f);
@@ -153,19 +153,19 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
     }
   } else {
     if (neighflag == FULL) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,true,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,true,0,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
       else Kokkos::parallel_for(list->inum,f);
       f.contribute();
     } else if (neighflag == HALFTHREAD) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,true,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,true,0,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
       else Kokkos::parallel_for(list->inum,f);
       f.contribute();
     } else if (neighflag == HALF) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,true,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,true,0,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
       else Kokkos::parallel_for(list->inum,f);
diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h
index 80226d3770..18112e4c18 100644
--- a/src/KOKKOS/pair_table_kokkos.h
+++ b/src/KOKKOS/pair_table_kokkos.h
@@ -35,9 +35,6 @@ struct S_TableCompute {
   static constexpr int TabStyle = TABSTYLE;
 };
 
-template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
-struct PairTableComputeFunctor;
-
 template<class DeviceType>
 class PairTableKokkos : public PairTable {
  public:
@@ -135,33 +132,33 @@ class PairTableKokkos : public PairTable {
   F_FLOAT compute_ecoul(const F_FLOAT& /*rsq*/, const int& /*i*/, const int& /*j*/,
                         const int& /*itype*/, const int& /*jtype*/) const { return 0; }
 
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,0,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,0,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,0,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,0,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,0,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,0,S_TableCompute<DeviceType,LOOKUP> >;
 
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,0,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,0,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,0,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,0,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,0,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,0,S_TableCompute<DeviceType,LINEAR> >;
 
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,0,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,0,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,0,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,0,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,0,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,0,S_TableCompute<DeviceType,SPLINE> >;
 
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,0,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,0,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,0,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,0,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,0,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,0,S_TableCompute<DeviceType,BITMAP> >;
 
   friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
 };
diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp
new file mode 100644
index 0000000000..04eb5ab657
--- /dev/null
+++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp
@@ -0,0 +1,270 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_yukawa_colloid_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "memory_kokkos.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "respa.h"
+#include "update.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairYukawaColloidKokkos<DeviceType>::PairYukawaColloidKokkos(LAMMPS *lmp) : PairYukawaColloid(lmp)
+{
+  respa_enable = 0;
+
+  kokkosable = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | RADIUS_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairYukawaColloidKokkos<DeviceType>::~PairYukawaColloidKokkos()
+{
+  if (copymode) return;
+
+  if (allocated) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->destroy_kokkos(k_cutsq,cutsq);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairYukawaColloidKokkos<DeviceType>::allocate()
+{
+  PairYukawaColloid::allocate();
+
+  int n = atom->ntypes;
+  memory->destroy(cutsq);
+  memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+  k_params = Kokkos::DualView<params_yukawa**,
+                              Kokkos::LayoutRight,DeviceType>(
+                              "PairYukawaColloid::params",n+1,n+1);
+
+  params = k_params.template view<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairYukawaColloidKokkos<DeviceType>::init_style()
+{
+  PairYukawaColloid::init_style();
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa)
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  // adjust neighbor list request for KOKKOS
+
+  neighflag = lmp->kokkos->neighflag;
+  auto request = neighbor->find_request(this);
+  request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
+                           !std::is_same<DeviceType,LMPDeviceType>::value);
+  request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
+  if (neighflag == FULL) request->enable_full();
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+// Rewrite this.
+template<class DeviceType>
+double PairYukawaColloidKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairYukawaColloid::init_one(i,j);
+
+  k_params.h_view(i,j).a      = a[i][j];
+  k_params.h_view(i,j).offset = offset[i][j];
+  k_params.h_view(i,j).cutsq  = cutone*cutone;
+  k_params.h_view(j,i)        = k_params.h_view(i,j);
+
+  if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+  }
+
+  k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairYukawaColloidKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  ev_init(eflag,vflag,0);
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  radius = atomKK->k_radius.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  newton_pair = force->newton_pair;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev = pair_compute<PairYukawaColloidKokkos<DeviceType>,void >(
+    this,(NeighListKokkos<DeviceType>*)list);
+
+  if (eflag_global) eng_vdwl += ev.evdwl;
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairYukawaColloidKokkos<DeviceType>::
+compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype) const {
+  (void) i;
+  (void) j;
+  const F_FLOAT radi   = radius[i];
+  const F_FLOAT radj   = radius[j];
+  const F_FLOAT rr     = sqrt(rsq);
+  // Fetch the params either off the stack or from some mapped memory?
+  const F_FLOAT aa     = STACKPARAMS ? m_params[itype][jtype].a
+                                     : params(itype,jtype).a;
+
+  // U   = a * exp(-kappa*(r-(radi+radj))) / kappa
+  // f   = -dU/dr = a * exp(-kappa*r)
+  // f/r = a * exp(-kappa*r) / r
+  const F_FLOAT rinv = 1.0 / rr;
+  const F_FLOAT screening = exp(-kappa*(rr-(radi+radj)));
+  const F_FLOAT forceyukawa = aa * screening;
+  const F_FLOAT fpair = forceyukawa * rinv;
+
+  return fpair;
+}
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairYukawaColloidKokkos<DeviceType>::
+compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+              const int& itype, const int& jtype) const {
+  (void) i;
+  (void) j;
+  const F_FLOAT radi   = radius[i];
+  const F_FLOAT radj   = radius[j];
+  const F_FLOAT rr     = sqrt(rsq);
+  const F_FLOAT aa     = STACKPARAMS ? m_params[itype][jtype].a
+                                     : params(itype,jtype).a;
+  const F_FLOAT offset = STACKPARAMS ? m_params[itype][jtype].offset
+                                     : params(itype,jtype).offset;
+
+  // U   = a * exp(-kappa*(r-(radi+radj))) / kappa
+  const F_FLOAT rinv = 1.0 / rr;
+  const F_FLOAT screening = exp(-kappa*(rr-(radi+radj)));
+
+  return aa / kappa * screening - offset;
+}
+
+
+namespace LAMMPS_NS {
+template class PairYukawaColloidKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class PairYukawaColloidKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.h b/src/KOKKOS/pair_yukawa_colloid_kokkos.h
new file mode 100644
index 0000000000..83ce58e898
--- /dev/null
+++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.h
@@ -0,0 +1,123 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(yukawa/colloid/kk,PairYukawaColloidKokkos<LMPDeviceType>);
+PairStyle(yukawa/colloid/kk/device,PairYukawaColloidKokkos<LMPDeviceType>);
+PairStyle(yukawa/colloid/kk/host,PairYukawaColloidKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_PAIR_YUKAWA_COLLOID_KOKKOS_H
+#define LMP_PAIR_YUKAWA_COLLOID_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_yukawa_colloid.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairYukawaColloidKokkos : public PairYukawaColloid {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=0};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  PairYukawaColloidKokkos(class LAMMPS *);
+  ~PairYukawaColloidKokkos() override;
+
+  void compute(int, int) override;
+  void init_style() override;
+  double init_one(int,int) override;
+
+  struct params_yukawa {
+    KOKKOS_INLINE_FUNCTION
+    params_yukawa() { cutsq=0, a = 0; offset = 0; }
+    KOKKOS_INLINE_FUNCTION
+    params_yukawa(int /*i*/) { cutsq=0, a = 0; offset = 0; }
+    F_FLOAT cutsq, a, offset;
+  };
+
+
+ protected:
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& /*rsq*/, const int& /*i*/, const int& /*j*/,
+                        const int& /*itype*/, const int& /*jtype*/) const { return 0; }
+
+
+  Kokkos::DualView<params_yukawa**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_yukawa**,Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
+  params_yukawa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename AT::t_x_array_randomread x;
+  typename AT::t_x_array c_x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_float_1d_randomread radius;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+
+  int newton_pair;
+  double special_lj[4];
+
+  typename AT::tdual_ffloat_2d k_cutsq;
+  typename AT::t_ffloat_2d d_cutsq;
+
+
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  void allocate() override;
+  friend struct PairComputeFunctor<PairYukawaColloidKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairYukawaColloidKokkos,FULL,true,1>;
+  friend struct PairComputeFunctor<PairYukawaColloidKokkos,HALF,true>;
+  friend struct PairComputeFunctor<PairYukawaColloidKokkos,HALFTHREAD,true>;
+  friend struct PairComputeFunctor<PairYukawaColloidKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairYukawaColloidKokkos,FULL,false,1>;
+  friend struct PairComputeFunctor<PairYukawaColloidKokkos,HALF,false>;
+  friend struct PairComputeFunctor<PairYukawaColloidKokkos,HALFTHREAD,false>;
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaColloidKokkos,FULL,0>(PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaColloidKokkos,FULL,1>(PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaColloidKokkos,HALF>(
+    PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaColloidKokkos,HALFTHREAD>(
+    PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairYukawaColloidKokkos>(
+    PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
+  friend void pair_virial_fdotr_compute<PairYukawaColloidKokkos>(PairYukawaColloidKokkos*);
+
+};
+
+}
+
+#endif
+#endif
+
diff --git a/src/KOKKOS/pair_yukawa_kokkos.h b/src/KOKKOS/pair_yukawa_kokkos.h
index e04f65264b..dc93e83aea 100644
--- a/src/KOKKOS/pair_yukawa_kokkos.h
+++ b/src/KOKKOS/pair_yukawa_kokkos.h
@@ -95,20 +95,19 @@ class PairYukawaKokkos : public PairYukawa {
   int nlocal,nall,eflag,vflag;
 
   void allocate() override;
-  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairYukawaKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairYukawaKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairYukawaKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairYukawaKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,FULL,void>(
-    PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALF,void>(
-    PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALFTHREAD,void>(
-    PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairYukawaKokkos,void>(
-    PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,FULL,0>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,FULL,1>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALF>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALFTHREAD>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairYukawaKokkos,void>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairYukawaKokkos>(PairYukawaKokkos*);
 
 };
diff --git a/src/KOKKOS/pair_zbl_kokkos.h b/src/KOKKOS/pair_zbl_kokkos.h
index bd33cdb5e0..b7638a25e0 100644
--- a/src/KOKKOS/pair_zbl_kokkos.h
+++ b/src/KOKKOS/pair_zbl_kokkos.h
@@ -89,16 +89,19 @@ class PairZBLKokkos : public PairZBL {
 
   void allocate() override;
 
-  friend struct PairComputeFunctor<PairZBLKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairZBLKokkos,FULL,true,0>;
+  friend struct PairComputeFunctor<PairZBLKokkos,FULL,true,1>;
   friend struct PairComputeFunctor<PairZBLKokkos,HALF,true>;
   friend struct PairComputeFunctor<PairZBLKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairZBLKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairZBLKokkos,FULL,false,0>;
+  friend struct PairComputeFunctor<PairZBLKokkos,FULL,false,1>;
   friend struct PairComputeFunctor<PairZBLKokkos,HALF,false>;
   friend struct PairComputeFunctor<PairZBLKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,FULL,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALF,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALFTHREAD,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairZBLKokkos,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,FULL,0>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,FULL,1>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALF>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALFTHREAD>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairZBLKokkos>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
   friend void pair_virial_fdotr_compute<PairZBLKokkos>(PairZBLKokkos*);
 };
 
diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp
index e34283f71c..129b9d2218 100644
--- a/src/MANYBODY/pair_airebo.cpp
+++ b/src/MANYBODY/pair_airebo.cpp
@@ -59,7 +59,6 @@ PairAIREBO::PairAIREBO(LAMMPS *lmp)
   nextra = 3;
   pvector = new double[nextra];
 
-  trim_flag = 0; // workaround
   maxlocal = 0;
   REBO_numneigh = nullptr;
   REBO_firstneigh = nullptr;
diff --git a/src/OPENMP/npair_omp.h b/src/OPENMP/npair_omp.h
index 318fddfd54..7249c59406 100644
--- a/src/OPENMP/npair_omp.h
+++ b/src/OPENMP/npair_omp.h
@@ -32,6 +32,7 @@ namespace LAMMPS_NS {
 // get access to number of threads and per-thread data structures via FixOMP
 #define NPAIR_OMP_INIT                 \
   const int nthreads = comm->nthreads; \
+  omp_set_num_threads(nthreads); \
   const int ifix = modify->find_fix("package_omp")
 
 // get thread id and then assign each thread a fixed chunk of atoms
diff --git a/src/OPENMP/thr_omp.cpp b/src/OPENMP/thr_omp.cpp
index c27a99028d..88344a900d 100644
--- a/src/OPENMP/thr_omp.cpp
+++ b/src/OPENMP/thr_omp.cpp
@@ -34,6 +34,10 @@
 
 #include <cstring>
 
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
 using namespace LAMMPS_NS;
 using MathConst::THIRD;
 
@@ -44,6 +48,9 @@ ThrOMP::ThrOMP(LAMMPS *ptr, int style) : lmp(ptr), fix(nullptr), thr_style(style
   // register fix omp with this class
   fix = static_cast<FixOMP *>(lmp->modify->get_fix_by_id("package_omp"));
   if (!fix) lmp->error->all(FLERR, "The 'package omp' command is required for /omp styles");
+#if defined(_OPENMP)
+  omp_set_num_threads(lmp->comm->nthreads);
+#endif
 }
 
 // clang-format off
diff --git a/src/REPLICA/fix_pimd_langevin.cpp b/src/REPLICA/fix_pimd_langevin.cpp
index c091075f7c..cffaf327e4 100644
--- a/src/REPLICA/fix_pimd_langevin.cpp
+++ b/src/REPLICA/fix_pimd_langevin.cpp
@@ -1347,7 +1347,7 @@ void FixPIMDLangevin::compute_tote()
 
 void FixPIMDLangevin::compute_t_prim()
 {
-  t_prim = 1.5 * atom->natoms * np * force->boltz * temp - total_spring_energy;
+  t_prim = 1.5 * atom->natoms * np * force->boltz * temp - total_spring_energy * inverse_np;
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/VORONOI/compute_voronoi_atom.cpp b/src/VORONOI/compute_voronoi_atom.cpp
index 28bab271a2..b4f1aa3055 100644
--- a/src/VORONOI/compute_voronoi_atom.cpp
+++ b/src/VORONOI/compute_voronoi_atom.cpp
@@ -111,12 +111,7 @@ ComputeVoronoi::ComputeVoronoi(LAMMPS *lmp, int narg, char **arg) :
       if (iarg + 2 > narg) error->all(FLERR,"Illegal compute voronoi/atom command");
       faces_flag = utils::logical(FLERR,arg[iarg+1],false,lmp);
       iarg += 2;
-    } else if (strcmp(arg[iarg], "peratom") == 0) {
-      if (iarg + 2 > narg) error->all(FLERR,"Illegal compute voronoi/atom command");
-      peratom_flag = utils::logical(FLERR,arg[iarg+1],false,lmp);
-      iarg += 2;
-    }
-    else error->all(FLERR,"Illegal compute voronoi/atom command");
+    } else error->all(FLERR,"Illegal compute voronoi/atom command");
   }
 
   if (occupation && ( surface!=VOROSURF_NONE || maxedge>0 ) )
@@ -394,27 +389,29 @@ void ComputeVoronoi::checkOccupation()
   // clear occupation vector
   memset(occvec, 0, oldnatoms*sizeof(*occvec));
 
-  int i, j, k,
-      nlocal = atom->nlocal,
-      nall = atom->nghost + nlocal;
-  double rx, ry, rz,
-         **x = atom->x;
+  int i, j, k;
+  double rx, ry, rz;
+
+  int nlocal = atom->nlocal;
+  int nall = atom->nghost + nlocal;
+  double **x = atom->x;
 
   // prepare destination buffer for variable evaluation
+
   if (atom->nmax > lmax) {
     memory->destroy(lnext);
     lmax = atom->nmax;
     memory->create(lnext,lmax,"voronoi/atom:lnext");
   }
 
-  // clear lroot
-  for (i=0; i<oldnall; ++i) lroot[i] = -1;
+  // clear lroot and lnext
 
-  // clear lnext
+  for (i=0; i<oldnall; ++i) lroot[i] = -1;
   for (i=0; i<nall; ++i) lnext[i] = -1;
 
   // loop over all local atoms and find out in which of the local first frame voronoi cells the are in
   // (need to loop over ghosts, too, to get correct occupation numbers for the second column)
+
   for (i=0; i<nall; ++i) {
     // again: find_voronoi_cell() should be in the common base class. Why it is not, I don't know. Ask the voro++ author.
     if ((  radstr && con_poly->find_voronoi_cell(x[i][0], x[i][1], x[i][2], rx, ry, rz, k)) ||
@@ -435,6 +432,7 @@ void ComputeVoronoi::checkOccupation()
   }
 
   // MPI sum occupation
+
 #ifdef NOTINPLACE
   memcpy(sendocc, occvec, oldnatoms*sizeof(*occvec));
   MPI_Allreduce(sendocc, occvec, oldnatoms, MPI_INT, MPI_SUM, world);
@@ -443,6 +441,7 @@ void ComputeVoronoi::checkOccupation()
 #endif
 
   // determine the total number of atoms in this atom's currently occupied cell
+
   int c;
   for (i=0; i<oldnall; i++) { // loop over lroot (old voronoi cells)
     // count
@@ -461,11 +460,12 @@ void ComputeVoronoi::checkOccupation()
   }
 
   // cherry pick currently owned atoms
+  // set the new atom count in the atom's first frame voronoi cell
+  // but take into account that new atoms might have been added to
+  // the system, so we can only look up occupancy for tags that are
+  // smaller or equal to the recorded largest tag.
+
   for (i=0; i<nlocal; i++) {
-    // set the new atom count in the atom's first frame voronoi cell
-    // but take into account that new atoms might have been added to
-    // the system, so we can only look up occupancy for tags that are
-    // smaller or equal to the recorded largest tag.
     tagint mytag = atom->tag[i];
     if (mytag > oldmaxtag)
       voro[i][0] = 0;
@@ -479,6 +479,7 @@ void ComputeVoronoi::checkOccupation()
 void ComputeVoronoi::loopCells()
 {
   // invoke voro++ and fetch results for owned atoms in group
+
   voronoicell_neighbor c;
   int i;
   if (faces_flag) nfaces = 0;
diff --git a/src/balance.cpp b/src/balance.cpp
index 3bd083e2b9..6f28081f13 100644
--- a/src/balance.cpp
+++ b/src/balance.cpp
@@ -473,7 +473,7 @@ void Balance::options(int iarg, int narg, char **arg, int sortflag_default)
       }
       iarg += 2+nopt;
 
-    } else if (strcmp(arg[iarg+1],"sort") == 0) {
+    } else if (strcmp(arg[iarg],"sort") == 0) {
       if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "balance sort", error);
       sortflag = utils::logical(FLERR,arg[iarg+1],false,lmp);
       iarg += 2;
diff --git a/src/compute_msd_chunk.cpp b/src/compute_msd_chunk.cpp
index 07234ecfdb..6e7436d6ad 100644
--- a/src/compute_msd_chunk.cpp
+++ b/src/compute_msd_chunk.cpp
@@ -27,8 +27,8 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 ComputeMSDChunk::ComputeMSDChunk(LAMMPS *lmp, int narg, char **arg) :
-    ComputeChunk(lmp, narg, arg), id_fix(nullptr), massproc(nullptr), masstotal(nullptr),
-    com(nullptr), comall(nullptr), msd(nullptr)
+    ComputeChunk(lmp, narg, arg), id_fix(nullptr), fix(nullptr), massproc(nullptr),
+    masstotal(nullptr), com(nullptr), comall(nullptr), msd(nullptr)
 {
   if (narg != 4) error->all(FLERR, "Illegal compute msd/chunk command");
 
@@ -196,6 +196,12 @@ void ComputeMSDChunk::compute_array()
 void ComputeMSDChunk::allocate()
 {
   ComputeChunk::allocate();
+  memory->destroy(massproc);
+  memory->destroy(masstotal);
+  memory->destroy(com);
+  memory->destroy(comall);
+  memory->destroy(msd);
+
   memory->create(massproc, nchunk, "msd/chunk:massproc");
   memory->create(masstotal, nchunk, "msd/chunk:masstotal");
   memory->create(com, nchunk, 3, "msd/chunk:com");
diff --git a/src/compute_property_local.cpp b/src/compute_property_local.cpp
index d0523a1bec..87517a3e05 100644
--- a/src/compute_property_local.cpp
+++ b/src/compute_property_local.cpp
@@ -405,6 +405,7 @@ int ComputePropertyLocal::count_pairs(int allflag, int forceflag)
       if (!(mask[j] & groupbit)) continue;
 
       // itag = jtag is possible for long cutoffs that include images of self
+      // do not need triclinic logic here b/c neighbor list itself is correct
 
       if (newton_pair == 0 && j >= nlocal) {
         jtag = tag[j];
diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp
index 6b27498eb7..3feabf2ec3 100644
--- a/src/compute_reduce.cpp
+++ b/src/compute_reduce.cpp
@@ -34,9 +34,11 @@ using namespace LAMMPS_NS;
 #define BIG 1.0e20
 
 //----------------------------------------------------------------
+
 void abs_max(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/)
 {
   // r is the already reduced value, n is the new value
+
   double n = std::fabs(*(double *) in), r = *(double *) inout;
   double m;
 
@@ -47,9 +49,11 @@ void abs_max(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/)
   }
   *(double *) inout = m;
 }
+
 void abs_min(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/)
 {
   // r is the already reduced value, n is the new value
+
   double n = std::fabs(*(double *) in), r = *(double *) inout;
   double m;
 
@@ -68,6 +72,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) :
     owner(nullptr), idregion(nullptr), region(nullptr), varatom(nullptr)
 {
   int iarg = 0;
+
   if (strcmp(style, "reduce") == 0) {
     if (narg < 5) utils::missing_cmd_args(FLERR, "compute reduce", error);
     iarg = 3;
@@ -134,7 +139,6 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) :
     value_t val;
 
     val.id = "";
-    val.flavor = 0;
     val.val.c = nullptr;
 
     if (strcmp(arg[iarg], "x") == 0) {
@@ -188,6 +192,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) :
   nvalues = values.size();
   replace = new int[nvalues];
   for (int i = 0; i < nvalues; ++i) replace[i] = -1;
+  input_mode = PERATOM;
   std::string mycmd = "compute ";
   mycmd += style;
 
@@ -207,6 +212,11 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) :
         error->all(FLERR, "Compute {} replace column already used for another replacement");
       replace[col1] = col2;
       iarg += 2;
+    } else if (strcmp(arg[iarg], "inputs") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, mycmd + " inputs", error);
+      if (strcmp(arg[iarg+1], "peratom") == 0) input_mode = PERATOM;
+      else if (strcmp(arg[iarg+1], "local") == 0) input_mode = LOCAL;
+      iarg += 2;
     } else
       error->all(FLERR, "Unknown compute {} keyword: {}", style, arg[iarg]);
   }
@@ -231,66 +241,67 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) :
   // setup and error check
 
   for (auto &val : values) {
-    if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F)
-      val.flavor = PERATOM;
+    if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F) {
+      if (input_mode == LOCAL) error->all(FLERR,"Compute {} inputs must be all local");
 
-    else if (val.which == ArgInfo::COMPUTE) {
+    } else if (val.which == ArgInfo::COMPUTE) {
       val.val.c = modify->get_compute_by_id(val.id);
       if (!val.val.c)
         error->all(FLERR, "Compute ID {} for compute {} does not exist", val.id, style);
-      if (val.val.c->peratom_flag) {
-        val.flavor = PERATOM;
+
+      if (input_mode == PERATOM) {
+        if (!val.val.c->peratom_flag)
+          error->all(FLERR, "Compute {} compute {} does not calculate per-atom values", style, val.id);
         if (val.argindex == 0 && val.val.c->size_peratom_cols != 0)
-          error->all(FLERR, "Compute {} compute {} does not calculate a per-atom vector", style,
-                     val.id);
+          error->all(FLERR, "Compute {} compute {} does not calculate a per-atom vector", style, val.id);
         if (val.argindex && val.val.c->size_peratom_cols == 0)
-          error->all(FLERR, "Compute {} compute {} does not calculate a per-atom array", style,
-                     val.id);
+          error->all(FLERR, "Compute {} compute {} does not calculate a per-atom array", style, val.id);
         if (val.argindex && val.argindex > val.val.c->size_peratom_cols)
           error->all(FLERR, "Compute {} compute {} array is accessed out-of-range", style, val.id);
-      } else if (val.val.c->local_flag) {
-        val.flavor = LOCAL;
+
+      } else if (input_mode == LOCAL) {
+        if (!val.val.c->peratom_flag)
+          error->all(FLERR, "Compute {} compute {} does not calculate local values", style, val.id);
         if (val.argindex == 0 && val.val.c->size_local_cols != 0)
-          error->all(FLERR, "Compute {} compute {} does not calculate a local vector", style,
-                     val.id);
+          error->all(FLERR, "Compute {} compute {} does not calculate a local vector", style, val.id);
         if (val.argindex && val.val.c->size_local_cols == 0)
-          error->all(FLERR, "Compute {} compute {} does not calculate a local array", style,
-                     val.id);
+          error->all(FLERR, "Compute {} compute {} does not calculate a local array", style, val.id);
         if (val.argindex && val.argindex > val.val.c->size_local_cols)
           error->all(FLERR, "Compute {} compute {} array is accessed out-of-range", style, val.id);
-      } else
-        error->all(FLERR, "Compute {} compute {} calculates global values", style, val.id);
+      }
 
     } else if (val.which == ArgInfo::FIX) {
       val.val.f = modify->get_fix_by_id(val.id);
       if (!val.val.f) error->all(FLERR, "Fix ID {} for compute {} does not exist", val.id, style);
-      if (val.val.f->peratom_flag) {
-        val.flavor = PERATOM;
+
+      if (input_mode == PERATOM) {
+        if (!val.val.f->peratom_flag)
+          error->all(FLERR, "Compute {} fix {} does not calculate per-atom values", style, val.id);
         if (val.argindex == 0 && (val.val.f->size_peratom_cols != 0))
-          error->all(FLERR, "Compute {} fix {} does not calculate a per-atom vector", style,
-                     val.id);
+          error->all(FLERR, "Compute {} fix {} does not calculate a per-atom vector", style, val.id);
         if (val.argindex && (val.val.f->size_peratom_cols == 0))
           error->all(FLERR, "Compute {} fix {} does not calculate a per-atom array", style, val.id);
         if (val.argindex && (val.argindex > val.val.f->size_peratom_cols))
           error->all(FLERR, "Compute {} fix {} array is accessed out-of-range", style, val.id);
-      } else if (val.val.f->local_flag) {
-        val.flavor = LOCAL;
+
+      } else if (input_mode == LOCAL) {
+        if (!val.val.f->local_flag)
+          error->all(FLERR, "Compute {} fix {} does not calculate local values", style, val.id);
         if (val.argindex == 0 && (val.val.f->size_local_cols != 0))
           error->all(FLERR, "Compute {} fix {} does not calculate a local vector", style, val.id);
         if (val.argindex && (val.val.f->size_local_cols == 0))
           error->all(FLERR, "Compute {} fix {} does not calculate a local array", style, val.id);
         if (val.argindex && (val.argindex > val.val.f->size_local_cols))
           error->all(FLERR, "Compute {} fix {} array is accessed out-of-range", style, val.id);
-      } else
-        error->all(FLERR, "Compute {} fix {} calculates global values", style, val.id);
+      }
 
     } else if (val.which == ArgInfo::VARIABLE) {
+      if (input_mode == LOCAL) error->all(FLERR,"Compute {} inputs must be all local");
       val.val.v = input->variable->find(val.id.c_str());
       if (val.val.v < 0)
         error->all(FLERR, "Variable name {} for compute {} does not exist", val.id, style);
       if (input->variable->atomstyle(val.val.v) == 0)
         error->all(FLERR, "Compute {} variable {} is not atom-style variable", style, val.id);
-      val.flavor = PERATOM;
     }
   }
 
@@ -512,7 +523,7 @@ double ComputeReduce::compute_one(int m, int flag)
 
   } else if (val.which == ArgInfo::COMPUTE) {
 
-    if (val.flavor == PERATOM) {
+    if (input_mode == PERATOM) {
       if (!(val.val.c->invoked_flag & Compute::INVOKED_PERATOM)) {
         val.val.c->compute_peratom();
         val.val.c->invoked_flag |= Compute::INVOKED_PERATOM;
@@ -537,7 +548,7 @@ double ComputeReduce::compute_one(int m, int flag)
           one = carray_atom[flag][aidxm1];
       }
 
-    } else if (val.flavor == LOCAL) {
+    } else if (input_mode == LOCAL) {
       if (!(val.val.c->invoked_flag & Compute::INVOKED_LOCAL)) {
         val.val.c->compute_local();
         val.val.c->invoked_flag |= Compute::INVOKED_LOCAL;
@@ -567,7 +578,7 @@ double ComputeReduce::compute_one(int m, int flag)
     if (update->ntimestep % val.val.f->peratom_freq)
       error->all(FLERR, "Fix {} used in compute {} not computed at compatible time", val.id, style);
 
-    if (val.flavor == PERATOM) {
+    if (input_mode == PERATOM) {
       if (aidx == 0) {
         double *fix_vector = val.val.f->vector_atom;
         if (flag < 0) {
@@ -585,7 +596,7 @@ double ComputeReduce::compute_one(int m, int flag)
           one = fix_array[flag][aidxm1];
       }
 
-    } else if (val.flavor == LOCAL) {
+    } else if (input_mode == LOCAL) {
       if (aidx == 0) {
         double *fix_vector = val.val.f->vector_local;
         int n = val.val.f->size_local_rows;
@@ -632,18 +643,18 @@ bigint ComputeReduce::count(int m)
   if ((val.which == ArgInfo::X) || (val.which == ArgInfo::V) || (val.which == ArgInfo::F))
     return group->count(igroup);
   else if (val.which == ArgInfo::COMPUTE) {
-    if (val.flavor == PERATOM) {
+    if (input_mode == PERATOM) {
       return group->count(igroup);
-    } else if (val.flavor == LOCAL) {
+    } else if (input_mode == LOCAL) {
       bigint ncount = val.val.c->size_local_rows;
       bigint ncountall;
       MPI_Allreduce(&ncount, &ncountall, 1, MPI_LMP_BIGINT, MPI_SUM, world);
       return ncountall;
     }
   } else if (val.which == ArgInfo::FIX) {
-    if (val.flavor == PERATOM) {
+    if (input_mode == PERATOM) {
       return group->count(igroup);
-    } else if (val.flavor == LOCAL) {
+    } else if (input_mode == LOCAL) {
       bigint ncount = val.val.f->size_local_rows;
       bigint ncountall;
       MPI_Allreduce(&ncount, &ncountall, 1, MPI_LMP_BIGINT, MPI_SUM, world);
diff --git a/src/compute_reduce.h b/src/compute_reduce.h
index f8f73cb17a..f8b652e00c 100644
--- a/src/compute_reduce.h
+++ b/src/compute_reduce.h
@@ -37,12 +37,11 @@ class ComputeReduce : public Compute {
   double memory_usage() override;
 
  protected:
-  int mode, nvalues;
+  int mode, nvalues, input_mode;
   struct value_t {
     int which;
     int argindex;
     std::string id;
-    int flavor;
     union {
       class Compute *c;
       class Fix *f;
diff --git a/src/compute_reduce_region.cpp b/src/compute_reduce_region.cpp
index efce00ff66..bd850e902c 100644
--- a/src/compute_reduce_region.cpp
+++ b/src/compute_reduce_region.cpp
@@ -33,13 +33,15 @@ static constexpr double BIG = 1.0e20;
 ComputeReduceRegion::ComputeReduceRegion(LAMMPS *lmp, int narg, char **arg) :
     ComputeReduce(lmp, narg, arg)
 {
+  if (input_mode == LOCAL)
+    error->all(FLERR,"Compute reduce/region cannot use local data as input");
 }
 
 /* ----------------------------------------------------------------------
    calculate reduced value for one input M and return it
    if flag = -1:
      sum/min/max/ave all values in vector
-     for per-atom quantities, limit to atoms in group and region
+     limit to atoms in group and region
      if mode = MIN or MAX, also set index to which vector value wins
    if flag >= 0: simply return vector[flag]
 ------------------------------------------------------------------------- */
@@ -57,6 +59,7 @@ double ComputeReduceRegion::compute_one(int m, int flag)
 
   // initialization in case it has not yet been run, e.g. when
   // the compute was invoked right after it has been created
+
   if ((val.which == ArgInfo::COMPUTE) || (val.which == ArgInfo::FIX)) {
     if (val.val.c == nullptr) init();
   }
@@ -97,52 +100,29 @@ double ComputeReduceRegion::compute_one(int m, int flag)
     // invoke compute if not previously invoked
 
   } else if (val.which == ArgInfo::COMPUTE) {
-    if (val.flavor == PERATOM) {
-      if (!(val.val.c->invoked_flag & Compute::INVOKED_PERATOM)) {
-        val.val.c->compute_peratom();
-        val.val.c->invoked_flag |= Compute::INVOKED_PERATOM;
-      }
 
-      if (aidx == 0) {
-        double *compute_vector = val.val.c->vector_atom;
-        if (flag < 0) {
-          for (int i = 0; i < nlocal; i++)
-            if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2]))
-              combine(one, compute_vector[i], i);
-        } else
-          one = compute_vector[flag];
-      } else {
-        double **compute_array = val.val.c->array_atom;
-        int aidxm1 = aidx - 1;
-        if (flag < 0) {
-          for (int i = 0; i < nlocal; i++)
-            if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2]))
-              combine(one, compute_array[i][aidxm1], i);
-        } else
-          one = compute_array[flag][aidxm1];
-      }
+    if (!(val.val.c->invoked_flag & Compute::INVOKED_PERATOM)) {
+      val.val.c->compute_peratom();
+      val.val.c->invoked_flag |= Compute::INVOKED_PERATOM;
+    }
 
-    } else if (val.flavor == LOCAL) {
-      if (!(val.val.c->invoked_flag & Compute::INVOKED_LOCAL)) {
-        val.val.c->compute_local();
-        val.val.c->invoked_flag |= Compute::INVOKED_LOCAL;
-      }
-
-      if (aidx == 0) {
-        double *compute_vector = val.val.c->vector_local;
-        if (flag < 0)
-          for (int i = 0; i < val.val.c->size_local_rows; i++) combine(one, compute_vector[i], i);
-        else
-          one = compute_vector[flag];
-      } else {
-        double **compute_array = val.val.c->array_local;
-        int aidxm1 = aidx - 1;
-        if (flag < 0)
-          for (int i = 0; i < val.val.c->size_local_rows; i++)
+    if (aidx == 0) {
+      double *compute_vector = val.val.c->vector_atom;
+      if (flag < 0) {
+        for (int i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2]))
+            combine(one, compute_vector[i], i);
+      } else
+        one = compute_vector[flag];
+    } else {
+      double **compute_array = val.val.c->array_atom;
+      int aidxm1 = aidx - 1;
+      if (flag < 0) {
+        for (int i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2]))
             combine(one, compute_array[i][aidxm1], i);
-        else
-          one = compute_array[flag][aidxm1];
-      }
+      } else
+        one = compute_array[flag][aidxm1];
     }
 
     // check if fix frequency is a match
@@ -151,45 +131,26 @@ double ComputeReduceRegion::compute_one(int m, int flag)
     if (update->ntimestep % val.val.f->peratom_freq)
       error->all(FLERR, "Fix {} used in compute {} not computed at compatible time", val.id, style);
 
-    if (val.flavor == PERATOM) {
-      if (aidx == 0) {
-        double *fix_vector = val.val.f->vector_atom;
-        if (flag < 0) {
-          for (int i = 0; i < nlocal; i++)
-            if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2]))
-              combine(one, fix_vector[i], i);
-        } else
-          one = fix_vector[flag];
-      } else {
-        double **fix_array = val.val.f->array_atom;
-        int aidxm1 = aidx - 1;
-        if (flag < 0) {
-          for (int i = 0; i < nlocal; i++)
-            if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2]))
-              combine(one, fix_array[i][aidxm1], i);
-        } else
-          one = fix_array[flag][aidxm1];
-      }
-
-    } else if (val.flavor == LOCAL) {
-      if (aidx == 0) {
-        double *fix_vector = val.val.f->vector_local;
-        if (flag < 0)
-          for (int i = 0; i < val.val.f->size_local_rows; i++) combine(one, fix_vector[i], i);
-        else
-          one = fix_vector[flag];
-      } else {
-        double **fix_array = val.val.f->array_local;
-        int aidxm1 = aidx - 1;
-        if (flag < 0)
-          for (int i = 0; i < val.val.f->size_local_rows; i++)
+    if (aidx == 0) {
+      double *fix_vector = val.val.f->vector_atom;
+      if (flag < 0) {
+        for (int i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2]))
+            combine(one, fix_vector[i], i);
+      } else
+        one = fix_vector[flag];
+    } else {
+      double **fix_array = val.val.f->array_atom;
+      int aidxm1 = aidx - 1;
+      if (flag < 0) {
+        for (int i = 0; i < nlocal; i++)
+          if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2]))
             combine(one, fix_array[i][aidxm1], i);
-        else
-          one = fix_array[flag][aidxm1];
-      }
+      } else
+        one = fix_array[flag][aidxm1];
     }
 
-    // evaluate atom-style variable
+  // evaluate atom-style variable
 
   } else if (val.which == ArgInfo::VARIABLE) {
     if (atom->nmax > maxatom) {
@@ -218,25 +179,11 @@ bigint ComputeReduceRegion::count(int m)
 
   if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F)
     return group->count(igroup, region);
-  else if (val.which == ArgInfo::COMPUTE) {
-    if (val.flavor == PERATOM) {
-      return group->count(igroup, region);
-    } else if (val.flavor == LOCAL) {
-      bigint ncount = val.val.c->size_local_rows;
-      bigint ncountall;
-      MPI_Allreduce(&ncount, &ncountall, 1, MPI_DOUBLE, MPI_SUM, world);
-      return ncountall;
-    }
-  } else if (val.which == ArgInfo::FIX) {
-    if (val.flavor == PERATOM) {
-      return group->count(igroup, region);
-    } else if (val.flavor == LOCAL) {
-      bigint ncount = val.val.f->size_local_rows;
-      bigint ncountall;
-      MPI_Allreduce(&ncount, &ncountall, 1, MPI_DOUBLE, MPI_SUM, world);
-      return ncountall;
-    }
-  } else if (val.which == ArgInfo::VARIABLE)
+  else if (val.which == ArgInfo::COMPUTE)
+    return group->count(igroup, region);
+  else if (val.which == ArgInfo::FIX)
+    return group->count(igroup, region);
+  else if (val.which == ArgInfo::VARIABLE)
     return group->count(igroup, region);
 
   bigint dummy = 0;
diff --git a/src/fix_ave_histo.cpp b/src/fix_ave_histo.cpp
index 0a2975bb2e..4503ad56f4 100644
--- a/src/fix_ave_histo.cpp
+++ b/src/fix_ave_histo.cpp
@@ -164,7 +164,7 @@ FixAveHisto::FixAveHisto(LAMMPS *lmp, int narg, char **arg) :
   }
 
   // check input args for kind consistency
-  // all inputs must all be global, per-atom, or local
+  // inputs must all be all either global, per-atom, or local
 
   if (nevery <= 0)
     error->all(FLERR,"Illegal {} nevery value: {}", mycmd, nevery);
diff --git a/src/fix_efield.cpp b/src/fix_efield.cpp
index 9132904b80..236395093c 100644
--- a/src/fix_efield.cpp
+++ b/src/fix_efield.cpp
@@ -129,6 +129,8 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
 
 FixEfield::~FixEfield()
 {
+  if (copymode) return;
+
   delete[] xstr;
   delete[] ystr;
   delete[] zstr;
diff --git a/src/fix_press_berendsen.cpp b/src/fix_press_berendsen.cpp
index e27a4560f0..05e523abae 100644
--- a/src/fix_press_berendsen.cpp
+++ b/src/fix_press_berendsen.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -32,17 +31,16 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-enum{NOBIAS,BIAS};
-enum{NONE,XYZ,XY,YZ,XZ};
-enum{ISO,ANISO};
+enum { NOBIAS, BIAS };
+enum { NONE, XYZ, XY, YZ, XZ };
+enum { ISO, ANISO };
 
 /* ---------------------------------------------------------------------- */
 
 FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) :
-  Fix(lmp, narg, arg),
-  id_temp(nullptr), id_press(nullptr), tflag(0), pflag(0)
+    Fix(lmp, narg, arg), id_temp(nullptr), id_press(nullptr), tflag(0), pflag(0)
 {
-  if (narg < 5) error->all(FLERR,"Illegal fix press/berendsen command");
+  if (narg < 5) error->all(FLERR, "Illegal fix press/berendsen command");
 
   // Berendsen barostat applied every step
 
@@ -67,26 +65,24 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) :
   int iarg = 3;
 
   while (iarg < narg) {
-    if (strcmp(arg[iarg],"iso") == 0) {
-      if (iarg+4 > narg)
-        error->all(FLERR,"Illegal fix press/berendsen command");
+    if (strcmp(arg[iarg], "iso") == 0) {
+      if (iarg + 4 > narg) error->all(FLERR, "Illegal fix press/berendsen command");
       pcouple = XYZ;
-      p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp);
-      p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-      p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+      p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       if (dimension == 2) {
         p_start[2] = p_stop[2] = p_period[2] = 0.0;
         p_flag[2] = 0;
       }
       iarg += 4;
-    } else if (strcmp(arg[iarg],"aniso") == 0) {
-      if (iarg+4 > narg)
-        error->all(FLERR,"Illegal fix press/berendsen command");
+    } else if (strcmp(arg[iarg], "aniso") == 0) {
+      if (iarg + 4 > narg) error->all(FLERR, "Illegal fix press/berendsen command");
       pcouple = NONE;
-      p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp);
-      p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-      p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+      p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       if (dimension == 2) {
         p_start[2] = p_stop[2] = p_period[2] = 0.0;
@@ -94,59 +90,61 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) :
       }
       iarg += 4;
 
-    } else if (strcmp(arg[iarg],"x") == 0) {
-      if (iarg+4 > narg)
-        error->all(FLERR,"Illegal fix press/berendsen command");
-      p_start[0] = utils::numeric(FLERR,arg[iarg+1],false,lmp);
-      p_stop[0] = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-      p_period[0] = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+    } else if (strcmp(arg[iarg], "x") == 0) {
+      if (iarg + 4 > narg) error->all(FLERR, "Illegal fix press/berendsen command");
+      p_start[0] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[0] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[0] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
       p_flag[0] = 1;
       iarg += 4;
-    } else if (strcmp(arg[iarg],"y") == 0) {
-      if (iarg+4 > narg)
-        error->all(FLERR,"Illegal fix press/berendsen command");
-      p_start[1] = utils::numeric(FLERR,arg[iarg+1],false,lmp);
-      p_stop[1] = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-      p_period[1] = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+    } else if (strcmp(arg[iarg], "y") == 0) {
+      if (iarg + 4 > narg) error->all(FLERR, "Illegal fix press/berendsen command");
+      p_start[1] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[1] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[1] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
       p_flag[1] = 1;
       iarg += 4;
-    } else if (strcmp(arg[iarg],"z") == 0) {
-      if (iarg+4 > narg)
-        error->all(FLERR,"Illegal fix press/berendsen command");
-      p_start[2] = utils::numeric(FLERR,arg[iarg+1],false,lmp);
-      p_stop[2] = utils::numeric(FLERR,arg[iarg+2],false,lmp);
-      p_period[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp);
+    } else if (strcmp(arg[iarg], "z") == 0) {
+      if (iarg + 4 > narg) error->all(FLERR, "Illegal fix press/berendsen command");
+      p_start[2] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[2] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[2] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
       p_flag[2] = 1;
       iarg += 4;
-      if (dimension == 2)
-        error->all(FLERR,"Invalid fix press/berendsen for a 2d simulation");
+      if (dimension == 2) error->all(FLERR, "Invalid fix press/berendsen for a 2d simulation");
 
-    } else if (strcmp(arg[iarg],"couple") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal fix press/berendsen command");
-      if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ;
-      else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY;
-      else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ;
-      else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ;
-      else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NONE;
-      else error->all(FLERR,"Illegal fix press/berendsen command");
+    } else if (strcmp(arg[iarg], "couple") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix press/berendsen command");
+      if (strcmp(arg[iarg + 1], "xyz") == 0)
+        pcouple = XYZ;
+      else if (strcmp(arg[iarg + 1], "xy") == 0)
+        pcouple = XY;
+      else if (strcmp(arg[iarg + 1], "yz") == 0)
+        pcouple = YZ;
+      else if (strcmp(arg[iarg + 1], "xz") == 0)
+        pcouple = XZ;
+      else if (strcmp(arg[iarg + 1], "none") == 0)
+        pcouple = NONE;
+      else
+        error->all(FLERR, "Illegal fix press/berendsen command");
       iarg += 2;
 
-    } else if (strcmp(arg[iarg],"modulus") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal fix press/berendsen command");
-      bulkmodulus = utils::numeric(FLERR,arg[iarg+1],false,lmp);
-      if (bulkmodulus <= 0.0)
-        error->all(FLERR,"Illegal fix press/berendsen command");
+    } else if (strcmp(arg[iarg], "modulus") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix press/berendsen command");
+      bulkmodulus = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      if (bulkmodulus <= 0.0) error->all(FLERR, "Illegal fix press/berendsen command");
       iarg += 2;
-    } else if (strcmp(arg[iarg],"dilate") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal fix press/berendsen command");
-      if (strcmp(arg[iarg+1],"all") == 0) allremap = 1;
-      else if (strcmp(arg[iarg+1],"partial") == 0) allremap = 0;
-      else error->all(FLERR,"Illegal fix press/berendsen command");
+    } else if (strcmp(arg[iarg], "dilate") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix press/berendsen command");
+      if (strcmp(arg[iarg + 1], "all") == 0)
+        allremap = 1;
+      else if (strcmp(arg[iarg + 1], "partial") == 0)
+        allremap = 0;
+      else
+        error->all(FLERR, "Illegal fix press/berendsen command");
       iarg += 2;
-    } else error->all(FLERR,"Illegal fix press/berendsen command");
+    } else
+      error->all(FLERR, "Illegal fix press/berendsen command");
   }
 
   if (allremap == 0) restart_pbc = 1;
@@ -154,57 +152,48 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) :
   // error checks
 
   if (dimension == 2 && p_flag[2])
-    error->all(FLERR,"Invalid fix press/berendsen for a 2d simulation");
+    error->all(FLERR, "Invalid fix press/berendsen for a 2d simulation");
   if (dimension == 2 && (pcouple == YZ || pcouple == XZ))
-    error->all(FLERR,"Invalid fix press/berendsen for a 2d simulation");
+    error->all(FLERR, "Invalid fix press/berendsen for a 2d simulation");
 
   if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
   if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0)
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
   if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
   if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
   if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
 
   if (p_flag[0] && domain->xperiodic == 0)
-    error->all(FLERR,
-               "Cannot use fix press/berendsen on a non-periodic dimension");
+    error->all(FLERR, "Cannot use fix press/berendsen on a non-periodic dimension");
   if (p_flag[1] && domain->yperiodic == 0)
-    error->all(FLERR,
-               "Cannot use fix press/berendsen on a non-periodic dimension");
+    error->all(FLERR, "Cannot use fix press/berendsen on a non-periodic dimension");
   if (p_flag[2] && domain->zperiodic == 0)
-    error->all(FLERR,
-               "Cannot use fix press/berendsen on a non-periodic dimension");
+    error->all(FLERR, "Cannot use fix press/berendsen on a non-periodic dimension");
 
   if (pcouple == XYZ && dimension == 3 &&
-      (p_start[0] != p_start[1] || p_start[0] != p_start[2] ||
-       p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] ||
-       p_period[0] != p_period[1] || p_period[0] != p_period[2]))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+      (p_start[0] != p_start[1] || p_start[0] != p_start[2] || p_stop[0] != p_stop[1] ||
+       p_stop[0] != p_stop[2] || p_period[0] != p_period[1] || p_period[0] != p_period[2]))
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
   if (pcouple == XYZ && dimension == 2 &&
-      (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
-       p_period[0] != p_period[1]))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+      (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || p_period[0] != p_period[1]))
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
   if (pcouple == XY &&
-      (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
-       p_period[0] != p_period[1]))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+      (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || p_period[0] != p_period[1]))
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
   if (pcouple == YZ &&
-      (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] ||
-       p_period[1] != p_period[2]))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+      (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] || p_period[1] != p_period[2]))
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
   if (pcouple == XZ &&
-      (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] ||
-       p_period[0] != p_period[2]))
-    error->all(FLERR,"Invalid fix press/berendsen pressure settings");
+      (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] || p_period[0] != p_period[2]))
+    error->all(FLERR, "Invalid fix press/berendsen pressure settings");
 
-  if ((p_flag[0] && p_period[0] <= 0.0) ||
-      (p_flag[1] && p_period[1] <= 0.0) ||
+  if ((p_flag[0] && p_period[0] <= 0.0) || (p_flag[1] && p_period[1] <= 0.0) ||
       (p_flag[2] && p_period[2] <= 0.0))
-    error->all(FLERR,"Fix press/berendsen damping parameters must be > 0.0");
+    error->all(FLERR, "Fix press/berendsen damping parameters must be > 0.0");
 
   if (p_flag[0]) box_change |= BOX_CHANGE_X;
   if (p_flag[1]) box_change |= BOX_CHANGE_Y;
@@ -213,8 +202,10 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) :
   // pstyle = ISO if XYZ coupling or XY coupling in 2d -> 1 dof
   // else pstyle = ANISO -> 3 dof
 
-  if (pcouple == XYZ || (dimension == 2 && pcouple == XY)) pstyle = ISO;
-  else pstyle = ANISO;
+  if (pcouple == XYZ || (dimension == 2 && pcouple == XY))
+    pstyle = ISO;
+  else
+    pstyle = ANISO;
 
   // create a new compute temp style
   // id = fix-ID + temp
@@ -222,7 +213,7 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) :
   //   and thus its KE/temperature contribution should use group all
 
   id_temp = utils::strdup(std::string(id) + "_temp");
-  modify->add_compute(fmt::format("{} all temp",id_temp));
+  modify->add_compute(fmt::format("{} all temp", id_temp));
   tflag = 1;
 
   // create a new compute pressure style
@@ -230,7 +221,7 @@ FixPressBerendsen::FixPressBerendsen(LAMMPS *lmp, int narg, char **arg) :
   // pass id_temp as 4th arg to pressure constructor
 
   id_press = utils::strdup(std::string(id) + "_press");
-  modify->add_compute(fmt::format("{} all pressure {}",id_press, id_temp));
+  modify->add_compute(fmt::format("{} all pressure {}", id_press, id_temp));
   pflag = 1;
 
   nrigid = 0;
@@ -264,19 +255,18 @@ int FixPressBerendsen::setmask()
 
 void FixPressBerendsen::init()
 {
-  if (domain->triclinic)
-    error->all(FLERR,"Cannot use fix press/berendsen with triclinic box");
+  if (domain->triclinic) error->all(FLERR, "Cannot use fix press/berendsen with triclinic box");
 
   // ensure no conflict with fix deform
 
-  for (const auto &ifix : modify->get_fix_list())
-    if (strcmp(ifix->style, "^deform") == 0) {
-      int *dimflag = static_cast<FixDeform *>(ifix)->dimflag;
-      if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) ||
-          (p_flag[2] && dimflag[2]))
-        error->all(FLERR,"Cannot use fix press/berendsen and "
-                   "fix deform on same component of stress tensor");
-    }
+  for (const auto &ifix : modify->get_fix_by_style("^deform")) {
+    int *dimflag = static_cast<FixDeform *>(ifix)->dimflag;
+    if (!dimflag) continue;
+    if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) || (p_flag[2] && dimflag[2]))
+      error->all(FLERR,
+                 "Cannot use fix press/berendsen and "
+                 "fix deform on same component of stress tensor");
+  }
 
   // set temperature and pressure ptrs
 
@@ -284,8 +274,10 @@ void FixPressBerendsen::init()
   if (!temperature)
     error->all(FLERR, "Temperature compute ID {} for fix press/berendsen does not exist", id_temp);
 
-  if (temperature->tempbias) which = BIAS;
-  else which = NOBIAS;
+  if (temperature->tempbias)
+    which = BIAS;
+  else
+    which = NOBIAS;
 
   pressure = modify->get_compute_by_id(id_press);
   if (!pressure)
@@ -293,8 +285,10 @@ void FixPressBerendsen::init()
 
   // Kspace setting
 
-  if (force->kspace) kspace_flag = 1;
-  else kspace_flag = 0;
+  if (force->kspace)
+    kspace_flag = 1;
+  else
+    kspace_flag = 0;
 
   // detect if any rigid fixes exist so rigid bodies move when box is remapped
   // rfix[] = indices to each fix rigid
@@ -303,13 +297,13 @@ void FixPressBerendsen::init()
   nrigid = 0;
   rfix = nullptr;
 
-  for (int i = 0; i < modify->nfix; i++)
-    if (modify->fix[i]->rigid_flag) nrigid++;
+  for (const auto &ifix : modify->get_fix_list())
+    if (ifix->rigid_flag) nrigid++;
   if (nrigid > 0) {
-    rfix = new int[nrigid];
+    rfix = new Fix *[nrigid];
     nrigid = 0;
-    for (int i = 0; i < modify->nfix; i++)
-      if (modify->fix[i]->rigid_flag) rfix[nrigid++] = i;
+    for (auto &ifix : modify->get_fix_list())
+      if (ifix->rigid_flag) rfix[nrigid++] = ifix;
   }
 }
 
@@ -321,7 +315,7 @@ void FixPressBerendsen::setup(int /*vflag*/)
 {
   // trigger virial computation on next timestep
 
-  pressure->addstep(update->ntimestep+1);
+  pressure->addstep(update->ntimestep + 1);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -344,10 +338,9 @@ void FixPressBerendsen::end_of_step()
 
   for (int i = 0; i < 3; i++) {
     if (p_flag[i]) {
-      p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]);
-      dilation[i] =
-        pow(1.0 - update->dt/p_period[i] *
-            (p_target[i]-p_current[i])/bulkmodulus,1.0/3.0);
+      p_target[i] = p_start[i] + delta * (p_stop[i] - p_start[i]);
+      dilation[i] = pow(1.0 - update->dt / p_period[i] * (p_target[i] - p_current[i]) / bulkmodulus,
+                        1.0 / 3.0);
     }
   }
 
@@ -359,7 +352,7 @@ void FixPressBerendsen::end_of_step()
 
   // trigger virial computation on next timestep
 
-  pressure->addstep(update->ntimestep+1);
+  pressure->addstep(update->ntimestep + 1);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -371,7 +364,7 @@ void FixPressBerendsen::couple()
   if (pstyle == ISO)
     p_current[0] = p_current[1] = p_current[2] = pressure->scalar;
   else if (pcouple == XYZ) {
-    double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]);
+    double ave = 1.0 / 3.0 * (tensor[0] + tensor[1] + tensor[2]);
     p_current[0] = p_current[1] = p_current[2] = ave;
   } else if (pcouple == XY) {
     double ave = 0.5 * (tensor[0] + tensor[1]);
@@ -401,7 +394,7 @@ void FixPressBerendsen::couple()
 void FixPressBerendsen::remap()
 {
   int i;
-  double oldlo,oldhi,ctr;
+  double oldlo, oldhi, ctr;
 
   double **x = atom->x;
   int *mask = atom->mask;
@@ -409,16 +402,15 @@ void FixPressBerendsen::remap()
 
   // convert pertinent atoms and rigid bodies to lamda coords
 
-  if (allremap) domain->x2lamda(nlocal);
+  if (allremap)
+    domain->x2lamda(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit)
-        domain->x2lamda(x[i],x[i]);
+      if (mask[i] & groupbit) domain->x2lamda(x[i], x[i]);
   }
 
   if (nrigid)
-    for (i = 0; i < nrigid; i++)
-      modify->fix[rfix[i]]->deform(0);
+    for (i = 0; i < nrigid; i++) rfix[i]->deform(0);
 
   // reset global and local box to new size/shape
 
@@ -427,8 +419,8 @@ void FixPressBerendsen::remap()
       oldlo = domain->boxlo[i];
       oldhi = domain->boxhi[i];
       ctr = 0.5 * (oldlo + oldhi);
-      domain->boxlo[i] = (oldlo-ctr)*dilation[i] + ctr;
-      domain->boxhi[i] = (oldhi-ctr)*dilation[i] + ctr;
+      domain->boxlo[i] = (oldlo - ctr) * dilation[i] + ctr;
+      domain->boxhi[i] = (oldhi - ctr) * dilation[i] + ctr;
     }
   }
 
@@ -437,24 +429,23 @@ void FixPressBerendsen::remap()
 
   // convert pertinent atoms and rigid bodies back to box coords
 
-  if (allremap) domain->lamda2x(nlocal);
+  if (allremap)
+    domain->lamda2x(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
-      if (mask[i] & groupbit)
-        domain->lamda2x(x[i],x[i]);
+      if (mask[i] & groupbit) domain->lamda2x(x[i], x[i]);
   }
 
   if (nrigid)
-    for (i = 0; i < nrigid; i++)
-      modify->fix[rfix[i]]->deform(1);
+    for (i = 0; i < nrigid; i++) rfix[i]->deform(1);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixPressBerendsen::modify_param(int narg, char **arg)
 {
-  if (strcmp(arg[0],"temp") == 0) {
-    if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
+  if (strcmp(arg[0], "temp") == 0) {
+    if (narg < 2) error->all(FLERR, "Illegal fix_modify command");
     if (tflag) {
       modify->delete_compute(id_temp);
       tflag = 0;
@@ -464,25 +455,25 @@ int FixPressBerendsen::modify_param(int narg, char **arg)
 
     temperature = modify->get_compute_by_id(arg[1]);
     if (!temperature)
-      error->all(FLERR,"Could not find fix_modify temperature compute ID: ", arg[1]);
+      error->all(FLERR, "Could not find fix_modify temperature compute ID: ", arg[1]);
 
     if (temperature->tempflag == 0)
-      error->all(FLERR,"Fix_modify temperature compute {} does not compute temperature", arg[1]);
+      error->all(FLERR, "Fix_modify temperature compute {} does not compute temperature", arg[1]);
     if (temperature->igroup != 0 && comm->me == 0)
-      error->warning(FLERR,"Temperature compute {} for fix {} is not for group all: {}",
-                     arg[1], style, group->names[temperature->igroup]);
+      error->warning(FLERR, "Temperature compute {} for fix {} is not for group all: {}", arg[1],
+                     style, group->names[temperature->igroup]);
 
     // reset id_temp of pressure to new temperature ID
 
     auto icompute = modify->get_compute_by_id(id_press);
     if (!icompute)
-      error->all(FLERR,"Pressure compute ID {} for fix {} does not exist", id_press, style);
+      error->all(FLERR, "Pressure compute ID {} for fix {} does not exist", id_press, style);
     icompute->reset_extra_compute_fix(id_temp);
 
     return 2;
 
-  } else if (strcmp(arg[0],"press") == 0) {
-    if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
+  } else if (strcmp(arg[0], "press") == 0) {
+    if (narg < 2) error->all(FLERR, "Illegal fix_modify command");
     if (pflag) {
       modify->delete_compute(id_press);
       pflag = 0;
@@ -491,9 +482,9 @@ int FixPressBerendsen::modify_param(int narg, char **arg)
     id_press = utils::strdup(arg[1]);
 
     pressure = modify->get_compute_by_id(arg[1]);
-    if (!pressure) error->all(FLERR,"Could not find fix_modify pressure compute ID: {}", arg[1]);
+    if (!pressure) error->all(FLERR, "Could not find fix_modify pressure compute ID: {}", arg[1]);
     if (pressure->pressflag == 0)
-      error->all(FLERR,"Fix_modify pressure compute {} does not compute pressure", arg[1]);
+      error->all(FLERR, "Fix_modify pressure compute {} does not compute pressure", arg[1]);
     return 2;
   }
   return 0;
diff --git a/src/fix_press_berendsen.h b/src/fix_press_berendsen.h
index 85a4895901..9e83533746 100644
--- a/src/fix_press_berendsen.h
+++ b/src/fix_press_berendsen.h
@@ -44,9 +44,9 @@ class FixPressBerendsen : public Fix {
   double p_period[3], p_target[3];
   double p_current[3], dilation[3];
   double factor[3];
-  int kspace_flag;    // 1 if KSpace invoked, 0 if not
-  int nrigid;         // number of rigid fixes
-  int *rfix;          // indices of rigid fixes
+  int kspace_flag;     // 1 if KSpace invoked, 0 if not
+  int nrigid;          // number of rigid fixes
+  class Fix **rfix;    // indices of rigid fixes
 
   char *id_temp, *id_press;
   class Compute *temperature, *pressure;
diff --git a/src/fix_press_langevin.cpp b/src/fix_press_langevin.cpp
new file mode 100644
index 0000000000..2f6e765cd5
--- /dev/null
+++ b/src/fix_press_langevin.cpp
@@ -0,0 +1,836 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Germain Clavier (TUe)
+------------------------------------------------------------------------- */
+
+#include "fix_press_langevin.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "compute.h"
+#include "domain.h"
+#include "error.h"
+#include "fix_deform.h"
+#include "force.h"
+#include "group.h"
+#include "irregular.h"
+#include "kspace.h"
+#include "modify.h"
+#include "random_mars.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+#define DELTAFLIP 0.1
+#define TILTMAX 1.5
+
+enum { NONE, XYZ, XY, YZ, XZ };
+enum { ISO, ANISO, TRICLINIC };
+
+/* ---------------------------------------------------------------------- */
+
+FixPressLangevin::FixPressLangevin(LAMMPS *lmp, int narg, char **arg) :
+    Fix(lmp, narg, arg), id_temp(nullptr), id_press(nullptr), temperature(nullptr),
+    pressure(nullptr), irregular(nullptr), random(nullptr)
+{
+  if (narg < 5) utils::missing_cmd_args(FLERR, "fix press/langevin", error);
+
+  // Langevin barostat applied every step
+  // For details on the equations of motion see:
+  // Gronbech-Jensen & Farago J. Chem. Phys. 141 194108 (2014)
+
+  nevery = 1;
+
+  // default values
+
+  pcouple = NONE;
+  allremap = 1;
+  pre_exchange_flag = 0;
+  flipflag = 1;
+  seed = 111111;
+  pflag = 0;
+  kspace_flag = 0;
+
+  p_ltime = 0.0;
+
+  // target temperature
+
+  t_start = t_stop = t_target = 0.0;
+
+  for (int i = 0; i < 6; i++) {
+
+    // pressure and pistons period
+
+    p_start[i] = p_stop[i] = p_period[i] = 0.0;
+    p_flag[i] = 0;
+    p_alpha[i] = 0;
+
+    p_mass[i] = 0.;
+
+    // pistons coordinates derivative V
+
+    p_deriv[i] = 0.0;
+
+    // a and b values for each piston
+
+    gjfa[i] = 0.0;
+    gjfb[i] = 0.0;
+
+    // random value for each piston
+
+    fran[i] = 0.0;
+    f_piston[i] = 0.0;
+    dilation[i] = 0.0;
+  }
+
+  // process keywords
+
+  dimension = domain->dimension;
+
+  int iarg = 3;
+
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "iso") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin iso", error);
+      pcouple = XYZ;
+      p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[0] = p_flag[1] = p_flag[2] = 1;
+      if (dimension == 2) {
+        p_start[2] = p_stop[2] = p_period[2] = 0.0;
+        p_flag[2] = 0;
+      }
+      iarg += 4;
+    } else if (strcmp(arg[iarg], "aniso") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin aniso", error);
+      pcouple = NONE;
+      p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[0] = p_flag[1] = p_flag[2] = 1;
+      if (dimension == 2) {
+        p_start[2] = p_stop[2] = p_period[2] = 0.0;
+        p_flag[2] = 0;
+      }
+      iarg += 4;
+    } else if (strcmp(arg[iarg], "tri") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin tri", error);
+      pcouple = NONE;
+      p_start[0] = p_start[1] = p_start[2] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[0] = p_stop[1] = p_stop[2] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[0] = p_period[1] = p_period[2] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[0] = p_flag[1] = p_flag[2] = 1;
+      p_start[3] = p_start[4] = p_start[5] = 0.0;
+      p_stop[3] = p_stop[4] = p_stop[5] = 0.0;
+      p_period[3] = p_period[4] = p_period[5] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[3] = p_flag[4] = p_flag[5] = 1;
+      if (dimension == 2) {
+        p_start[2] = p_stop[2] = p_period[2] = 0.0;
+        p_flag[2] = 0;
+        p_start[3] = p_stop[3] = p_period[3] = 0.0;
+        p_flag[3] = 0;
+        p_start[4] = p_stop[4] = p_period[4] = 0.0;
+        p_flag[4] = 0;
+      }
+      iarg += 4;
+    } else if (strcmp(arg[iarg], "x") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin tri", error);
+      if (iarg + 4 > narg) error->all(FLERR, "Illegal fix press/langevin command");
+      p_start[0] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[0] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[0] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[0] = 1;
+      iarg += 4;
+    } else if (strcmp(arg[iarg], "y") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin y", error);
+      p_start[1] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[1] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[1] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[1] = 1;
+      iarg += 4;
+    } else if (strcmp(arg[iarg], "z") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin z", error);
+      p_start[2] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[2] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[2] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[2] = 1;
+      iarg += 4;
+      if (dimension == 2)
+        error->all(FLERR, "Fix press/langevin z option not allowed for a 2d simulation");
+    } else if (strcmp(arg[iarg], "xy") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin yz", error);
+      p_start[3] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[3] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[3] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[3] = 1;
+      iarg += 4;
+      if (dimension == 2)
+        error->all(FLERR, "Fix press/langevin yz option not allowed for a 2d simulation");
+
+    } else if (strcmp(arg[iarg], "xz") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin xz", error);
+      p_start[4] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[4] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[4] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[4] = 1;
+      iarg += 4;
+      if (dimension == 2)
+        error->all(FLERR, "Fix press/langevin zz option not allowed for a 2d simulation");
+
+    } else if (strcmp(arg[iarg], "yz") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin xy", error);
+      p_start[5] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      p_stop[5] = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      p_period[5] = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      p_flag[5] = 1;
+      iarg += 4;
+      if (dimension == 2) error->all(FLERR, "Invalid fix {} command for a 2d simulation", style);
+
+    } else if (strcmp(arg[iarg], "flip") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin flip", error);
+      flipflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
+      iarg += 2;
+
+    } else if (strcmp(arg[iarg], "couple") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin couple", error);
+      if (strcmp(arg[iarg + 1], "xyz") == 0)
+        pcouple = XYZ;
+      else if (strcmp(arg[iarg + 1], "xy") == 0)
+        pcouple = XY;
+      else if (strcmp(arg[iarg + 1], "yz") == 0)
+        pcouple = YZ;
+      else if (strcmp(arg[iarg + 1], "xz") == 0)
+        pcouple = XZ;
+      else if (strcmp(arg[iarg + 1], "none") == 0)
+        pcouple = NONE;
+      else
+        error->all(FLERR, "Unknown fix press/langevin couple option: {}", arg[iarg + 1]);
+      iarg += 2;
+
+    } else if (strcmp(arg[iarg], "friction") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin friction", error);
+      p_ltime = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      if (p_ltime <= 0.0) error->all(FLERR, "Fix press/langevin friction value must be > 0");
+      iarg += 2;
+    } else if (strcmp(arg[iarg], "dilate") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin dilate", error);
+      if (strcmp(arg[iarg + 1], "all") == 0)
+        allremap = 1;
+      else if (strcmp(arg[iarg + 1], "partial") == 0)
+        allremap = 0;
+      else
+        error->all(FLERR, "Unknown fix press/langevin dilate option: {}", arg[iarg + 1]);
+      iarg += 2;
+    } else if (strcmp(arg[iarg], "temp") == 0) {
+      if (iarg + 4 > narg) utils::missing_cmd_args(FLERR, "fix press/langevin temp", error);
+      t_start = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      t_stop = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      seed = utils::numeric(FLERR, arg[iarg + 3], false, lmp);
+      if (seed <= 0) error->all(FLERR, "Fix press/langevin temp seed must be > 0");
+      iarg += 4;
+    }
+
+    else
+      error->all(FLERR, "Unknown fix press/langevin keyword: {}", arg[iarg]);
+  }
+
+  if (allremap == 0) restart_pbc = 1;
+
+  random = new RanMars(lmp, seed);
+
+  // error checks
+
+  if (dimension == 2 && p_flag[2])
+    error->all(FLERR, "Invalid fix press/langevin for a 2d simulation");
+  if (dimension == 2 && (pcouple == YZ || pcouple == XZ))
+    error->all(FLERR, "Invalid fix press/langevin for a 2d simulation");
+
+  if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+  if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0)
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+  if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+  if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+  if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+
+  if (p_flag[0] && domain->xperiodic == 0)
+    error->all(FLERR, "Cannot use fix press/langevin on a non-periodic dimension");
+  if (p_flag[1] && domain->yperiodic == 0)
+    error->all(FLERR, "Cannot use fix press/langevin on a non-periodic dimension");
+  if (p_flag[2] && domain->zperiodic == 0)
+    error->all(FLERR, "Cannot use fix press/langevin on a non-periodic dimension");
+
+  // require periodicity in 2nd dim of off-diagonal tilt component
+
+  if (p_flag[3] && domain->zperiodic == 0)
+    error->all(FLERR, "Cannot use fix {} on a 2nd non-periodic dimension", style);
+  if (p_flag[4] && domain->zperiodic == 0)
+    error->all(FLERR, "Cannot use fix {} on a 2nd non-periodic dimension", style);
+  if (p_flag[5] && domain->yperiodic == 0)
+    error->all(FLERR, "Cannot use fix {} on a 2nd non-periodic dimension", style);
+  if (!domain->triclinic && (p_flag[3] || p_flag[4] || p_flag[5]))
+    error->all(FLERR, "Can not specify Pxy/Pxz/Pyz in fix {} with non-triclinic box", style);
+
+  if (pcouple == XYZ && dimension == 3 &&
+      (p_start[0] != p_start[1] || p_start[0] != p_start[2] || p_stop[0] != p_stop[1] ||
+       p_stop[0] != p_stop[2] || p_period[0] != p_period[1] || p_period[0] != p_period[2]))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+  if (pcouple == XYZ && dimension == 2 &&
+      (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || p_period[0] != p_period[1]))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+  if (pcouple == XY &&
+      (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || p_period[0] != p_period[1]))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+  if (pcouple == YZ &&
+      (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] || p_period[1] != p_period[2]))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+  if (pcouple == XZ &&
+      (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] || p_period[0] != p_period[2]))
+    error->all(FLERR, "Invalid fix press/langevin pressure settings");
+
+  if (t_start < 0.0 || t_stop < 0.0)
+    error->all(FLERR, "Fix press/langevin temperature parameters must be >= 0.0");
+
+  if ((p_flag[0] && p_period[0] <= 0.0) || (p_flag[1] && p_period[1] <= 0.0) ||
+      (p_flag[2] && p_period[2] <= 0.0) || (p_flag[3] && p_period[3] <= 0.0) ||
+      (p_flag[4] && p_period[4] <= 0.0) || (p_flag[5] && p_period[5] <= 0.0))
+    error->all(FLERR, "Fix press/langevin damping parameters must be > 0.0");
+
+  if (p_flag[0]) box_change |= BOX_CHANGE_X;
+  if (p_flag[1]) box_change |= BOX_CHANGE_Y;
+  if (p_flag[2]) box_change |= BOX_CHANGE_Z;
+  if (p_flag[3]) box_change |= BOX_CHANGE_YZ;
+  if (p_flag[4]) box_change |= BOX_CHANGE_XZ;
+  if (p_flag[5]) box_change |= BOX_CHANGE_XY;
+
+  // pstyle = ISO if XYZ coupling or XY coupling in 2d -> 1 dof
+  // else pstyle = ANISO -> 3 dof
+
+  if (p_flag[3] || p_flag[4] || p_flag[5])
+    pstyle = TRICLINIC;
+  else if (pcouple == XYZ || (dimension == 2 && pcouple == XY))
+    pstyle = ISO;
+  else
+    pstyle = ANISO;
+
+  // pre_exchange only required if flips can occur due to shape changes
+
+  if (flipflag && (p_flag[3] || p_flag[4] || p_flag[5]))
+    pre_exchange_flag = pre_exchange_migrate = 1;
+  if (flipflag && (domain->yz != 0.0 || domain->xz != 0.0 || domain->xy != 0.0))
+    pre_exchange_flag = pre_exchange_migrate = 1;
+
+  if (pre_exchange_flag)
+    irregular = new Irregular(lmp);
+  else
+    irregular = nullptr;
+
+  // Langevin GJF dynamics does NOT need a temperature compute
+  // This is stated explicitely in their paper.
+  // The temperature used for the pressure is NkT/V on purpose.
+
+  // For this reason, the compute must use the virial pressure
+  // Kinetic contribution will be added by the fix style
+
+  id_press = utils::strdup(std::string(id) + "_press");
+  pressure = modify->add_compute(fmt::format("{} all pressure NULL virial", id_press));
+  pflag = 1;
+
+  // p_fric is alpha coeff from GJF
+  // with alpha = Q/p_period
+  // similar to fix_langevin formalism
+
+  double kt = force->boltz * t_start;
+  double nkt = (atom->natoms + 1) * kt;
+  for (int i = 0; i < 6; i++) {
+    if (p_ltime > 0.0)
+      p_fric[i] = p_ltime;
+    else
+      p_fric[i] = p_period[i];
+  }
+
+  for (int i = 0; i < 6; i++) {
+    p_mass[i] = nkt * p_period[i] * p_period[i];
+    p_alpha[i] = p_mass[i] * p_fric[i];
+    gjfa[i] = (1.0 - p_alpha[i] * update->dt / 2.0 / p_mass[i]) /
+        (1.0 + p_alpha[i] * update->dt / 2.0 / p_mass[i]);
+    gjfb[i] = 1. / (1.0 + p_alpha[i] * update->dt / 2.0 / p_mass[i]);
+  }
+
+  nrigid = 0;
+  rfix = nullptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixPressLangevin::~FixPressLangevin()
+{
+  delete random;
+  delete[] rfix;
+  delete irregular;
+
+  // delete temperature and pressure if fix created them
+
+  if (pflag) modify->delete_compute(id_press);
+  delete[] id_press;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixPressLangevin::setmask()
+{
+  int mask = 0;
+  mask |= INITIAL_INTEGRATE;
+  mask |= POST_INTEGRATE;
+  mask |= POST_FORCE;
+  mask |= END_OF_STEP;
+  if (pre_exchange_flag) mask |= PRE_EXCHANGE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPressLangevin::init()
+{
+  // ensure no conflict with fix deform
+
+  for (const auto &ifix : modify->get_fix_by_style("^deform")) {
+    int *dimflag = static_cast<FixDeform *>(ifix)->dimflag;
+    if (!dimflag) continue;
+    if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) || (p_flag[2] && dimflag[2]) ||
+        (p_flag[3] && dimflag[3]) || (p_flag[4] && dimflag[4]) || (p_flag[5] && dimflag[5]))
+      error->all(FLERR,
+                 "Cannot use fix press/langevin and fix deform on same component of stress tensor");
+  }
+
+  // set pressure ptr
+
+  pressure = modify->get_compute_by_id(id_press);
+  if (!pressure)
+    error->all(FLERR, "Pressure compute ID {} for fix press/langevin does not exist", id_press);
+
+  // Kspace setting
+
+  if (force->kspace)
+    kspace_flag = 1;
+  else
+    kspace_flag = 0;
+
+  // detect if any rigid fixes exist so rigid bodies move when box is remapped
+  // rfix[] = indices to each fix rigid
+
+  delete[] rfix;
+  nrigid = 0;
+  rfix = nullptr;
+
+  for (const auto &ifix : modify->get_fix_list())
+    if (ifix->rigid_flag) nrigid++;
+  if (nrigid > 0) {
+    rfix = new Fix *[nrigid];
+    nrigid = 0;
+    for (auto &ifix : modify->get_fix_list())
+      if (ifix->rigid_flag) rfix[nrigid++] = ifix;
+  }
+
+  // Nullifies piston derivatives and forces so that it is not integrated at
+  // the start of a second run.
+  for (int i = 0; i < 6; i++) {
+    p_deriv[i] = 0.0;
+    dilation[i] = 0.0;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute T,P before integrator starts
+------------------------------------------------------------------------- */
+
+void FixPressLangevin::setup(int /*vflag*/)
+{
+  // trigger virial computation on next timestep
+
+  pressure->addstep(update->ntimestep + 1);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPressLangevin::initial_integrate(int /* vflag */)
+{
+  // compute new V
+
+  double dt;
+  double dl;
+  double displacement;
+  double delta = update->ntimestep - update->beginstep;
+
+  // compute new random term on pistons dynamics
+
+  if (delta != 0.0) delta /= update->endstep - update->beginstep;
+  t_target = t_start + delta * (t_stop - t_start);
+  couple_beta();
+
+  dt = update->dt;
+
+  for (int i = 0; i < 6; i++) {
+    if (p_flag[i]) {
+      // See equation 13
+      displacement = dt * p_deriv[i] * gjfb[i];
+      displacement += 0.5 * dt * dt * f_piston[i] * gjfb[i] / p_mass[i];
+      displacement += 0.5 * dt * fran[i] * gjfb[i] / p_mass[i];
+      if (i < 3) {
+        dl = domain->boxhi[i] - domain->boxlo[i];
+        dilation[i] = (dl + displacement) / dl;
+      } else {
+        dilation[i] = displacement;
+      }
+    }
+  }
+}
+
+void FixPressLangevin::post_integrate()
+{
+  // remap simulation box and atoms
+  // redo KSpace coeffs since volume has changed
+
+  remap();
+  if (kspace_flag) force->kspace->setup();
+}
+
+/* ---------------------------------------------------------------------- */
+void FixPressLangevin::post_force(int /*vflag*/)
+{
+  // compute new forces on pistons after internal virial computation
+
+  double delta = update->ntimestep - update->beginstep;
+  if (delta != 0.0) delta /= update->endstep - update->beginstep;
+
+  // compute current pressure tensor and add kinetic term
+
+  if (pstyle == ISO) {
+    pressure->compute_scalar();
+  } else {
+    pressure->compute_vector();
+  }
+
+  couple_pressure();
+  couple_kinetic();
+
+  for (int i = 0; i < 6; i++) {
+    if (p_flag[i]) {
+      f_old_piston[i] = f_piston[i];
+      p_target[i] = p_start[i] + delta * (p_stop[i] - p_start[i]);
+      f_piston[i] = p_current[i] - p_target[i];
+    }
+  }
+
+  // trigger virial computation on next timestep
+
+  pressure->addstep(update->ntimestep + 1);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPressLangevin::end_of_step()
+{
+  // compute pistons velocity
+
+  double dt;
+  dt = update->dt;
+
+  for (int i = 0; i < 6; i++) {
+    if (p_flag[i]) {
+      p_deriv[i] *= gjfa[i];
+      p_deriv[i] += 0.5 * dt * (gjfa[i] * f_old_piston[i] + f_piston[i]) / p_mass[i];
+      p_deriv[i] += fran[i] * gjfb[i] / p_mass[i];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPressLangevin::couple_pressure()
+{
+  double *tensor = pressure->vector;
+
+  if (pstyle == ISO)
+    p_current[0] = p_current[1] = p_current[2] = pressure->scalar;
+  else if (pcouple == XYZ) {
+    double ave = 1.0 / 3.0 * (tensor[0] + tensor[1] + tensor[2]);
+    p_current[0] = p_current[1] = p_current[2] = ave;
+  } else if (pcouple == XY) {
+    double ave = 0.5 * (tensor[0] + tensor[1]);
+    p_current[0] = p_current[1] = ave;
+    p_current[2] = tensor[2];
+  } else if (pcouple == YZ) {
+    double ave = 0.5 * (tensor[1] + tensor[2]);
+    p_current[1] = p_current[2] = ave;
+    p_current[0] = tensor[0];
+  } else if (pcouple == XZ) {
+    double ave = 0.5 * (tensor[0] + tensor[2]);
+    p_current[0] = p_current[2] = ave;
+    p_current[1] = tensor[1];
+  } else {
+    p_current[0] = tensor[0];
+    p_current[1] = tensor[1];
+    p_current[2] = tensor[2];
+  }
+  p_current[3] = tensor[3];
+  p_current[4] = tensor[4];
+  p_current[5] = tensor[5];
+}
+/* ---------------------------------------------------------------------- */
+
+void FixPressLangevin::couple_kinetic()
+{
+  double pk, volume;
+
+  // kinetic part
+
+  if (dimension == 3)
+    volume = domain->xprd * domain->yprd * domain->zprd;
+  else
+    volume = domain->xprd * domain->yprd;
+
+  pk = atom->natoms * force->boltz * t_target / volume;
+  pk *= force->nktv2p;
+
+  p_current[0] += pk;
+  p_current[1] += pk;
+  if (dimension == 3) p_current[2] += pk;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPressLangevin::couple_beta()
+{
+  double gamma[6];
+  int me = comm->me;
+
+  for (int i = 0; i < 6; i++)
+    gamma[i] = sqrt(2.0 * p_fric[i] * force->boltz * update->dt * t_target);
+
+  fran[0] = fran[1] = fran[2] = 0.0;
+  fran[3] = fran[4] = fran[5] = 0.0;
+  if (me == 0) {
+    if (pstyle == ISO)
+      fran[0] = fran[1] = fran[2] = gamma[0] * random->gaussian();
+    else if (pcouple == XYZ) {
+      fran[0] = fran[1] = fran[2] = gamma[0] * random->gaussian();
+    } else if (pcouple == XY) {
+      fran[0] = fran[1] = gamma[0] * random->gaussian();
+      fran[2] = gamma[2] * random->gaussian();
+    } else if (pcouple == YZ) {
+      fran[1] = fran[2] = gamma[1] * random->gaussian();
+      fran[0] = gamma[0] * random->gaussian();
+    } else if (pcouple == XZ) {
+      fran[0] = fran[2] = gamma[0] * random->gaussian();
+      fran[1] = gamma[1] * random->gaussian();
+    } else {
+      fran[0] = gamma[0] * random->gaussian();
+      fran[1] = gamma[1] * random->gaussian();
+      fran[2] = gamma[2] * random->gaussian();
+    }
+    fran[3] = gamma[3] * random->gaussian();
+    fran[4] = gamma[4] * random->gaussian();
+    fran[5] = gamma[5] * random->gaussian();
+  }
+  MPI_Bcast(&fran, 6, MPI_DOUBLE, 0, world);
+}
+
+/* ----------------------------------------------------------------------
+   change box size
+   remap all atoms or fix group atoms depending on allremap flag
+   if rigid bodies exist, scale rigid body centers-of-mass
+------------------------------------------------------------------------- */
+
+void FixPressLangevin::remap()
+{
+  int i;
+  double oldlo, oldhi, ctr;
+
+  double **x = atom->x;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  // convert pertinent atoms and rigid bodies to lamda coords
+
+  if (allremap)
+    domain->x2lamda(nlocal);
+  else {
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) domain->x2lamda(x[i], x[i]);
+  }
+
+  if (nrigid)
+    for (i = 0; i < nrigid; i++) rfix[i]->deform(0);
+
+  // reset global and local box to new size/shape
+
+  for (i = 0; i < 3; i++) {
+    if (p_flag[i]) {
+      oldlo = domain->boxlo[i];
+      oldhi = domain->boxhi[i];
+      ctr = 0.5 * (oldlo + oldhi);
+      domain->boxlo[i] = (oldlo - ctr) * dilation[i] + ctr;
+      domain->boxhi[i] = (oldhi - ctr) * dilation[i] + ctr;
+    }
+  }
+
+  if (p_flag[3]) domain->xy += dilation[3];
+  if (p_flag[4]) domain->xz += dilation[4];
+  if (p_flag[5]) domain->yz += dilation[5];
+
+  if (domain->yz < -TILTMAX * domain->yprd || domain->yz > TILTMAX * domain->yprd ||
+      domain->xz < -TILTMAX * domain->xprd || domain->xz > TILTMAX * domain->xprd ||
+      domain->xy < -TILTMAX * domain->xprd || domain->xy > TILTMAX * domain->xprd)
+    error->all(FLERR,
+               "Fix {} has tilted box too far in one step - "
+               "periodic cell is too far from equilibrium state",
+               style);
+
+  domain->set_global_box();
+  domain->set_local_box();
+
+  // convert pertinent atoms and rigid bodies back to box coords
+
+  if (allremap)
+    domain->lamda2x(nlocal);
+  else {
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) domain->lamda2x(x[i], x[i]);
+  }
+
+  if (nrigid)
+    for (i = 0; i < nrigid; i++) rfix[i]->deform(1);
+}
+
+/* ----------------------------------------------------------------------
+  if any tilt ratios exceed limits, set flip = 1 and compute new tilt values
+  do not flip in x or y if non-periodic (can tilt but not flip)
+    this is b/c the box length would be changed (dramatically) by flip
+  if yz tilt exceeded, adjust C vector by one B vector
+  if xz tilt exceeded, adjust C vector by one A vector
+  if xy tilt exceeded, adjust B vector by one A vector
+  check yz first since it may change xz, then xz check comes after
+  if any flip occurs, create new box in domain
+  image_flip() adjusts image flags due to box shape change induced by flip
+  remap() puts atoms outside the new box back into the new box
+  perform irregular on atoms in lamda coords to migrate atoms to new procs
+  important that image_flip comes before remap, since remap may change
+    image flags to new values, making eqs in doc of Domain:image_flip incorrect
+------------------------------------------------------------------------- */
+
+void FixPressLangevin::pre_exchange()
+{
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+
+  // flip is only triggered when tilt exceeds 0.5 by DELTAFLIP
+  // this avoids immediate re-flipping due to tilt oscillations
+
+  double xtiltmax = (0.5 + DELTAFLIP) * xprd;
+  double ytiltmax = (0.5 + DELTAFLIP) * yprd;
+
+  int flipxy, flipxz, flipyz;
+  flipxy = flipxz = flipyz = 0;
+
+  if (domain->yperiodic) {
+    if (domain->yz < -ytiltmax) {
+      domain->yz += yprd;
+      domain->xz += domain->xy;
+      flipyz = 1;
+    } else if (domain->yz >= ytiltmax) {
+      domain->yz -= yprd;
+      domain->xz -= domain->xy;
+      flipyz = -1;
+    }
+  }
+
+  if (domain->xperiodic) {
+    if (domain->xz < -xtiltmax) {
+      domain->xz += xprd;
+      flipxz = 1;
+    } else if (domain->xz >= xtiltmax) {
+      domain->xz -= xprd;
+      flipxz = -1;
+    }
+    if (domain->xy < -xtiltmax) {
+      domain->xy += xprd;
+      flipxy = 1;
+    } else if (domain->xy >= xtiltmax) {
+      domain->xy -= xprd;
+      flipxy = -1;
+    }
+  }
+
+  int flip = 0;
+  if (flipxy || flipxz || flipyz) flip = 1;
+
+  if (flip) {
+    domain->set_global_box();
+    domain->set_local_box();
+
+    domain->image_flip(flipxy, flipxz, flipyz);
+
+    double **x = atom->x;
+    imageint *image = atom->image;
+    int nlocal = atom->nlocal;
+    for (int i = 0; i < nlocal; i++) domain->remap(x[i], image[i]);
+
+    domain->x2lamda(atom->nlocal);
+    irregular->migrate_atoms();
+    domain->lamda2x(atom->nlocal);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixPressLangevin::modify_param(int narg, char **arg)
+{
+  if (strcmp(arg[0], "press") == 0) {
+    if (narg < 2) utils::missing_cmd_args(FLERR, "fix_modify press", error);
+    if (pflag) {
+      modify->delete_compute(id_press);
+      pflag = 0;
+    }
+    delete[] id_press;
+    id_press = utils::strdup(arg[1]);
+
+    pressure = modify->get_compute_by_id(arg[1]);
+    if (!pressure) error->all(FLERR, "Could not find fix_modify pressure compute ID: {}", arg[1]);
+    if (pressure->pressflag == 0)
+      error->all(FLERR, "Fix_modify pressure compute {} does not compute pressure", arg[1]);
+    return 2;
+  }
+  return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixPressLangevin::reset_dt()
+{
+  for (int i = 0; i < 6; i++) {
+    gjfa[i] = (1.0 - p_alpha[i] * update->dt / 2.0 / p_mass[i]) /
+        (1.0 + p_alpha[i] * update->dt / 2.0 / p_mass[i]);
+    gjfb[i] = 1. / (1.0 + p_alpha[i] * update->dt / 2.0 / p_mass[i]);
+  }
+}
diff --git a/src/fix_press_langevin.h b/src/fix_press_langevin.h
new file mode 100644
index 0000000000..868993b1f4
--- /dev/null
+++ b/src/fix_press_langevin.h
@@ -0,0 +1,78 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(press/langevin,FixPressLangevin);
+// clang-format on
+#else
+
+#ifndef LMP_FIX_PRESS_LANGEVIN_H
+#define LMP_FIX_PRESS_LANGEVIN_H
+
+#include "fix.h"
+
+namespace LAMMPS_NS {
+
+class FixPressLangevin : public Fix {
+ public:
+  FixPressLangevin(class LAMMPS *, int, char **);
+  ~FixPressLangevin() override;
+  int setmask() override;
+  void init() override;
+  void setup(int) override;
+  void pre_exchange() override;
+  void initial_integrate(int) override;
+  void post_integrate() override;
+  void post_force(int) override;
+  void end_of_step() override;
+  void reset_dt() override;
+  int modify_param(int, char **) override;
+
+ protected:
+  int dimension;
+  int pstyle, pcouple, allremap;
+  int p_flag[6];    // 1 if control P on this dim, 0 if not
+  double t_start, t_stop, t_target;
+  double p_fric[6], p_ltime;    // Friction and Langevin charac. time
+  double p_alpha[6];
+  double p_start[6], p_stop[6], p_period[6];
+  double p_mass[6], p_target[6], p_current[6];
+  double p_deriv[6], dilation[6];
+  double f_piston[6], f_old_piston[6];
+  double gjfa[6], gjfb[6], fran[6];
+  int kspace_flag;    // 1 if KSpace invoked, 0 if not
+  int nrigid;         // number of rigid fixes
+  class Fix **rfix;    // list of rigid fixes
+
+  char *id_temp, *id_press;
+  class Compute *temperature, *pressure;
+  int pflag;
+
+  int flipflag;
+  int pre_exchange_flag;         // set if pre_exchange needed for box flips
+  class Irregular *irregular;    // for migrating atoms after box flips
+
+  class RanMars *random;
+  int seed;
+
+  void couple_pressure();
+  void couple_kinetic();
+  void couple_beta();
+  void remap();
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp
index 994b4f0f19..9613523059 100644
--- a/src/fix_property_atom.cpp
+++ b/src/fix_property_atom.cpp
@@ -46,6 +46,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
   rmass_flag = 0;
   temperature_flag = 0;
   heatflow_flag = 0;
+  nmax_old = 0;
 
   nvalue = 0;
   values_peratom = 0;
@@ -198,16 +199,23 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
 
   astyle = utils::strdup(atom->atom_style);
 
-  // perform initial allocation of atom-based array
   // register with Atom class
 
-  nmax_old = 0;
-  if (!lmp->kokkos) FixPropertyAtom::grow_arrays(atom->nmax);
   atom->add_callback(Atom::GROW);
   atom->add_callback(Atom::RESTART);
   if (border) atom->add_callback(Atom::BORDER);
 }
 
+
+/* ---------------------------------------------------------------------- */
+
+void FixPropertyAtom::post_constructor()
+{
+  // perform initial allocation of atom-based array
+
+  grow_arrays(atom->nmax);
+}
+
 /* ---------------------------------------------------------------------- */
 
 FixPropertyAtom::~FixPropertyAtom()
diff --git a/src/fix_property_atom.h b/src/fix_property_atom.h
index 92497d6188..c50b6049dc 100644
--- a/src/fix_property_atom.h
+++ b/src/fix_property_atom.h
@@ -27,6 +27,7 @@ namespace LAMMPS_NS {
 class FixPropertyAtom : public Fix {
  public:
   FixPropertyAtom(class LAMMPS *, int, char **);
+  void post_constructor() override;
   ~FixPropertyAtom() override;
   int setmask() override;
   void init() override;
diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp
index 550b3afc4d..df00a2ba8c 100644
--- a/src/fix_spring_self.cpp
+++ b/src/fix_spring_self.cpp
@@ -96,6 +96,8 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) :
 
 FixSpringSelf::~FixSpringSelf()
 {
+  if (copymode) return;
+
   // unregister callbacks to this fix from Atom class
 
   atom->delete_callback(id,Atom::GROW);
diff --git a/src/fix_spring_self.h b/src/fix_spring_self.h
index 59dba78e43..f13f2be918 100644
--- a/src/fix_spring_self.h
+++ b/src/fix_spring_self.h
@@ -47,7 +47,7 @@ class FixSpringSelf : public Fix {
   int size_restart(int) override;
   int maxsize_restart() override;
 
- private:
+ protected:
   double k, espring;
   double **xoriginal;    // original coords of atoms
   int xflag, yflag, zflag;
diff --git a/src/image.cpp b/src/image.cpp
index 27079eee58..3133723b32 100644
--- a/src/image.cpp
+++ b/src/image.cpp
@@ -395,12 +395,12 @@ void Image::merge()
   if (fsaa) {
     for (int h=0; h < height; h += 2) {
       for (int w=0; w < width; w +=2) {
-        int idx1 = 3*height*h + 3*w;
-        int idx2 = 3*height*h + 3*(w+1);
-        int idx3 = 3*height*(h+1) + 3*w;
-        int idx4 = 3*height*(h+1) + 3*(w+1);
+        int idx1 = 3*width*h + 3*w;
+        int idx2 = 3*width*h + 3*(w+1);
+        int idx3 = 3*width*(h+1) + 3*w;
+        int idx4 = 3*width*(h+1) + 3*(w+1);
 
-        int out = 3*(height/2)*(h/2) + 3*(w/2);
+        int out = 3*(width/2)*(h/2) + 3*(w/2);
         for (int i=0; i < 3; ++i) {
           writeBuffer[out+i] = (unsigned char) (0.25*((int)writeBuffer[idx1+i]
                                                       +(int)writeBuffer[idx2+i]
diff --git a/src/library.cpp b/src/library.cpp
index f89fdaebf0..7b0d8ef91b 100644
--- a/src/library.cpp
+++ b/src/library.cpp
@@ -858,14 +858,17 @@ void *lammps_last_thermo(void *handle, const char *what, int index)
 {
   auto lmp = (LAMMPS *) handle;
   void *val = nullptr;
+
+  if (!lmp->output) return val;
   Thermo *th = lmp->output->thermo;
-  if (!th) return nullptr;
+  if (!th) return val;
   const int nfield = *th->get_nfield();
 
   BEGIN_CAPTURE
   {
     if (strcmp(what, "setup") == 0) {
-      val = (void *) &lmp->update->setupflag;
+      if (lmp->update)
+        val = (void *) &lmp->update->setupflag;
 
     } else if (strcmp(what, "line") == 0) {
       val = (void *) th->get_line();
diff --git a/src/min.cpp b/src/min.cpp
index 5a469a788b..acc7d17654 100644
--- a/src/min.cpp
+++ b/src/min.cpp
@@ -215,6 +215,9 @@ void Min::setup(int flag)
   }
   update->setupflag = 1;
 
+  if (lmp->kokkos)
+    error->all(FLERR,"KOKKOS package requires Kokkos-enabled min_style");
+
   // setup extra global dof due to fixes
   // cannot be done in init() b/c update init() is before modify init()
 
diff --git a/src/neighbor.cpp b/src/neighbor.cpp
index 4b0f39fc3b..a4b7b7796d 100644
--- a/src/neighbor.cpp
+++ b/src/neighbor.cpp
@@ -313,7 +313,10 @@ void Neighbor::init()
   triclinic = domain->triclinic;
   newton_pair = force->newton_pair;
 
-  // error check
+  // error checks
+
+  if (triclinic && atom->tag_enable == 0)
+    error->all(FLERR, "Cannot build triclinic neighbor lists unless atoms have IDs");
 
   if (delay > 0 && (delay % every) != 0)
     error->all(FLERR,"Neighbor delay must be 0 or multiple of every setting");
diff --git a/src/npair_respa_nsq.cpp b/src/npair_respa_nsq.cpp
index 55c6b7dd61..7f70addbc9 100644
--- a/src/npair_respa_nsq.cpp
+++ b/src/npair_respa_nsq.cpp
@@ -130,6 +130,12 @@ void NPairRespaNsq<NEWTON, TRI>::build(NeighList *list)
     }
 
     // loop over remaining atoms, owned and ghost
+    // use itag/jtap comparision to eliminate half the interactions
+    // itag = jtag is possible for long cutoffs that include images of self
+    // for triclinic, must use delta to eliminate half the I/J interactions
+    // cannot use I/J exact coord comparision as for orthog
+    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+    //   with an added PBC offset can shift all 3 coords by epsilon
 
     for (j = i + 1; j < nall; j++) {
       if (includegroup && !(mask[j] & bitmask)) continue;
diff --git a/src/npair_trim.cpp b/src/npair_trim.cpp
index 5751cbc330..1ff7826227 100644
--- a/src/npair_trim.cpp
+++ b/src/npair_trim.cpp
@@ -51,11 +51,15 @@ void NPairTrim::build(NeighList *list)
   int *numneigh_copy = listcopy->numneigh;
   int **firstneigh_copy = listcopy->firstneigh;
   int inum = listcopy->inum;
+  int gnum = listcopy->gnum;
 
   list->inum = inum;
-  list->gnum = listcopy->gnum;
+  list->gnum = gnum;
 
-  for (ii = 0; ii < inum; ii++) {
+  int inum_trim = inum;
+  if (list->ghost) inum_trim += gnum;
+
+  for (ii = 0; ii < inum_trim; ii++) {
     n = 0;
     neighptr = ipage->vget();
 
diff --git a/src/region_ellipsoid.cpp b/src/region_ellipsoid.cpp
index 3520b55813..daabd621c8 100644
--- a/src/region_ellipsoid.cpp
+++ b/src/region_ellipsoid.cpp
@@ -25,6 +25,18 @@ using namespace LAMMPS_NS;
 
 enum { CONSTANT, VARIABLE };
 
+static double GetRoot2D(double r0, double z0, double z1, double g);
+static double GetRoot3D(double r0, double r1, double z0, double z1, double z2, double g);
+
+static double DistancePointEllipse(double e0, double e1, double y0, double y1, double &x0,
+                                   double &x1);
+static double DistancePointEllipsoid(double e0, double e1, double e2, double y0, double y1,
+                                     double y2, double &x0, double &x1, double &x2);
+
+static constexpr int maxIterations =
+    std::numeric_limits<double>::digits - std::numeric_limits<double>::min_exponent;
+static constexpr double EPSILON = std::numeric_limits<double>::epsilon() * 2.0;
+
 /* ---------------------------------------------------------------------- */
 
 RegEllipsoid::RegEllipsoid(LAMMPS *lmp, int narg, char **arg) :
@@ -190,8 +202,8 @@ int RegEllipsoid::surface_interior(double *x, double cutoff)
     double b_r = b - cutoff;
     double c_r = c - cutoff;
     double delx_r = b_r * c_r * (x[0] - xc);
-    double dely_r = a_r * c_r * (x[1] - xc);
-    double delz_r = a_r * b_r * (x[2] - xc);
+    double dely_r = a_r * c_r * (x[1] - yc);
+    double delz_r = a_r * b_r * (x[2] - zc);
     double r_r = delx_r * delx_r + dely_r * dely_r + delz_r * delz_r;
     double rc_r = a_r * a_r * b_r * b_r * c_r * c_r;
 
@@ -217,9 +229,9 @@ int RegEllipsoid::surface_interior(double *x, double cutoff)
       contact[0].r = DistancePointEllipsoid(
           axes[sorting[2]], axes[sorting[1]], axes[sorting[0]], coords[sorting[2]],
           coords[sorting[1]], coords[sorting[0]], x0[sorting[2]], x0[sorting[1]], x0[sorting[0]]);
-      contact[0].delx = x[0] - (copysign(x0[sorting[2]], x[0] - xc) + xc);
-      contact[0].dely = x[1] - (copysign(x0[sorting[1]], x[1] - yc) + yc);
-      contact[0].delz = x[2] - (copysign(x0[sorting[0]], x[2] - zc) + zc);
+      contact[0].delx = x[0] - (copysign(x0[0], x[0] - xc) + xc);
+      contact[0].dely = x[1] - (copysign(x0[1], x[1] - yc) + yc);
+      contact[0].delz = x[2] - (copysign(x0[2], x[2] - zc) + zc);
       //      contact[0].radius = -radius;
       contact[0].iwall = 0;
       contact[0].varflag = 1;
@@ -236,7 +248,7 @@ int RegEllipsoid::surface_interior(double *x, double cutoff)
     double a_r = a - cutoff;
     double b_r = b - cutoff;
     double delx_r = b_r * (x[0] - xc);
-    double dely_r = a_r * (x[1] - xc);
+    double dely_r = a_r * (x[1] - yc);
     double r_r = delx_r * delx_r + dely_r * dely_r;
     double rc_r = a_r * a_r * b_r * b_r;
 
@@ -281,8 +293,8 @@ int RegEllipsoid::surface_exterior(double *x, double cutoff)
     double b_r = b + cutoff;
     double c_r = c + cutoff;
     double delx_r = b_r * c_r * (x[0] - xc);
-    double dely_r = a_r * c_r * (x[1] - xc);
-    double delz_r = a_r * b_r * (x[2] - xc);
+    double dely_r = a_r * c_r * (x[1] - yc);
+    double delz_r = a_r * b_r * (x[2] - zc);
     double r_r = delx_r * delx_r + dely_r * dely_r + delz_r * delz_r;
     double rc_r = a_r * a_r * b_r * b_r * c_r * c_r;
 
@@ -308,9 +320,9 @@ int RegEllipsoid::surface_exterior(double *x, double cutoff)
       contact[0].r = DistancePointEllipsoid(
           axes[sorting[2]], axes[sorting[1]], axes[sorting[0]], coords[sorting[2]],
           coords[sorting[1]], coords[sorting[0]], x0[sorting[2]], x0[sorting[1]], x0[sorting[0]]);
-      contact[0].delx = x[0] - (copysign(x0[sorting[2]], x[0] - xc) + xc);
-      contact[0].dely = x[1] - (copysign(x0[sorting[1]], x[1] - yc) + yc);
-      contact[0].delz = x[2] - (copysign(x0[sorting[0]], x[2] - zc) + zc);
+      contact[0].delx = x[0] - (copysign(x0[0], x[0] - xc) + xc);
+      contact[0].dely = x[1] - (copysign(x0[1], x[1] - yc) + yc);
+      contact[0].delz = x[2] - (copysign(x0[2], x[2] - zc) + zc);
       //      contact[0].radius = radius;
       contact[0].iwall = 0;
       contact[0].varflag = 1;
@@ -327,7 +339,7 @@ int RegEllipsoid::surface_exterior(double *x, double cutoff)
     double a_r = a + cutoff;
     double b_r = b + cutoff;
     double delx_r = b_r * (x[0] - xc);
-    double dely_r = a_r * (x[1] - xc);
+    double dely_r = a_r * (x[1] - yc);
     double r_r = delx_r * delx_r + dely_r * dely_r;
     double rc_r = a_r * a_r * b_r * b_r;
 
@@ -436,26 +448,24 @@ void RegEllipsoid::variable_check()
 // ------------------------------------------------------------------
 
 /* ----------------------------------------------------------------------
-   functions for the 2D case
+   static helper functions for the 2D case
 ------------------------------------------------------------------------- */
 
-double RegEllipsoid::GetRoot2D(double r0, double z0, double z1, double g)
+double GetRoot2D(double r0, double z0, double z1, double g)
 {
-  int maxIterations =
-      std::numeric_limits<double>::digits - std::numeric_limits<double>::min_exponent;
-  double n0 = r0 * z0;
-  double s0 = z1 - 1;
-  double s1 = (g < 0 ? 0 : sqrt(n0 * n0 + z1 * z1) - 1);
-  double s = 0;
+  const double n0 = r0 * z0;
+  double s0 = z1 - 1.0;
+  double s1 = (g < 0.0 ? 0.0 : sqrt(n0 * n0 + z1 * z1) - 1.0);
+  double s = 0.0;
   for (int i = 0; i < maxIterations; ++i) {
-    s = (s0 + s1) / 2;
+    s = (s0 + s1) / 2.0;
     if (s == s0 || s == s1) { break; }
-    double ratio0 = n0 / (s + r0);
-    double ratio1 = z1 / (s + 1);
-    g = ratio0 * ratio0 + ratio1 * ratio1 - 1;
-    if (g > 0) {
+    const double ratio0 = n0 / (s + r0);
+    const double ratio1 = z1 / (s + 1.0);
+    g = ratio0 * ratio0 + ratio1 * ratio1 - 1.0;
+    if ((g > 0.0) && (g > EPSILON)) {
       s0 = s;
-    } else if (g < 0) {
+    } else if ((g < 0.0) && (g < -EPSILON)) {
       s1 = s;
     } else {
       break;
@@ -464,28 +474,27 @@ double RegEllipsoid::GetRoot2D(double r0, double z0, double z1, double g)
   return s;
 }
 
-double RegEllipsoid::DistancePointEllipse(double e0, double e1, double y0, double y1, double &x0,
-                                          double &x1)
+double DistancePointEllipse(double e0, double e1, double y0, double y1, double &x0, double &x1)
 {
   double distance;
-  if (y1 > 0) {
-    if (y0 > 0) {
+  if (y1 > 0.0) {
+    if (y0 > 0.0) {
       double z0 = y0 / e0;
       double z1 = y1 / e1;
-      double g = z0 * z0 + z1 * z1 - 1;
-      if (g != 0) {
+      double g = z0 * z0 + z1 * z1 - 1.0;
+      if (g != 0.0) {
         double r0 = (e0 * e0) / (e1 * e1);
         double sbar = GetRoot2D(r0, z0, z1, g);
         x0 = r0 * y0 / (sbar + r0);
-        x1 = y1 / (sbar + 1);
+        x1 = y1 / (sbar + 1.0);
         distance = sqrt((x0 - y0) * (x0 - y0) + (x1 - y1) * (x1 - y1));
       } else {
         x0 = y0;
         x1 = y1;
-        distance = 0;
+        distance = 0.0;
       }
     } else {
-      x0 = 0;
+      x0 = 0.0;
       x1 = e1;
       distance = fabs(y1 - e1);
     }
@@ -499,7 +508,7 @@ double RegEllipsoid::DistancePointEllipse(double e0, double e1, double y0, doubl
       distance = sqrt((x0 - y0) * (x0 - y0) + x1 * x1);
     } else {
       x0 = e0;
-      x1 = 0;
+      x1 = 0.0;
       distance = fabs(y0 - e0);
     }
   }
@@ -507,28 +516,26 @@ double RegEllipsoid::DistancePointEllipse(double e0, double e1, double y0, doubl
 }
 
 /* ----------------------------------------------------------------------
-   functions for the 3D case
+   static helper functions for the 3D case
 ------------------------------------------------------------------------- */
 
-double RegEllipsoid::GetRoot3D(double r0, double r1, double z0, double z1, double z2, double g)
+double GetRoot3D(double r0, double r1, double z0, double z1, double z2, double g)
 {
-  int maxIterations =
-      std::numeric_limits<double>::digits - std::numeric_limits<double>::min_exponent;
-  double n0 = r0 * z0;
-  double n1 = r1 * z1;
-  double s0 = z2 - 1;
-  double s1 = (g < 0 ? 0 : sqrt(n0 * n0 + n1 * n1 + z2 * z2) - 1);
-  double s = 0;
+  const double n0 = r0 * z0;
+  const double n1 = r1 * z1;
+  double s0 = z2 - 1.0;
+  double s1 = (g < 0.0 ? 0.0 : sqrt(n0 * n0 + n1 * n1 + z2 * z2) - 1.0);
+  double s = 0.0;
   for (int i = 0; i < maxIterations; ++i) {
-    s = (s0 + s1) / 2;
+    s = (s0 + s1) / 2.0;
     if (s == s0 || s == s1) { break; }
-    double ratio0 = n0 / (s + r0);
-    double ratio1 = n1 / (s + r1);
-    double ratio2 = z2 / (s + 1);
-    g = ratio0 * ratio0 + ratio1 * ratio1 + ratio2 * ratio2 - 1;
-    if (g > 0) {
+    const double ratio0 = n0 / (s + r0);
+    const double ratio1 = n1 / (s + r1);
+    const double ratio2 = z2 / (s + 1.0);
+    g = ratio0 * ratio0 + ratio1 * ratio1 + ratio2 * ratio2 - 1.0;
+    if ((g > 0.0) && (g > EPSILON)) {
       s0 = s;
-    } else if (g < 0) {
+    } else if ((g < 0.0) && (g < -EPSILON)) {
       s1 = s;
     } else {
       break;
@@ -537,42 +544,42 @@ double RegEllipsoid::GetRoot3D(double r0, double r1, double z0, double z1, doubl
   return s;
 }
 
-double RegEllipsoid::DistancePointEllipsoid(double e0, double e1, double e2, double y0, double y1,
-                                            double y2, double &x0, double &x1, double &x2)
+double DistancePointEllipsoid(double e0, double e1, double e2, double y0, double y1, double y2,
+                              double &x0, double &x1, double &x2)
 {
   double distance;
-  if (y2 > 0) {
-    if (y1 > 0) {
-      if (y0 > 0) {
+  if (y2 > 0.0) {
+    if (y1 > 0.0) {
+      if (y0 > 0.0) {
         double z0 = y0 / e0;
         double z1 = y1 / e1;
         double z2 = y2 / e2;
-        double g = z0 * z0 + z1 * z1 + z2 * z2 - 1;
-        if (g != 0) {
+        double g = z0 * z0 + z1 * z1 + z2 * z2 - 1.0;
+        if (g != 0.0) {
           double r0 = e0 * e0 / (e2 * e2);
           double r1 = e1 * e1 / (e2 * e2);
           double sbar = GetRoot3D(r0, r1, z0, z1, z2, g);
           x0 = r0 * y0 / (sbar + r0);
           x1 = r1 * y1 / (sbar + r1);
-          x2 = y2 / (sbar + 1);
+          x2 = y2 / (sbar + 1.0);
           distance = sqrt((x0 - y0) * (x0 - y0) + (x1 - y1) * (x1 - y1) + (x2 - y2) * (x2 - y2));
         } else {
           x0 = y0;
           x1 = y1;
           x2 = y2;
-          distance = 0;
+          distance = 0.0;
         }
       } else {
-        x0 = 0;
+        x0 = 0.0;
         distance = DistancePointEllipse(e1, e2, y1, y2, x1, x2);
       }
     } else {
-      if (y0 > 0) {
-        x1 = 0;
+      if (y0 > 0.0) {
+        x1 = 0.0;
         distance = DistancePointEllipse(e0, e2, y0, y2, x0, x2);
       } else {
-        x0 = 0;
-        x1 = 0;
+        x0 = 0.0;
+        x1 = 0.0;
         x2 = e2;
         distance = fabs(y2 - e2);
       }
@@ -588,8 +595,8 @@ double RegEllipsoid::DistancePointEllipsoid(double e0, double e1, double e2, dou
       double xde1 = numer1 / denom1;
       double xde0sqr = xde0 * xde0;
       double xde1sqr = xde1 * xde1;
-      double discr = 1 - xde0sqr - xde1sqr;
-      if (discr > 0) {
+      double discr = 1.0 - xde0sqr - xde1sqr;
+      if (discr > 0.0) {
         x0 = e0 * xde0;
         x1 = e1 * xde1;
         x2 = e2 * sqrt(discr);
@@ -598,7 +605,7 @@ double RegEllipsoid::DistancePointEllipsoid(double e0, double e1, double e2, dou
       }
     }
     if (!computed) {
-      x2 = 0;
+      x2 = 0.0;
       distance = DistancePointEllipse(e0, e1, y0, y1, x0, x1);
     }
   }
diff --git a/src/region_ellipsoid.h b/src/region_ellipsoid.h
index 033d30234f..59944f093e 100644
--- a/src/region_ellipsoid.h
+++ b/src/region_ellipsoid.h
@@ -47,12 +47,6 @@ class RegEllipsoid : public Region {
   char *astr, *bstr, *cstr;
 
   void variable_check();
-
-  double GetRoot2D(double r0, double z0, double z1, double g);
-  double GetRoot3D(double r0, double r1, double z0, double z1, double z2, double g);
-  double DistancePointEllipse(double e0, double e1, double y0, double y1, double &x0, double &x1);
-  double DistancePointEllipsoid(double e0, double e1, double e2, double y0, double y1, double y2,
-                                double &x0, double &x1, double &x2);
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/thermo.cpp b/src/thermo.cpp
index 009c9cdf27..1c4e25dcb0 100644
--- a/src/thermo.cpp
+++ b/src/thermo.cpp
@@ -111,6 +111,7 @@ Thermo::Thermo(LAMMPS *_lmp, int narg, char **arg) :
   lostflag = lostbond = Thermo::ERROR;
   lostbefore = warnbefore = 0;
   flushflag = 0;
+  firststep = 0;
   ntimestep = -1;
   nline = -1;
   image_fname.clear();
diff --git a/src/update.cpp b/src/update.cpp
index 1074d8dcf6..04d3f7e1e6 100644
--- a/src/update.cpp
+++ b/src/update.cpp
@@ -396,7 +396,7 @@ void Update::new_integrate(char *style, int narg, char **arg, int trysuffix, int
 
 void Update::create_minimize(int narg, char **arg, int trysuffix)
 {
-  if (narg < 1) error->all(FLERR, "Illegal run_style command");
+  if (narg < 1) error->all(FLERR, "Illegal minimize_style command");
 
   delete[] minimize_style;
   delete minimize;
diff --git a/src/variable.cpp b/src/variable.cpp
index cf2e5c3b6f..264dcf6258 100644
--- a/src/variable.cpp
+++ b/src/variable.cpp
@@ -1469,8 +1469,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar)
         if (domain->box_exist == 0)
           print_var_error(FLERR,"Variable evaluation before simulation box is defined",ivar);
 
-        // uppercase used to force access of
-        // global vector vs global scalar, and global array vs global vector
+        // uppercase used to access of peratom data by equal-style var
 
         int lowercase = 1;
         if (word[0] == 'C') lowercase = 0;
@@ -1479,7 +1478,6 @@ double Variable::evaluate(char *str, Tree **tree, int ivar)
         if (!compute)
           print_var_error(FLERR,fmt::format("Invalid compute ID '{}' in variable formula", word+2),ivar);
 
-
         // parse zero or one or two trailing brackets
         // point i beyond last bracket
         // nbracket = # of bracket pairs
@@ -1501,218 +1499,234 @@ double Variable::evaluate(char *str, Tree **tree, int ivar)
           }
         }
 
-        // c_ID = scalar from global scalar, must be lowercase
+        // equal-style variable is being evaluated
 
-        if (nbracket == 0 && compute->scalar_flag && lowercase) {
+        if (style[ivar] == EQUAL) {
 
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_SCALAR)) {
-            compute->compute_scalar();
-            compute->invoked_flag |= Compute::INVOKED_SCALAR;
-          }
+          // c_ID = scalar from global scalar
 
-          value1 = compute->scalar;
-          if (tree) {
-            auto newtree = new Tree();
-            newtree->type = VALUE;
-            newtree->value = value1;
-            treestack[ntreestack++] = newtree;
-          } else argstack[nargstack++] = value1;
+          if (lowercase && nbracket == 0) {
 
-        // c_ID[i] = scalar from global vector, must be lowercase
+            if (!compute->scalar_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_SCALAR)) {
+              compute->compute_scalar();
+              compute->invoked_flag |= Compute::INVOKED_SCALAR;
+            }
 
-        } else if (nbracket == 1 && compute->vector_flag && lowercase) {
+            value1 = compute->scalar;
+            argstack[nargstack++] = value1;
 
-          if (index1 > compute->size_vector &&
-              compute->size_vector_variable == 0)
-            print_var_error(FLERR,"Variable formula compute vector is accessed out-of-range",ivar,0);
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) {
-            compute->compute_vector();
-            compute->invoked_flag |= Compute::INVOKED_VECTOR;
-          }
+          // c_ID[i] = scalar from global vector
+
+          } else if (lowercase && nbracket == 1) {
+
+            if (!compute->vector_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (index1 > compute->size_vector &&
+                compute->size_vector_variable == 0)
+              print_var_error(FLERR,"Variable formula compute vector is accessed out-of-range",ivar,0);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) {
+              compute->compute_vector();
+              compute->invoked_flag |= Compute::INVOKED_VECTOR;
+            }
 
           if (compute->size_vector_variable &&
               index1 > compute->size_vector) value1 = 0.0;
           else value1 = compute->vector[index1-1];
-          if (tree) {
+          argstack[nargstack++] = value1;
+
+          // c_ID[i][j] = scalar from global array
+
+          } else if (lowercase && nbracket == 2) {
+
+            if (!compute->array_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (index1 > compute->size_array_rows &&
+                compute->size_array_rows_variable == 0)
+              print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
+            if (index2 > compute->size_array_cols)
+              print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_ARRAY)) {
+              compute->compute_array();
+              compute->invoked_flag |= Compute::INVOKED_ARRAY;
+            }
+
+            if (compute->size_array_rows_variable &&
+                index1 > compute->size_array_rows) value1 = 0.0;
+            else value1 = compute->array[index1-1][index2-1];
+            argstack[nargstack++] = value1;
+
+          // C_ID[i] = scalar element of per-atom vector, note uppercase "C"
+
+          } else if (!lowercase && nbracket == 1) {
+
+            if (!compute->peratom_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (compute->size_peratom_cols)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
+              compute->compute_peratom();
+              compute->invoked_flag |= Compute::INVOKED_PERATOM;
+            }
+
+            peratom2global(1,nullptr,compute->vector_atom,1,index1,tree,
+                           treestack,ntreestack,argstack,nargstack);
+
+          // C_ID[i][j] = scalar element of per-atom array, note uppercase "C"
+
+          } else if (!lowercase && nbracket == 2) {
+
+            if (!compute->peratom_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (!compute->size_peratom_cols)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (index2 > compute->size_peratom_cols)
+              print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
+              compute->compute_peratom();
+              compute->invoked_flag |= Compute::INVOKED_PERATOM;
+            }
+
+            if (compute->array_atom)
+              peratom2global(1,nullptr,&compute->array_atom[0][index2-1],
+                             compute->size_peratom_cols,index1,
+                             tree,treestack,ntreestack,argstack,nargstack);
+            else
+              peratom2global(1,nullptr,nullptr,compute->size_peratom_cols,index1,
+                             tree,treestack,ntreestack,argstack,nargstack);
+
+          // no other possibilities for equal-style variable, so error
+
+          } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+
+        // vector-style variable is being evaluated
+
+        } else if (style[ivar] == VECTOR) {
+
+          // c_ID = vector from global vector
+
+          if (lowercase && nbracket == 0) {
+
+            if (!compute->vector_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (compute->size_vector == 0)
+              print_var_error(FLERR,"Variable formula compute vector is zero length",ivar);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) {
+              compute->compute_vector();
+              compute->invoked_flag |= Compute::INVOKED_VECTOR;
+            }
+
             auto newtree = new Tree();
-            newtree->type = VALUE;
-            newtree->value = value1;
+            newtree->type = VECTORARRAY;
+            newtree->array = compute->vector;
+            newtree->nvector = compute->size_vector;
+            newtree->nstride = 1;
             treestack[ntreestack++] = newtree;
-          } else argstack[nargstack++] = value1;
 
-        // c_ID[i][j] = scalar from global array, must be lowercase
+          // c_ID[i] = vector from global array
 
-        } else if (nbracket == 2 && compute->array_flag && lowercase) {
+          } else if (lowercase && nbracket == 1) {
 
-          if (index1 > compute->size_array_rows &&
-              compute->size_array_rows_variable == 0)
-            print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
-          if (index2 > compute->size_array_cols)
-            print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_ARRAY)) {
-            compute->compute_array();
-            compute->invoked_flag |= Compute::INVOKED_ARRAY;
-          }
+            if (!compute->array_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (compute->size_array_rows == 0)
+              print_var_error(FLERR,"Variable formula compute array is zero length",ivar);
+            if (index1 > compute->size_array_cols)
+              print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_ARRAY)) {
+              compute->compute_array();
+              compute->invoked_flag |= Compute::INVOKED_ARRAY;
+            }
 
-          if (compute->size_array_rows_variable &&
-              index1 > compute->size_array_rows) value1 = 0.0;
-          else value1 = compute->array[index1-1][index2-1];
-          if (tree) {
             auto newtree = new Tree();
-            newtree->type = VALUE;
-            newtree->value = value1;
+            newtree->type = VECTORARRAY;
+            newtree->array = &compute->array[0][index1-1];
+            newtree->nvector = compute->size_array_rows;
+            newtree->nstride = compute->size_array_cols;
             treestack[ntreestack++] = newtree;
-          } else argstack[nargstack++] = value1;
 
-        // c_ID = vector from global vector, lowercase or uppercase
+          // no other possibilities for vector-style variable, so error
 
-        } else if (nbracket == 0 && compute->vector_flag) {
+          } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
 
-          if (tree == nullptr)
-            print_var_error(FLERR,"Compute global vector in equal-style variable formula",ivar);
-          if (treetype == ATOM)
-            print_var_error(FLERR,"Compute global vector in atom-style variable formula",ivar);
-          if (compute->size_vector == 0)
-            print_var_error(FLERR,"Variable formula compute vector is zero length",ivar);
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) {
-            compute->compute_vector();
-            compute->invoked_flag |= Compute::INVOKED_VECTOR;
-          }
+        // atom-style variable is being evaluated
 
-          auto newtree = new Tree();
-          newtree->type = VECTORARRAY;
-          newtree->array = compute->vector;
-          newtree->nvector = compute->size_vector;
-          newtree->nstride = 1;
-          treestack[ntreestack++] = newtree;
+        } else if (style[ivar] == ATOM) {
 
-        // c_ID[i] = vector from global array, lowercase or uppercase
+          // c_ID = vector from per-atom vector
 
-        } else if (nbracket == 1 && compute->array_flag) {
+          if (lowercase && nbracket == 0) {
 
-          if (tree == nullptr)
-            print_var_error(FLERR,"Compute global vector in equal-style variable formula",ivar);
-          if (treetype == ATOM)
-            print_var_error(FLERR,"Compute global vector in atom-style variable formula",ivar);
-          if (compute->size_array_rows == 0)
-            print_var_error(FLERR,"Variable formula compute array is zero length",ivar);
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_ARRAY)) {
-            compute->compute_array();
-            compute->invoked_flag |= Compute::INVOKED_ARRAY;
-          }
+            if (!compute->peratom_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (compute->size_peratom_cols)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
+              compute->compute_peratom();
+              compute->invoked_flag |= Compute::INVOKED_PERATOM;
+            }
 
-          auto newtree = new Tree();
-          newtree->type = VECTORARRAY;
-          newtree->array = &compute->array[0][index1-1];
-          newtree->nvector = compute->size_array_rows;
-          newtree->nstride = compute->size_array_cols;
-          treestack[ntreestack++] = newtree;
+            auto newtree = new Tree();
+            newtree->type = ATOMARRAY;
+            newtree->array = compute->vector_atom;
+            newtree->nstride = 1;
+            treestack[ntreestack++] = newtree;
 
-        // c_ID[i] = scalar from per-atom vector
+          // c_ID[i] = vector from per-atom array
 
-        } else if (nbracket == 1 && compute->peratom_flag &&
-                   compute->size_peratom_cols == 0) {
+          } else if (lowercase && nbracket == 1) {
 
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
-            compute->compute_peratom();
-            compute->invoked_flag |= Compute::INVOKED_PERATOM;
-          }
+            if (!compute->peratom_flag)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (!compute->size_peratom_cols)
+              print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+            if (index1 > compute->size_peratom_cols)
+              print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
+            if (!compute->is_initialized())
+              print_var_error(FLERR,"Variable formula compute cannot be invoked before "
+                              "initialization by a run",ivar);
+            if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
+              compute->compute_peratom();
+              compute->invoked_flag |= Compute::INVOKED_PERATOM;
+            }
 
-          peratom2global(1,nullptr,compute->vector_atom,1,index1,tree,
-                         treestack,ntreestack,argstack,nargstack);
+            auto newtree = new Tree();
+            newtree->type = ATOMARRAY;
+            newtree->array = nullptr;
+            if (compute->array_atom)
+              newtree->array = &compute->array_atom[0][index1-1];
+            newtree->nstride = compute->size_peratom_cols;
+            treestack[ntreestack++] = newtree;
 
-        // c_ID[i][j] = scalar from per-atom array
+          // no other possibilities for atom-style variable, so error
 
-        } else if (nbracket == 2 && compute->peratom_flag &&
-                   compute->size_peratom_cols > 0) {
-
-          if (index2 > compute->size_peratom_cols)
-            print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
-            compute->compute_peratom();
-            compute->invoked_flag |= Compute::INVOKED_PERATOM;
-          }
-
-          if (compute->array_atom)
-            peratom2global(1,nullptr,&compute->array_atom[0][index2-1],compute->size_peratom_cols,index1,
-                           tree,treestack,ntreestack,argstack,nargstack);
-          else
-            peratom2global(1,nullptr,nullptr,compute->size_peratom_cols,index1,
-                           tree,treestack,ntreestack,argstack,nargstack);
-
-        // c_ID = vector from per-atom vector
-
-        } else if (nbracket == 0 && compute->peratom_flag &&
-                   compute->size_peratom_cols == 0) {
-
-          if (tree == nullptr)
-            print_var_error(FLERR,"Per-atom compute in equal-style variable formula",ivar);
-          if (treetype == VECTOR)
-            print_var_error(FLERR,"Per-atom compute in vector-style variable formula",ivar);
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
-            compute->compute_peratom();
-            compute->invoked_flag |= Compute::INVOKED_PERATOM;
-          }
-
-          auto newtree = new Tree();
-          newtree->type = ATOMARRAY;
-          newtree->array = compute->vector_atom;
-          newtree->nstride = 1;
-          treestack[ntreestack++] = newtree;
-
-        // c_ID[i] = vector from per-atom array
-
-        } else if (nbracket == 1 && compute->peratom_flag &&
-                   compute->size_peratom_cols > 0) {
-
-          if (tree == nullptr)
-            print_var_error(FLERR,"Per-atom compute in equal-style variable formula",ivar);
-          if (treetype == VECTOR)
-            print_var_error(FLERR,"Per-atom compute in vector-style variable formula",ivar);
-          if (index1 > compute->size_peratom_cols)
-            print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0);
-          if (!compute->is_initialized())
-            print_var_error(FLERR,"Variable formula compute cannot be invoked before "
-                            "initialization by a run",ivar);
-          if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
-            compute->compute_peratom();
-            compute->invoked_flag |= Compute::INVOKED_PERATOM;
-          }
-
-          auto newtree = new Tree();
-          newtree->type = ATOMARRAY;
-          if (compute->array_atom)
-            newtree->array = &compute->array_atom[0][index1-1];
-          newtree->nstride = compute->size_peratom_cols;
-          treestack[ntreestack++] = newtree;
-
-        } else if (nbracket == 1 && compute->local_flag) {
-          print_var_error(FLERR,"Cannot access local data via indexing",ivar);
-        } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+          } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar);
+        }
 
       // ----------------
       // fix
@@ -1732,7 +1746,6 @@ double Variable::evaluate(char *str, Tree **tree, int ivar)
         if (!fix)
           print_var_error(FLERR,fmt::format("Invalid fix ID '{}' in variable formula",word+2),ivar);
 
-
         // parse zero or one or two trailing brackets
         // point i beyond last bracket
         // nbracket = # of bracket pairs
@@ -1754,181 +1767,200 @@ double Variable::evaluate(char *str, Tree **tree, int ivar)
           }
         }
 
-        // f_ID = scalar from global scalar, must be lowercase
+        // equal-style variable is being evaluated
 
-        if (nbracket == 0 && fix->scalar_flag && lowercase) {
+        if (style[ivar] == EQUAL) {
 
-          if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
-            print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+          // f_ID = scalar from global scalar
+
+          if (lowercase && nbracket == 0) {
+
+            if (!fix->scalar_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
+              print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+
+            value1 = fix->compute_scalar();
+            argstack[nargstack++] = value1;
+
+          // f_ID[i] = scalar from global vector
+
+          } else if (lowercase && nbracket == 1) {
+
+            if (!fix->vector_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (index1 > fix->size_vector &&
+                fix->size_vector_variable == 0)
+              print_var_error(FLERR,"Variable formula fix vector is accessed out-of-range",ivar,0);
+            if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
+              print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+
+            value1 = fix->compute_vector(index1-1);
+            argstack[nargstack++] = value1;
+
+          // f_ID[i][j] = scalar from global array
+
+          } else if (lowercase && nbracket == 2) {
+
+            if (!fix->array_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (index1 > fix->size_array_rows &&
+                fix->size_array_rows_variable == 0)
+              print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
+            if (index2 > fix->size_array_cols)
+              print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
+            if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
+              print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+
+            value1 = fix->compute_array(index1-1,index2-1);
+            argstack[nargstack++] = value1;
+
+          // F_ID[i] = scalar element of per-atom vector, note uppercase "F"
+
+          } else if (!lowercase && nbracket == 1) {
+
+            if (!fix->peratom_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (fix->size_peratom_cols)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (update->whichflag > 0 &&
+                update->ntimestep % fix->peratom_freq)
+              print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+
+            peratom2global(1,nullptr,fix->vector_atom,1,index1,tree,
+                           treestack,ntreestack,argstack,nargstack);
+
+          // F_ID[i][j] = scalar element of per-atom array, note uppercase "F"
+
+          } else if (!lowercase && nbracket == 2) {
+
+            if (!fix->peratom_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (!fix->size_peratom_cols)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (index2 > fix->size_peratom_cols)
+              print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
+            if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq)
+              print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+
+            if (fix->array_atom)
+              peratom2global(1,nullptr,&fix->array_atom[0][index2-1],
+                             fix->size_peratom_cols,index1,
+                             tree,treestack,ntreestack,argstack,nargstack);
+            else
+              peratom2global(1,nullptr,nullptr,fix->size_peratom_cols,index1,
+                             tree,treestack,ntreestack,argstack,nargstack);
+
+          // no other possibilities for equal-style variable, so error
+
+          } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+
+        // vector-style variable is being evaluated
+
+        } else if (style[ivar] == VECTOR) {
+
+          // f_ID = vector from global vector
+
+          if (lowercase && nbracket == 0) {
+
+            if (!fix->vector_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (fix->size_vector == 0)
+              print_var_error(FLERR,"Variable formula fix vector is zero length",ivar);
+            if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
+              print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar);
+
+            int nvec = fix->size_vector;
+            double *vec;
+            memory->create(vec,nvec,"variable:values");
+            for (int m = 0; m < nvec; m++)
+              vec[m] = fix->compute_vector(m);
 
-          value1 = fix->compute_scalar();
-          if (tree) {
             auto newtree = new Tree();
-            newtree->type = VALUE;
-            newtree->value = value1;
+            newtree->type = VECTORARRAY;
+            newtree->array = vec;
+            newtree->nvector = nvec;
+            newtree->nstride = 1;
+            newtree->selfalloc = 1;
             treestack[ntreestack++] = newtree;
-          } else argstack[nargstack++] = value1;
 
-        // f_ID[i] = scalar from global vector, must be lowercase
+          // f_ID[i] = vector from global array
 
-        } else if (nbracket == 1 && fix->vector_flag && lowercase) {
+          } else if (lowercase && nbracket == 1) {
 
-          if (index1 > fix->size_vector &&
-              fix->size_vector_variable == 0)
-            print_var_error(FLERR,"Variable formula fix vector is accessed out-of-range",ivar,0);
-          if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
-            print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+            if (!fix->array_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (fix->size_array_rows == 0)
+              print_var_error(FLERR,"Variable formula fix array is zero length",ivar);
+            if (index1 > fix->size_array_cols)
+              print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
+            if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
+              print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+
+            int nvec = fix->size_array_rows;
+            double *vec;
+            memory->create(vec,nvec,"variable:values");
+            for (int m = 0; m < nvec; m++)
+              vec[m] = fix->compute_array(m,index1-1);
 
-          value1 = fix->compute_vector(index1-1);
-          if (tree) {
             auto newtree = new Tree();
-            newtree->type = VALUE;
-            newtree->value = value1;
+            newtree->type = VECTORARRAY;
+            newtree->array = vec;
+            newtree->nvector = nvec;
+            newtree->nstride = 1;
+            newtree->selfalloc = 1;
             treestack[ntreestack++] = newtree;
-          } else argstack[nargstack++] = value1;
 
-        // f_ID[i][j] = scalar from global array, must be lowercase
+          // no other possibilities for vector-style variable, so error
 
-        } else if (nbracket == 2 && fix->array_flag && lowercase) {
+          } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
 
-          if (index1 > fix->size_array_rows &&
-              fix->size_array_rows_variable == 0)
-            print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
-          if (index2 > fix->size_array_cols)
-            print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
-          if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
-            print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
+        // atom-style variable is being evaluated
+
+        } else if (style[ivar] == ATOM) {
+
+          // f_ID = vector from per-atom vector
+
+          if (lowercase && nbracket == 0) {
+
+            if (!fix->peratom_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (fix->size_peratom_cols)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq)
+              print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar);
 
-          value1 = fix->compute_array(index1-1,index2-1);
-          if (tree) {
             auto newtree = new Tree();
-            newtree->type = VALUE;
-            newtree->value = value1;
+            newtree->type = ATOMARRAY;
+            newtree->array = fix->vector_atom;
+            newtree->nstride = 1;
             treestack[ntreestack++] = newtree;
-          } else argstack[nargstack++] = value1;
 
-        // f_ID = vector from global vector, lowercase or uppercase
+          // f_ID[i] = vector from per-atom array
 
-        } else if (nbracket == 0 && fix->vector_flag) {
+          } else if (lowercase && nbracket == 1) {
 
-          if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
-            print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar);
-          if (tree == nullptr)
-            print_var_error(FLERR,"Fix global vector in equal-style variable formula",ivar);
-          if (treetype == ATOM)
-            print_var_error(FLERR,"Fix global vector in atom-style variable formula",ivar);
-          if (fix->size_vector == 0)
-            print_var_error(FLERR,"Variable formula fix vector is zero length",ivar);
+            if (!fix->peratom_flag)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (!fix->size_peratom_cols)
+              print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+            if (index1 > fix->size_peratom_cols)
+              print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
+            if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq)
+              print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar);
 
-          int nvec = fix->size_vector;
-          double *vec;
-          memory->create(vec,nvec,"variable:values");
-          for (int m = 0; m < nvec; m++)
-            vec[m] = fix->compute_vector(m);
+            auto newtree = new Tree();
+            newtree->type = ATOMARRAY;
+            newtree->array = nullptr;
+            if (fix->array_atom)
+              newtree->array = &fix->array_atom[0][index1-1];
+            newtree->nstride = fix->size_peratom_cols;
+            treestack[ntreestack++] = newtree;
 
-          auto newtree = new Tree();
-          newtree->type = VECTORARRAY;
-          newtree->array = vec;
-          newtree->nvector = nvec;
-          newtree->nstride = 1;
-          newtree->selfalloc = 1;
-          treestack[ntreestack++] = newtree;
+          // no other possibilities for atom-style variable, so error
 
-        // f_ID[i] = vector from global array, lowercase or uppercase
-
-        } else if (nbracket == 1 && fix->array_flag) {
-
-          if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
-            print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
-          if (tree == nullptr)
-            print_var_error(FLERR,"Fix global vector in equal-style variable formula",ivar);
-          if (treetype == ATOM)
-            print_var_error(FLERR,"Fix global vector in atom-style variable formula",ivar);
-          if (fix->size_array_rows == 0)
-            print_var_error(FLERR,"Variable formula fix array is zero length",ivar);
-
-          int nvec = fix->size_array_rows;
-          double *vec;
-          memory->create(vec,nvec,"variable:values");
-          for (int m = 0; m < nvec; m++)
-            vec[m] = fix->compute_array(m,index1-1);
-
-          auto newtree = new Tree();
-          newtree->type = VECTORARRAY;
-          newtree->array = vec;
-          newtree->nvector = nvec;
-          newtree->nstride = 1;
-          newtree->selfalloc = 1;
-          treestack[ntreestack++] = newtree;
-
-        // f_ID[i] = scalar from per-atom vector
-
-        } else if (nbracket == 1 && fix->peratom_flag &&
-                   fix->size_peratom_cols == 0) {
-
-          if (update->whichflag > 0 &&
-              update->ntimestep % fix->peratom_freq)
-            print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
-
-          peratom2global(1,nullptr,fix->vector_atom,1,index1,
-                         tree,treestack,ntreestack,argstack,nargstack);
-
-        // f_ID[i][j] = scalar from per-atom array
-
-        } else if (nbracket == 2 && fix->peratom_flag &&
-                   fix->size_peratom_cols > 0) {
-
-          if (index2 > fix->size_peratom_cols)
-            print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
-          if (update->whichflag > 0 &&
-              update->ntimestep % fix->peratom_freq)
-            print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar);
-
-          if (fix->array_atom)
-            peratom2global(1,nullptr,&fix->array_atom[0][index2-1],fix->size_peratom_cols,index1,
-                           tree,treestack,ntreestack,argstack,nargstack);
-          else
-            peratom2global(1,nullptr,nullptr,fix->size_peratom_cols,index1,
-                           tree,treestack,ntreestack,argstack,nargstack);
-
-        // f_ID = vector from per-atom vector
-
-        } else if (nbracket == 0 && fix->peratom_flag &&
-                   fix->size_peratom_cols == 0) {
-
-          if (tree == nullptr)
-            print_var_error(FLERR,"Per-atom fix in equal-style variable formula",ivar);
-          if (update->whichflag > 0 &&
-              update->ntimestep % fix->peratom_freq)
-            print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar);
-
-          auto newtree = new Tree();
-          newtree->type = ATOMARRAY;
-          newtree->array = fix->vector_atom;
-          newtree->nstride = 1;
-          treestack[ntreestack++] = newtree;
-
-        // f_ID[i] = vector from per-atom array
-
-        } else if (nbracket == 1 && fix->peratom_flag &&
-                   fix->size_peratom_cols > 0) {
-
-          if (tree == nullptr)
-            print_var_error(FLERR,"Per-atom fix in equal-style variable formula",ivar);
-          if (index1 > fix->size_peratom_cols)
-            print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0);
-          if (update->whichflag > 0 &&
-              update->ntimestep % fix->peratom_freq)
-            print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar);
-
-          auto newtree = new Tree();
-          newtree->type = ATOMARRAY;
-          if (fix->array_atom)
-            newtree->array = &fix->array_atom[0][index1-1];
-          newtree->nstride = fix->size_peratom_cols;
-          treestack[ntreestack++] = newtree;
-
-        } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+          } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar);
+        }
 
       // ----------------
       // variable
@@ -1958,124 +1990,140 @@ double Variable::evaluate(char *str, Tree **tree, int ivar)
           i = ptr-str+1;
         }
 
-        // v_name = scalar from internal-style variable
-        // access value directly
+        // vname with no bracket
 
-        if (nbracket == 0 && style[ivar] == INTERNAL) {
+        if (nbracket == 0) {
 
-          value1 = dvalue[ivar];
-          if (tree) {
-            auto newtree = new Tree();
-            newtree->type = VALUE;
-            newtree->value = value1;
-            treestack[ntreestack++] = newtree;
-          } else argstack[nargstack++] = value1;
+          // scalar from internal-style variable
+          // access value directly
 
-        // v_name = scalar from non atom/atomfile & non vector-style variable
-        // access value via retrieve()
+          if (style[ivar] == INTERNAL) {
 
-        } else if (nbracket == 0 && style[ivar] != ATOM &&
-                   style[ivar] != ATOMFILE && style[ivar] != VECTOR) {
+            value1 = dvalue[ivar];
+            if (tree) {
+              auto newtree = new Tree();
+              newtree->type = VALUE;
+              newtree->value = value1;
+              treestack[ntreestack++] = newtree;
+            } else argstack[nargstack++] = value1;
 
-          char *var = retrieve(word+2);
-          if (var == nullptr)
-            print_var_error(FLERR,"Invalid variable evaluation in variable formula",ivar);
-          if (utils::is_double(var)) {
+            // scalar from any style variable except VECTOR, ATOM, ATOMFILE
+            // access value via retrieve()
+
+          } else if (style[ivar] != ATOM && style[ivar] != ATOMFILE && style[ivar] != VECTOR) {
+
+            char *var = retrieve(word+2);
+            if (var == nullptr)
+              print_var_error(FLERR,"Invalid variable evaluation in variable formula",ivar);
+            if (!utils::is_double(var))
+              print_var_error(FLERR,"Non-numeric variable value in variable formula",ivar);
             if (tree) {
               auto newtree = new Tree();
               newtree->type = VALUE;
               newtree->value = atof(var);
               treestack[ntreestack++] = newtree;
             } else argstack[nargstack++] = atof(var);
-          } else print_var_error(FLERR,"Non-numeric variable value in variable formula",ivar);
 
-        // v_name = per-atom vector from atom-style variable
-        // evaluate the atom-style variable as newtree
+          // vector from vector-style variable
+          // evaluate the vector-style variable, put result in newtree
 
-        } else if (nbracket == 0 && style[ivar] == ATOM) {
+          } else if (style[ivar] == VECTOR) {
 
-          if (tree == nullptr)
-            print_var_error(FLERR,"Atom-style variable in equal-style variable formula",ivar);
-          if (treetype == VECTOR)
-            print_var_error(FLERR,"Atom-style variable in vector-style variable formula",ivar);
+            if (tree == nullptr)
+              print_var_error(FLERR,"Vector-style variable in equal-style variable formula",ivar);
+            if (treetype == ATOM)
+              print_var_error(FLERR,"Vector-style variable in atom-style variable formula",ivar);
 
-          Tree *newtree = nullptr;
-          evaluate(data[ivar][0],&newtree,ivar);
-          treestack[ntreestack++] = newtree;
+            double *vec;
+            int nvec = compute_vector(ivar,&vec);
 
-        // v_name = per-atom vector from atomfile-style variable
-
-        } else if (nbracket == 0 && style[ivar] == ATOMFILE) {
-
-          if (tree == nullptr)
-            print_var_error(FLERR,"Atomfile-style variable in equal-style variable formula",ivar);
-          if (treetype == VECTOR)
-            print_var_error(FLERR,"Atomfile-style variable in vector-style variable formula",ivar);
-
-          auto newtree = new Tree();
-          newtree->type = ATOMARRAY;
-          newtree->array = reader[ivar]->fixstore->vstore;
-          newtree->nstride = 1;
-          treestack[ntreestack++] = newtree;
-
-        // v_name = vector from vector-style variable
-        // evaluate the vector-style variable, put result in newtree
-
-        } else if (nbracket == 0 && style[ivar] == VECTOR) {
-
-          if (tree == nullptr)
-            print_var_error(FLERR,"Vector-style variable in equal-style variable formula",ivar);
-          if (treetype == ATOM)
-            print_var_error(FLERR,"Vector-style variable in atom-style variable formula",ivar);
-
-          double *vec;
-          int nvec = compute_vector(ivar,&vec);
-
-          auto newtree = new Tree();
-          newtree->type = VECTORARRAY;
-          newtree->array = vec;
-          newtree->nvector = nvec;
-          newtree->nstride = 1;
-          treestack[ntreestack++] = newtree;
-
-        // v_name[N] = scalar from atom-style variable
-        // compute the per-atom variable in result
-        // use peratom2global to extract single value from result
-
-        } else if (nbracket && style[ivar] == ATOM) {
-
-          double *result;
-          memory->create(result,atom->nlocal,"variable:result");
-          compute_atom(ivar,0,result,1,0);
-          peratom2global(1,nullptr,result,1,index,tree,treestack,ntreestack,argstack,nargstack);
-          memory->destroy(result);
-
-        // v_name[N] = scalar from atomfile-style variable
-
-        } else if (nbracket && style[ivar] == ATOMFILE) {
-
-          peratom2global(1,nullptr,reader[ivar]->fixstore->vstore,1,index,
-                         tree,treestack,ntreestack,argstack,nargstack);
-
-        // v_name[N] = scalar from vector-style variable
-        // compute the vector-style variable, extract single value
-
-        } else if (nbracket && style[ivar] == VECTOR) {
-
-          double *vec;
-          int nvec = compute_vector(ivar,&vec);
-          if (index <= 0 || index > nvec)
-            print_var_error(FLERR,"Invalid index into vector-style variable",ivar);
-          int m = index;   // convert from tagint to int
-
-          if (tree) {
             auto newtree = new Tree();
-            newtree->type = VALUE;
-            newtree->value = vec[m-1];
+            newtree->type = VECTORARRAY;
+            newtree->array = vec;
+            newtree->nvector = nvec;
+            newtree->nstride = 1;
             treestack[ntreestack++] = newtree;
-          } else argstack[nargstack++] = vec[m-1];
 
-        } else print_var_error(FLERR,"Mismatched variable in variable formula",ivar);
+          // vector from atom-style variable
+          // evaluate the atom-style variable as newtree
+
+          } else if (style[ivar] == ATOM) {
+
+            if (tree == nullptr)
+              print_var_error(FLERR,"Atom-style variable in equal-style variable formula",ivar);
+            if (treetype == VECTOR)
+              print_var_error(FLERR,"Atom-style variable in vector-style variable formula",ivar);
+
+            Tree *newtree = nullptr;
+            evaluate(data[ivar][0],&newtree,ivar);
+            treestack[ntreestack++] = newtree;
+
+          // vector from atomfile-style variable
+          // point to the values in FixStore instance
+
+          } else if (style[ivar] == ATOMFILE) {
+
+            if (tree == nullptr)
+              print_var_error(FLERR,"Atomfile-style variable in equal-style variable formula",ivar);
+            if (treetype == VECTOR)
+              print_var_error(FLERR,"Atomfile-style variable in vector-style variable formula",ivar);
+
+            auto newtree = new Tree();
+            newtree->type = ATOMARRAY;
+            newtree->array = reader[ivar]->fixstore->vstore;
+            newtree->nstride = 1;
+            treestack[ntreestack++] = newtree;
+
+          // no other possibilities for variable with no bracket
+
+          } else print_var_error(FLERR,"Mismatched variable in variable formula",ivar);
+
+        // vname[i] with one bracket
+
+        } else if (nbracket == 1) {
+
+          // scalar from vector-style variable
+          // compute the vector-style variable, extract single value
+
+          if (style[ivar] == VECTOR) {
+
+            double *vec;
+            int nvec = compute_vector(ivar,&vec);
+            if (index <= 0 || index > nvec)
+              print_var_error(FLERR,"Invalid index into vector-style variable",ivar);
+            int m = index;   // convert from tagint to int
+
+            if (tree) {
+              auto newtree = new Tree();
+              newtree->type = VALUE;
+              newtree->value = vec[m-1];
+              treestack[ntreestack++] = newtree;
+            } else argstack[nargstack++] = vec[m-1];
+
+          // scalar from atom-style variable
+          // compute the per-atom variable in result
+          // use peratom2global to extract single value from result
+
+          } else if (style[ivar] == ATOM) {
+
+            double *result;
+            memory->create(result,atom->nlocal,"variable:result");
+            compute_atom(ivar,0,result,1,0);
+            peratom2global(1,nullptr,result,1,index,tree,treestack,ntreestack,argstack,nargstack);
+            memory->destroy(result);
+
+          // scalar from atomfile-style variable
+          // use peratom2global to extract single value from FixStore instance
+
+          } else if (style[ivar] == ATOMFILE) {
+
+            peratom2global(1,nullptr,reader[ivar]->fixstore->vstore,1,index,
+                           tree,treestack,ntreestack,argstack,nargstack);
+
+          // no other possibilities for variable with one bracket
+
+          } else print_var_error(FLERR,"Mismatched variable in variable formula",ivar);
+        }
 
       // ----------------
       // math/group/special/labelmap function or atom value/vector or
diff --git a/src/version.h b/src/version.h
index 572a274053..35780aa785 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1,2 +1,2 @@
-#define LAMMPS_VERSION "2 Aug 2023"
+#define LAMMPS_VERSION "3 Aug 2023"
 #define LAMMPS_UPDATE "Development"
diff --git a/tools/lammps-gui/CMakeLists.txt b/tools/lammps-gui/CMakeLists.txt
index 8c59ec7dc2..e83db05fdd 100644
--- a/tools/lammps-gui/CMakeLists.txt
+++ b/tools/lammps-gui/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.16)
 
-project(lammps-gui VERSION 1.5.4 LANGUAGES CXX)
+project(lammps-gui VERSION 1.5.9 LANGUAGES CXX)
 
 set(CMAKE_AUTOUIC ON)
 set(CMAKE_AUTOMOC ON)
@@ -11,6 +11,42 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 option(LAMMPS_GUI_USE_PLUGIN "Load LAMMPS library dynamically at runtime" OFF)
 mark_as_advanced(LAMMPS_GUI_USE_PLUGIN)
+option(LAMMPS_GUI_USE_QT5 "Prefer using Qt5 over Qt6" OFF)
+
+include(CheckIncludeFileCXX)
+# helper function to check for usable omp.h header
+function(check_omp_h_include)
+  find_package(OpenMP COMPONENTS CXX QUIET)
+  if(OpenMP_CXX_FOUND)
+    set(CMAKE_REQUIRED_FLAGS ${OpenMP_CXX_FLAGS})
+    set(CMAKE_REQUIRED_INCLUDES ${OpenMP_CXX_INCLUDE_DIRS})
+    set(CMAKE_REQUIRED_LINK_OPTIONS ${OpenMP_CXX_FLAGS})
+    set(CMAKE_REQUIRED_LIBRARIES ${OpenMP_CXX_LIBRARIES})
+    check_include_file_cxx(omp.h _have_omp_h)
+  else()
+    set(_have_omp_h FALSE)
+  endif()
+  set(HAVE_OMP_H_INCLUDE ${_have_omp_h} PARENT_SCOPE)
+endfunction()
+
+# detect if we may enable OpenMP support by default
+set(BUILD_OMP_DEFAULT OFF)
+find_package(OpenMP COMPONENTS CXX QUIET)
+if(OpenMP_CXX_FOUND)
+  check_omp_h_include()
+  if(HAVE_OMP_H_INCLUDE)
+    set(BUILD_OMP_DEFAULT ON)
+  endif()
+endif()
+
+option(BUILD_OMP "Build with OpenMP support" ${BUILD_OMP_DEFAULT})
+if(BUILD_OMP)
+  find_package(OpenMP COMPONENTS CXX REQUIRED)
+  check_omp_h_include()
+  if(NOT HAVE_OMP_H_INCLUDE)
+    message(FATAL_ERROR "Cannot find the 'omp.h' header file required for full OpenMP support")
+  endif()
+endif()
 
 # checks
 # when this file is included as subdirectory in the LAMMPS build, many settings are directly imported
@@ -73,7 +109,16 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
 endif()
 
 # we require Qt 5 and at least version 5.12 at that.
-find_package(Qt5 5.12 REQUIRED COMPONENTS Widgets Charts)
+if(NOT LAMMPS_GUI_USE_QT5)
+  find_package(Qt6 6.2 QUIET COMPONENTS Widgets Charts)
+endif()
+if(NOT Qt6_FOUND)
+  find_package(Qt5 5.12 REQUIRED COMPONENTS Widgets Charts)
+  set(QT_VERSION_MAJOR 5)
+else()
+  set(QT_VERSION_MAJOR 6)
+endif()
+message(STATUS "Using Qt version ${Qt${QT_VERSION_MAJOR}_VERSION} for LAMMPS GUI")
 
 set(PROJECT_SOURCES
   main.cpp
@@ -84,6 +129,7 @@ set(PROJECT_SOURCES
   chartviewer.h
   codeeditor.cpp
   codeeditor.h
+  helpers.cpp
   highlighter.cpp
   highlighter.h
   imageviewer.cpp
@@ -104,7 +150,11 @@ set(PROJECT_SOURCES
   ${PLUGIN_LOADER_SRC}
   ${ICON_RC_FILE}
 )
-qt5_add_resources(PROJECT_SOURCES lammpsgui.qrc)
+if(QT_VERSION_MAJOR EQUAL 6)
+  qt6_add_resources(PROJECT_SOURCES lammpsgui.qrc)
+else()
+  qt5_add_resources(PROJECT_SOURCES lammpsgui.qrc)
+endif()
 
 if(APPLE)
   set(MACOSX_ICON_FILE ${LAMMPS_DIR}/cmake/packaging/lammps.icns)
@@ -112,10 +162,22 @@ if(APPLE)
   set(MACOSX_BACKGROUND_FILE ${LAMMPS_DIR}/cmake/packaging/LAMMPS_DMG_Background.png)
 endif()
 
-add_executable(lammps-gui
-  ${MACOSX_ICON_FILE}
-  ${PROJECT_SOURCES}
-)
+if(QT_VERSION_MAJOR EQUAL 6)
+  qt_add_executable(lammps-gui
+    MANUAL_FINALIZATION
+    ${MACOSX_ICON_FILE}
+    ${PROJECT_SOURCES}
+  )
+else()
+  add_executable(lammps-gui
+    ${MACOSX_ICON_FILE}
+    ${PROJECT_SOURCES}
+  )
+endif()
+
+if(QT_VERSION_MAJOR EQUAL 6)
+    qt_finalize_executable(lammps-gui)
+endif()
 
 # compilation settings
 if(LAMMPS_GUI_USE_PLUGIN)
@@ -127,7 +189,7 @@ else()
 endif()
 target_include_directories(lammps-gui PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
 target_compile_definitions(lammps-gui PRIVATE LAMMPS_GUI_VERSION="${PROJECT_VERSION}")
-target_link_libraries(lammps-gui PRIVATE Qt5::Widgets Qt5::Charts)
+target_link_libraries(lammps-gui PRIVATE Qt${QT_VERSION_MAJOR}::Widgets Qt${QT_VERSION_MAJOR}::Charts)
 if(BUILD_OMP)
   find_package(OpenMP COMPONENTS CXX REQUIRED)
   target_link_libraries(lammps-gui PRIVATE OpenMP::OpenMP_CXX)
@@ -165,13 +227,21 @@ if(APPLE)
     COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LAMMPS_DIR}/doc/lammps.1 ${APP_CONTENTS}/share/lammps/man/man1/
     COMMAND ${CMAKE_COMMAND} -E create_symlink lammps.1 ${APP_CONTENTS}/share/lammps/man/man1/lmp.1
     COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LAMMPS_DIR}/doc/msi2lmp.1 ${APP_CONTENTS}/share/lammps/man/man1
-    DEPENDS lammps-gui lammps
+    DEPENDS lammps-gui lammps lmp binary2txt stl_bin2txt msi2lmp phana
     COMMENT "Copying additional files into macOS app bundle tree"
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
   )
+  if(FFMPEG_EXECUTABLE)
+    add_custom_target(copy-ffmpeg
+      COMMAND ${CMAKE_COMMAND} -E copy_if_different ${FFMPEG_EXECUTABLE} ${APP_CONTENTS}/bin/
+      COMMENT "Copying FFMpeg into macOS app bundle tree"
+      DEPENDS complete-bundle
+    )
+    set(FFMPEG_TARGET copy-ffmpeg)
+  endif()
   add_custom_target(dmg
     COMMAND ${LAMMPS_DIR}/cmake/packaging/build_macos_dmg.sh
-    DEPENDS complete-bundle
+    DEPENDS complete-bundle ${FFMPEG_TARGET}
     COMMENT "Create Drag-n-Drop installer disk image from app bundle"
     BYPRODUCT LAMMPS-macOS-multiarch.dmg
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
@@ -200,7 +270,7 @@ elseif((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND CMAKE_CROSSCOMPILING)
     COMMENT "Create zip file with windows binaries"
     BYPRODUCT LAMMPS-Win10-amd64.zip
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
-elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+elseif((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND NOT LAMMPS_GUI_USE_PLUGIN)
   install(TARGETS lammps-gui DESTINATION ${CMAKE_INSTALL_BINDIR})
   install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/lammps-gui.desktop DESTINATION ${CMAKE_INSTALL_DATADIR}/applications/)
   install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/lammps-input.xml DESTINATION ${CMAKE_INSTALL_DATADIR}/mime/packages/)
diff --git a/tools/lammps-gui/TODO.md b/tools/lammps-gui/TODO.md
index e2ff9cb42f..ee05e67225 100644
--- a/tools/lammps-gui/TODO.md
+++ b/tools/lammps-gui/TODO.md
@@ -1,15 +1,29 @@
 LAMMPS-GUI TODO list:
 
-# Short term goals
+# Short term goals (v1.x)
 
-- add "syntax check" with enabled "-skiprun" flag
-- implement "static" completion for fix/compute/styles/region etc...
-- implement "dynamic" completion for variable names, group names, molecule names, compute/dump/fix/region/group IDs
 - implement indenting regions for (nested) loops?
+- implement data file manager GUI with the following features:
+   - import coordinates and topology via VMD molfile plugins
+   - import coordinates and topology from intermol
+   - import coordinates and topology from OpenBabel
+   - store data internally in a generalized YAML format
+   - add/remove columns to per-atom data
+   - change atom style for export to data file
+   - merge one system to another
+   - edit mapping between numeric and symbolic types. create labelmaps.
+   - import/export LAMMPS data and molecule files
+   - store coordinates internally as unwrapped coordinates
+   - recenter coordinates
+   - edit box boundaries
+   - readjust box to extent of atoms (with or without estimated radius)
+   - call to LAMMPS to create geometries from lattices (with/without molecule files) and STL files
+   - call to LAMMPS to generate visualizations of geometries
+   - edit force field parameters, e.g. apply charmm
 
-# Long term ideas
+# Long term ideas (v2.x)
 - rewrite entire application to build the App and its layout manually
-- port to Qt6
 - also a rewrite should establish consistent naming conventions. now we have a mix of LAMMPS style, Qt style, and others.
 - add option to attach a debugger to the running program (highly non-portable, need customization support in preferences)
 - write a "wizard" dialog that can be used for beginners to create an input file template for a few typical use scenarios
+  (could perhaps use some LLM based KI to look up suggestions for answers?).
diff --git a/tools/lammps-gui/chartviewer.cpp b/tools/lammps-gui/chartviewer.cpp
index 7ea8a77385..fbd888f1cd 100644
--- a/tools/lammps-gui/chartviewer.cpp
+++ b/tools/lammps-gui/chartviewer.cpp
@@ -13,41 +13,70 @@
 
 #include "chartviewer.h"
 
+#include "lammpsgui.h"
+
+#include <QAction>
+#include <QApplication>
+#include <QFileDialog>
 #include <QHBoxLayout>
+#include <QKeySequence>
+#include <QLabel>
+#include <QLayout>
 #include <QLineSeries>
+#include <QMenu>
+#include <QMenuBar>
+#include <QPushButton>
 #include <QSettings>
 #include <QSpacerItem>
+#include <QTextStream>
 #include <QVBoxLayout>
 
 using namespace QtCharts;
 
 ChartWindow::ChartWindow(const QString &_filename, QWidget *parent) :
-    QWidget(parent), menu(new QMenuBar), file(new QMenu("&File")), active_chart(-1),
-    filename(_filename)
+    QWidget(parent), menu(new QMenuBar), file(new QMenu("&File")), filename(_filename)
 {
     auto *top = new QHBoxLayout;
     menu->addMenu(file);
     menu->setSizePolicy(QSizePolicy::Minimum, QSizePolicy::Preferred);
 
+    // workaround for incorrect highlight bug on macOS
+    auto *dummy = new QPushButton(QIcon(), "");
+    dummy->hide();
+    auto *normal = new QPushButton(QIcon(":/icons/gtk-zoom-fit.png"), "");
+    normal->setToolTip("Reset zoom to normal");
+
     columns = new QComboBox;
     top->addWidget(menu);
     top->addSpacerItem(new QSpacerItem(1, 1, QSizePolicy::Expanding, QSizePolicy::Minimum));
+    top->addWidget(dummy);
+    top->addWidget(normal);
     top->addWidget(new QLabel("Select data:"));
     top->addWidget(columns);
     saveAsAct = file->addAction("&Save Graph As...", this, &ChartWindow::saveAs);
-    saveAsAct->setIcon(QIcon(":/document-save-as.png"));
+    saveAsAct->setIcon(QIcon(":/icons/document-save-as.png"));
     exportCsvAct = file->addAction("&Export data to CSV...", this, &ChartWindow::exportCsv);
-    exportCsvAct->setIcon(QIcon(":/application-calc.png"));
+    exportCsvAct->setIcon(QIcon(":/icons/application-calc.png"));
     exportDatAct = file->addAction("Export data to &Gnuplot...", this, &ChartWindow::exportDat);
-    exportDatAct->setIcon(QIcon(":/application-plot.png"));
+    exportDatAct->setIcon(QIcon(":/icons/application-plot.png"));
     file->addSeparator();
+    stopAct = file->addAction("Stop &Run", this, &ChartWindow::stop_run);
+    stopAct->setIcon(QIcon(":/icons/process-stop.png"));
+    stopAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash));
     closeAct = file->addAction("&Close", this, &QWidget::close);
-    closeAct->setIcon(QIcon(":/window-close.png"));
+    closeAct->setIcon(QIcon(":/icons/window-close.png"));
+    closeAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_W));
+    quitAct = file->addAction("&Quit", this, &ChartWindow::quit);
+    quitAct->setIcon(QIcon(":/icons/application-exit.png"));
+    quitAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q));
     auto *layout = new QVBoxLayout;
     layout->addLayout(top);
     setLayout(layout);
 
+    connect(normal, &QPushButton::released, this, &ChartWindow::reset_zoom);
     connect(columns, SIGNAL(currentIndexChanged(int)), this, SLOT(change_chart(int)));
+    installEventFilter(this);
+
     QSettings settings;
     resize(settings.value("chartx", 500).toInt(), settings.value("charty", 320).toInt());
 }
@@ -56,7 +85,10 @@ int ChartWindow::get_step() const
 {
     if (charts.size() > 0) {
         auto *v = charts[0];
-        return (int)v->get_step(v->get_count() - 1);
+        if (v)
+          return (int)v->get_step(v->get_count() - 1);
+        else
+          return -1;
     } else {
         return -1;
     }
@@ -74,7 +106,6 @@ void ChartWindow::reset_charts()
     }
     charts.clear();
     columns->clear();
-    active_chart = 0;
 }
 
 void ChartWindow::add_chart(const QString &title, int index)
@@ -86,7 +117,6 @@ void ChartWindow::add_chart(const QString &title, int index)
     // hide all but the first chart added
     if (charts.size() > 0) chart->hide();
     charts.append(chart);
-    active_chart = 0;
 }
 
 void ChartWindow::add_data(int step, double data, int index)
@@ -95,21 +125,45 @@ void ChartWindow::add_data(int step, double data, int index)
         if (c->get_index() == index) c->add_data(step, data);
 }
 
+void ChartWindow::quit()
+{
+    LammpsGui *main = nullptr;
+    for (QWidget *widget : QApplication::topLevelWidgets())
+        if (widget->objectName() == "LammpsGui") main = dynamic_cast<LammpsGui *>(widget);
+    if (main) main->quit();
+}
+
+void ChartWindow::reset_zoom()
+{
+    int choice = columns->currentData().toInt();
+    charts[choice]->reset_zoom();
+}
+
+void ChartWindow::stop_run()
+{
+    LammpsGui *main = nullptr;
+    for (QWidget *widget : QApplication::topLevelWidgets())
+        if (widget->objectName() == "LammpsGui") main = dynamic_cast<LammpsGui *>(widget);
+    if (main) main->stop_run();
+}
+
 void ChartWindow::saveAs()
 {
-    if (charts.empty() || (active_chart < 0)) return;
+    if (charts.empty()) return;
     QString defaultname = filename + "." + columns->currentText() + ".png";
     if (filename.isEmpty()) defaultname = columns->currentText() + ".png";
     QString fileName = QFileDialog::getSaveFileName(this, "Save Chart as Image", defaultname,
                                                     "Image Files (*.jpg *.png *.bmp *.ppm)");
     if (!fileName.isEmpty()) {
-        charts[active_chart]->grab().save(fileName);
+        int choice = columns->currentData().toInt();
+        for (auto &c : charts)
+            if (choice == c->get_index()) c->grab().save(fileName);
     }
 }
 
 void ChartWindow::exportDat()
 {
-    if (charts.empty() || (active_chart < 0)) return;
+    if (charts.empty()) return;
     QString defaultname = filename + ".dat";
     if (filename.isEmpty()) defaultname = "lammpsdata.dat";
     QString fileName = QFileDialog::getSaveFileName(this, "Save Chart as Gnuplot data", defaultname,
@@ -117,25 +171,24 @@ void ChartWindow::exportDat()
     if (!fileName.isEmpty()) {
         QFile file(fileName);
         if (file.open(QIODevice::WriteOnly | QIODevice::Text)) {
+            QTextStream out(&file);
+            constexpr int fw = 16;
+            out.setFieldAlignment(QTextStream::AlignRight);
+            out.setRealNumberPrecision(8);
 
-            file.write("# Thermodynamic data from ");
-            file.write(filename.toLocal8Bit());
-            file.write("\n# Columns:");
-            for (auto &c : charts) {
-                file.write(" ");
-                file.write(c->get_title());
-            }
-            file.write("\n");
+            out << "# Thermodynamic data from " << filename << "\n";
+            out << "#          Step";
+            for (auto &c : charts)
+                out << qSetFieldWidth(0) << ' ' << qSetFieldWidth(fw) << c->get_title();
+            out << qSetFieldWidth(0) << '\n';
 
             int lines = charts[0]->get_count();
             for (int i = 0; i < lines; ++i) {
                 // timestep
-                file.write(QString::number(charts[0]->get_step(i)).toLocal8Bit());
-                for (auto &c : charts) {
-                    file.write(" ");
-                    file.write(QString::number(c->get_data(i)).toLocal8Bit());
-                }
-                file.write("\n");
+                out << qSetFieldWidth(0) << ' ' << qSetFieldWidth(fw) << charts[0]->get_step(i);
+                for (auto &c : charts)
+                    out << qSetFieldWidth(0) << ' ' << qSetFieldWidth(fw) << c->get_data(i);
+                out << qSetFieldWidth(0) << '\n';
             }
             file.close();
         }
@@ -144,7 +197,7 @@ void ChartWindow::exportDat()
 
 void ChartWindow::exportCsv()
 {
-    if (charts.empty() || (active_chart < 0)) return;
+    if (charts.empty()) return;
     QString defaultname = filename + ".csv";
     if (filename.isEmpty()) defaultname = "lammpsdata.csv";
     QString fileName = QFileDialog::getSaveFileName(this, "Save Chart as CSV data", defaultname,
@@ -152,30 +205,28 @@ void ChartWindow::exportCsv()
     if (!fileName.isEmpty()) {
         QFile file(fileName);
         if (file.open(QIODevice::WriteOnly | QIODevice::Text)) {
+            QTextStream out(&file);
+            out.setRealNumberPrecision(8);
 
-            file.write("Step");
-            for (auto &c : charts) {
-                file.write(",");
-                file.write(c->get_title());
-            }
-            file.write("\n");
+            out << "Step";
+            for (auto &c : charts)
+                out << ',' << c->get_title();
+            out << '\n';
 
             int lines = charts[0]->get_count();
             for (int i = 0; i < lines; ++i) {
                 // timestep
-                file.write(QString::number(charts[0]->get_step(i)).toLocal8Bit());
-                for (auto &c : charts) {
-                    file.write(",");
-                    file.write(QString::number(c->get_data(i)).toLocal8Bit());
-                }
-                file.write("\n");
+                out << charts[0]->get_step(i);
+                for (auto &c : charts)
+                    out << ',' << c->get_data(i);
+                out << '\n';
             }
             file.close();
         }
     }
 }
 
-void ChartWindow::change_chart(int index)
+void ChartWindow::change_chart(int)
 {
     int choice = columns->currentData().toInt();
     for (auto &c : charts) {
@@ -196,6 +247,26 @@ void ChartWindow::closeEvent(QCloseEvent *event)
     QWidget::closeEvent(event);
 }
 
+// event filter to handle "Ambiguous shortcut override" issues
+bool ChartWindow::eventFilter(QObject *watched, QEvent *event)
+{
+    if (event->type() == QEvent::ShortcutOverride) {
+        QKeyEvent *keyEvent = dynamic_cast<QKeyEvent *>(event);
+        if (!keyEvent) return QWidget::eventFilter(watched, event);
+        if (keyEvent->modifiers().testFlag(Qt::ControlModifier) && keyEvent->key() == '/') {
+            stop_run();
+            event->accept();
+            return true;
+        }
+        if (keyEvent->modifiers().testFlag(Qt::ControlModifier) && keyEvent->key() == 'W') {
+            close();
+            event->accept();
+            return true;
+        }
+    }
+    return QWidget::eventFilter(watched, event);
+}
+
 /* -------------------------------------------------------------------- */
 
 ChartViewer::ChartViewer(const QString &title, int _index, QWidget *parent) :
@@ -229,7 +300,7 @@ void ChartViewer::add_data(int step, double data)
     if (last_step < step) {
         last_step = step;
         series->append(step, data);
-        auto points = series->pointsVector();
+        auto points = series->points();
 
         qreal xmin = 1.0e100;
         qreal xmax = -1.0e100;
@@ -246,6 +317,26 @@ void ChartViewer::add_data(int step, double data)
     }
 }
 
+/* -------------------------------------------------------------------- */
+
+void ChartViewer::reset_zoom()
+{
+    auto points = series->points();
+
+    qreal xmin = 1.0e100;
+    qreal xmax = -1.0e100;
+    qreal ymin = 1.0e100;
+    qreal ymax = -1.0e100;
+    for (auto &p : points) {
+        xmin = qMin(xmin, p.x());
+        xmax = qMax(xmax, p.x());
+        ymin = qMin(ymin, p.y());
+        ymax = qMax(ymax, p.y());
+    }
+    xaxis->setRange(xmin, xmax);
+    yaxis->setRange(ymin, ymax);
+}
+
 // Local Variables:
 // c-basic-offset: 4
 // End:
diff --git a/tools/lammps-gui/chartviewer.h b/tools/lammps-gui/chartviewer.h
index 22f52a82d6..da0468eaf8 100644
--- a/tools/lammps-gui/chartviewer.h
+++ b/tools/lammps-gui/chartviewer.h
@@ -14,16 +14,17 @@
 #ifndef CHARTVIEWER_H
 #define CHARTVIEWER_H
 
+#include <QComboBox>
 #include <QList>
 #include <QString>
 #include <QWidget>
-#include <QtCharts>
 
 class QAction;
 class QMenuBar;
 class QMenu;
-class QComboBox;
+namespace QtCharts {
 class ChartViewer;
+}
 
 class ChartWindow : public QWidget {
     Q_OBJECT
@@ -42,6 +43,10 @@ public:
     void add_data(int step, double data, int index);
 
 private slots:
+    void quit();
+    void reset_zoom();
+    void stop_run();
+
     void saveAs();
     void exportDat();
     void exportCsv();
@@ -50,43 +55,50 @@ private slots:
 
 protected:
     void closeEvent(QCloseEvent *event) override;
+    bool eventFilter(QObject *watched, QEvent *event) override;
 
 private:
     QMenuBar *menu;
     QMenu *file;
     QComboBox *columns;
-    QAction *saveAsAct;
-    QAction *exportCsvAct;
-    QAction *exportDatAct;
-    QAction *closeAct;
+    QAction *saveAsAct, *exportCsvAct, *exportDatAct;
+    QAction *closeAct, *stopAct, *quitAct;
 
     QString filename;
-    int active_chart;
-    QList<ChartViewer *> charts;
+    QList<QtCharts::ChartViewer *> charts;
 };
 
 /* -------------------------------------------------------------------- */
 
-class ChartViewer : public QtCharts::QChartView {
+#include <QChart>
+#include <QChartView>
+#include <QLineSeries>
+#include <QValueAxis>
+
+namespace QtCharts {
+class ChartViewer : public QChartView {
     Q_OBJECT
 
 public:
     explicit ChartViewer(const QString &title, int index, QWidget *parent = nullptr);
 
     void add_data(int step, double data);
+    void reset_zoom();
+
     int get_index() const { return index; };
     int get_count() const { return series->count(); }
     const char *get_title() const { return series->name().toLocal8Bit(); }
-    double get_step(int index) const { return series->at(index).x(); }
-    double get_data(int index) const { return series->at(index).y(); }
+    double get_step(int index) const { return (index < 0) ? 0.0 : series->at(index).x(); }
+    double get_data(int index) const { return (index < 0) ? 0.0 : series->at(index).y(); }
 
 private:
     int last_step, index;
-    QtCharts::QChart *chart;
-    QtCharts::QLineSeries *series;
-    QtCharts::QValueAxis *xaxis;
-    QtCharts::QValueAxis *yaxis;
+    QChart *chart;
+    QLineSeries *series;
+    QValueAxis *xaxis;
+    QValueAxis *yaxis;
 };
+} // namespace QtCharts
 #endif
 
 // Local Variables:
diff --git a/tools/lammps-gui/codeeditor.cpp b/tools/lammps-gui/codeeditor.cpp
index 6c7ae33283..e95f576be0 100644
--- a/tools/lammps-gui/codeeditor.cpp
+++ b/tools/lammps-gui/codeeditor.cpp
@@ -19,8 +19,10 @@
 #include <QAction>
 #include <QCompleter>
 #include <QDesktopServices>
+#include <QDir>
 #include <QDragEnterEvent>
 #include <QDropEvent>
+#include <QFileInfo>
 #include <QIcon>
 #include <QKeySequence>
 #include <QMenu>
@@ -131,14 +133,14 @@ CodeEditor::CodeEditor(QWidget *parent) :
     minimize_comp(new QCompleter(this)), variable_comp(new QCompleter(this)),
     units_comp(new QCompleter(this)), group_comp(new QCompleter(this)),
     varname_comp(new QCompleter(this)), fixid_comp(new QCompleter(this)),
-    compid_comp(new QCompleter(this)), highlight(NO_HIGHLIGHT)
+    compid_comp(new QCompleter(this)), file_comp(new QCompleter(this)), highlight(NO_HIGHLIGHT)
 {
     help_action = new QShortcut(QKeySequence::fromString("Ctrl+?"), parent);
     connect(help_action, &QShortcut::activated, this, &CodeEditor::get_help);
 
     // set up completer class (without a model currently)
 #define COMPLETER_SETUP(completer)                                                            \
-    completer->setCompletionMode(QCompleter::PopupCompletion);                                \
+    completer->setCompletionMode(QCompleter::UnfilteredPopupCompletion);                      \
     completer->setModelSorting(QCompleter::CaseInsensitivelySortedModel);                     \
     completer->setWidget(this);                                                               \
     completer->setMaxVisibleItems(16);                                                        \
@@ -166,6 +168,7 @@ CodeEditor::CodeEditor(QWidget *parent) :
     COMPLETER_SETUP(varname_comp);
     COMPLETER_SETUP(fixid_comp);
     COMPLETER_SETUP(compid_comp);
+    COMPLETER_SETUP(file_comp);
 #undef COMPLETER_SETUP
 
     // initialize help system
@@ -173,7 +176,7 @@ CodeEditor::CodeEditor(QWidget *parent) :
     if (help_index.open(QIODevice::ReadOnly | QIODevice::Text)) {
         while (!help_index.atEnd()) {
             auto line  = QString(help_index.readLine());
-            auto words = line.trimmed().split(' ');
+            auto words = line.trimmed().replace('\t', ' ').split(' ');
             if (words.size() > 2) {
                 if (words.at(1) == "pair_style") {
                     pair_map[words.at(2)] = words.at(0);
@@ -233,6 +236,7 @@ CodeEditor::~CodeEditor()
     delete varname_comp;
     delete fixid_comp;
     delete compid_comp;
+    delete file_comp;
 }
 
 int CodeEditor::lineNumberAreaWidth()
@@ -319,7 +323,7 @@ QString CodeEditor::reformatLine(const QString &line)
         }
 
         // append remaining words with just a single blank added.
-        for (int i = 1; i < words.size(); ++i) {
+        for (std::size_t i = 1; i < words.size(); ++i) {
             newtext += ' ';
             newtext += words[i].c_str();
 
@@ -389,15 +393,19 @@ COMPLETER_INIT_FUNC(units, Units)
 void CodeEditor::setGroupList()
 {
     QStringList groups;
+#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
+    QRegExp groupcmd(QStringLiteral("^\\s*group\\s+(\\S+)(\\s+|$)"));
+#else
     QRegularExpression groupcmd(QStringLiteral("^\\s*group\\s+(\\S+)(\\s+|$)"));
+#endif
     auto saved = textCursor();
     // reposition cursor to beginning of text and search for group commands
     auto cursor = textCursor();
     cursor.movePosition(QTextCursor::Start);
     setTextCursor(cursor);
     while (find(groupcmd)) {
-        auto words = textCursor().block().text().replace('\t', ' ').split(' ', Qt::SkipEmptyParts);
-        if ((words.size() > 1) && !groups.contains(words[1])) groups << words[1];
+        auto words = split_line(textCursor().block().text().replace('\t', ' ').toStdString());
+        if ((words.size() > 1) && !groups.contains(words[1].c_str())) groups << words[1].c_str();
     }
     groups.sort();
     groups.prepend(QStringLiteral("all"));
@@ -409,6 +417,11 @@ void CodeEditor::setGroupList()
 void CodeEditor::setVarNameList()
 {
     QStringList vars;
+
+    // variable "gui_run" is always defined by LAMMPS GUI
+    vars << QString("${gui_run}");
+    vars << QString("v_gui_run");
+
     LammpsWrapper *lammps = &qobject_cast<LammpsGui *>(parent())->lammps;
     int nvar              = lammps->id_count("variable");
     char buffer[200];
@@ -419,20 +432,24 @@ void CodeEditor::setVarNameList()
         vars << QString("v_%1").arg(buffer);
     }
 
+#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
+    QRegExp varcmd(QStringLiteral("^\\s*variable\\s+(\\S+)(\\s+|$)"));
+#else
     QRegularExpression varcmd(QStringLiteral("^\\s*variable\\s+(\\S+)(\\s+|$)"));
+#endif
     auto saved = textCursor();
     // reposition cursor to beginning of text and search for group commands
     auto cursor = textCursor();
     cursor.movePosition(QTextCursor::Start);
     setTextCursor(cursor);
     while (find(varcmd)) {
-        auto words = textCursor().block().text().replace('\t', ' ').split(' ', Qt::SkipEmptyParts);
+        auto words = split_line(textCursor().block().text().replace('\t', ' ').toStdString());
         if ((words.size() > 1)) {
-            QString w = QString("$%1").arg(words[1]);
+            QString w = QString("$%1").arg(words[1].c_str());
             if ((words[1].size() == 1) && !vars.contains(w)) vars << w;
-            w = QString("${%1}").arg(words[1]);
+            w = QString("${%1}").arg(words[1].c_str());
             if (!vars.contains(w)) vars << w;
-            w = QString("v_%1").arg(words[1]);
+            w = QString("v_%1").arg(words[1].c_str());
             if (!vars.contains(w)) vars << w;
         }
     }
@@ -445,18 +462,22 @@ void CodeEditor::setVarNameList()
 void CodeEditor::setComputeIDList()
 {
     QStringList compid;
+#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
+    QRegExp compcmd(QStringLiteral("^\\s*compute\\s+(\\S+)\\s+"));
+#else
     QRegularExpression compcmd(QStringLiteral("^\\s*compute\\s+(\\S+)\\s+"));
+#endif
     auto saved = textCursor();
     // reposition cursor to beginning of text and search for group commands
     auto cursor = textCursor();
     cursor.movePosition(QTextCursor::Start);
     setTextCursor(cursor);
     while (find(compcmd)) {
-        auto words = textCursor().block().text().replace('\t', ' ').split(' ', Qt::SkipEmptyParts);
+        auto words = split_line(textCursor().block().text().replace('\t', ' ').toStdString());
         if ((words.size() > 1)) {
-            QString w = QString("c_%1").arg(words[1]);
+            QString w = QString("c_%1").arg(words[1].c_str());
             if (!compid.contains(w)) compid << w;
-            w = QString("C_%1").arg(words[1]);
+            w = QString("C_%1").arg(words[1].c_str());
             if (!compid.contains(w)) compid << w;
         }
     }
@@ -469,18 +490,22 @@ void CodeEditor::setComputeIDList()
 void CodeEditor::setFixIDList()
 {
     QStringList fixid;
+#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
+    QRegExp fixcmd(QStringLiteral("^\\s*fix\\s+(\\S+)\\s+"));
+#else
     QRegularExpression fixcmd(QStringLiteral("^\\s*fix\\s+(\\S+)\\s+"));
+#endif
     auto saved = textCursor();
     // reposition cursor to beginning of text and search for group commands
     auto cursor = textCursor();
     cursor.movePosition(QTextCursor::Start);
     setTextCursor(cursor);
     while (find(fixcmd)) {
-        auto words = textCursor().block().text().replace('\t', ' ').split(' ', Qt::SkipEmptyParts);
+        auto words = split_line(textCursor().block().text().replace('\t', ' ').toStdString());
         if ((words.size() > 1)) {
-            QString w = QString("f_%1").arg(words[1]);
+            QString w = QString("f_%1").arg(words[1].c_str());
             if (!fixid.contains(w)) fixid << w;
-            w = QString("F_%1").arg(words[1]);
+            w = QString("F_%1").arg(words[1].c_str());
             if (!fixid.contains(w)) fixid << w;
         }
     }
@@ -490,6 +515,16 @@ void CodeEditor::setFixIDList()
     fixid_comp->setModel(new QStringListModel(fixid, fixid_comp));
 }
 
+void CodeEditor::setFileList()
+{
+    QStringList files;
+    QDir dir(".");
+    for (const auto &file : dir.entryInfoList(QDir::Files))
+        files << file.fileName();
+    files.sort();
+    file_comp->setModel(new QStringListModel(files, file_comp));
+}
+
 void CodeEditor::keyPressEvent(QKeyEvent *event)
 {
     const auto key = event->key();
@@ -522,26 +557,32 @@ void CodeEditor::keyPressEvent(QKeyEvent *event)
     }
 
     // automatically reformat when hitting the return or enter key
-    if (reformat_on_return && (key == Qt::Key_Return) || (key == Qt::Key_Enter)) {
+    if (reformat_on_return && ((key == Qt::Key_Return) || (key == Qt::Key_Enter))) {
         reformatCurrentLine();
     }
 
     // process key event in parent class
     QPlainTextEdit::keyPressEvent(event);
 
-    // if enabled, try pop up completion automatically after 3 characters
+    // if enabled, try pop up completion automatically after 2 characters
     if (automatic_completion) {
         auto cursor = textCursor();
         auto line   = cursor.block().text();
+        if (line.isEmpty()) return;
 
         // QTextCursor::WordUnderCursor is unusable here since recognizes '/' as word boundary.
         // Work around it by manually searching for the location of the beginning of the word.
-        int begin = cursor.positionInBlock();
+        int begin = qMin(cursor.positionInBlock(), line.length() - 1);
+
         while (begin >= 0) {
             if (line[begin].isSpace()) break;
             --begin;
         }
         if (((cursor.positionInBlock() - begin) > 2) || (line[begin + 1] == '$')) runCompletion();
+        if (current_comp && current_comp->popup()->isVisible() &&
+            ((cursor.positionInBlock() - begin) < 2)) {
+            current_comp->popup()->hide();
+        }
     }
 }
 
@@ -633,7 +674,9 @@ void CodeEditor::lineNumberAreaPaintEvent(QPaintEvent *event)
 
 void CodeEditor::contextMenuEvent(QContextMenuEvent *event)
 {
-    // reposition the cursor here?
+    // reposition the cursor here, but only if there is no active selection
+    if (!textCursor().hasSelection()) setTextCursor(cursorForPosition(event->pos()));
+
     QString page, help;
     find_help(page, help);
 
@@ -641,18 +684,18 @@ void CodeEditor::contextMenuEvent(QContextMenuEvent *event)
     auto *menu = createStandardContextMenu();
     menu->addSeparator();
     auto action = menu->addAction(QString("Display available completions for '%1'").arg(help));
-    action->setIcon(QIcon(":/expand-text.png"));
+    action->setIcon(QIcon(":/icons/expand-text.png"));
     connect(action, &QAction::triggered, this, &CodeEditor::runCompletion);
 
     if (!page.isEmpty()) {
         menu->addSeparator();
         action = menu->addAction(QString("Reformat '%1' command").arg(help));
-        action->setIcon(QIcon(":/format-indent-less-3.png"));
+        action->setIcon(QIcon(":/icons/format-indent-less-3.png"));
         connect(action, &QAction::triggered, this, &CodeEditor::reformatCurrentLine);
 
         menu->addSeparator();
         action = menu->addAction(QString("View Documentation for '%1'").arg(help));
-        action->setIcon(QIcon(":/system-help.png"));
+        action->setIcon(QIcon(":/icons/system-help.png"));
         action->setData(page);
         connect(action, &QAction::triggered, this, &CodeEditor::open_help);
         // if we link to help with specific styles (fix, compute, pair, bond, ...)
@@ -663,13 +706,13 @@ void CodeEditor::contextMenuEvent(QContextMenuEvent *event)
             page = words.at(0);
             page += ".html";
             auto action2 = menu->addAction(QString("View Documentation for '%1'").arg(help));
-            action2->setIcon(QIcon(":/system-help.png"));
+            action2->setIcon(QIcon(":/icons/system-help.png"));
             action2->setData(page);
             connect(action2, &QAction::triggered, this, &CodeEditor::open_help);
         }
     }
     auto action3 = menu->addAction(QString("LAMMPS Manual"));
-    action3->setIcon(QIcon(":/help-browser.png"));
+    action3->setIcon(QIcon(":/icons/help-browser.png"));
     action3->setData(QString());
     connect(action3, &QAction::triggered, this, &CodeEditor::open_help);
 
@@ -695,6 +738,9 @@ void CodeEditor::reformatCurrentLine()
 
 void CodeEditor::runCompletion()
 {
+    QAbstractItemView *popup = nullptr;
+    if (current_comp) popup = current_comp->popup();
+
     auto cursor = textCursor();
     auto line   = cursor.block().text().trimmed();
     // no completion possible on empty lines
@@ -704,7 +750,7 @@ void CodeEditor::runCompletion()
     // QTextCursor::WordUnderCursor is unusable here since it recognizes '/' as word boundary.
     // Work around it by manually searching for the beginning and end position of the word
     // under the cursor and then using that substring.
-    int begin = cursor.positionInBlock();
+    int begin = qMin(cursor.positionInBlock(), line.length() - 1);
     line      = cursor.block().text();
     while (begin >= 0) {
         if (line[begin].isSpace()) break;
@@ -721,7 +767,8 @@ void CodeEditor::runCompletion()
     if (selected.startsWith("$")) {
         current_comp = varname_comp;
         current_comp->setCompletionPrefix(selected);
-        auto popup = current_comp->popup();
+        if (popup && (popup != current_comp->popup())) popup->hide();
+        popup = current_comp->popup();
         // if the command is already a complete command, remove existing popup
         if (selected == current_comp->currentCompletion()) {
             if (popup->isVisible()) {
@@ -742,7 +789,8 @@ void CodeEditor::runCompletion()
 
         current_comp = command_comp;
         current_comp->setCompletionPrefix(words[0].c_str());
-        auto popup = current_comp->popup();
+        if (popup && (popup != current_comp->popup())) popup->hide();
+        popup = current_comp->popup();
         // if the command is already a complete command, remove existing popup
         if (words[0] == current_comp->currentCompletion().toStdString()) {
             if (popup->isVisible()) {
@@ -785,7 +833,14 @@ void CodeEditor::runCompletion()
         else if ((words[0] == "change_box") || (words[0] == "displace_atoms") ||
                  (words[0] == "velocity") || (words[0] == "write_dump"))
             current_comp = group_comp;
-        else if (selected.startsWith("v_"))
+        else if ((words[0] == "fitpod") || (words[0] == "include") || (words[0] == "ndx2group") ||
+                 (words[0] == "read_data") || (words[0] == "read_dump") ||
+                 (words[0] == "read_restart") || (words[0] == "rerun")) {
+            if (selected.contains('/')) {
+                if (popup && popup->isVisible()) popup->hide();
+            } else
+                current_comp = file_comp;
+        } else if (selected.startsWith("v_"))
             current_comp = varname_comp;
         else if (selected.startsWith("c_"))
             current_comp = compid_comp;
@@ -798,7 +853,8 @@ void CodeEditor::runCompletion()
 
         if (current_comp) {
             current_comp->setCompletionPrefix(words[1].c_str());
-            auto popup = current_comp->popup();
+            if (popup && (popup != current_comp->popup())) popup->hide();
+            popup = current_comp->popup();
             // if the command is already a complete command, remove existing popup
             if (words[1] == current_comp->currentCompletion().toStdString()) {
                 if (popup->isVisible()) popup->hide();
@@ -836,10 +892,16 @@ void CodeEditor::runCompletion()
             current_comp = fixid_comp;
         else if (selected.startsWith("F_"))
             current_comp = fixid_comp;
-
+        else if ((words[0] == "fitpod") || (words[0] == "molecule")) {
+            if (selected.contains('/')) {
+                if (popup && popup->isVisible()) popup->hide();
+            } else
+                current_comp = file_comp;
+        }
         if (current_comp) {
             current_comp->setCompletionPrefix(words[2].c_str());
-            auto popup = current_comp->popup();
+            if (popup && (popup != current_comp->popup())) popup->hide();
+            popup = current_comp->popup();
             // if the command is already a complete command, remove existing popup
             if (words[2] == current_comp->currentCompletion().toStdString()) {
                 if (popup->isVisible()) popup->hide();
@@ -863,7 +925,12 @@ void CodeEditor::runCompletion()
             current_comp = compute_comp;
         else if (words[0] == "dump")
             current_comp = dump_comp;
-        else if (selected.startsWith("v_"))
+        else if ((words[0] == "pair_coeff") && (words[1] == "*") && (words[2] == "*")) {
+            if (selected.contains('/')) {
+                if (popup && popup->isVisible()) popup->hide();
+            } else
+                current_comp = file_comp;
+        } else if (selected.startsWith("v_"))
             current_comp = varname_comp;
         else if (selected.startsWith("c_"))
             current_comp = compid_comp;
@@ -876,7 +943,8 @@ void CodeEditor::runCompletion()
 
         if (current_comp) {
             current_comp->setCompletionPrefix(words[3].c_str());
-            auto popup = current_comp->popup();
+            if (popup && (popup != current_comp->popup())) popup->hide();
+            popup = current_comp->popup();
             // if the command is already a complete command, remove existing popup
             if (words[3] == current_comp->currentCompletion().toStdString()) {
                 if (popup->isVisible()) popup->hide();
@@ -904,7 +972,8 @@ void CodeEditor::runCompletion()
 
         if (current_comp) {
             current_comp->setCompletionPrefix(selected);
-            auto popup = current_comp->popup();
+            if (popup && (popup != current_comp->popup())) popup->hide();
+            popup = current_comp->popup();
             // if the command is already a complete command, remove existing popup
             if (selected == current_comp->currentCompletion()) {
                 if (popup->isVisible()) popup->hide();
@@ -923,11 +992,27 @@ void CodeEditor::insertCompletedCommand(const QString &completion)
 {
     auto *completer = qobject_cast<QCompleter *>(sender());
     if (completer->widget() != this) return;
+
+    // select the entire word (non-space text) under the cursor
+    // we need to do it in this compicated way, since QTextCursor does not recognize
+    // special characters as part of a word.
     auto cursor = textCursor();
-    int extra   = completion.length() - completer->completionPrefix().length();
-    cursor.movePosition(QTextCursor::Left);
-    cursor.movePosition(QTextCursor::EndOfWord);
-    cursor.insertText(completion.right(extra));
+    auto line   = cursor.block().text();
+    int begin   = cursor.positionInBlock();
+    do {
+        if (line[begin].isSpace()) break;
+        --begin;
+    } while (begin >= 0);
+
+    int end = begin + 1;
+    while (end < line.length()) {
+        if (line[end].isSpace()) break;
+        ++end;
+    }
+
+    cursor.setPosition(cursor.position() - cursor.positionInBlock() + begin + 1);
+    cursor.movePosition(QTextCursor::NextCharacter, QTextCursor::KeepAnchor, end - begin - 1);
+    cursor.insertText(completion);
     setTextCursor(cursor);
 }
 
diff --git a/tools/lammps-gui/codeeditor.h b/tools/lammps-gui/codeeditor.h
index 5f802d329d..703b2ffd3c 100644
--- a/tools/lammps-gui/codeeditor.h
+++ b/tools/lammps-gui/codeeditor.h
@@ -60,6 +60,7 @@ public:
     void setVarNameList();
     void setComputeIDList();
     void setFixIDList();
+    void setFileList();
 
     static constexpr int NO_HIGHLIGHT = 1 << 30;
 
@@ -87,7 +88,7 @@ private:
     QCompleter *current_comp, *command_comp, *fix_comp, *compute_comp, *dump_comp, *atom_comp,
         *pair_comp, *bond_comp, *angle_comp, *dihedral_comp, *improper_comp, *kspace_comp,
         *region_comp, *integrate_comp, *minimize_comp, *variable_comp, *units_comp, *group_comp,
-        *varname_comp, *fixid_comp, *compid_comp;
+        *varname_comp, *fixid_comp, *compid_comp, *file_comp;
 
     int highlight;
     bool reformat_on_return;
diff --git a/tools/lammps-gui/help_index.table b/tools/lammps-gui/help_index.table
index d32483760a..5ce4ae6203 100644
--- a/tools/lammps-gui/help_index.table
+++ b/tools/lammps-gui/help_index.table
@@ -196,6 +196,8 @@ compute_cluster_atom.html compute fragment/atom
 compute_cna_atom.html compute cna/atom
 compute_cnp_atom.html compute cnp/atom
 compute_com_chunk.html compute com/chunk
+compute_composition_atom.html compute composition/atom
+compute_composition_atom.html compute composition/atom/kk
 compute_com.html compute com
 compute_contact_atom.html compute contact/atom
 compute_coord_atom.html compute coord/atom
@@ -242,8 +244,6 @@ compute_ke_atom.html compute ke/atom
 compute_ke_eff.html compute ke/eff
 compute_ke_rigid.html compute ke/rigid
 compute_ke.html compute ke
-compute_local_comp_atom.html compute local/comp/atom
-compute_local_comp_atom.html compute local/comp/atom/kk
 compute_mliap.html compute mliap
 compute_modify.html compute_modify
 compute_momentum.html compute momentum
@@ -409,15 +409,12 @@ dump_netcdf.html dump netcdf/mpiio
 dump.html dump
 dump.html dump atom
 dump.html dump atom/gz
-dump.html dump atom/mpiio
 dump.html dump atom/zstd
 dump.html dump cfg
 dump.html dump cfg/gz
-dump.html dump cfg/mpiio
 dump.html dump cfg/zstd
 dump.html dump custom
 dump.html dump custom/gz
-dump.html dump custom/mpiio
 dump.html dump custom/zstd
 dump.html dump dcd
 dump.html dump grid
@@ -428,7 +425,6 @@ dump.html dump local/zstd
 dump.html dump xtc
 dump.html dump xyz
 dump.html dump xyz/gz
-dump.html dump xyz/mpiio
 dump.html dump xyz/zstd
 dump.html dump yaml
 dump_vtk.html dump vtk
diff --git a/tools/lammps-gui/helpers.cpp b/tools/lammps-gui/helpers.cpp
new file mode 100644
index 0000000000..8db7cd0d68
--- /dev/null
+++ b/tools/lammps-gui/helpers.cpp
@@ -0,0 +1,71 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "helpers.h"
+
+#include <QFile>
+#include <QFileInfo>
+#include <QProcess>
+#include <QStringList>
+
+// duplicate string, STL version
+char *mystrdup(const std::string &text)
+{
+    auto tmp = new char[text.size() + 1];
+    memcpy(tmp, text.c_str(), text.size() + 1);
+    return tmp;
+}
+
+// duplicate string, pointer version
+char *mystrdup(const char *text)
+{
+    return mystrdup(std::string(text));
+}
+
+// duplicate string, Qt version
+char *mystrdup(const QString &text)
+{
+    return mystrdup(text.toStdString());
+}
+
+// find if executable is in path
+// https://stackoverflow.com/a/51041497
+
+bool has_exe(const QString &exe)
+{
+    QProcess findProcess;
+    QStringList arguments;
+    arguments << exe;
+#if defined(_WIN32)
+    findProcess.start("where", arguments);
+#else
+    findProcess.start("which", arguments);
+#endif
+    findProcess.setReadChannel(QProcess::ProcessChannel::StandardOutput);
+
+    if (!findProcess.waitForFinished()) return false; // Not found or which does not work
+
+    QString retStr(findProcess.readAll());
+    retStr = retStr.trimmed();
+
+    QFile file(retStr);
+    QFileInfo check_file(file);
+    if (check_file.exists() && check_file.isFile())
+        return true; // Found!
+    else
+        return false; // Not found!
+}
+
+// Local Variables:
+// c-basic-offset: 4
+// End:
diff --git a/tools/lammps-gui/helpers.h b/tools/lammps-gui/helpers.h
new file mode 100644
index 0000000000..a88233b0f3
--- /dev/null
+++ b/tools/lammps-gui/helpers.h
@@ -0,0 +1,31 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef HELPERS_H
+#define HELPERS_H
+
+#include <QString>
+#include <string>
+
+// duplicate string
+extern char *mystrdup(const std::string &text);
+extern char *mystrdup(const char *text);
+extern char *mystrdup(const QString &text);
+
+// find if executable is in path
+extern bool has_exe(const QString &exe);
+
+#endif
+// Local Variables:
+// c-basic-offset: 4
+// End:
diff --git a/tools/lammps-gui/highlighter.h b/tools/lammps-gui/highlighter.h
index 6edbc0ca96..03b4355e19 100644
--- a/tools/lammps-gui/highlighter.h
+++ b/tools/lammps-gui/highlighter.h
@@ -32,7 +32,7 @@ private:
     QRegularExpression isOutput1, isOutput2, isRead;
     QTextCharFormat formatOutput, formatRead, formatLattice, formatSetup;
     QRegularExpression isStyle, isForce, isDefine, isUndo;
-    QRegularExpression isParticle, isSetup, isSetup1, isRun;
+    QRegularExpression isParticle, isRun, isSetup, isSetup1;
     QTextCharFormat formatParticle, formatRun, formatDefine;
     QRegularExpression isVariable, isReference;
     QTextCharFormat formatVariable;
diff --git a/tools/lammps-gui/antialias.png b/tools/lammps-gui/icons/antialias.png
similarity index 100%
rename from tools/lammps-gui/antialias.png
rename to tools/lammps-gui/icons/antialias.png
diff --git a/tools/lammps-gui/application-calc.png b/tools/lammps-gui/icons/application-calc.png
similarity index 100%
rename from tools/lammps-gui/application-calc.png
rename to tools/lammps-gui/icons/application-calc.png
diff --git a/tools/lammps-gui/application-exit.png b/tools/lammps-gui/icons/application-exit.png
similarity index 100%
rename from tools/lammps-gui/application-exit.png
rename to tools/lammps-gui/icons/application-exit.png
diff --git a/tools/lammps-gui/application-plot.png b/tools/lammps-gui/icons/application-plot.png
similarity index 100%
rename from tools/lammps-gui/application-plot.png
rename to tools/lammps-gui/icons/application-plot.png
diff --git a/tools/lammps-gui/axes-img.png b/tools/lammps-gui/icons/axes-img.png
similarity index 100%
rename from tools/lammps-gui/axes-img.png
rename to tools/lammps-gui/icons/axes-img.png
diff --git a/tools/lammps-gui/document-new.png b/tools/lammps-gui/icons/document-new.png
similarity index 100%
rename from tools/lammps-gui/document-new.png
rename to tools/lammps-gui/icons/document-new.png
diff --git a/tools/lammps-gui/document-open-recent.png b/tools/lammps-gui/icons/document-open-recent.png
similarity index 100%
rename from tools/lammps-gui/document-open-recent.png
rename to tools/lammps-gui/icons/document-open-recent.png
diff --git a/tools/lammps-gui/document-open.png b/tools/lammps-gui/icons/document-open.png
similarity index 100%
rename from tools/lammps-gui/document-open.png
rename to tools/lammps-gui/icons/document-open.png
diff --git a/tools/lammps-gui/document-revert.png b/tools/lammps-gui/icons/document-revert.png
similarity index 100%
rename from tools/lammps-gui/document-revert.png
rename to tools/lammps-gui/icons/document-revert.png
diff --git a/tools/lammps-gui/document-save-as.png b/tools/lammps-gui/icons/document-save-as.png
similarity index 100%
rename from tools/lammps-gui/document-save-as.png
rename to tools/lammps-gui/icons/document-save-as.png
diff --git a/tools/lammps-gui/document-save.png b/tools/lammps-gui/icons/document-save.png
similarity index 100%
rename from tools/lammps-gui/document-save.png
rename to tools/lammps-gui/icons/document-save.png
diff --git a/tools/lammps-gui/edit-copy.png b/tools/lammps-gui/icons/edit-copy.png
similarity index 100%
rename from tools/lammps-gui/edit-copy.png
rename to tools/lammps-gui/icons/edit-copy.png
diff --git a/tools/lammps-gui/edit-cut.png b/tools/lammps-gui/icons/edit-cut.png
similarity index 100%
rename from tools/lammps-gui/edit-cut.png
rename to tools/lammps-gui/icons/edit-cut.png
diff --git a/tools/lammps-gui/edit-delete.png b/tools/lammps-gui/icons/edit-delete.png
similarity index 100%
rename from tools/lammps-gui/edit-delete.png
rename to tools/lammps-gui/icons/edit-delete.png
diff --git a/tools/lammps-gui/edit-paste.png b/tools/lammps-gui/icons/edit-paste.png
similarity index 100%
rename from tools/lammps-gui/edit-paste.png
rename to tools/lammps-gui/icons/edit-paste.png
diff --git a/tools/lammps-gui/edit-redo.png b/tools/lammps-gui/icons/edit-redo.png
similarity index 100%
rename from tools/lammps-gui/edit-redo.png
rename to tools/lammps-gui/icons/edit-redo.png
diff --git a/tools/lammps-gui/edit-undo.png b/tools/lammps-gui/icons/edit-undo.png
similarity index 100%
rename from tools/lammps-gui/edit-undo.png
rename to tools/lammps-gui/icons/edit-undo.png
diff --git a/tools/lammps-gui/emblem-photos.png b/tools/lammps-gui/icons/emblem-photos.png
similarity index 100%
rename from tools/lammps-gui/emblem-photos.png
rename to tools/lammps-gui/icons/emblem-photos.png
diff --git a/tools/lammps-gui/expand-text.png b/tools/lammps-gui/icons/expand-text.png
similarity index 100%
rename from tools/lammps-gui/expand-text.png
rename to tools/lammps-gui/icons/expand-text.png
diff --git a/tools/lammps-gui/icons/export-movie.png b/tools/lammps-gui/icons/export-movie.png
new file mode 100644
index 0000000000..f5ab58b0b5
Binary files /dev/null and b/tools/lammps-gui/icons/export-movie.png differ
diff --git a/tools/lammps-gui/format-indent-less-3.png b/tools/lammps-gui/icons/format-indent-less-3.png
similarity index 100%
rename from tools/lammps-gui/format-indent-less-3.png
rename to tools/lammps-gui/icons/format-indent-less-3.png
diff --git a/tools/lammps-gui/go-first.png b/tools/lammps-gui/icons/go-first.png
similarity index 100%
rename from tools/lammps-gui/go-first.png
rename to tools/lammps-gui/icons/go-first.png
diff --git a/tools/lammps-gui/go-last.png b/tools/lammps-gui/icons/go-last.png
similarity index 100%
rename from tools/lammps-gui/go-last.png
rename to tools/lammps-gui/icons/go-last.png
diff --git a/tools/lammps-gui/go-next-2.png b/tools/lammps-gui/icons/go-next-2.png
similarity index 100%
rename from tools/lammps-gui/go-next-2.png
rename to tools/lammps-gui/icons/go-next-2.png
diff --git a/tools/lammps-gui/go-previous-2.png b/tools/lammps-gui/icons/go-previous-2.png
similarity index 100%
rename from tools/lammps-gui/go-previous-2.png
rename to tools/lammps-gui/icons/go-previous-2.png
diff --git a/tools/lammps-gui/gtk-go-down.png b/tools/lammps-gui/icons/gtk-go-down.png
similarity index 100%
rename from tools/lammps-gui/gtk-go-down.png
rename to tools/lammps-gui/icons/gtk-go-down.png
diff --git a/tools/lammps-gui/gtk-go-up.png b/tools/lammps-gui/icons/gtk-go-up.png
similarity index 100%
rename from tools/lammps-gui/gtk-go-up.png
rename to tools/lammps-gui/icons/gtk-go-up.png
diff --git a/tools/lammps-gui/gtk-zoom-fit.png b/tools/lammps-gui/icons/gtk-zoom-fit.png
similarity index 100%
rename from tools/lammps-gui/gtk-zoom-fit.png
rename to tools/lammps-gui/icons/gtk-zoom-fit.png
diff --git a/tools/lammps-gui/gtk-zoom-in.png b/tools/lammps-gui/icons/gtk-zoom-in.png
similarity index 100%
rename from tools/lammps-gui/gtk-zoom-in.png
rename to tools/lammps-gui/icons/gtk-zoom-in.png
diff --git a/tools/lammps-gui/gtk-zoom-out.png b/tools/lammps-gui/icons/gtk-zoom-out.png
similarity index 100%
rename from tools/lammps-gui/gtk-zoom-out.png
rename to tools/lammps-gui/icons/gtk-zoom-out.png
diff --git a/tools/lammps-gui/hd-img.png b/tools/lammps-gui/icons/hd-img.png
similarity index 100%
rename from tools/lammps-gui/hd-img.png
rename to tools/lammps-gui/icons/hd-img.png
diff --git a/tools/lammps-gui/help-about.png b/tools/lammps-gui/icons/help-about.png
similarity index 100%
rename from tools/lammps-gui/help-about.png
rename to tools/lammps-gui/icons/help-about.png
diff --git a/tools/lammps-gui/help-browser.png b/tools/lammps-gui/icons/help-browser.png
similarity index 100%
rename from tools/lammps-gui/help-browser.png
rename to tools/lammps-gui/icons/help-browser.png
diff --git a/tools/lammps-gui/help-faq.png b/tools/lammps-gui/icons/help-faq.png
similarity index 100%
rename from tools/lammps-gui/help-faq.png
rename to tools/lammps-gui/icons/help-faq.png
diff --git a/tools/lammps-gui/image-x-generic.png b/tools/lammps-gui/icons/image-x-generic.png
similarity index 100%
rename from tools/lammps-gui/image-x-generic.png
rename to tools/lammps-gui/icons/image-x-generic.png
diff --git a/tools/lammps-gui/lammps-icon-128x128.png b/tools/lammps-gui/icons/lammps-icon-128x128.png
similarity index 100%
rename from tools/lammps-gui/lammps-icon-128x128.png
rename to tools/lammps-gui/icons/lammps-icon-128x128.png
diff --git a/tools/lammps-gui/media-playback-start-2.png b/tools/lammps-gui/icons/media-playback-start-2.png
similarity index 100%
rename from tools/lammps-gui/media-playback-start-2.png
rename to tools/lammps-gui/icons/media-playback-start-2.png
diff --git a/tools/lammps-gui/media-playlist-repeat.png b/tools/lammps-gui/icons/media-playlist-repeat.png
similarity index 100%
rename from tools/lammps-gui/media-playlist-repeat.png
rename to tools/lammps-gui/icons/media-playlist-repeat.png
diff --git a/tools/lammps-gui/object-rotate-left.png b/tools/lammps-gui/icons/object-rotate-left.png
similarity index 100%
rename from tools/lammps-gui/object-rotate-left.png
rename to tools/lammps-gui/icons/object-rotate-left.png
diff --git a/tools/lammps-gui/object-rotate-right.png b/tools/lammps-gui/icons/object-rotate-right.png
similarity index 100%
rename from tools/lammps-gui/object-rotate-right.png
rename to tools/lammps-gui/icons/object-rotate-right.png
diff --git a/tools/lammps-gui/ovito.png b/tools/lammps-gui/icons/ovito.png
similarity index 100%
rename from tools/lammps-gui/ovito.png
rename to tools/lammps-gui/icons/ovito.png
diff --git a/tools/lammps-gui/preferences-desktop-font.png b/tools/lammps-gui/icons/preferences-desktop-font.png
similarity index 100%
rename from tools/lammps-gui/preferences-desktop-font.png
rename to tools/lammps-gui/icons/preferences-desktop-font.png
diff --git a/tools/lammps-gui/preferences-desktop-personal.png b/tools/lammps-gui/icons/preferences-desktop-personal.png
similarity index 100%
rename from tools/lammps-gui/preferences-desktop-personal.png
rename to tools/lammps-gui/icons/preferences-desktop-personal.png
diff --git a/tools/lammps-gui/preferences-desktop.png b/tools/lammps-gui/icons/preferences-desktop.png
similarity index 100%
rename from tools/lammps-gui/preferences-desktop.png
rename to tools/lammps-gui/icons/preferences-desktop.png
diff --git a/tools/lammps-gui/process-stop.png b/tools/lammps-gui/icons/process-stop.png
similarity index 100%
rename from tools/lammps-gui/process-stop.png
rename to tools/lammps-gui/icons/process-stop.png
diff --git a/tools/lammps-gui/run-file.png b/tools/lammps-gui/icons/run-file.png
similarity index 100%
rename from tools/lammps-gui/run-file.png
rename to tools/lammps-gui/icons/run-file.png
diff --git a/tools/lammps-gui/system-box.png b/tools/lammps-gui/icons/system-box.png
similarity index 100%
rename from tools/lammps-gui/system-box.png
rename to tools/lammps-gui/icons/system-box.png
diff --git a/tools/lammps-gui/system-help.png b/tools/lammps-gui/icons/system-help.png
similarity index 100%
rename from tools/lammps-gui/system-help.png
rename to tools/lammps-gui/icons/system-help.png
diff --git a/tools/lammps-gui/system-run.png b/tools/lammps-gui/icons/system-run.png
similarity index 100%
rename from tools/lammps-gui/system-run.png
rename to tools/lammps-gui/icons/system-run.png
diff --git a/tools/lammps-gui/utilities-terminal.png b/tools/lammps-gui/icons/utilities-terminal.png
similarity index 100%
rename from tools/lammps-gui/utilities-terminal.png
rename to tools/lammps-gui/icons/utilities-terminal.png
diff --git a/tools/lammps-gui/vdw-style.png b/tools/lammps-gui/icons/vdw-style.png
similarity index 100%
rename from tools/lammps-gui/vdw-style.png
rename to tools/lammps-gui/icons/vdw-style.png
diff --git a/tools/lammps-gui/vmd.png b/tools/lammps-gui/icons/vmd.png
similarity index 100%
rename from tools/lammps-gui/vmd.png
rename to tools/lammps-gui/icons/vmd.png
diff --git a/tools/lammps-gui/window-close.png b/tools/lammps-gui/icons/window-close.png
similarity index 100%
rename from tools/lammps-gui/window-close.png
rename to tools/lammps-gui/icons/window-close.png
diff --git a/tools/lammps-gui/x-office-drawing.png b/tools/lammps-gui/icons/x-office-drawing.png
similarity index 100%
rename from tools/lammps-gui/x-office-drawing.png
rename to tools/lammps-gui/icons/x-office-drawing.png
diff --git a/tools/lammps-gui/imageviewer.cpp b/tools/lammps-gui/imageviewer.cpp
index 139d73cb38..00b08f3f47 100644
--- a/tools/lammps-gui/imageviewer.cpp
+++ b/tools/lammps-gui/imageviewer.cpp
@@ -12,15 +12,18 @@
 ------------------------------------------------------------------------- */
 
 #include "imageviewer.h"
+
+#include "lammpsgui.h"
 #include "lammpswrapper.h"
 
 #include <QAction>
-#include <QDialogButtonBox>
+#include <QApplication>
 #include <QDir>
 #include <QFileDialog>
 #include <QGuiApplication>
 #include <QImage>
 #include <QImageReader>
+#include <QKeySequence>
 #include <QLabel>
 #include <QLineEdit>
 #include <QMenuBar>
@@ -40,8 +43,80 @@
 
 #include <cmath>
 
-extern "C" {
-#include "periodic_table.h"
+// clang-format off
+/* periodic table of elements for translation of ordinal to atom type */
+static const char *pte_label[] = {
+    "X",  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne",
+    "Na", "Mg", "Al", "Si", "P" , "S",  "Cl", "Ar", "K",  "Ca", "Sc",
+    "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge",
+    "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc",
+    "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe",
+    "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb",
+    "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re", "Os",
+    "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr",
+    "Ra", "Ac", "Th", "Pa", "U",  "Np", "Pu", "Am", "Cm", "Bk", "Cf",
+    "Es", "Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt",
+    "Ds", "Rg"
+};
+static constexpr int nr_pte_entries = sizeof(pte_label) / sizeof(char *);
+
+/* corresponding table of masses. */
+static constexpr double pte_mass[] = {
+    /* X  */ 0.00000, 1.00794, 4.00260, 6.941, 9.012182, 10.811,
+    /* C  */ 12.0107, 14.0067, 15.9994, 18.9984032, 20.1797,
+    /* Na */ 22.989770, 24.3050, 26.981538, 28.0855, 30.973761,
+    /* S  */ 32.065, 35.453, 39.948, 39.0983, 40.078, 44.955910,
+    /* Ti */ 47.867, 50.9415, 51.9961, 54.938049, 55.845, 58.9332,
+    /* Ni */ 58.6934, 63.546, 65.409, 69.723, 72.64, 74.92160,
+    /* Se */ 78.96, 79.904, 83.798, 85.4678, 87.62, 88.90585,
+    /* Zr */ 91.224, 92.90638, 95.94, 98.0, 101.07, 102.90550,
+    /* Pd */ 106.42, 107.8682, 112.411, 114.818, 118.710, 121.760,
+    /* Te */ 127.60, 126.90447, 131.293, 132.90545, 137.327,
+    /* La */ 138.9055, 140.116, 140.90765, 144.24, 145.0, 150.36,
+    /* Eu */ 151.964, 157.25, 158.92534, 162.500, 164.93032,
+    /* Er */ 167.259, 168.93421, 173.04, 174.967, 178.49, 180.9479,
+    /* W  */ 183.84, 186.207, 190.23, 192.217, 195.078, 196.96655,
+    /* Hg */ 200.59, 204.3833, 207.2, 208.98038, 209.0, 210.0, 222.0,
+    /* Fr */ 223.0, 226.0, 227.0, 232.0381, 231.03588, 238.02891,
+    /* Np */ 237.0, 244.0, 243.0, 247.0, 247.0, 251.0, 252.0, 257.0,
+    /* Md */ 258.0, 259.0, 262.0, 261.0, 262.0, 266.0, 264.0, 269.0,
+    /* Mt */ 268.0, 271.0, 272.0
+};
+
+/*
+ * corresponding table of VDW radii.
+ * van der Waals radii are taken from A. Bondi,
+ * J. Phys. Chem., 68, 441 - 452, 1964,
+ * except the value for H, which is taken from R.S. Rowland & R. Taylor,
+ * J.Phys.Chem., 100, 7384 - 7391, 1996. Radii that are not available in
+ * either of these publications have RvdW = 2.00 \AA
+ * The radii for Ions (Na, K, Cl, Ca, Mg, and Cs are based on the CHARMM27
+ * Rmin/2 parameters for (SOD, POT, CLA, CAL, MG, CES) by default.
+ */
+static constexpr double pte_vdw_radius[] = {
+    /* X  */ 1.5, 1.2, 1.4, 1.82, 2.0, 2.0,
+    /* C  */ 1.7, 1.55, 1.52, 1.47, 1.54,
+    /* Na */ 1.36, 1.18, 2.0, 2.1, 1.8,
+    /* S  */ 1.8, 2.27, 1.88, 1.76, 1.37, 2.0,
+    /* Ti */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+    /* Ni */ 1.63, 1.4, 1.39, 1.07, 2.0, 1.85,
+    /* Se */ 1.9, 1.85, 2.02, 2.0, 2.0, 2.0,
+    /* Zr */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+    /* Pd */ 1.63, 1.72, 1.58, 1.93, 2.17, 2.0,
+    /* Te */ 2.06, 1.98, 2.16, 2.1, 2.0,
+    /* La */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+    /* Eu */ 2.0, 2.0, 2.0, 2.0, 2.0,
+    /* Er */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+    /* W  */ 2.0, 2.0, 2.0, 2.0, 1.72, 1.66,
+    /* Hg */ 1.55, 1.96, 2.02, 2.0, 2.0, 2.0, 2.0,
+    /* Fr */ 2.0, 2.0, 2.0, 2.0, 2.0, 1.86,
+    /* Np */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+    /* Md */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+    /* Mt */ 2.0, 2.0, 2.0
+};
+
+// clang-format on
+
 static int get_pte_from_mass(double mass)
 {
     int idx = 0;
@@ -52,13 +127,12 @@ static int get_pte_from_mass(double mass)
     if ((mass < 61.24) && (mass > 58.8133)) idx = 27;
     return idx;
 }
-}
 
 static const QString blank(" ");
 
 ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidget *parent) :
-    QDialog(parent), imageLabel(new QLabel), scrollArea(new QScrollArea), menuBar(new QMenuBar),
-    lammps(_lammps), group("all"), filename(fileName), useelements(false)
+    QDialog(parent), menuBar(new QMenuBar), imageLabel(new QLabel), scrollArea(new QScrollArea),
+    lammps(_lammps), group("all"), filename(fileName), useelements(false), usediameter(false)
 {
     imageLabel->setBackgroundRole(QPalette::Base);
     imageLabel->setSizePolicy(QSizePolicy::Ignored, QSizePolicy::Ignored);
@@ -69,17 +143,12 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge
     scrollArea->setWidget(imageLabel);
     scrollArea->setVisible(false);
 
-    buttonBox = new QDialogButtonBox(QDialogButtonBox::Close);
-
-    connect(buttonBox, &QDialogButtonBox::accepted, this, &QDialog::accept);
-    connect(buttonBox, &QDialogButtonBox::rejected, this, &QDialog::reject);
-
     QVBoxLayout *mainLayout = new QVBoxLayout;
 
     QSettings settings;
 
     vdwfactor = 0.5;
-    auto pix  = QPixmap(":/emblem-photos.png");
+    auto pix  = QPixmap(":/icons/emblem-photos.png");
 
     auto *renderstatus = new QLabel(QString());
     renderstatus->setPixmap(pix.scaled(22, 22, Qt::KeepAspectRatio));
@@ -107,39 +176,39 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge
     auto *dummy = new QPushButton(QIcon(), "");
     dummy->hide();
 
-    auto *dossao = new QPushButton(QIcon(":/hd-img.png"), "");
+    auto *dossao = new QPushButton(QIcon(":/icons/hd-img.png"), "");
     dossao->setCheckable(true);
     dossao->setToolTip("Toggle SSAO rendering");
     dossao->setObjectName("ssao");
-    auto *doanti = new QPushButton(QIcon(":/antialias.png"), "");
+    auto *doanti = new QPushButton(QIcon(":/icons/antialias.png"), "");
     doanti->setCheckable(true);
     doanti->setToolTip("Toggle anti-aliasing");
     doanti->setObjectName("antialias");
-    auto *dovdw = new QPushButton(QIcon(":/vdw-style.png"), "");
+    auto *dovdw = new QPushButton(QIcon(":/icons/vdw-style.png"), "");
     dovdw->setCheckable(true);
     dovdw->setToolTip("Toggle VDW style representation");
     dovdw->setObjectName("vdw");
-    auto *dobox = new QPushButton(QIcon(":/system-box.png"), "");
+    auto *dobox = new QPushButton(QIcon(":/icons/system-box.png"), "");
     dobox->setCheckable(true);
     dobox->setToolTip("Toggle displaying box");
     dobox->setObjectName("box");
-    auto *doaxes = new QPushButton(QIcon(":/axes-img.png"), "");
+    auto *doaxes = new QPushButton(QIcon(":/icons/axes-img.png"), "");
     doaxes->setCheckable(true);
     doaxes->setToolTip("Toggle displaying axes");
     doaxes->setObjectName("axes");
-    auto *zoomin = new QPushButton(QIcon(":/gtk-zoom-in.png"), "");
+    auto *zoomin = new QPushButton(QIcon(":/icons/gtk-zoom-in.png"), "");
     zoomin->setToolTip("Zoom in by 10 percent");
-    auto *zoomout = new QPushButton(QIcon(":/gtk-zoom-out.png"), "");
+    auto *zoomout = new QPushButton(QIcon(":/icons/gtk-zoom-out.png"), "");
     zoomout->setToolTip("Zoom out by 10 percent");
-    auto *rotleft = new QPushButton(QIcon(":/object-rotate-left.png"), "");
+    auto *rotleft = new QPushButton(QIcon(":/icons/object-rotate-left.png"), "");
     rotleft->setToolTip("Rotate left by 15 degrees");
-    auto *rotright = new QPushButton(QIcon(":/object-rotate-right.png"), "");
+    auto *rotright = new QPushButton(QIcon(":/icons/object-rotate-right.png"), "");
     rotright->setToolTip("Rotate right by 15 degrees");
-    auto *rotup = new QPushButton(QIcon(":/gtk-go-up.png"), "");
+    auto *rotup = new QPushButton(QIcon(":/icons/gtk-go-up.png"), "");
     rotup->setToolTip("Rotate up by 15 degrees");
-    auto *rotdown = new QPushButton(QIcon(":/gtk-go-down.png"), "");
+    auto *rotdown = new QPushButton(QIcon(":/icons/gtk-go-down.png"), "");
     rotdown->setToolTip("Rotate down by 15 degrees");
-    auto *reset = new QPushButton(QIcon(":/gtk-zoom-fit.png"), "");
+    auto *reset = new QPushButton(QIcon(":/icons/gtk-zoom-fit.png"), "");
     reset->setToolTip("Reset view to defaults");
     auto *combo = new QComboBox;
     combo->setObjectName("group");
@@ -191,8 +260,7 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge
 
     mainLayout->addLayout(menuLayout);
     mainLayout->addWidget(scrollArea);
-    mainLayout->addWidget(buttonBox);
-    setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     setWindowTitle(QString("Image Viewer: ") + QFileInfo(fileName).fileName());
     createActions();
 
@@ -201,13 +269,13 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge
     // properties directly since lookup in reset_view() will have failed
     dobox->setChecked(showbox);
     dovdw->setChecked(vdwfactor > 1.0);
-    dovdw->setEnabled(useelements);
+    dovdw->setEnabled(useelements || usediameter);
     doaxes->setChecked(showaxes);
     dossao->setChecked(usessao);
     doanti->setChecked(antialias);
 
     scaleFactor = 1.0;
-    resize(image.width() + 20, image.height() + 50);
+    resize(image.width() + 20, image.height() + 75);
 
     scrollArea->setVisible(true);
     updateActions();
@@ -348,7 +416,7 @@ void ImageViewer::do_rot_up()
     createImage();
 }
 
-void ImageViewer::change_group(int idx)
+void ImageViewer::change_group(int)
 {
     QComboBox *box = findChild<QComboBox *>("group");
     if (box) group = box->currentText();
@@ -368,10 +436,7 @@ void ImageViewer::createImage()
     dumpcmd += "'" + dumpfile.fileName() + "'";
 
     settings.beginGroup("snapshot");
-    int aa       = antialias ? 2 : 1;
-    int tmpxsize = xsize * aa;
-    int tmpysize = ysize * aa;
-    int hhrot    = (hrot > 180) ? 360 - hrot : hrot;
+    int hhrot = (hrot > 180) ? 360 - hrot : hrot;
 
     // determine elements from masses and set their covalent radii
     int ntypes       = lammps->extract_setting("ntypes");
@@ -390,9 +455,10 @@ void ImageViewer::createImage()
             adiams += QString("adiam %1 %2 ").arg(i).arg(vdwfactor * pte_vdw_radius[idx]);
         }
     }
+    usediameter = lammps->extract_setting("radius_flag") != 0;
 
     // adjust pushbutton state and clear adiams string to disable VDW display, if needed
-    if (useelements) {
+    if (useelements || usediameter) {
         auto *button = findChild<QPushButton *>("vdw");
         if (button) button->setEnabled(true);
     } else {
@@ -405,10 +471,14 @@ void ImageViewer::createImage()
         dumpcmd += blank + "element";
     else
         dumpcmd += blank + settings.value("color", "type").toString();
-    dumpcmd += blank + settings.value("diameter", "type").toString();
-    dumpcmd += QString(" size ") + QString::number(tmpxsize) + blank + QString::number(tmpysize);
-    dumpcmd += QString(" zoom ") + QString::number(zoom);
+    if (usediameter && (vdwfactor > 1.0))
+        dumpcmd += blank + "diameter";
+    else
+        dumpcmd += blank + settings.value("diameter", "type").toString();
+    dumpcmd += QString(" size %1 %2").arg(xsize).arg(ysize);
+    dumpcmd += QString(" zoom %1").arg(zoom);
     dumpcmd += " shiny 0.5 ";
+    dumpcmd += QString(" fsaa %1").arg(antialias ? "yes" : "no");
     if (nbondtypes > 0) {
         if (vdwfactor > 1.0)
             dumpcmd += " bond none none ";
@@ -416,18 +486,18 @@ void ImageViewer::createImage()
             dumpcmd += " bond atom 0.5 ";
     }
     if (lammps->extract_setting("dimension") == 3) {
-        dumpcmd += QString(" view ") + QString::number(hhrot) + blank + QString::number(vrot);
+        dumpcmd += QString(" view %1 %2").arg(hhrot).arg(vrot);
     }
-    if (usessao) dumpcmd += QString(" ssao yes 453983 0.75");
+    if (usessao) dumpcmd += " ssao yes 453983 0.75";
     if (showbox)
-        dumpcmd += QString(" box yes 0.025");
+        dumpcmd += " box yes 0.025";
     else
-        dumpcmd += QString(" box no 0.0");
+        dumpcmd += " box no 0.0";
 
     if (showaxes)
-        dumpcmd += QString(" axes yes 0.5 0.025");
+        dumpcmd += " axes yes 0.5 0.025";
     else
-        dumpcmd += QString(" axes no 0.0 0.0");
+        dumpcmd += " axes no 0.0 0.0";
 
     dumpcmd += " modify boxcolor " + settings.value("boxcolor", "yellow").toString();
     dumpcmd += " backcolor " + settings.value("background", "black").toString();
@@ -439,17 +509,13 @@ void ImageViewer::createImage()
     QImageReader reader(dumpfile.fileName());
     reader.setAutoTransform(true);
     const QImage newImage = reader.read();
-
-    if (newImage.isNull()) {
-        QMessageBox::warning(
-            this, QGuiApplication::applicationDisplayName(),
-            QString("Cannot load %1: %2").arg(dumpfile.fileName(), reader.errorString()));
-        return;
-    }
     dumpfile.remove();
 
+    // read of new image failed. Don't try to scale and load it.
+    if (newImage.isNull()) return;
+
     // scale back to achieve antialiasing
-    image = newImage.scaled(xsize, ysize, Qt::IgnoreAspectRatio, Qt::SmoothTransformation);
+    image = newImage;
     imageLabel->setPixmap(QPixmap::fromImage(image));
     imageLabel->adjustSize();
     if (renderstatus) renderstatus->setEnabled(false);
@@ -465,6 +531,14 @@ void ImageViewer::saveAs()
 
 void ImageViewer::copy() {}
 
+void ImageViewer::quit()
+{
+    LammpsGui *main = nullptr;
+    for (QWidget *widget : QApplication::topLevelWidgets())
+        if (widget->objectName() == "LammpsGui") main = dynamic_cast<LammpsGui *>(widget);
+    if (main) main->quit();
+}
+
 void ImageViewer::saveFile(const QString &fileName)
 {
     if (!fileName.isEmpty()) image.save(fileName);
@@ -475,17 +549,20 @@ void ImageViewer::createActions()
     QMenu *fileMenu = menuBar->addMenu("&File");
 
     saveAsAct = fileMenu->addAction("&Save As...", this, &ImageViewer::saveAs);
-    saveAsAct->setIcon(QIcon(":/document-save-as.png"));
+    saveAsAct->setIcon(QIcon(":/icons/document-save-as.png"));
     saveAsAct->setEnabled(false);
     fileMenu->addSeparator();
     copyAct = fileMenu->addAction("&Copy", this, &ImageViewer::copy);
-    copyAct->setIcon(QIcon(":/edit-copy.png"));
+    copyAct->setIcon(QIcon(":/icons/edit-copy.png"));
     copyAct->setShortcut(QKeySequence::Copy);
     copyAct->setEnabled(false);
     fileMenu->addSeparator();
     QAction *exitAct = fileMenu->addAction("&Close", this, &QWidget::close);
-    exitAct->setIcon(QIcon(":/window-close.png"));
-    exitAct->setShortcut(QKeySequence::fromString("Ctrl+W"));
+    exitAct->setIcon(QIcon(":/icons/window-close.png"));
+    exitAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_W));
+    QAction *quitAct = fileMenu->addAction("&Quit", this, &ImageViewer::quit);
+    quitAct->setIcon(QIcon(":/icons/application-exit.png"));
+    quitAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q));
 }
 
 void ImageViewer::updateActions()
diff --git a/tools/lammps-gui/imageviewer.h b/tools/lammps-gui/imageviewer.h
index 013a90249f..1be7790666 100644
--- a/tools/lammps-gui/imageviewer.h
+++ b/tools/lammps-gui/imageviewer.h
@@ -40,6 +40,7 @@ public:
 private slots:
     void saveAs();
     void copy();
+    void quit();
 
     void edit_size();
     void reset_view();
@@ -87,7 +88,7 @@ private:
     int xsize, ysize;
     int hrot, vrot;
     double zoom, vdwfactor;
-    bool showbox, showaxes, antialias, usessao, useelements;
+    bool showbox, showaxes, antialias, usessao, useelements, usediameter;
 };
 #endif
 
diff --git a/tools/lammps-gui/lammpsgui.cpp b/tools/lammps-gui/lammpsgui.cpp
index 1f997f53b8..37b8aebf88 100644
--- a/tools/lammps-gui/lammpsgui.cpp
+++ b/tools/lammps-gui/lammpsgui.cpp
@@ -14,6 +14,7 @@
 #include "lammpsgui.h"
 
 #include "chartviewer.h"
+#include "helpers.h"
 #include "highlighter.h"
 #include "imageviewer.h"
 #include "lammpsrunner.h"
@@ -34,6 +35,7 @@
 #include <QLabel>
 #include <QLocale>
 #include <QMessageBox>
+#include <QMetaType>
 #include <QPlainTextEdit>
 #include <QProcess>
 #include <QProgressBar>
@@ -56,56 +58,22 @@
 #endif
 
 static const QString blank(" ");
-static constexpr int MAXRECENT = 5;
-static constexpr int BUFLEN    = 128;
-
-// duplicate string
-static char *mystrdup(const std::string &text)
-{
-    auto tmp = new char[text.size() + 1];
-    memcpy(tmp, text.c_str(), text.size() + 1);
-    return tmp;
-}
-
-// find if executable is in path
-// https://stackoverflow.com/a/51041497
-
-static bool has_exe(const QString &exe)
-{
-    QProcess findProcess;
-    QStringList arguments;
-    arguments << exe;
-#if defined(_WIN32)
-    findProcess.start("where", arguments);
-#else
-    findProcess.start("which", arguments);
-#endif
-    findProcess.setReadChannel(QProcess::ProcessChannel::StandardOutput);
-
-    if (!findProcess.waitForFinished()) return false; // Not found or which does not work
-
-    QString retStr(findProcess.readAll());
-    retStr = retStr.trimmed();
-
-    QFile file(retStr);
-    QFileInfo check_file(file);
-    if (check_file.exists() && check_file.isFile())
-        return true; // Found!
-    else
-        return false; // Not found!
-}
+static constexpr int BUFLEN = 128;
 
 LammpsGui::LammpsGui(QWidget *parent, const char *filename) :
     QMainWindow(parent), ui(new Ui::LammpsGui), highlighter(nullptr), capturer(nullptr),
     status(nullptr), logwindow(nullptr), imagewindow(nullptr), chartwindow(nullptr),
     slideshow(nullptr), logupdater(nullptr), dirstatus(nullptr), progress(nullptr),
-    prefdialog(nullptr), lammpsstatus(nullptr), varwindow(nullptr)
+    prefdialog(nullptr), lammpsstatus(nullptr), varwindow(nullptr), runner(nullptr),
+    is_running(false), run_counter(0)
 {
     // enforce using the plain ASCII C locale within the GUI.
     QLocale::setDefault(QLocale("C"));
 
-    // register QList<QString>
+#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
+    // register QList<QString> only needed for Qt5
     qRegisterMetaTypeStreamOperators<QList<QString>>("QList<QString>");
+#endif
 
     ui->setupUi(this);
     this->setCentralWidget(ui->textEdit);
@@ -116,9 +84,13 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) :
     // use $HOME if we get dropped to "/" like on macOS
     if (current_dir == "/") current_dir = QDir::homePath();
 
+#define stringify(x) myxstr(x)
+#define myxstr(x) #x
     QCoreApplication::setOrganizationName("The LAMMPS Developers");
     QCoreApplication::setOrganizationDomain("lammps.org");
-    QCoreApplication::setApplicationName("LAMMPS GUI");
+    QCoreApplication::setApplicationName("LAMMPS GUI - QT" stringify(QT_VERSION_MAJOR));
+#undef stringify
+#undef myxstr
 
     // restore and initialize settings
     QSettings settings;
@@ -181,7 +153,7 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) :
     lammps_args.push_back(mystrdup("-log"));
     lammps_args.push_back(mystrdup("none"));
 
-    setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
 
     QFont all_font("Arial", -1);
     all_font.setStyleHint(QFont::SansSerif, QFont::PreferOutline);
@@ -198,7 +170,7 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) :
 
     varwindow = new QLabel(QString());
     varwindow->setWindowTitle("LAMMPS-GUI - Current Variables:");
-    varwindow->setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    varwindow->setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     varwindow->setMinimumSize(100, 50);
     varwindow->setText("(none)");
     varwindow->setFont(text_font);
@@ -211,7 +183,7 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) :
 
     update_recents();
 
-    // check if we have OVITO and VMD installed and deacivate actions if not
+    // check if we have OVITO and VMD installed and deactivate actions if not
     ui->actionView_in_OVITO->setEnabled(has_exe("ovito"));
     ui->actionView_in_OVITO->setData("ovito");
     ui->actionView_in_VMD->setEnabled(has_exe("vmd"));
@@ -261,15 +233,15 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) :
 #endif
 
     lammpsstatus = new QLabel(QString());
-    auto pix     = QPixmap(":/lammps-icon-128x128.png");
+    auto pix     = QPixmap(":/icons/lammps-icon-128x128.png");
     lammpsstatus->setPixmap(pix.scaled(22, 22, Qt::KeepAspectRatio));
     ui->statusbar->addWidget(lammpsstatus);
     lammpsstatus->setToolTip("LAMMPS instance is active");
     lammpsstatus->hide();
 
-    auto *lammpsrun   = new QPushButton(QIcon(":/system-run.png"), "");
-    auto *lammpsstop  = new QPushButton(QIcon(":/process-stop.png"), "");
-    auto *lammpsimage = new QPushButton(QIcon(":/emblem-photos.png"), "");
+    auto *lammpsrun   = new QPushButton(QIcon(":/icons/system-run.png"), "");
+    auto *lammpsstop  = new QPushButton(QIcon(":/icons/process-stop.png"), "");
+    auto *lammpsimage = new QPushButton(QIcon(":/icons/emblem-photos.png"), "");
     lammpsrun->setToolTip("Run LAMMPS on input");
     lammpsstop->setToolTip("Stop LAMMPS");
     lammpsimage->setToolTip("Create snapshot image");
@@ -340,6 +312,8 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) :
     style_list.sort();
     ui->textEdit->setUnitsList(style_list);
 
+    ui->textEdit->setFileList();
+
 #define ADD_STYLES(keyword, Type)                                                              \
     style_list.clear();                                                                        \
     if ((std::string(#keyword) == "pair") || (std::string(#keyword) == "bond") ||              \
@@ -407,6 +381,7 @@ void LammpsGui::new_document()
     lammps.close();
     lammpsstatus->hide();
     setWindowTitle(QString("LAMMPS-GUI - *unknown*"));
+    run_counter = 0;
 }
 
 void LammpsGui::open()
@@ -528,7 +503,7 @@ void LammpsGui::update_recents(const QString &filename)
 
 void LammpsGui::update_variables()
 {
-    const auto doc = ui->textEdit->toPlainText().split('\n');
+    const auto doc = ui->textEdit->toPlainText().replace('\t', ' ').split('\n');
     QStringList known;
     QRegularExpression indexvar("^\\s*variable\\s+(\\w+)\\s+index\\s+(.*)");
     QRegularExpression anyvar("^\\s*variable\\s+(\\w+)\\s+(\\w+)\\s+(.*)");
@@ -540,6 +515,8 @@ void LammpsGui::update_variables()
 
     for (const auto &line : doc) {
 
+        if (line.isEmpty()) continue;
+
         // first find variable definitions.
         // index variables are special since they can be overridden from the command line
         auto index = indexvar.match(line);
@@ -618,20 +595,24 @@ void LammpsGui::open_file(const QString &fileName)
     if (!file.open(QIODevice::ReadOnly | QFile::Text)) {
         QMessageBox::warning(this, "Warning",
                              "Cannot open file " + path.absoluteFilePath() + ": " +
-                                 file.errorString());
-        return;
+                                 file.errorString() +
+                                 ".\nWill create new file on saving editor buffer.");
+        ui->textEdit->document()->setPlainText(QString());
+    } else {
+        QTextStream in(&file);
+        QString text = in.readAll();
+        ui->textEdit->document()->setPlainText(text);
+        ui->textEdit->moveCursor(QTextCursor::Start, QTextCursor::MoveAnchor);
+        file.close();
     }
     setWindowTitle(QString("LAMMPS-GUI - " + current_file));
-    QTextStream in(&file);
-    QString text = in.readAll();
-    ui->textEdit->document()->setPlainText(text);
-    ui->textEdit->moveCursor(QTextCursor::Start, QTextCursor::MoveAnchor);
+    run_counter = 0;
     ui->textEdit->document()->setModified(false);
     ui->textEdit->setGroupList();
     ui->textEdit->setVarNameList();
     ui->textEdit->setComputeIDList();
     ui->textEdit->setFixIDList();
-    file.close();
+    ui->textEdit->setFileList();
     dirstatus->setText(QString(" Directory: ") + current_dir);
     status->setText("Ready.");
 
@@ -655,6 +636,8 @@ void LammpsGui::open_file(const QString &fileName)
     lammps.close();
 }
 
+// write file and update CWD to its folder
+
 void LammpsGui::write_file(const QString &fileName)
 {
     QFileInfo path(fileName);
@@ -667,6 +650,7 @@ void LammpsGui::write_file(const QString &fileName)
         return;
     }
     setWindowTitle(QString("LAMMPS-GUI - " + current_file));
+    QDir::setCurrent(current_dir);
 
     update_recents(path.absoluteFilePath());
 
@@ -869,7 +853,7 @@ void LammpsGui::logupdate()
 
             for (int i = 0; i < ncols; ++i) {
                 int datatype = *(int *)lammps.last_thermo("type", i);
-                double data;
+                double data  = 0.0;
                 if (datatype == 0) // int
                     data = *(int *)lammps.last_thermo("data", i);
                 else if (datatype == 2) // double
@@ -892,7 +876,9 @@ void LammpsGui::logupdate()
             else
                 slideshow->hide();
         } else {
-            slideshow->setWindowTitle(QString("LAMMPS-GUI - Slide Show: ") + current_file);
+            slideshow->setWindowTitle(
+                QString("LAMMPS-GUI - Slide Show: %1 - Run %2").arg(current_file).arg(run_counter));
+            if (QSettings().value("viewslide", true).toBool()) slideshow->show();
         }
         slideshow->add_image(imagefile);
     }
@@ -938,7 +924,7 @@ void LammpsGui::run_done()
                     chartwindow->add_chart(label, i);
                 }
                 int datatype = *(int *)lammps.last_thermo("type", i);
-                double data;
+                double data  = 0.0;
                 if (datatype == 0) // int
                     data = *(int *)lammps.last_thermo("data", i);
                 else if (datatype == 2) // double
@@ -972,6 +958,7 @@ void LammpsGui::run_done()
                               QString("Error running LAMMPS:\n\n") + errorbuf);
     }
     ui->textEdit->setCursor(nline);
+    ui->textEdit->setFileList();
     progress->hide();
     dirstatus->show();
 }
@@ -1008,6 +995,7 @@ void LammpsGui::do_run(bool use_buffer)
     progress->setValue(0);
     dirstatus->hide();
     progress->show();
+
     int nthreads = settings.value("nthreads", 1).toInt();
     int accel    = settings.value("accelerator", AcceleratorTab::None).toInt();
     if ((accel != AcceleratorTab::OpenMP) && (accel != AcceleratorTab::Intel) &&
@@ -1024,12 +1012,17 @@ void LammpsGui::do_run(bool use_buffer)
 
     runner     = new LammpsRunner(this);
     is_running = true;
+    ++run_counter;
+
+    // define "gui_run" variable set to run_counter value
+    lammps.command("variable gui_run delete");
+    lammps.command(std::string("variable gui_run index " + std::to_string(run_counter)).c_str());
     if (use_buffer) {
-        // always add final newline since the text edit widget does not
-        char *input = mystrdup(ui->textEdit->toPlainText().toStdString() + "\n");
+        // always add final newline since the text edit widget does not do it
+        char *input = mystrdup(ui->textEdit->toPlainText() + "\n");
         runner->setup_run(&lammps, input, nullptr);
     } else {
-        char *fname = mystrdup(current_file.toStdString());
+        char *fname = mystrdup(current_file);
         runner->setup_run(&lammps, nullptr, fname);
     }
 
@@ -1038,26 +1031,25 @@ void LammpsGui::do_run(bool use_buffer)
     runner->start();
 
     // if configured, delete old log window before opening new one
-    if (settings.value("logreplace", false).toBool()) delete logwindow;
-    logwindow = new LogWindow();
+    if (settings.value("logreplace", true).toBool()) delete logwindow;
+    logwindow = new LogWindow(current_file);
     logwindow->setReadOnly(true);
     logwindow->setCenterOnScroll(true);
     logwindow->moveCursor(QTextCursor::End);
-    if (use_buffer)
-        logwindow->setWindowTitle("LAMMPS-GUI - Output from running LAMMPS on buffer - " +
-                                  current_file);
-    else
-        logwindow->setWindowTitle("LAMMPS-GUI - Output from running LAMMPS on file - " +
-                                  current_file);
-    logwindow->setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    logwindow->setWindowTitle(
+        QString("LAMMPS-GUI - Output from running LAMMPS on %1 - %2 - Run  %3")
+            .arg(use_buffer ? "buffer" : "file")
+            .arg(current_file)
+            .arg(run_counter));
+    logwindow->setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     QFont text_font;
     text_font.fromString(settings.value("textfont", text_font.toString()).toString());
     logwindow->document()->setDefaultFont(text_font);
     logwindow->setLineWrapMode(LogWindow::NoWrap);
     logwindow->setMinimumSize(400, 300);
-    QShortcut *shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_W), logwindow);
+    QShortcut *shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_W), logwindow);
     QObject::connect(shortcut, &QShortcut::activated, logwindow, &LogWindow::close);
-    shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_Slash), logwindow);
+    shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash), logwindow);
     QObject::connect(shortcut, &QShortcut::activated, this, &LammpsGui::stop_run);
     if (settings.value("viewlog", true).toBool())
         logwindow->show();
@@ -1065,19 +1057,18 @@ void LammpsGui::do_run(bool use_buffer)
         logwindow->hide();
 
     // if configured, delete old log window before opening new one
-    if (settings.value("chartreplace", false).toBool()) delete chartwindow;
+    if (settings.value("chartreplace", true).toBool()) delete chartwindow;
     chartwindow = new ChartWindow(current_file);
-    if (use_buffer)
-        chartwindow->setWindowTitle("LAMMPS-GUI - Thermo charts from running LAMMPS on buffer - " +
-                                    current_file);
-    else
-        chartwindow->setWindowTitle("LAMMPS-GUI - Thermo charts from running LAMMPS on file - " +
-                                    current_file);
-    chartwindow->setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    chartwindow->setWindowTitle(
+        QString("LAMMPS-GUI - Thermo charts from running LAMMPS on %1 - %2 - Run  %3")
+            .arg(use_buffer ? "buffer" : "file")
+            .arg(current_file)
+            .arg(run_counter));
+    chartwindow->setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     chartwindow->setMinimumSize(400, 300);
-    shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_W), chartwindow);
+    shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_W), chartwindow);
     QObject::connect(shortcut, &QShortcut::activated, chartwindow, &ChartWindow::close);
-    shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_Slash), chartwindow);
+    shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash), chartwindow);
     QObject::connect(shortcut, &QShortcut::activated, this, &LammpsGui::stop_run);
     if (settings.value("viewchart", true).toBool())
         chartwindow->show();
@@ -1106,13 +1097,17 @@ void LammpsGui::render_image()
             // add a run 0 and thus create the state of the initial system without running.
             // this will allow us to create a snapshot image.
             auto saved = ui->textEdit->textCursor();
+#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
+            if (ui->textEdit->find(QRegExp(QStringLiteral("^\\s*(run|minimize)\\s+")))) {
+#else
             if (ui->textEdit->find(QRegularExpression(QStringLiteral("^\\s*(run|minimize)\\s+")))) {
+#endif
                 auto cursor = ui->textEdit->textCursor();
                 cursor.movePosition(QTextCursor::PreviousBlock);
                 cursor.movePosition(QTextCursor::EndOfLine);
                 cursor.movePosition(QTextCursor::Start, QTextCursor::KeepAnchor);
                 auto selection = cursor.selectedText().replace(QChar(0x2029), '\n');
-                selection += "run 0 pre yes post no";
+                selection += "\nrun 0 pre yes post no";
                 ui->textEdit->setTextCursor(saved);
                 lammps.command("clear");
                 lammps.commands_string(selection.toStdString().c_str());
@@ -1125,9 +1120,10 @@ void LammpsGui::render_image()
                                      "Cannot create snapshot image without a system box");
                 return;
             }
+            ui->textEdit->setTextCursor(saved);
         }
         // if configured, delete old image window before opening new one
-        if (QSettings().value("imagereplace", false).toBool()) delete imagewindow;
+        if (QSettings().value("imagereplace", true).toBool()) delete imagewindow;
         imagewindow = new ImageViewer(current_file, &lammps);
     } else {
         QMessageBox::warning(this, "ImageViewer Error",
@@ -1199,14 +1195,15 @@ void LammpsGui::view_variables()
 void LammpsGui::about()
 {
     std::string version = "This is LAMMPS-GUI version " LAMMPS_GUI_VERSION;
+    version += " using Qt version " QT_VERSION_STR "\n";
     if (lammps.has_plugin()) {
-        version += " - LAMMPS library loaded as plugin";
+        version += "LAMMPS library loaded as plugin";
         if (!plugin_path.empty()) {
             version += " from file ";
             version += plugin_path;
         }
     } else {
-        version += " - LAMMPS library linked to executable";
+        version += "LAMMPS library linked to executable";
     }
 
     QString to_clipboard(version.c_str());
@@ -1232,10 +1229,10 @@ void LammpsGui::about()
 
     QMessageBox msg;
     msg.setWindowTitle("About LAMMPS");
-    msg.setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    msg.setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     msg.setText(version.c_str());
     msg.setInformativeText(info.c_str());
-    msg.setIconPixmap(QPixmap(":/lammps-icon-128x128.png").scaled(64, 64));
+    msg.setIconPixmap(QPixmap(":/icons/lammps-icon-128x128.png").scaled(64, 64));
     msg.setStandardButtons(QMessageBox::Close);
     QFont font;
     font.setPointSizeF(font.pointSizeF() * 0.75);
@@ -1252,11 +1249,14 @@ void LammpsGui::help()
 {
     QMessageBox msg;
     msg.setWindowTitle("LAMMPS-GUI Quick Help");
-    msg.setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    msg.setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     msg.setText("<div>This is LAMMPS-GUI version " LAMMPS_GUI_VERSION "</div>");
-    msg.setInformativeText("<p>LAMMPS GUI is a graphical text editor that is linked to the LAMMPS "
+    msg.setInformativeText("<p>LAMMPS GUI is a graphical text editor that is customized for "
+                           "editing LAMMPS input files and linked to the LAMMPS "
                            "library and thus can run LAMMPS directly using the contents of the "
-                           "text buffer as input through the LAMMPS C-library interface. </p>"
+                           "text buffer as input. It can retrieve and display information from "
+                           "LAMMPS while it is running and  display visualizations created "
+                           "with the dump image command.</p>"
                            "<p>The main window of the LAMMPS GUI is a text editor window with "
                            "LAMMPS specific syntax highlighting. When typing <b>Ctrl-Enter</b> "
                            "or clicking on 'Run LAMMMPS' in the 'Run' menu, LAMMPS will be run "
@@ -1265,26 +1265,29 @@ void LammpsGui::help()
                            "is displayed in a chart window. Both are updated regularly during the "
                            "run, as is a progress bar in the main window. The running simulation "
                            "can be stopped cleanly by typing <b>Ctrl-/</b> or by clicking on "
-                           "'Stop LAMMPS' in the 'Run' menu. After the simulation is finished "
-                           "or stopped, an image of the simulated system can be shown in an image "
+                           "'Stop LAMMPS' in the 'Run' menu. While LAMMPS is not running, "
+                           "an image of the simulated system can be created and shown in an image "
                            "viewer window by typing <b>Ctrl-i</b> or by clicking on 'View Image' "
                            "in the 'Run' menu. Multiple image settings can be changed through the "
-                           "buttons in the menu bar and the image will be re-renderd.</p>"
+                           "buttons in the menu bar and the image will be re-renderd.  In case "
+                           "an input file contains a dump image command, LAMMPS GUI will load "
+                           "the images as they are created and display them in a slide show. </p>"
                            "<p>When opening a file, the editor will determine the directory "
                            "where the input file resides and switch its current working directory "
                            "to that same folder and thus enabling the run to read other files in "
-                           "that folder, e.g. a data file .The GUI will show its current working "
+                           "that folder, e.g. a data file. The GUI will show its current working "
                            "directory in the status bar. In addition to using the menu, the "
                            "editor window can also receive files as the first command line "
                            "argument or via drag-n-drop from a graphical file manager or a "
                            "desktop environment.</p>"
-                           "<p>Almost all commands are accessible via hotkeys. Which those "
-                           "hotkeys are, is typically shown next to their entries in the menus. "
+                           "<p>Almost all commands are accessible via keyboard shortcuts. Which "
+                           "those shortcuts are, is typically shown next to their entries in the "
+                           "menus. "
                            "In addition, the documentation for the command in the current line "
                            "can be viewed by typing <b>Ctrl-?</b> or by choosing the respective "
                            "entry in the context menu, available by right-clicking the mouse. "
-                           "Log, chart, and image windows can be closed with <b>Ctrl-W</b>, the "
-                           "main window with <b>Ctrl-Q</b>.</p>"
+                           "Log, chart, slide show, and image windows can be closed with "
+                           "<b>Ctrl-W</b> and the application terminated with <b>Ctrl-Q</b>.</p>"
                            "<p>The 'About LAMMPS' dialog will show the LAMMPS version and the "
                            "features included into the LAMMPS library linked to the LAMMPS GUI. "
                            "A number of settings can be adjusted in the 'Preferences' dialog (in "
@@ -1292,7 +1295,7 @@ void LammpsGui::help()
                            "accelerator packages and number of OpenMP threads. Due to its nature "
                            "as a graphical application, it is <b>not</b> possible to use the "
                            "LAMMPS GUI in parallel with MPI.</p>");
-    msg.setIconPixmap(QPixmap(":/lammps-icon-128x128.png").scaled(64, 64));
+    msg.setIconPixmap(QPixmap(":/icons/lammps-icon-128x128.png").scaled(64, 64));
     msg.setStandardButtons(QMessageBox::Close);
     msg.exec();
 }
@@ -1343,9 +1346,9 @@ void LammpsGui::preferences()
         // must delete LAMMPS instance after preferences have changed that require
         // using different command line flags when creating the LAMMPS instance like
         // suffixes or package commands
+        int newthreads = settings.value("nthreads", 1).toInt();
         if ((oldaccel != settings.value("accelerator", AcceleratorTab::None).toInt()) ||
-            (oldthreads != settings.value("nthreads", 1).toInt()) ||
-            (oldecho != settings.value("echo", false).toBool()) ||
+            (oldthreads != newthreads) || (oldecho != settings.value("echo", false).toBool()) ||
             (oldcite != settings.value("cite", false).toBool())) {
             if (lammps.is_running()) {
                 stop_run();
@@ -1354,6 +1357,10 @@ void LammpsGui::preferences()
             }
             lammps.close();
             lammpsstatus->hide();
+#if defined(_OPENMP)
+            qputenv("OMP_NUM_THREADS", std::to_string(newthreads).c_str());
+            omp_set_num_threads(newthreads);
+#endif
         }
         if (imagewindow) imagewindow->createImage();
         settings.beginGroup("reformat");
@@ -1414,9 +1421,9 @@ void LammpsGui::start_lammps()
         QString value = var.second;
         if (!name.isEmpty() && !value.isEmpty()) {
             lammps_args.push_back(mystrdup("-var"));
-            lammps_args.push_back(mystrdup(name.toStdString()));
+            lammps_args.push_back(mystrdup(name));
             for (const auto &v : value.split(' '))
-                lammps_args.push_back(mystrdup(v.toStdString()));
+                lammps_args.push_back(mystrdup(v));
         }
     }
 
@@ -1425,9 +1432,18 @@ void LammpsGui::start_lammps()
     lammps.open(narg, args);
     lammpsstatus->show();
 
+    // must have a version newer than the 2 August 2023 release of LAMMPS
+    // TODO: must update this check before next feature release
+    if (lammps.version() <= 20230802) {
+        QMessageBox::critical(this, "Incompatible LAMMPS Version",
+                              "LAMMPS-GUI version " LAMMPS_GUI_VERSION " requires\n"
+                              "a LAMMPS version more recent than 2 August 2023");
+        exit(1);
+    }
+
     // delete additional arguments again (3 were there initially
-    while (lammps_args.size() > initial_narg) {
-        delete lammps_args.back();
+    while ((int)lammps_args.size() > initial_narg) {
+        delete[] lammps_args.back();
         lammps_args.pop_back();
     }
 
diff --git a/tools/lammps-gui/lammpsgui.h b/tools/lammps-gui/lammpsgui.h
index 6d80b0aee6..0dd34f2c49 100644
--- a/tools/lammps-gui/lammpsgui.h
+++ b/tools/lammps-gui/lammpsgui.h
@@ -16,8 +16,10 @@
 
 #include <QMainWindow>
 
+#include <QGridLayout>
 #include <QList>
 #include <QPair>
+#include <QSpacerItem>
 #include <QString>
 #include <vector>
 
@@ -66,6 +68,10 @@ protected:
     void start_lammps();
     void run_done();
 
+public slots:
+    void quit();
+    void stop_run();
+
 private slots:
     void new_document();
     void open();
@@ -73,7 +79,6 @@ private slots:
     void start_exe();
     void save();
     void save_as();
-    void quit();
     void copy();
     void cut();
     void paste();
@@ -82,7 +87,6 @@ private slots:
     void run_buffer() { do_run(true); }
     void run_file() { do_run(false); }
 
-    void stop_run();
     void edit_variables();
     void render_image();
     void view_slides();
@@ -126,6 +130,7 @@ private:
     LammpsRunner *runner;
     std::string plugin_path;
     bool is_running;
+    int run_counter;
     std::vector<char *> lammps_args;
 };
 #endif // LAMMPSGUI_H
diff --git a/tools/lammps-gui/lammpsgui.qrc b/tools/lammps-gui/lammpsgui.qrc
index bb23d559ee..6405d66d7a 100644
--- a/tools/lammps-gui/lammpsgui.qrc
+++ b/tools/lammps-gui/lammpsgui.qrc
@@ -1,61 +1,62 @@
 <!-- -*- xml -*- -->
 <RCC>
   <qresource prefix="/">
-    <file>lammps-icon-128x128.png</file>
+    <file>icons/lammps-icon-128x128.png</file>
     <file>help_index.table</file>
     <!-- This file is updated with: grep 'mycmd ==' ../../src/input.cpp | sed -e 's/^.*mycmd == "\(.*\)".*$/\1/' > lammps_internal_commands.txt -->
     <file>lammps_internal_commands.txt</file>
-    <file>antialias.png</file>
-    <file>application-calc.png</file>
-    <file>application-exit.png</file>
-    <file>application-plot.png</file>
-    <file>axes-img.png</file>
-    <file>document-new.png</file>
-    <file>document-open-recent.png</file>
-    <file>document-open.png</file>
-    <file>document-revert.png</file>
-    <file>document-save-as.png</file>
-    <file>document-save.png</file>
-    <file>edit-copy.png</file>
-    <file>edit-cut.png</file>
-    <file>edit-delete.png</file>
-    <file>edit-paste.png</file>
-    <file>edit-redo.png</file>
-    <file>edit-undo.png</file>
-    <file>emblem-photos.png</file>
-    <file>expand-text.png</file>
-    <file>format-indent-less-3.png</file>
-    <file>go-first.png</file>
-    <file>go-last.png</file>
-    <file>go-next-2.png</file>
-    <file>go-previous-2.png</file>
-    <file>gtk-go-down.png</file>
-    <file>gtk-go-up.png</file>
-    <file>gtk-zoom-fit.png</file>
-    <file>gtk-zoom-in.png</file>
-    <file>gtk-zoom-out.png</file>
-    <file>hd-img.png</file>
-    <file>help-about.png</file>
-    <file>help-browser.png</file>
-    <file>help-faq.png</file>
-    <file>image-x-generic.png</file>
-    <file>media-playback-start-2.png</file>
-    <file>media-playlist-repeat.png</file>
-    <file>object-rotate-left.png</file>
-    <file>object-rotate-right.png</file>
-    <file>ovito.png</file>
-    <file>preferences-desktop-font.png</file>
-    <file>preferences-desktop-personal.png</file>
-    <file>preferences-desktop.png</file>
-    <file>process-stop.png</file>
-    <file>run-file.png</file>
-    <file>system-box.png</file>
-    <file>system-help.png</file>
-    <file>system-run.png</file>
-    <file>utilities-terminal.png</file>
-    <file>vdw-style.png</file>
-    <file>vmd.png</file>
-    <file>window-close.png</file>
-    <file>x-office-drawing.png</file>
+    <file>icons/antialias.png</file>
+    <file>icons/application-calc.png</file>
+    <file>icons/application-exit.png</file>
+    <file>icons/application-plot.png</file>
+    <file>icons/axes-img.png</file>
+    <file>icons/document-new.png</file>
+    <file>icons/document-open-recent.png</file>
+    <file>icons/document-open.png</file>
+    <file>icons/document-revert.png</file>
+    <file>icons/document-save-as.png</file>
+    <file>icons/document-save.png</file>
+    <file>icons/edit-copy.png</file>
+    <file>icons/edit-cut.png</file>
+    <file>icons/edit-delete.png</file>
+    <file>icons/edit-paste.png</file>
+    <file>icons/edit-redo.png</file>
+    <file>icons/edit-undo.png</file>
+    <file>icons/emblem-photos.png</file>
+    <file>icons/expand-text.png</file>
+    <file>icons/export-movie.png</file>
+    <file>icons/format-indent-less-3.png</file>
+    <file>icons/go-first.png</file>
+    <file>icons/go-last.png</file>
+    <file>icons/go-next-2.png</file>
+    <file>icons/go-previous-2.png</file>
+    <file>icons/gtk-go-down.png</file>
+    <file>icons/gtk-go-up.png</file>
+    <file>icons/gtk-zoom-fit.png</file>
+    <file>icons/gtk-zoom-in.png</file>
+    <file>icons/gtk-zoom-out.png</file>
+    <file>icons/hd-img.png</file>
+    <file>icons/help-about.png</file>
+    <file>icons/help-browser.png</file>
+    <file>icons/help-faq.png</file>
+    <file>icons/image-x-generic.png</file>
+    <file>icons/media-playback-start-2.png</file>
+    <file>icons/media-playlist-repeat.png</file>
+    <file>icons/object-rotate-left.png</file>
+    <file>icons/object-rotate-right.png</file>
+    <file>icons/ovito.png</file>
+    <file>icons/preferences-desktop-font.png</file>
+    <file>icons/preferences-desktop-personal.png</file>
+    <file>icons/preferences-desktop.png</file>
+    <file>icons/process-stop.png</file>
+    <file>icons/run-file.png</file>
+    <file>icons/system-box.png</file>
+    <file>icons/system-help.png</file>
+    <file>icons/system-run.png</file>
+    <file>icons/utilities-terminal.png</file>
+    <file>icons/vdw-style.png</file>
+    <file>icons/vmd.png</file>
+    <file>icons/window-close.png</file>
+    <file>icons/x-office-drawing.png</file>
   </qresource>
 </RCC>
diff --git a/tools/lammps-gui/lammpsgui.ui b/tools/lammps-gui/lammpsgui.ui
index 77257b23c2..bb9af2e17e 100644
--- a/tools/lammps-gui/lammpsgui.ui
+++ b/tools/lammps-gui/lammpsgui.ui
@@ -105,7 +105,7 @@
   <widget class="QStatusBar" name="statusbar"/>
   <action name="actionNew">
    <property name="icon">
-    <iconset theme=":/document-new.png"/>
+    <iconset theme=":/icons/document-new.png"/>
    </property>
    <property name="text">
     <string>&amp;New</string>
@@ -116,7 +116,7 @@
   </action>
   <action name="actionOpen">
    <property name="icon">
-    <iconset theme=":/document-open.png"/>
+    <iconset theme=":/icons/document-open.png"/>
    </property>
    <property name="text">
     <string>&amp;Open</string>
@@ -127,7 +127,7 @@
   </action>
   <action name="actionSave">
    <property name="icon">
-    <iconset theme=":/document-save.png"/>
+    <iconset theme=":/icons/document-save.png"/>
    </property>
    <property name="text">
     <string>&amp;Save</string>
@@ -138,7 +138,7 @@
   </action>
   <action name="actionSave_As">
    <property name="icon">
-    <iconset theme=":/document-save-as.png"/>
+    <iconset theme=":/icons/document-save-as.png"/>
    </property>
    <property name="text">
     <string>Save &amp;As</string>
@@ -149,7 +149,7 @@
   </action>
   <action name="actionQuit">
    <property name="icon">
-    <iconset theme=":/application-exit.png"/>
+    <iconset theme=":/icons/application-exit.png"/>
    </property>
    <property name="text">
     <string>&amp;Quit</string>
@@ -160,7 +160,7 @@
   </action>
   <action name="actionCut">
    <property name="icon">
-    <iconset theme=":/edit-cut.png"/>
+    <iconset theme=":/icons/edit-cut.png"/>
    </property>
    <property name="text">
     <string>Cu&amp;t</string>
@@ -171,7 +171,7 @@
   </action>
   <action name="actionCopy">
    <property name="icon">
-    <iconset theme=":/edit-copy.png"/>
+    <iconset theme=":/icons/edit-copy.png"/>
    </property>
    <property name="text">
     <string>&amp;Copy</string>
@@ -182,7 +182,7 @@
   </action>
   <action name="actionPaste">
    <property name="icon">
-    <iconset theme=":/edit-paste.png"/>
+    <iconset theme=":/icons/edit-paste.png"/>
    </property>
    <property name="text">
     <string>&amp;Paste</string>
@@ -193,7 +193,7 @@
   </action>
   <action name="actionUndo">
    <property name="icon">
-    <iconset theme=":/edit-undo.png"/>
+    <iconset theme=":/icons/edit-undo.png"/>
    </property>
    <property name="text">
     <string>&amp;Undo</string>
@@ -204,7 +204,7 @@
   </action>
   <action name="actionRedo">
    <property name="icon">
-    <iconset theme=":/edit-redo.png"/>
+    <iconset theme=":/icons/edit-redo.png"/>
    </property>
    <property name="text">
     <string>&amp;Redo</string>
@@ -215,7 +215,7 @@
   </action>
   <action name="actionRun_Buffer">
    <property name="icon">
-    <iconset theme=":/system-run.png"/>
+    <iconset theme=":/icons/system-run.png"/>
    </property>
    <property name="text">
     <string>&amp;Run LAMMPS from Editor Buffer</string>
@@ -226,7 +226,7 @@
   </action>
   <action name="actionRun_File">
    <property name="icon">
-    <iconset theme=":/run-file.png"/>
+    <iconset theme=":/icons/run-file.png"/>
    </property>
    <property name="text">
     <string>&amp;Run LAMMPS from File</string>
@@ -237,7 +237,7 @@
   </action>
   <action name="actionStop_LAMMPS">
    <property name="icon">
-    <iconset theme=":/process-stop.png"/>
+    <iconset theme=":/icons/process-stop.png"/>
    </property>
    <property name="text">
     <string>&amp;Stop LAMMPS</string>
@@ -248,7 +248,7 @@
   </action>
   <action name="actionImage">
    <property name="icon">
-    <iconset theme=":/emblem-photos.png"/>
+    <iconset theme=":/icons/emblem-photos.png"/>
    </property>
    <property name="text">
     <string>Create &amp;Image</string>
@@ -259,7 +259,7 @@
   </action>
   <action name="actionAbout_LAMMPS_GUI">
    <property name="icon">
-    <iconset theme=":/help-about.png"/>
+    <iconset theme=":/icons/help-about.png"/>
    </property>
    <property name="text">
     <string>&amp;About LAMMPS</string>
@@ -270,7 +270,7 @@
   </action>
   <action name="action_Help">
    <property name="icon">
-    <iconset theme=":/help-faq.png"/>
+    <iconset theme=":/icons/help-faq.png"/>
    </property>
    <property name="text">
     <string>Quick &amp;Help</string>
@@ -281,7 +281,7 @@
   </action>
   <action name="actionPreferences">
    <property name="icon">
-    <iconset theme=":/preferences-desktop.png"/>
+    <iconset theme=":/icons/preferences-desktop.png"/>
    </property>
    <property name="text">
     <string>Pre&amp;ferences...</string>
@@ -292,7 +292,7 @@
   </action>
   <action name="actionLAMMPS_Manual">
    <property name="icon">
-    <iconset theme=":/help-browser.png"/>
+    <iconset theme=":/icons/help-browser.png"/>
    </property>
    <property name="text">
     <string>LAMMPS &amp;Manual</string>
@@ -303,7 +303,7 @@
   </action>
   <action name="actionDefaults">
    <property name="icon">
-    <iconset theme=":/document-revert.png"/>
+    <iconset theme=":/icons/document-revert.png"/>
    </property>
    <property name="text">
     <string>Reset to &amp;Defaults</string>
@@ -311,7 +311,7 @@
   </action>
   <action name="actionView_in_OVITO">
    <property name="icon">
-    <iconset theme=":/ovito.png"/>
+    <iconset theme=":/icons/ovito.png"/>
    </property>
    <property name="text">
     <string>View in &amp;OVITO</string>
@@ -322,7 +322,7 @@
   </action>
   <action name="actionView_in_VMD">
    <property name="icon">
-    <iconset theme=":/vmd.png"/>
+    <iconset theme=":/icons/vmd.png"/>
    </property>
    <property name="text">
     <string>View in VM&amp;D</string>
@@ -333,7 +333,7 @@
   </action>
   <action name="actionView_Log_Window">
    <property name="icon">
-    <iconset theme=":/utilities-terminal.png"/>
+    <iconset theme=":/icons/utilities-terminal.png"/>
    </property>
    <property name="text">
     <string>&amp;Log Window</string>
@@ -344,7 +344,7 @@
   </action>
   <action name="actionView_Graph_Window">
    <property name="icon">
-    <iconset theme=":/x-office-drawing.png"/>
+    <iconset theme=":/icons/x-office-drawing.png"/>
    </property>
    <property name="text">
     <string>&amp;Chart Window</string>
@@ -355,7 +355,7 @@
   </action>
   <action name="actionView_Slide_Show">
    <property name="icon">
-    <iconset theme=":/image-x-generic.png"/>
+    <iconset theme=":/icons/image-x-generic.png"/>
    </property>
    <property name="text">
     <string>&amp;Slide Show Window</string>
@@ -366,7 +366,7 @@
   </action>
   <action name="action_1">
    <property name="icon">
-    <iconset theme=":/document-open-recent.png"/>
+    <iconset theme=":/icons/document-open-recent.png"/>
    </property>
    <property name="text">
     <string>&amp;1.</string>
@@ -374,7 +374,7 @@
   </action>
   <action name="action_2">
    <property name="icon">
-    <iconset theme=":/document-open-recent.png"/>
+    <iconset theme=":/icons/document-open-recent.png"/>
    </property>
    <property name="text">
     <string>&amp;2.</string>
@@ -382,7 +382,7 @@
   </action>
   <action name="action_3">
    <property name="icon">
-    <iconset theme=":/document-open-recent.png"/>
+    <iconset theme=":/icons/document-open-recent.png"/>
    </property>
    <property name="text">
     <string>&amp;3.</string>
@@ -390,7 +390,7 @@
   </action>
   <action name="action_4">
    <property name="icon">
-    <iconset theme=":/document-open-recent.png"/>
+    <iconset theme=":/icons/document-open-recent.png"/>
    </property>
    <property name="text">
     <string>&amp;4.</string>
@@ -398,7 +398,7 @@
   </action>
   <action name="action_5">
    <property name="icon">
-    <iconset theme=":/document-open-recent.png"/>
+    <iconset theme=":/icons/document-open-recent.png"/>
    </property>
    <property name="text">
     <string>&amp;5.</string>
@@ -406,7 +406,7 @@
   </action>
   <action name="actionView_Image_Window">
    <property name="icon">
-    <iconset theme=":/emblem-photos.png"/>
+    <iconset theme=":/icons/emblem-photos.png"/>
    </property>
    <property name="text">
     <string>&amp;Image Window</string>
@@ -417,7 +417,7 @@
   </action>
   <action name="actionSet_Variables">
    <property name="icon">
-    <iconset theme=":/preferences-desktop-personal.png"/>
+    <iconset theme=":/icons/preferences-desktop-personal.png"/>
    </property>
    <property name="text">
     <string>Set &amp;Variables...</string>
@@ -428,7 +428,7 @@
   </action>
   <action name="actionView_Variable_Window">
    <property name="icon">
-    <iconset theme=":/preferences-desktop-personal.png"/>
+    <iconset theme=":/icons/preferences-desktop-personal.png"/>
    </property>
    <property name="text">
     <string>&amp;Variables Window</string>
@@ -439,7 +439,7 @@
   </action>
   <action name="actionLAMMPS_GUI_Howto">
    <property name="icon">
-    <iconset theme=":/system-help.png"/>
+    <iconset theme=":/icons/system-help.png"/>
    </property>
    <property name="text">
     <string>LAMMPS GUI Howto</string>
diff --git a/tools/lammps-gui/lammpswrapper.cpp b/tools/lammps-gui/lammpswrapper.cpp
index 024aae5301..f74a1c6575 100644
--- a/tools/lammps-gui/lammpswrapper.cpp
+++ b/tools/lammps-gui/lammpswrapper.cpp
@@ -19,7 +19,12 @@
 #include "library.h"
 #endif
 
-LammpsWrapper::LammpsWrapper() : lammps_handle(nullptr), plugin_handle(nullptr) {}
+LammpsWrapper::LammpsWrapper() : lammps_handle(nullptr)
+{
+#if defined(LAMMPS_GUI_USE_PLUGIN)
+    plugin_handle = nullptr;
+#endif
+}
 
 void LammpsWrapper::open(int narg, char **args)
 {
@@ -32,6 +37,19 @@ void LammpsWrapper::open(int narg, char **args)
 #endif
 }
 
+int LammpsWrapper::version()
+{
+    int val = 0;
+    if (lammps_handle) {
+#if defined(LAMMPS_GUI_USE_PLUGIN)
+        val = ((liblammpsplugin_t *)plugin_handle)->version(lammps_handle);
+#else
+        val = lammps_version(lammps_handle);
+#endif
+    }
+    return val;
+}
+
 int LammpsWrapper::extract_setting(const char *keyword)
 {
     int val = 0;
diff --git a/tools/lammps-gui/lammpswrapper.h b/tools/lammps-gui/lammpswrapper.h
index f5d73a0d87..1d024a94e7 100644
--- a/tools/lammps-gui/lammpswrapper.h
+++ b/tools/lammps-gui/lammpswrapper.h
@@ -29,6 +29,7 @@ public:
 
     void force_timeout();
 
+    int version();
     int extract_setting(const char *keyword);
     void *extract_global(const char *keyword);
     void *extract_atom(const char *keyword);
@@ -57,7 +58,9 @@ public:
 
 private:
     void *lammps_handle;
+#if defined(LAMMPS_GUI_USE_PLUGIN)
     void *plugin_handle;
+#endif
 };
 #endif
 
diff --git a/tools/lammps-gui/logwindow.cpp b/tools/lammps-gui/logwindow.cpp
index 7091b34f43..73ec81d06c 100644
--- a/tools/lammps-gui/logwindow.cpp
+++ b/tools/lammps-gui/logwindow.cpp
@@ -12,12 +12,37 @@
 ------------------------------------------------------------------------- */
 
 #include "logwindow.h"
-#include <QSettings>
 
-LogWindow::LogWindow(QWidget *parent) : QPlainTextEdit(parent)
+#include "lammpsgui.h"
+
+#include <QAction>
+#include <QApplication>
+#include <QDir>
+#include <QFile>
+#include <QFileDialog>
+#include <QIcon>
+#include <QKeySequence>
+#include <QMenu>
+#include <QMessageBox>
+#include <QSettings>
+#include <QShortcut>
+#include <QString>
+#include <QTextStream>
+
+LogWindow::LogWindow(const QString &_filename, QWidget *parent) :
+    QPlainTextEdit(parent), filename(_filename)
 {
     QSettings settings;
     resize(settings.value("logx", 500).toInt(), settings.value("logy", 320).toInt());
+
+    auto action = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_S), this);
+    connect(action, &QShortcut::activated, this, &LogWindow::save_as);
+    action = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q), this);
+    connect(action, &QShortcut::activated, this, &LogWindow::quit);
+    action = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash), this);
+    connect(action, &QShortcut::activated, this, &LogWindow::stop_run);
+
+    installEventFilter(this);
 }
 
 void LogWindow::closeEvent(QCloseEvent *event)
@@ -30,6 +55,81 @@ void LogWindow::closeEvent(QCloseEvent *event)
     QPlainTextEdit::closeEvent(event);
 }
 
+void LogWindow::quit()
+{
+    LammpsGui *main = nullptr;
+    for (QWidget *widget : QApplication::topLevelWidgets())
+        if (widget->objectName() == "LammpsGui") main = dynamic_cast<LammpsGui *>(widget);
+    if (main) main->quit();
+}
+
+void LogWindow::stop_run()
+{
+    LammpsGui *main = nullptr;
+    for (QWidget *widget : QApplication::topLevelWidgets())
+        if (widget->objectName() == "LammpsGui") main = dynamic_cast<LammpsGui *>(widget);
+    if (main) main->stop_run();
+}
+
+void LogWindow::save_as()
+{
+    QString defaultname = filename + ".log";
+    if (filename.isEmpty()) defaultname = "lammps.log";
+    QString logFileName = QFileDialog::getSaveFileName(this, "Save Log to File", defaultname,
+                                                       "Log files (*.log *.out *.txt)");
+    if (logFileName.isEmpty()) return;
+
+    QFileInfo path(logFileName);
+    QFile file(path.absoluteFilePath());
+
+    if (!file.open(QIODevice::WriteOnly | QFile::Text)) {
+        QMessageBox::warning(this, "Warning", "Cannot save file: " + file.errorString());
+        return;
+    }
+
+    QTextStream out(&file);
+    QString text = toPlainText();
+    out << text;
+    if (text.back().toLatin1() != '\n') out << "\n"; // add final newline if missing
+    file.close();
+}
+
+void LogWindow::contextMenuEvent(QContextMenuEvent *event)
+{
+    // show augmented context menu
+    auto *menu = createStandardContextMenu();
+    menu->addSeparator();
+    auto action = menu->addAction(QString("Save Log to File ..."));
+    action->setIcon(QIcon(":/icons/document-save-as.png"));
+    action->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_S));
+    connect(action, &QAction::triggered, this, &LogWindow::save_as);
+    action = menu->addAction("&Close Window", this, &QWidget::close);
+    action->setIcon(QIcon(":/icons/window-close.png"));
+    action->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_W));
+    menu->exec(event->globalPos());
+    delete menu;
+}
+
+// event filter to handle "Ambiguous shortcut override" issues
+bool LogWindow::eventFilter(QObject *watched, QEvent *event)
+{
+    if (event->type() == QEvent::ShortcutOverride) {
+        QKeyEvent *keyEvent = dynamic_cast<QKeyEvent *>(event);
+        if (!keyEvent) return QWidget::eventFilter(watched, event);
+        if (keyEvent->modifiers().testFlag(Qt::ControlModifier) && keyEvent->key() == '/') {
+            stop_run();
+            event->accept();
+            return true;
+        }
+        if (keyEvent->modifiers().testFlag(Qt::ControlModifier) && keyEvent->key() == 'W') {
+            close();
+            event->accept();
+            return true;
+        }
+    }
+    return QWidget::eventFilter(watched, event);
+}
+
 // Local Variables:
 // c-basic-offset: 4
 // End:
diff --git a/tools/lammps-gui/logwindow.h b/tools/lammps-gui/logwindow.h
index 8807ef5cee..ad0691d0cc 100644
--- a/tools/lammps-gui/logwindow.h
+++ b/tools/lammps-gui/logwindow.h
@@ -20,10 +20,20 @@ class LogWindow : public QPlainTextEdit {
     Q_OBJECT
 
 public:
-    LogWindow(QWidget *parent = nullptr);
+    LogWindow(const QString &filename, QWidget *parent = nullptr);
+
+private slots:
+    void quit();
+    void save_as();
+    void stop_run();
 
 protected:
     void closeEvent(QCloseEvent *event) override;
+    void contextMenuEvent(QContextMenuEvent *event) override;
+    bool eventFilter(QObject *watched, QEvent *event) override;
+
+private:
+    QString filename;
 };
 
 #endif
diff --git a/tools/lammps-gui/main.cpp b/tools/lammps-gui/main.cpp
index bdd3885492..cf09fbb892 100644
--- a/tools/lammps-gui/main.cpp
+++ b/tools/lammps-gui/main.cpp
@@ -14,13 +14,25 @@
 #include "lammpsgui.h"
 
 #include <QApplication>
+#include <QFileInfo>
+
+#include <cstdio>
+#include <cstring>
 
 int main(int argc, char *argv[])
 {
     QApplication a(argc, argv);
 
     const char *infile = nullptr;
-    if (argc > 1) infile = argv[1];
+    if (argc > 1) {
+        infile = argv[1];
+        if ((strcmp(infile, "-help") == 0) || (strcmp(infile, "-h") == 0)) {
+            printf("This is LAMMPS-GUI version " LAMMPS_GUI_VERSION
+                   " using Qt version " QT_VERSION_STR "\n");
+            printf("Usage: %s [-h|-help|<inputfile>]\n", argv[0]);
+            return 1;
+        }
+    }
 
     LammpsGui w(nullptr, infile);
     w.show();
diff --git a/tools/lammps-gui/periodic_table.h b/tools/lammps-gui/periodic_table.h
deleted file mode 100644
index 70721dee95..0000000000
--- a/tools/lammps-gui/periodic_table.h
+++ /dev/null
@@ -1,206 +0,0 @@
-// clang-format off
-/***************************************************************************
- * RCS INFORMATION:
- *
- *      $RCSfile: periodic_table.h,v $
- *      $Author: johns $       $Locker:  $             $State: Exp $
- *      $Revision: 1.12 $       $Date: 2009/01/21 17:45:41 $
- *
- ***************************************************************************/
-
-/*
- * periodic table of elements and helper functions to convert
- * ordinal numbers to labels and back.
- * all tables and functions are declared static, so that it
- * can be safely included by all plugins that may need it.
- *
- * 2002-2009 akohlmey@cmm.chem.upenn.edu, vmd@ks.uiuc.edu
- */
-
-#include <string.h>
-#include <ctype.h>
-
-/* periodic table of elements for translation of ordinal to atom type */
-static const char *pte_label[] = {
-    "X",  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne",
-    "Na", "Mg", "Al", "Si", "P" , "S",  "Cl", "Ar", "K",  "Ca", "Sc",
-    "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge",
-    "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc",
-    "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe",
-    "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb",
-    "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re", "Os",
-    "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr",
-    "Ra", "Ac", "Th", "Pa", "U",  "Np", "Pu", "Am", "Cm", "Bk", "Cf",
-    "Es", "Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt",
-    "Ds", "Rg"
-};
-static const int nr_pte_entries = sizeof(pte_label) / sizeof(char *);
-
-/* corresponding table of masses. */
-static const double pte_mass[] = {
-    /* X  */ 0.00000, 1.00794, 4.00260, 6.941, 9.012182, 10.811,
-    /* C  */ 12.0107, 14.0067, 15.9994, 18.9984032, 20.1797,
-    /* Na */ 22.989770, 24.3050, 26.981538, 28.0855, 30.973761,
-    /* S  */ 32.065, 35.453, 39.948, 39.0983, 40.078, 44.955910,
-    /* Ti */ 47.867, 50.9415, 51.9961, 54.938049, 55.845, 58.9332,
-    /* Ni */ 58.6934, 63.546, 65.409, 69.723, 72.64, 74.92160,
-    /* Se */ 78.96, 79.904, 83.798, 85.4678, 87.62, 88.90585,
-    /* Zr */ 91.224, 92.90638, 95.94, 98.0, 101.07, 102.90550,
-    /* Pd */ 106.42, 107.8682, 112.411, 114.818, 118.710, 121.760,
-    /* Te */ 127.60, 126.90447, 131.293, 132.90545, 137.327,
-    /* La */ 138.9055, 140.116, 140.90765, 144.24, 145.0, 150.36,
-    /* Eu */ 151.964, 157.25, 158.92534, 162.500, 164.93032,
-    /* Er */ 167.259, 168.93421, 173.04, 174.967, 178.49, 180.9479,
-    /* W  */ 183.84, 186.207, 190.23, 192.217, 195.078, 196.96655,
-    /* Hg */ 200.59, 204.3833, 207.2, 208.98038, 209.0, 210.0, 222.0,
-    /* Fr */ 223.0, 226.0, 227.0, 232.0381, 231.03588, 238.02891,
-    /* Np */ 237.0, 244.0, 243.0, 247.0, 247.0, 251.0, 252.0, 257.0,
-    /* Md */ 258.0, 259.0, 262.0, 261.0, 262.0, 266.0, 264.0, 269.0,
-    /* Mt */ 268.0, 271.0, 272.0
-};
-
-/*
- * corresponding table of VDW radii.
- * van der Waals radii are taken from A. Bondi,
- * J. Phys. Chem., 68, 441 - 452, 1964,
- * except the value for H, which is taken from R.S. Rowland & R. Taylor,
- * J.Phys.Chem., 100, 7384 - 7391, 1996. Radii that are not available in
- * either of these publications have RvdW = 2.00 \AA
- * The radii for Ions (Na, K, Cl, Ca, Mg, and Cs are based on the CHARMM27
- * Rmin/2 parameters for (SOD, POT, CLA, CAL, MG, CES) by default.
- */
-static const double pte_vdw_radius[] = {
-    /* X  */ 1.5, 1.2, 1.4, 1.82, 2.0, 2.0,
-    /* C  */ 1.7, 1.55, 1.52, 1.47, 1.54,
-    /* Na */ 1.36, 1.18, 2.0, 2.1, 1.8,
-    /* S  */ 1.8, 2.27, 1.88, 1.76, 1.37, 2.0,
-    /* Ti */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
-    /* Ni */ 1.63, 1.4, 1.39, 1.07, 2.0, 1.85,
-    /* Se */ 1.9, 1.85, 2.02, 2.0, 2.0, 2.0,
-    /* Zr */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
-    /* Pd */ 1.63, 1.72, 1.58, 1.93, 2.17, 2.0,
-    /* Te */ 2.06, 1.98, 2.16, 2.1, 2.0,
-    /* La */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
-    /* Eu */ 2.0, 2.0, 2.0, 2.0, 2.0,
-    /* Er */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
-    /* W  */ 2.0, 2.0, 2.0, 2.0, 1.72, 1.66,
-    /* Hg */ 1.55, 1.96, 2.02, 2.0, 2.0, 2.0, 2.0,
-    /* Fr */ 2.0, 2.0, 2.0, 2.0, 2.0, 1.86,
-    /* Np */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
-    /* Md */ 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
-    /* Mt */ 2.0, 2.0, 2.0
-};
-
-/* lookup functions */
-
-static const char *get_pte_label(const int idx)
-{
-    if ((idx < 1) || (idx >= nr_pte_entries)) return pte_label[0];
-
-    return pte_label[idx];
-}
-
-static double get_pte_mass(const int idx)
-{
-    if ((idx < 1) || (idx >= nr_pte_entries)) return pte_mass[0];
-
-    return pte_mass[idx];
-}
-
-static double get_pte_vdw_radius(const int idx)
-{
-    if ((idx < 1) || (idx >= nr_pte_entries)) return pte_vdw_radius[0];
-
-#if 1
-    /* Replace with Hydrogen radius with an "all-atom" radius */
-    if (idx == 1)
-      return 1.0;    /* H  */
-#else
-    /* Replace with old VMD atom radii values */
-    switch (idx) {
-      case  1: return 1.0;    /* H  */
-      case  6: return 1.5;    /* C  */
-      case  7: return 1.4;    /* N  */
-      case  8: return 1.3;    /* O  */
-      case  9: return 1.2;    /* F  */
-      case 15: return 1.5;    /* P  */
-      case 16: return 1.9;    /* S  */
-    }
-#endif
-
-    return pte_vdw_radius[idx];
-}
-
-static int get_pte_idx(const char *label)
-{
-    int i;
-    char atom[3];
-
-    /* zap string */
-    atom[0] = (char) 0;
-    atom[1] = (char) 0;
-    atom[2] = (char) 0;
-    /* if we don't have a null-pointer, there must be at least two
-     * chars, which is all we need. we convert to the capitalization
-     * convention of the table above during assignment. */
-    if (label != NULL) {
-        atom[0] = (char) toupper((int) label[0]);
-        atom[1] = (char) tolower((int) label[1]);
-    }
-    /* discard numbers in atom label */
-    if (isdigit(atom[1])) atom[1] = (char) 0;
-
-    for (i=0; i < nr_pte_entries; ++i) {
-        if ( (pte_label[i][0] == atom[0])
-             && (pte_label[i][1] == atom[1]) ) return i;
-    }
-
-    return 0;
-}
-
-static int get_pte_idx_from_string(const char *label) {
-  int i, ind;
-  char atom[3];
-
-  if (label != NULL) {
-    /* zap string */
-    atom[0] = atom[1] = atom[2] = '\0';
-
-    for (ind=0,i=0; (ind<2) && (label[i]!='\0'); i++) {
-      if (label[i] != ' ') {
-        atom[ind] = toupper(label[i]);
-        ind++;
-      }
-    }
-
-    if (ind < 1)
-      return 0; /* no non-whitespace characters */
-
-    for (i=0; i < nr_pte_entries; ++i) {
-      if ((toupper(pte_label[i][0]) == atom[0]) && (toupper(pte_label[i][1]) == atom[1]))
-        return i;
-    }
-  }
-
-  return 0;
-}
-
-#if 0
-#include <stdio.h>
-
-int main() {
-  int i;
-
-  printf("Periodic table check/dump\n");
-  printf("  Table contains data for %d elements\n", nr_pte_entries);
-  printf("   Mass table size check: %d\n", sizeof(pte_mass) / sizeof(double));
-  printf("    VDW table size check: %d\n", sizeof(pte_vdw_radius) / sizeof(double));
-  printf("\n");
-  printf("Symbol Num    Mass   rVDW\n");
-  for (i=0; i<nr_pte_entries; i++) {
-    printf("   %-2s  %3d  %6.2f  %4.2f\n",
-      get_pte_label(i), i, get_pte_mass(i), get_pte_vdw_radius(i));
-  }
-  return 0;
-}
-#endif
diff --git a/tools/lammps-gui/preferences.cpp b/tools/lammps-gui/preferences.cpp
index c4711dac30..fd01bb5046 100644
--- a/tools/lammps-gui/preferences.cpp
+++ b/tools/lammps-gui/preferences.cpp
@@ -13,6 +13,7 @@
 
 #include "preferences.h"
 
+#include "helpers.h"
 #include "lammpsgui.h"
 #include "lammpswrapper.h"
 #include "ui_lammpsgui.h"
@@ -39,6 +40,7 @@
 #include <QSpacerItem>
 #include <QSpinBox>
 #include <QTabWidget>
+#include <QThread>
 #include <QVBoxLayout>
 
 #if defined(_OPENMP)
@@ -55,18 +57,10 @@
 #include <unistd.h>
 #endif
 
-// duplicate string
-static char *mystrdup(const std::string &text)
-{
-    auto tmp = new char[text.size() + 1];
-    memcpy(tmp, text.c_str(), text.size() + 1);
-    return tmp;
-}
-
 Preferences::Preferences(LammpsWrapper *_lammps, QWidget *parent) :
-    QDialog(parent), tabWidget(new QTabWidget),
+    QDialog(parent), need_relaunch(false), tabWidget(new QTabWidget),
     buttonBox(new QDialogButtonBox(QDialogButtonBox::Ok | QDialogButtonBox::Cancel)),
-    settings(new QSettings), lammps(_lammps), need_relaunch(false)
+    settings(new QSettings), lammps(_lammps)
 {
     tabWidget->addTab(new GeneralTab(settings, lammps), "&General Settings");
     tabWidget->addTab(new AcceleratorTab(settings, lammps), "&Accelerators");
@@ -80,7 +74,7 @@ Preferences::Preferences(LammpsWrapper *_lammps, QWidget *parent) :
     layout->addWidget(tabWidget);
     layout->addWidget(buttonBox);
     setLayout(layout);
-    setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     setWindowTitle("LAMMPS-GUI - Preferences");
     resize(600, 450);
 }
@@ -116,10 +110,15 @@ void Preferences::accept()
         }
     }
 
-    // store number of threads
+    // store number of threads, reset to 1 for "None" and "Opt" settings
     QLineEdit *field = tabWidget->findChild<QLineEdit *>("nthreads");
-    if (field)
-        if (field->hasAcceptableInput()) settings->setValue("nthreads", field->text());
+    if (field) {
+        int accel = settings->value("accelerator", AcceleratorTab::None).toInt();
+        if ((accel == AcceleratorTab::None) || (accel == AcceleratorTab::Opt))
+            settings->setValue("nthreads", 1);
+        else if (field->hasAcceptableInput())
+            settings->setValue("nthreads", field->text());
+    }
 
     // store image width, height, zoom, and rendering settings
 
@@ -176,8 +175,8 @@ void Preferences::accept()
                                 "LAMMPS-GUI must be relaunched."),
                         QMessageBox::Ok);
         msg.exec();
-        const char *path = mystrdup(QCoreApplication::applicationFilePath().toStdString());
-        const char *arg0 = mystrdup(QCoreApplication::arguments().at(0).toStdString());
+        const char *path = mystrdup(QCoreApplication::applicationFilePath());
+        const char *arg0 = mystrdup(QCoreApplication::arguments().at(0));
         execl(path, arg0, (char *)NULL);
     }
 
@@ -206,7 +205,7 @@ GeneralTab::GeneralTab(QSettings *_settings, LammpsWrapper *_lammps, QWidget *pa
 {
     auto *layout = new QVBoxLayout;
 
-    auto *echo = new QCheckBox("Echo input to log");
+    auto *echo = new QCheckBox("Echo input to output buffer");
     echo->setObjectName("echo");
     echo->setCheckState(settings->value("echo", false).toBool() ? Qt::Checked : Qt::Unchecked);
     auto *cite = new QCheckBox("Include citation details");
@@ -223,16 +222,15 @@ GeneralTab::GeneralTab(QSettings *_settings, LammpsWrapper *_lammps, QWidget *pa
     sldv->setCheckState(settings->value("viewslide", true).toBool() ? Qt::Checked : Qt::Unchecked);
     auto *logr = new QCheckBox("Replace log window on new run");
     logr->setObjectName("logreplace");
-    logr->setCheckState(settings->value("logreplace", false).toBool() ? Qt::Checked
-                                                                      : Qt::Unchecked);
+    logr->setCheckState(settings->value("logreplace", true).toBool() ? Qt::Checked : Qt::Unchecked);
     auto *imgr = new QCheckBox("Replace image window on new render");
     imgr->setObjectName("imagereplace");
-    imgr->setCheckState(settings->value("imagereplace", false).toBool() ? Qt::Checked
-                                                                        : Qt::Unchecked);
+    imgr->setCheckState(settings->value("imagereplace", true).toBool() ? Qt::Checked
+                                                                       : Qt::Unchecked);
     auto *pltr = new QCheckBox("Replace chart window on new run");
     pltr->setObjectName("chartreplace");
-    pltr->setCheckState(settings->value("chartreplace", false).toBool() ? Qt::Checked
-                                                                        : Qt::Unchecked);
+    pltr->setCheckState(settings->value("chartreplace", true).toBool() ? Qt::Checked
+                                                                       : Qt::Unchecked);
 
 #if defined(LAMMPS_GUI_USE_PLUGIN)
     auto *pluginlabel = new QLabel("Path to LAMMPS Shared Library File:");
@@ -249,17 +247,17 @@ GeneralTab::GeneralTab(QSettings *_settings, LammpsWrapper *_lammps, QWidget *pa
 
     auto *fontlayout = new QHBoxLayout;
     auto *getallfont =
-        new QPushButton(QIcon(":/preferences-desktop-font.png"), "Select Default Font...");
+        new QPushButton(QIcon(":/icons/preferences-desktop-font.png"), "Select Default Font...");
     auto *gettextfont =
-        new QPushButton(QIcon(":/preferences-desktop-font.png"), "Select Text Font...");
+        new QPushButton(QIcon(":/icons/preferences-desktop-font.png"), "Select Text Font...");
     fontlayout->addWidget(getallfont);
     fontlayout->addWidget(gettextfont);
     connect(getallfont, &QPushButton::released, this, &GeneralTab::newallfont);
     connect(gettextfont, &QPushButton::released, this, &GeneralTab::newtextfont);
 
     auto *freqlayout = new QHBoxLayout;
-    auto *freqlabel = new QLabel("GUI update interval (ms)");
-    auto *freqval  = new QSpinBox;
+    auto *freqlabel  = new QLabel("GUI update interval (ms)");
+    auto *freqval    = new QSpinBox;
     freqval->setRange(1, 1000);
     freqval->setStepType(QAbstractSpinBox::AdaptiveDecimalStepType);
     freqval->setValue(settings->value("updfreq", "100").toInt());
@@ -288,12 +286,12 @@ GeneralTab::GeneralTab(QSettings *_settings, LammpsWrapper *_lammps, QWidget *pa
 
 void GeneralTab::updatefonts(const QFont &all, const QFont &text)
 {
-    LammpsGui *main;
+    LammpsGui *main = nullptr;
     for (QWidget *widget : QApplication::topLevelWidgets())
         if (widget->objectName() == "LammpsGui") main = dynamic_cast<LammpsGui *>(widget);
 
     QApplication::setFont(all);
-    main->ui->textEdit->document()->setDefaultFont(text);
+    if (main) main->ui->textEdit->document()->setDefaultFont(text);
 }
 
 void GeneralTab::newallfont()
@@ -408,15 +406,23 @@ AcceleratorTab::AcceleratorTab(QSettings *_settings, LammpsWrapper *_lammps, QWi
 
     int maxthreads = 1;
 #if defined(_OPENMP)
-    maxthreads = omp_get_max_threads();
+    maxthreads = QThread::idealThreadCount();
 #endif
     auto *choices      = new QFrame;
     auto *choiceLayout = new QVBoxLayout;
-    auto *ntlabel      = new QLabel("Number of threads:");
+#if defined(_OPENMP)
+    auto *ntlabel      = new QLabel(QString("Number of threads (max %1):").arg(maxthreads));
     auto *ntchoice     = new QLineEdit(settings->value("nthreads", maxthreads).toString());
+#else
+    auto *ntlabel      = new QLabel(QString("Number of threads (OpenMP not available):"));
+    auto *ntchoice     = new QLineEdit("1");
+#endif
     auto *intval       = new QIntValidator(1, maxthreads, this);
     ntchoice->setValidator(intval);
     ntchoice->setObjectName("nthreads");
+#if !defined(_OPENMP)
+    ntchoice->setEnabled(false);
+#endif
 
     choiceLayout->addWidget(ntlabel);
     choiceLayout->addWidget(ntchoice);
diff --git a/tools/lammps-gui/setvariables.cpp b/tools/lammps-gui/setvariables.cpp
index fbbacb70bd..db5eb1cdea 100644
--- a/tools/lammps-gui/setvariables.cpp
+++ b/tools/lammps-gui/setvariables.cpp
@@ -32,7 +32,7 @@ SetVariables::SetVariables(QList<QPair<QString, QString>> &_vars, QWidget *paren
         auto *row  = new QHBoxLayout;
         auto *name = new QLineEdit(v.first);
         auto *val  = new QLineEdit(v.second);
-        auto *del  = new QPushButton(QIcon(":/edit-delete.png"), "");
+        auto *del  = new QPushButton(QIcon(":/icons/edit-delete.png"), "");
         name->setObjectName("varname");
         val->setObjectName("varval");
         del->setObjectName(QString::number(i));
@@ -55,7 +55,7 @@ SetVariables::SetVariables(QList<QPair<QString, QString>> &_vars, QWidget *paren
 
     layout->addWidget(buttonBox);
     setLayout(layout);
-    setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     setWindowTitle("LAMMPS-GUI - Set Variables");
     resize(300, 200);
 }
@@ -81,7 +81,7 @@ void SetVariables::add_row()
     auto *row  = new QHBoxLayout;
     auto *name = new QLineEdit(QString());
     auto *val  = new QLineEdit(QString());
-    auto *del  = new QPushButton(QIcon(":/edit-delete.png"), "");
+    auto *del  = new QPushButton(QIcon(":/icons/edit-delete.png"), "");
     name->setObjectName("varname");
     val->setObjectName("varval");
     del->setObjectName(QString::number(nrows - 2));
diff --git a/tools/lammps-gui/slideshow.cpp b/tools/lammps-gui/slideshow.cpp
index a42a67f8a2..140c703ca3 100644
--- a/tools/lammps-gui/slideshow.cpp
+++ b/tools/lammps-gui/slideshow.cpp
@@ -13,19 +13,28 @@
 
 #include "slideshow.h"
 
+#include "helpers.h"
+#include "lammpsgui.h"
+
+#include <QApplication>
 #include <QDialogButtonBox>
+#include <QDir>
+#include <QFileDialog>
 #include <QFileInfo>
 #include <QGuiApplication>
 #include <QHBoxLayout>
 #include <QImage>
 #include <QImageReader>
+#include <QKeySequence>
 #include <QLabel>
 #include <QPalette>
+#include <QProcess>
 #include <QPushButton>
 #include <QScreen>
 #include <QSettings>
 #include <QShortcut>
 #include <QSpacerItem>
+#include <QTemporaryFile>
 #include <QTimer>
 #include <QVBoxLayout>
 
@@ -42,8 +51,12 @@ SlideShow::SlideShow(const QString &fileName, QWidget *parent) :
     imageName->setAlignment(Qt::AlignCenter);
     imageName->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding);
 
-    auto *shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_W), this);
-    QObject::connect(shortcut, &QShortcut::activated, this, &SlideShow::close);
+    auto *shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_W), this);
+    QObject::connect(shortcut, &QShortcut::activated, this, &QWidget::close);
+    shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash), this);
+    QObject::connect(shortcut, &QShortcut::activated, this, &SlideShow::stop_run);
+    shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q), this);
+    QObject::connect(shortcut, &QShortcut::activated, this, &SlideShow::quit);
 
     buttonBox = new QDialogButtonBox(QDialogButtonBox::Close);
 
@@ -58,31 +71,36 @@ SlideShow::SlideShow(const QString &fileName, QWidget *parent) :
     auto *dummy = new QPushButton(QIcon(), "");
     dummy->hide();
 
-    auto *gofirst = new QPushButton(QIcon(":/go-first.png"), "");
+    auto *tomovie = new QPushButton(QIcon(":/icons/export-movie.png"), "");
+    tomovie->setToolTip("Export to movie file");
+    tomovie->setEnabled(has_exe("ffmpeg"));
+
+    auto *gofirst = new QPushButton(QIcon(":/icons/go-first.png"), "");
     gofirst->setToolTip("Go to first Image");
-    auto *goprev = new QPushButton(QIcon(":/go-previous-2.png"), "");
+    auto *goprev = new QPushButton(QIcon(":/icons/go-previous-2.png"), "");
     goprev->setToolTip("Go to previous Image");
-    auto *goplay = new QPushButton(QIcon(":/media-playback-start-2.png"), "");
+    auto *goplay = new QPushButton(QIcon(":/icons/media-playback-start-2.png"), "");
     goplay->setToolTip("Play animation");
     goplay->setCheckable(true);
     goplay->setChecked(playtimer);
     goplay->setObjectName("play");
-    auto *gonext = new QPushButton(QIcon(":/go-next-2.png"), "");
+    auto *gonext = new QPushButton(QIcon(":/icons/go-next-2.png"), "");
     gonext->setToolTip("Go to next Image");
-    auto *golast = new QPushButton(QIcon(":/go-last.png"), "");
+    auto *golast = new QPushButton(QIcon(":/icons/go-last.png"), "");
     golast->setToolTip("Go to last Image");
-    auto *goloop = new QPushButton(QIcon(":/media-playlist-repeat.png"), "");
+    auto *goloop = new QPushButton(QIcon(":/icons/media-playlist-repeat.png"), "");
     goloop->setToolTip("Loop animation");
     goloop->setCheckable(true);
     goloop->setChecked(do_loop);
 
-    auto *zoomin = new QPushButton(QIcon(":/gtk-zoom-in.png"), "");
+    auto *zoomin = new QPushButton(QIcon(":/icons/gtk-zoom-in.png"), "");
     zoomin->setToolTip("Zoom in by 10 percent");
-    auto *zoomout = new QPushButton(QIcon(":/gtk-zoom-out.png"), "");
+    auto *zoomout = new QPushButton(QIcon(":/icons/gtk-zoom-out.png"), "");
     zoomout->setToolTip("Zoom out by 10 percent");
-    auto *normal = new QPushButton(QIcon(":/gtk-zoom-fit.png"), "");
+    auto *normal = new QPushButton(QIcon(":/icons/gtk-zoom-fit.png"), "");
     normal->setToolTip("Reset zoom to normal");
 
+    connect(tomovie, &QPushButton::released, this, &SlideShow::movie);
     connect(gofirst, &QPushButton::released, this, &SlideShow::first);
     connect(goprev, &QPushButton::released, this, &SlideShow::prev);
     connect(goplay, &QPushButton::released, this, &SlideShow::play);
@@ -96,6 +114,7 @@ SlideShow::SlideShow(const QString &fileName, QWidget *parent) :
 
     navLayout->addSpacerItem(new QSpacerItem(10, 10, QSizePolicy::Expanding, QSizePolicy::Minimum));
     navLayout->addWidget(dummy);
+    navLayout->addWidget(tomovie);
     navLayout->addWidget(gofirst);
     navLayout->addWidget(goprev);
     navLayout->addWidget(goplay);
@@ -115,7 +134,7 @@ SlideShow::SlideShow(const QString &fileName, QWidget *parent) :
     botLayout->setStretch(0, 3);
     mainLayout->addLayout(botLayout);
 
-    setWindowIcon(QIcon(":/lammps-icon-128x128.png"));
+    setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png"));
     setWindowTitle(QString("LAMMPS-GUI - Slide Show: ") + QFileInfo(fileName).fileName());
 
     imagefiles.clear();
@@ -178,6 +197,62 @@ void SlideShow::loadImage(int idx)
     } while (idx >= 0);
 }
 
+void SlideShow::quit()
+{
+    LammpsGui *main = nullptr;
+    for (QWidget *widget : QApplication::topLevelWidgets())
+        if (widget->objectName() == "LammpsGui") main = dynamic_cast<LammpsGui *>(widget);
+    if (main) main->quit();
+}
+
+void SlideShow::stop_run()
+{
+    LammpsGui *main = nullptr;
+    for (QWidget *widget : QApplication::topLevelWidgets())
+        if (widget->objectName() == "LammpsGui") main = dynamic_cast<LammpsGui *>(widget);
+    if (main) main->stop_run();
+}
+
+void SlideShow::movie()
+{
+    QString fileName = QFileDialog::getSaveFileName(this, "Export to Movie File", ".",
+                                                    "Movie Files (*.mpg *.mp4 *.mkv *.avi *.mpeg)");
+    if (fileName.isEmpty()) return;
+
+    QDir curdir(".");
+    QTemporaryFile concatfile;
+    concatfile.open();
+    for (auto image : imagefiles) {
+        concatfile.write("file '");
+        concatfile.write(curdir.absoluteFilePath(image).toLocal8Bit());
+        concatfile.write("'\n");
+    }
+    concatfile.close();
+
+    QStringList args;
+    args << "-y";
+    args << "-safe"
+         << "0";
+    args << "-r"
+         << "10";
+    args << "-f"
+         << "concat";
+    args << "-i" << concatfile.fileName();
+    if (scaleFactor != 1.0) {
+        args << "-vf" << QString("scale=iw*%1:-1").arg(scaleFactor);
+    }
+    args << "-b:v"
+         << "2000k";
+    args << "-r"
+         << "24";
+    args << fileName;
+
+    auto *ffmpeg = new QProcess(this);
+    ffmpeg->start("ffmpeg", args);
+    ffmpeg->waitForFinished(-1);
+    delete ffmpeg;
+}
+
 void SlideShow::first()
 {
     current = 0;
diff --git a/tools/lammps-gui/slideshow.h b/tools/lammps-gui/slideshow.h
index fe357ec564..1b5e977643 100644
--- a/tools/lammps-gui/slideshow.h
+++ b/tools/lammps-gui/slideshow.h
@@ -32,6 +32,9 @@ public:
     void clear();
 
 private slots:
+    void quit();
+    void stop_run();
+    void movie();
     void first();
     void last();
     void next();
diff --git a/tools/lammps-gui/stdcapture.cpp b/tools/lammps-gui/stdcapture.cpp
index 428277cc10..b09aebf053 100644
--- a/tools/lammps-gui/stdcapture.cpp
+++ b/tools/lammps-gui/stdcapture.cpp
@@ -77,6 +77,7 @@ bool StdCapture::EndCapture()
 
     int bytesRead;
     bool fd_blocked;
+    int maxwait = 100;
 
     do {
         bytesRead  = 0;
@@ -93,9 +94,10 @@ bool StdCapture::EndCapture()
             buf[bytesRead] = 0;
             m_captured += buf;
         } else if (bytesRead < 0) {
-            fd_blocked = ((errno == EAGAIN) || (errno == EWOULDBLOCK) || (errno == EINTR));
+            fd_blocked = ((errno == EAGAIN) || (errno == EWOULDBLOCK) || (errno == EINTR)) && (maxwait > 0);
 
             if (fd_blocked) std::this_thread::sleep_for(std::chrono::milliseconds(10));
+            --maxwait;
         }
     } while (fd_blocked || (bytesRead == (bufSize - 1)));
     m_capturing = false;
diff --git a/tools/lammps-gui/stdcapture.h b/tools/lammps-gui/stdcapture.h
index ee8bb44dd3..3df6835aa9 100644
--- a/tools/lammps-gui/stdcapture.h
+++ b/tools/lammps-gui/stdcapture.h
@@ -31,7 +31,6 @@ private:
     int m_pipe[2];
     int m_oldStdOut;
     bool m_capturing;
-    bool m_init;
     std::string m_captured;
 
     static constexpr int bufSize = 1025;
diff --git a/tools/lammps-gui/update-help-index.sh b/tools/lammps-gui/update-help-index.sh
index 5bb57c7148..77b924d38d 100755
--- a/tools/lammps-gui/update-help-index.sh
+++ b/tools/lammps-gui/update-help-index.sh
@@ -1,4 +1,6 @@
 #!/bin/sh
 # this updates the help index table
 
+mv help_index.table help_index.oldtable
 grep '\.\. index::' ../../doc/src/*.rst | sort  | sed -e 's/^.*src\/\([^/]\+\)\.rst:/\1.html /' -e 's/\.\. \+index:: \+//' > help_index.table
+cmp help_index.table help_index.oldtable > /dev/null || touch lammpsgui.qrc
diff --git a/unittest/formats/CMakeLists.txt b/unittest/formats/CMakeLists.txt
index 93ea2f3b32..58c797b6e6 100644
--- a/unittest/formats/CMakeLists.txt
+++ b/unittest/formats/CMakeLists.txt
@@ -41,6 +41,8 @@ set_tests_properties(TextFileReader PROPERTIES ENVIRONMENT "LAMMPS_POTENTIALS=${
 add_executable(test_file_operations test_file_operations.cpp)
 target_link_libraries(test_file_operations PRIVATE lammps GTest::GMock)
 add_test(NAME FileOperations COMMAND test_file_operations)
+# try to mitigate possible OpenMPI bug
+set_tests_properties(TextFileReader PROPERTIES ENVIRONMENT "OMPI_MCA_sharedfp=\"^sm\"")
 
 add_executable(test_dump_atom test_dump_atom.cpp)
 target_link_libraries(test_dump_atom PRIVATE lammps GTest::GMock)