Merge remote-tracking branch 'upstream/develop' into compute-pace-update

2023-10-19 12:22:10 -06:00
parent 2ac32872e2 f641d88f86
commit ac59f47ea0
155 changed files with 4614 additions and 2097 deletions
--- a/cmake/Modules/Packages/ML-PACE.cmake
+++ b/cmake/Modules/Packages/ML-PACE.cmake
@ -1,6 +1,6 @@
-set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.10.04.pre.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
+set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.10.04.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
-set(PACELIB_MD5 "61ba11a37ee00de8365b18b521d394a6" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
+set(PACELIB_MD5 "70ff79f4e59af175e55d24f3243ad1ff" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
 mark_as_advanced(PACELIB_URL)
 mark_as_advanced(PACELIB_MD5)
 GetFallbackURL(PACELIB_URL PACELIB_FALLBACK)
--- a/doc/src/Commands_fix.rst
+++ b/doc/src/Commands_fix.rst
@ -69,7 +69,7 @@ OPT.
   * :doc:`drude/transform/inverse <fix_drude_transform>`
   * :doc:`dt/reset (k) <fix_dt_reset>`
   * :doc:`edpd/source <fix_dpd_source>`
-   * :doc:`efield <fix_efield>`
+   * :doc:`efield (k) <fix_efield>`
   * :doc:`efield/tip4p <fix_efield>`
   * :doc:`ehex <fix_ehex>`
   * :doc:`electrode/conp (i) <fix_electrode>`
@ -233,7 +233,7 @@ OPT.
   * :doc:`spring <fix_spring>`
   * :doc:`spring/chunk <fix_spring_chunk>`
   * :doc:`spring/rg <fix_spring_rg>`
-   * :doc:`spring/self <fix_spring_self>`
+   * :doc:`spring/self (k) <fix_spring_self>`
   * :doc:`srd <fix_srd>`
   * :doc:`store/force <fix_store_force>`
   * :doc:`store/state <fix_store_state>`
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@ -305,5 +305,5 @@ OPT.
   * :doc:`wf/cut <pair_wf_cut>`
   * :doc:`ylz <pair_ylz>`
   * :doc:`yukawa (gko) <pair_yukawa>`
-   * :doc:`yukawa/colloid (go) <pair_yukawa_colloid>`
+   * :doc:`yukawa/colloid (gko) <pair_yukawa_colloid>`
   * :doc:`zbl (gko) <pair_zbl>`
--- a/doc/src/Howto_output.rst
+++ b/doc/src/Howto_output.rst
@ -1,7 +1,7 @@
 Output from LAMMPS (thermo, dumps, computes, fixes, variables)
 ==============================================================
-There are four basic kinds of LAMMPS output:
+There are four basic forms of LAMMPS output:
 * :doc:`Thermodynamic output <thermo_style>`, which is a list of
  quantities printed every few timesteps to the screen and logfile.
@ -20,18 +20,17 @@ output files, depending on what :doc:`dump <dump>` and :doc:`fix <fix>`
 commands you specify.
 As discussed below, LAMMPS gives you a variety of ways to determine
-what quantities are computed and printed when the thermodynamics,
+what quantities are calculated and printed when the thermodynamics,
 dump, or fix commands listed above perform output.  Throughout this
 discussion, note that users can also :doc:`add their own computes and
-fixes to LAMMPS <Modify>` which can then generate values that can then
+fixes to LAMMPS <Modify>` which can generate values that can then be
-be output with these commands.
+output with these commands.
 The following subsections discuss different LAMMPS commands related
 to output and the kind of data they operate on and produce:
 * :ref:`Global/per-atom/local/per-grid data <global>`
 * :ref:`Scalar/vector/array data <scalar>`
 * :ref:`Per-grid data <grid>`
 * :ref:`Disambiguation <disambiguation>`
 * :ref:`Thermodynamic output <thermo>`
 * :ref:`Dump file output <dump>`
@ -48,34 +47,65 @@ to output and the kind of data they operate on and produce:
 Global/per-atom/local/per-grid data
 -----------------------------------
-Various output-related commands work with four different styles of
+Various output-related commands work with four different "styles" of
 data: global, per-atom, local, and per-grid.  A global datum is one or
 more system-wide values, e.g. the temperature of the system.  A
 per-atom datum is one or more values per atom, e.g. the kinetic energy
 of each atom.  Local datums are calculated by each processor based on
-the atoms it owns, but there may be zero or more per atom, e.g. a list
+the atoms it owns, and there may be zero or more per atom, e.g. a list
 of bond distances.
 A per-grid datum is one or more values per grid cell, for a grid which
-overlays the simulation domain.  The grid cells and the data they
+overlays the simulation domain.  Similar to atoms and per-atom data,
-store are distributed across processors; each processor owns the grid
+the grid cells and the data they store are distributed across
-cells whose center point falls within its subdomain.
+processors; each processor owns the grid cells whose center points
 fall within its subdomain.
 .. _scalar:
 Scalar/vector/array data
 ------------------------
-Global, per-atom, and local datums can come in three kinds: a single
+Global, per-atom, local, and per-grid datums can come in three
-scalar value, a vector of values, or a 2d array of values.  The doc
+"kinds": a single scalar value, a vector of values, or a 2d array of
-page for a "compute" or "fix" or "variable" that generates data will
+values.  More specifically these are the valid kinds for each style:
 specify both the style and kind of data it produces, e.g. a per-atom
 vector.
-When a quantity is accessed, as in many of the output commands
+* global scalar
-discussed below, it can be referenced via the following bracket
+* global vector
-notation, where ID in this case is the ID of a compute.  The leading
+* global array
-"c\_" would be replaced by "f\_" for a fix, or "v\_" for a variable:
+* per-atom vector
 * per-atom array
 * local vector
 * local array
 * per-grid vector
 * per-grid array
 A per-atom vector means a single value per atom; the "vector" is the
 length of the number of atoms.  A per-atom array means multiple values
 per atom.  Similarly a local vector or array means one or multiple
 values per entity (e.g. per bond in the system).  And a per-grid
 vector or array means one or multiple values per grid cell.
 The doc page for a compute or fix or variable that generates data will
 specify both the styles and kinds of data it produces, e.g. a per-atom
 vector.  Note that a compute or fix may generate multiple styles and
 kinds of output.  However, for per-atom data only a vector or array is
 output, never both.  Likewise for per-local and per-grid data.  An
 example of a fix which generates multiple styles and kinds of data is
 the :doc:`fix mdi/qm <fix_mdi_qm>` command.  It outputs a global
 scalar, global vector, and per-atom array for the quantum mechanical
 energy and virial of the system and forces on each atom.
 By contrast, different variable styles generate only a single kind of
 data: a global scalar for an equal-style variable, global vector for a
 vector-style variable, and a per-atom vector for an atom-style
 variable.
 When data is accessed by another command, as in many of the output
 commands discussed below, it can be referenced via the following
 bracket notation, where ID in this case is the ID of a compute.  The
 leading "c\_" would be replaced by "f\_" for a fix, or "v\_" for a
 variable (and ID would be the name of the variable):
 +-------------+--------------------------------------------+
 | c_ID        | entire scalar, vector, or array            |
@ -85,40 +115,56 @@ notation, where ID in this case is the ID of a compute.  The leading
 | c_ID[I][J]  | one element of array                       |
 +-------------+--------------------------------------------+
-In other words, using one bracket reduces the dimension of the data
+Note that using one bracket reduces the dimension of the data once
-once (vector -> scalar, array -> vector).  Using two brackets reduces
+(vector -> scalar, array -> vector).  Using two brackets reduces the
-the dimension twice (array -> scalar).  Thus a command that uses
+dimension twice (array -> scalar).  Thus a command that uses scalar
-scalar values as input can typically also process elements of a vector
+values as input can also conceptually operate on an element of a
-or array.
+vector or array.
-.. _grid:
+Per-grid vectors or arrays are accessed similarly, except that the ID
-
+for the compute or fix includes a grid name and a data name.  This is
-Per-grid data
+because a fix or compute can create multiple grids (of different
------------------------
+sizes) and multiple sets of data (for each grid).  The fix or compute
-
+defines names for each grid and for each data set, so that all of them
-Per-grid data can come in two kinds: a vector of values (one per grid
+can be accessed by other commands.  See the :doc:`Howto grid
-cekk), or a 2d array of values (multiple values per grid ckk).  The
+<Howto_grid>` doc page for more details.
 doc page for a "compute" or "fix" that generates data will specify
 names for both the grid(s) and datum(s) it produces, e.g. per-grid
 vectors or arrays, which can be referenced by other commands.  See the
 :doc:`Howto grid <Howto_grid>` doc page for more details.
 .. _disambiguation:
 Disambiguation
 --------------
-Some computes and fixes produce data in multiple styles, e.g. a global
+When a compute or fix produces data in multiple styles, e.g. global
-scalar and a per-atom vector. Usually the context in which the input
+and per-atom, a reference to the data can sometimes be ambiguous.
-script references the data determines which style is meant. Example:
+Usually the context in which the input script references the data
-if a compute provides both a global scalar and a per-atom vector, the
+determines which style is meant.
-former will be accessed by using ``c_ID`` in an equal-style variable,
+
-while the latter will be accessed by using ``c_ID`` in an atom-style
+For example, if a compute outputs a global vector and a per-atom
-variable.  Note that atom-style variable formulas can also access
+array, an element of the global vector will be accessed by using
-global scalars, but in this case it is not possible to do this
+``c_ID[I]`` in :doc:`thermodynamic output <thermo_style>`, while a
-directly because of the ambiguity.  Instead, an equal-style variable
+column of the per-atom array will be accessed by using ``c_ID[I]`` in
-can be defined which accesses the global scalar, and that variable can
+a :doc:`dump custom <dump>` command.
-be used in the atom-style variable formula in place of ``c_ID``.
+
 However, if a :doc:`atom-style variable <variable>` references
 ``c_ID[I]``, then it could be intended to refer to a single element of
 the global vector or a column of the per-atom array.  The doc page for
 any command that has a potential ambiguity (variables are the most
 common) will explain how to resolve the ambiguity.
 In this case, an atom-style variables references per-atom data if it
 exists.  If access to an element of a global vector is needed (as in
 this example), an equal-style variable which references the value can
 be defined and used in the atom-style variable formula instead.
 Similarly, :doc:`thermodynamic output <thermo_style>` can only
 reference global data from a compute or fix.  But you can indirectly
 access per-atom data as follows.  The reference ``c_ID[245][2]`` for
 the ID of a :doc:`compute displace/atom <compute_displace_atom>`
 command, refers to the y-component of displacement for the atom with
 ID 245.  While you cannot use that reference directly in the
 :doc:`thermo_style <thermo_style>` command, you can use it an
 equal-style variable formula, and then reference the variable in
 thermodynamic output.
 .. _thermo:
@ -389,7 +435,7 @@ output and input data types must match, e.g. global/per-atom/local
 data and scalar/vector/array data.
 Also note that, as described above, when a command takes a scalar as
-input, that could be an element of a vector or array.  Likewise a
+input, that could also be an element of a vector or array.  Likewise a
 vector input could be a column of an array.
 +--------------------------------------------------------+----------------------------------------------+----------------------------------------------------+
--- a/doc/src/Howto_triclinic.rst
+++ b/doc/src/Howto_triclinic.rst
@ -12,7 +12,8 @@ is created, e.g. by the :doc:`create_box <create_box>` or
 :doc:`read_data <read_data>` or :doc:`read_restart <read_restart>`
 commands.  Additionally, LAMMPS defines box size parameters lx,ly,lz
 where lx = xhi-xlo, and similarly in the y and z dimensions.  The 6
-parameters, as well as lx,ly,lz, can be output via the :doc:`thermo_style custom <thermo_style>` command.
+parameters, as well as lx,ly,lz, can be output via the
 :doc:`thermo_style custom <thermo_style>` command.
 LAMMPS also allows simulations to be performed in triclinic
 (non-orthogonal) simulation boxes shaped as a parallelepiped with
--- a/doc/src/Tools.rst
+++ b/doc/src/Tools.rst
@ -702,11 +702,15 @@ Prerequisites and portability
 LAMMPS GUI is programmed in C++ based on the C++11 standard and using
 the `Qt GUI framework <https://www.qt.io/product/framework>`_.
 Currently, Qt version 5.12 or later is required; Qt 5.15LTS is
-recommended; Qt 6.x not (yet) supported.  Building LAMMPS with CMake is
+recommended; support for Qt version 6.x is under active development and
-required.  The LAMMPS GUI has been successfully compiled and tested on:
+thus far only tested with Qt 6.5LTS on Linux.  Building LAMMPS with
 CMake is required.
 The LAMMPS GUI has been successfully compiled and tested on:
 - Ubuntu Linux 20.04LTS x86_64 using GCC 9, Qt version 5.12
 - Fedora Linux 38 x86\_64 using GCC 13 and Clang 16, Qt version 5.15LTS
 - Fedora Linux 38 x86\_64 using GCC 13, Qt version 6.5LTS
 - Apple macOS 12 (Monterey) and macOS 13 (Ventura) with Xcode on arm64 and x86\_64, Qt version 5.15LTS
 - Windows 10 and 11 x86_64 with Visual Studio 2022 and Visual C++ 14.36, Qt version 5.15LTS
 - Windows 10 and 11 x86_64 with MinGW / GCC 10.0 cross-compiler on Fedora 38, Qt version 5.15LTS
@ -717,7 +721,7 @@ required.  The LAMMPS GUI has been successfully compiled and tested on:
 Pre-compiled executables
 ^^^^^^^^^^^^^^^^^^^^^^^^
-Pre-compiled LAMMPS executables including the GUI are currently
+Pre-compiled LAMMPS executable packages that include the GUI are currently
 available from https://download.lammps.org/static or
 https://github.com/lammps/lammps/releases.  You can unpack the archives
 (or mount the macOS disk image) and run the GUI directly in place. The
@ -742,7 +746,10 @@ stored in a location where CMake can find them without additional help.
 Otherwise, the location of the Qt library installation must be indicated
 by setting ``-D Qt5_DIR=/path/to/qt5/lib/cmake/Qt5``, which is a path to
 a folder inside the Qt installation that contains the file
-``Qt5Config.cmake``.
+``Qt5Config.cmake``. Similarly, for Qt6 the location of the Qt library
 installation can be indicated by setting ``-D Qt6_DIR=/path/to/qt6/lib/cmake/Qt6``,
 if necessary.  When both, Qt5 and Qt6 are available, Qt6 will be preferred
 unless ``-D LAMMPS_GUI_USE_QT5=yes`` is set.
 It should be possible to build the LAMMPS GUI as a standalone
 compilation (e.g. when LAMMPS has been compiled with traditional make),
--- a/doc/src/atom_modify.rst
+++ b/doc/src/atom_modify.rst
@ -65,6 +65,11 @@ switch.  This is described on the :doc:`Build_settings <Build_settings>`
 doc page.  If atom IDs are not used, they must be specified as 0 for
 all atoms, e.g. in a data or restart file.
 .. note::
   If a :doc:`triclinic simulation box <Howto_triclinic>` is used,
   atom IDs are required, due to how neighbor lists are built.
 The *map* keyword determines how atoms with specific IDs are found
 when required.  An example are the bond (angle, etc) methods which
 need to find the local index of an atom with a specific global ID
--- a/doc/src/compute.rst
+++ b/doc/src/compute.rst
@ -27,58 +27,62 @@ Examples
 Description
 """""""""""
-Define a computation that will be performed on a group of atoms.
+Define a diagnostic computation that will be performed on a group of
-Quantities calculated by a compute are instantaneous values, meaning
+atoms.  Quantities calculated by a compute are instantaneous values,
-they are calculated from information about atoms on the current
+meaning they are calculated from information about atoms on the
-timestep or iteration, though a compute may internally store some
+current timestep or iteration, though internally a compute may store
-information about a previous state of the system.  Defining a compute
+some information about a previous state of the system.  Defining a
-does not perform a computation.  Instead computes are invoked by other
+compute does not perform the computation.  Instead computes are
-LAMMPS commands as needed (e.g., to calculate a temperature needed for
+invoked by other LAMMPS commands as needed (e.g., to calculate a
-a thermostat fix or to generate thermodynamic or dump file output).
+temperature needed for a thermostat fix or to generate thermodynamic
-See the :doc:`Howto output <Howto_output>` page for a summary of
+or dump file output).  See the :doc:`Howto output <Howto_output>` page
-various LAMMPS output options, many of which involve computes.
+for a summary of various LAMMPS output options, many of which involve
 computes.
 The ID of a compute can only contain alphanumeric characters and
 underscores.
 ----------
-Computes calculate one or more of four styles of quantities: global,
+Computes calculate and store any of four *styles* of quantities:
-per-atom, local, or per-atom.  A global quantity is one or more
+global, per-atom, local, or per-grid.
 system-wide values, e.g. the temperature of the system.  A per-atom
 quantity is one or more values per atom, e.g. the kinetic energy of
 each atom.  Per-atom values are set to 0.0 for atoms not in the
 specified compute group.  Local quantities are calculated by each
 processor based on the atoms it owns, but there may be zero or more
 per atom, e.g. a list of bond distances.  Per-grid quantities are
 calculated on a regular 2d or 3d grid which overlays a 2d or 3d
 simulation domain.  The grid points and the data they store are
 distributed across processors; each processor owns the grid points
 which fall within its subdomain.
-Computes that produce per-atom quantities have the word "atom" at the
+A global quantity is one or more system-wide values, e.g. the
-end of their style, e.g. *ke/atom*\ .  Computes that produce local
+temperature of the system.  A per-atom quantity is one or more values
-quantities have the word "local" at the end of their style,
+per atom, e.g. the kinetic energy of each atom.  Per-atom values are
-e.g. *bond/local*\ .  Computes that produce per-grid quantities have
+set to 0.0 for atoms not in the specified compute group.  Local
-the word "grid" at the end of their style, e.g. *property/grid*\ .
+quantities are calculated by each processor based on the atoms it
-Styles with neither "atom" or "local" or "grid" at the end of their
+owns, but there may be zero or more per atom, e.g. a list of bond
-style name produce global quantities.
+distances.  Per-grid quantities are calculated on a regular 2d or 3d
 grid which overlays a 2d or 3d simulation domain.  The grid points and
 the data they store are distributed across processors; each processor
 owns the grid points which fall within its subdomain.
-Note that a single compute typically produces either global or
+As a general rule of thumb, computes that produce per-atom quantities
-per-atom or local or per-grid values.  It does not compute both global
+have the word "atom" at the end of their style, e.g. *ke/atom*\ .
-and per-atom values.  It can produce local values or per-grid values
+Computes that produce local quantities have the word "local" at the
-in tandem with global or per-atom quantities.  The compute doc page
+end of their style, e.g. *bond/local*\ .  Computes that produce
-will explain the details.
+per-grid quantities have the word "grid" at the end of their style,
 e.g. *property/grid*\ .  And styles with neither "atom" or "local" or
 "grid" at the end of their style name produce global quantities.
-Global, per-atom, local, and per-grid quantities come in three kinds:
+Global, per-atom, local, and per-grid quantities can also be of three
-a single scalar value, a vector of values, or a 2d array of values.
+*kinds*: a single scalar value (global only), a vector of values, or a
-The doc page for each compute describes the style and kind of values
+2d array of values.  For per-atom, local, and per-grid quantities, a
-it produces, e.g. a per-atom vector.  Some computes produce more than
+"vector" means a single value for each atom, each local entity
-one kind of a single style, e.g. a global scalar and a global vector.
+(e.g. bond), or grid cell.  Likewise an "array", means multiple values
 for each atom, each local entity, or each grid cell.
-When a compute quantity is accessed, as in many of the output commands
+Note that a single compute can produce any combination of global,
-discussed below, it can be referenced via the following bracket
+per-atom, local, or per-grid values.  Likewise it can prouduce any
-notation, where ID is the ID of the compute:
+combination of scalar, vector, or array output for each style.  The
 exception is that for per-atom, local, and per-grid output, either a
 vector or array can be produced, but not both.  The doc page for each
 compute explains the values it produces.
 When a compute output is accessed by another input script command it
 is referenced via the following bracket notation, where ID is the ID
 of the compute:
 +-------------+--------------------------------------------+
 | c_ID        | entire scalar, vector, or array            |
@ -89,17 +93,23 @@ notation, where ID is the ID of the compute:
 +-------------+--------------------------------------------+
 In other words, using one bracket reduces the dimension of the
-quantity once (vector :math:`\to` scalar, array :math:`\to` vector).  Using two
+quantity once (vector :math:`\to` scalar, array :math:`\to` vector).
-brackets reduces the dimension twice (array :math:`\to` scalar).  Thus a
+Using two brackets reduces the dimension twice (array :math:`\to`
-command that uses scalar compute values as input can also process elements of a
+scalar).  Thus, for example, a command that uses global scalar compute
-vector or array.
+values as input can also process elements of a vector or array.
 Depending on the command, this can either be done directly using the
 syntax in the table, or by first defining a :doc:`variable <variable>`
 of the appropriate style to store the quantity, then using the
 variable as an input to the command.
-Note that commands and :doc:`variables <variable>` which use compute
+Note that commands and :doc:`variables <variable>` which take compute
-quantities typically do not allow for all kinds (e.g., a command may
+outputs as input typically do not allow for all styles and kinds of
-require a vector of values, not a scalar).  This means there is no
+data (e.g., a command may require global but not per-atom values, or
-ambiguity about referring to a compute quantity as c_ID even if it
+it may require a vector of values, not a scalar).  This means there is
-produces, for example, both a scalar and vector.  The doc pages for
+typically no ambiguity about referring to a compute output as c_ID
-various commands explain the details.
+even if it produces, for example, both a scalar and vector.  The doc
 pages for various commands explain the details, including how any
 ambiguities are resolved.
 ----------
--- a/doc/src/compute_reduce.rst
+++ b/doc/src/compute_reduce.rst
@ -37,13 +37,16 @@ Syntax
       v_name = per-atom vector calculated by an atom-style variable with name
 * zero or more keyword/args pairs may be appended
-* keyword = *replace*
+* keyword = *replace* or *inputs*
  .. parsed-literal::
       *replace* args = vec1 vec2
         vec1 = reduced value from this input vector will be replaced
         vec2 = replace it with vec1[N] where N is index of max/min value from vec2
       *inputs* arg = peratom or local
         peratom = all inputs are per-atom quantities (default)
         local = all input are local quantities
 Examples
 """"""""
@ -60,38 +63,44 @@ Description
 """""""""""
 Define a calculation that "reduces" one or more vector inputs into
-scalar values, one per listed input.  The inputs can be per-atom or
+scalar values, one per listed input.  For the compute reduce command,
-local quantities; they cannot be global quantities.  Atom attributes
+the inputs can be either per-atom or local quantities and must all be
-are per-atom quantities, :doc:`computes <compute>` and :doc:`fixes <fix>`
+of the same kind (per-atom or local); see discussion of the optional
-may generate any of the three kinds of quantities, and :doc:`atom-style variables <variable>` generate per-atom quantities.  See the
+*inputs* keyword below.  The compute reduce/region command can only be
-:doc:`variable <variable>` command and its special functions which can
+used with per-atom inputs.
-perform the same operations as the compute reduce command on global
+
-vectors.
+Atom attributes are per-atom quantities, :doc:`computes <compute>` and
 :doc:`fixes <fix>` can generate either per-atom or local quantities,
 and :doc:`atom-style variables <variable>` generate per-atom
 quantities.  See the :doc:`variable <variable>` command and its
 special functions which can perform the same reduction operations as
 the compute reduce command on global vectors.
 The reduction operation is specified by the *mode* setting.  The *sum*
 option adds the values in the vector into a global total.  The *min*
 or *max* options find the minimum or maximum value across all vector
 values.  The *minabs* or *maxabs* options find the minimum or maximum
 value across all absolute vector values.  The *ave* setting adds the
-vector values into a global total, then divides by the number of values
+vector values into a global total, then divides by the number of
-in the vector.  The *sumsq* option sums the square of the values in the
+values in the vector.  The *sumsq* option sums the square of the
-vector into a global total.  The *avesq* setting does the same as *sumsq*,
+values in the vector into a global total.  The *avesq* setting does
-then divides the sum of squares by the number of values.  The last two options
+the same as *sumsq*, then divides the sum of squares by the number of
-can be useful for calculating the variance of some quantity (e.g., variance =
+values.  The last two options can be useful for calculating the
-sumsq :math:`-` ave\ :math:`^2`).  The *sumabs* option sums the absolute
+variance of some quantity (e.g., variance = sumsq :math:`-` ave\
-values in the vector into a global total.  The *aveabs* setting does the same
+:math:`^2`).  The *sumabs* option sums the absolute values in the
-as *sumabs*, then divides the sum of absolute values by the number of
+vector into a global total.  The *aveabs* setting does the same as
 *sumabs*, then divides the sum of absolute values by the number of
 values.
 Each listed input is operated on independently.  For per-atom inputs,
 the group specified with this command means only atoms within the
-group contribute to the result.  For per-atom inputs, if the compute
+group contribute to the result.  Likewise for per-atom inputs, if the
-reduce/region command is used, the atoms must also currently be within
+compute reduce/region command is used, the atoms must also currently
-the region.  Note that an input that produces per-atom quantities may
+be within the region.  Note that an input that produces per-atom
-define its own group which affects the quantities it returns.  For
+quantities may define its own group which affects the quantities it
-example, if a compute is used as an input which generates a per-atom
+returns.  For example, if a compute is used as an input which
-vector, it will generate values of 0.0 for atoms that are not in the
+generates a per-atom vector, it will generate values of 0.0 for atoms
-group specified for that compute.
+that are not in the group specified for that compute.
 Each listed input can be an atom attribute (position, velocity, force
 component) or can be the result of a :doc:`compute <compute>` or
@ -123,52 +132,54 @@ array with six columns:
 ----------
-The atom attribute values (*x*, *y*, *z*, *vx*, *vy*, *vz*, *fx*, *fy*, and
+The atom attribute values (*x*, *y*, *z*, *vx*, *vy*, *vz*, *fx*,
-*fz*) are self-explanatory.  Note that other atom attributes can be used as
+*fy*, and *fz*) are self-explanatory.  Note that other atom attributes
-inputs to this fix by using the
+can be used as inputs to this fix by using the :doc:`compute
-:doc:`compute property/atom <compute_property_atom>` command and then specifying
+property/atom <compute_property_atom>` command and then specifying an
-an input value from that compute.
+input value from that compute.
 If a value begins with "c\_", a compute ID must follow which has been
-previously defined in the input script.  Computes can generate
+previously defined in the input script.  Valid computes can generate
-per-atom or local quantities.  See the individual
+per-atom or local quantities.  See the individual :doc:`compute
-:doc:`compute <compute>` page for details.  If no bracketed integer
+<compute>` page for details.  If no bracketed integer is appended, the
-is appended, the vector calculated by the compute is used.  If a
+vector calculated by the compute is used.  If a bracketed integer is
-bracketed integer is appended, the Ith column of the array calculated
+appended, the Ith column of the array calculated by the compute is
-by the compute is used.  Users can also write code for their own
+used.  Users can also write code for their own compute styles and
-compute styles and :doc:`add them to LAMMPS <Modify>`.  See the
+:doc:`add them to LAMMPS <Modify>`.  See the discussion above for how
-discussion above for how :math:`I` can be specified with a wildcard asterisk
+:math:`I` can be specified with a wildcard asterisk to effectively
-to effectively specify multiple values.
+specify multiple values.
 If a value begins with "f\_", a fix ID must follow which has been
-previously defined in the input script.  Fixes can generate per-atom
+previously defined in the input script.  Valid fixes can generate
-or local quantities.  See the individual :doc:`fix <fix>` page for
+per-atom or local quantities.  See the individual :doc:`fix <fix>`
-details.  Note that some fixes only produce their values on certain
+page for details.  Note that some fixes only produce their values on
-timesteps, which must be compatible with when compute reduce
+certain timesteps, which must be compatible with when compute reduce
 references the values, else an error results.  If no bracketed integer
 is appended, the vector calculated by the fix is used.  If a bracketed
 integer is appended, the Ith column of the array calculated by the fix
 is used.  Users can also write code for their own fix style and
 :doc:`add them to LAMMPS <Modify>`.  See the discussion above for how
-:math:`I` can be specified with a wildcard asterisk to effectively specify
+:math:`I` can be specified with a wildcard asterisk to effectively
-multiple values.
+specify multiple values.
 If a value begins with "v\_", a variable name must follow which has
 been previously defined in the input script.  It must be an
 :doc:`atom-style variable <variable>`.  Atom-style variables can
 reference thermodynamic keywords and various per-atom attributes, or
 invoke other computes, fixes, or variables when they are evaluated, so
-this is a very general means of generating per-atom quantities to reduce.
+this is a very general means of generating per-atom quantities to
 reduce.
 ----------
 If the *replace* keyword is used, two indices *vec1* and *vec2* are
-specified, where each index ranges from 1 to the number of input values.
+specified, where each index ranges from 1 to the number of input
-The replace keyword can only be used if the *mode* is *min* or *max*\ .
+values.  The replace keyword can only be used if the *mode* is *min*
-It works as follows.  A min/max is computed as usual on the *vec2*
+or *max*\ .  It works as follows.  A min/max is computed as usual on
-input vector.  The index :math:`N` of that value within *vec2* is also stored.
+the *vec2* input vector.  The index :math:`N` of that value within
-Then, instead of performing a min/max on the *vec1* input vector, the
+*vec2* is also stored.  Then, instead of performing a min/max on the
-stored index is used to select the :math:`N`\ th element of the *vec1* vector.
+*vec1* input vector, the stored index is used to select the :math:`N`\
 th element of the *vec1* vector.
 Thus, for example, if you wish to use this compute to find the bond
 with maximum stretch, you can do it as follows:
@ -190,6 +201,16 @@ information in this context, the *replace* keywords will extract the
 atom IDs for the two atoms in the bond of maximum stretch.  These atom
 IDs and the bond stretch will be printed with thermodynamic output.
 .. versionadded:: TBD
 The *inputs* keyword allows selection of whether all the inputs are
 per-atom or local quantities.  As noted above, all the inputs must be
 the same kind (per-atom or local).  Per-atom is the default setting.
 If a compute or fix is specified as an input, it must produce per-atom
 or local data to match this setting.  If it produces both, e.g. for
 the :doc:`compute voronoi/atom <compute_voronoi_atom>` command, then
 this keyword selects between them.
 ----------
 If a single input is specified this compute produces a global scalar
@ -197,38 +218,41 @@ value.  If multiple inputs are specified, this compute produces a
 global vector of values, the length of which is equal to the number of
 inputs specified.
-As discussed below, for the *sum*, *sumabs*, and *sumsq* modes, the value(s)
+As discussed below, for the *sum*, *sumabs*, and *sumsq* modes, the
-produced by this compute are all "extensive", meaning their value
+value(s) produced by this compute are all "extensive", meaning their
-scales linearly with the number of atoms involved.  If normalized
+value scales linearly with the number of atoms involved.  If
-values are desired, this compute can be accessed by the
+normalized values are desired, this compute can be accessed by the
 :doc:`thermo_style custom <thermo_style>` command with
-:doc:`thermo_modify norm yes <thermo_modify>` set as an option.
+:doc:`thermo_modify norm yes <thermo_modify>` set as an option.  Or it
-Or it can be accessed by a
+can be accessed by a :doc:`variable <variable>` that divides by the
-:doc:`variable <variable>` that divides by the appropriate atom count.
+appropriate atom count.
 ----------
 Output info
 """""""""""
-This compute calculates a global scalar if a single input value is specified
+This compute calculates a global scalar if a single input value is
-or a global vector of length :math:`N`, where :math:`N` is the number of
+specified or a global vector of length :math:`N`, where :math:`N` is
-inputs, and which can be accessed by indices 1 to :math:`N`.  These values can
+the number of inputs, and which can be accessed by indices 1 to
-be used by any command that uses global scalar or vector values from a
+:math:`N`.  These values can be used by any command that uses global
-compute as input.  See the :doc:`Howto output <Howto_output>` doc page
+scalar or vector values from a compute as input.  See the :doc:`Howto
-for an overview of LAMMPS output options.
+output <Howto_output>` doc page for an overview of LAMMPS output
 options.
 All the scalar or vector values calculated by this compute are
 "intensive", except when the *sum*, *sumabs*, or *sumsq* modes are used on
 per-atom or local vectors, in which case the calculated values are
 "extensive".
-The scalar or vector values will be in whatever :doc:`units <units>` the
+The scalar or vector values will be in whatever :doc:`units <units>`
-quantities being reduced are in.
+the quantities being reduced are in.
 Restrictions
 """"""""""""
- none
+
 As noted above, the compute reduce/region command can only be used
 with per-atom inputs.
 Related commands
 """"""""""""""""
@ -238,4 +262,4 @@ Related commands
 Default
 """""""
-none
+The default value for the *inputs* keyword is peratom.
--- a/doc/src/compute_voronoi_atom.rst
+++ b/doc/src/compute_voronoi_atom.rst
@ -13,7 +13,7 @@ Syntax
 * ID, group-ID are documented in :doc:`compute <compute>` command
 * voronoi/atom = style name of this compute command
 * zero or more keyword/value pairs may be appended
-* keyword = *only_group* or *occupation* or *surface* or *radius* or *edge_histo* or *edge_threshold* or *face_threshold* or *neighbors* or *peratom*
+* keyword = *only_group* or *occupation* or *surface* or *radius* or *edge_histo* or *edge_threshold* or *face_threshold* or *neighbors*
  .. parsed-literal::
@ -31,7 +31,6 @@ Syntax
       *face_threshold* arg = minarea
         minarea = minimum area for a face to be counted
       *neighbors* value = *yes* or *no* = store list of all neighbors or no
       *peratom* value = *yes* or *no* = per-atom quantities accessible or no
 Examples
 """"""""
@ -53,14 +52,12 @@ atoms in the simulation box.  The tessellation is calculated using all
 atoms in the simulation, but non-zero values are only stored for atoms
 in the group.
-By default two per-atom quantities are calculated by this compute.
+Two per-atom quantities are calculated by this compute.  The first is
-The first is the volume of the Voronoi cell around each atom.  Any
+the volume of the Voronoi cell around each atom.  Any point in an
-point in an atom's Voronoi cell is closer to that atom than any other.
+atom's Voronoi cell is closer to that atom than any other.  The second
-The second is the number of faces of the Voronoi cell. This is
+is the number of faces of the Voronoi cell. This is equal to the
-equal to the number of nearest neighbors of the central atom,
+number of nearest neighbors of the central atom, plus any exterior
-plus any exterior faces (see note below). If the *peratom* keyword
+faces (see note below).
 is set to "no", the per-atom quantities are still calculated,
 but they are not accessible.
 ----------
@ -97,13 +94,13 @@ present in atom_style sphere for granular models.
 The *edge_histo* keyword activates the compilation of a histogram of
 number of edges on the faces of the Voronoi cells in the compute
-group. The argument *maxedge* of the this keyword is the largest number
+group. The argument *maxedge* of the this keyword is the largest
-of edges on a single Voronoi cell face expected to occur in the
+number of edges on a single Voronoi cell face expected to occur in the
-sample. This keyword adds the generation of a global vector with
+sample. This keyword generates output of a global vector by this
-*maxedge*\ +1 entries. The last entry in the vector contains the number of
+compute with *maxedge*\ +1 entries. The last entry in the vector
-faces with more than *maxedge* edges. Since the polygon with the
+contains the number of faces with more than *maxedge* edges. Since the
-smallest amount of edges is a triangle, entries 1 and 2 of the vector
+polygon with the smallest amount of edges is a triangle, entries 1 and
-will always be zero.
+2 of the vector will always be zero.
 The *edge_threshold* and *face_threshold* keywords allow the
 suppression of edges below a given minimum length and faces below a
@ -127,8 +124,8 @@ to locate vacancies (the coordinates are given by the atom coordinates
 at the time step when the compute was first invoked), while column two
 data can be used to identify interstitial atoms.
-If the *neighbors* value is set to yes, then this compute creates a
+If the *neighbors* value is set to yes, then this compute also creates
-local array with 3 columns. There is one row for each face of each
+a local array with 3 columns. There is one row for each face of each
 Voronoi cell. The 3 columns are the atom ID of the atom that owns the
 cell, the atom ID of the atom in the neighboring cell (or zero if the
 face is external), and the area of the face.  The array can be
@ -143,8 +140,8 @@ containing all the Voronoi neighbors in a system:
   compute 6 all voronoi/atom neighbors yes
   dump d2 all local 1 dump.neighbors index c_6[1] c_6[2] c_6[3]
-If the *face_threshold* keyword is used, then only faces
+If the *face_threshold* keyword is used, then only faces with areas
-with areas greater than the threshold are stored.
+greater than the threshold are stored.
 ----------
@ -158,48 +155,52 @@ Voro++ software in the src/VORONOI/README file.
 .. note::
-   The calculation of Voronoi volumes is performed by each processor for
+   The calculation of Voronoi volumes is performed by each processor
-   the atoms it owns, and includes the effect of ghost atoms stored by
+   for the atoms it owns, and includes the effect of ghost atoms
-   the processor.  This assumes that the Voronoi cells of owned atoms
+   stored by the processor.  This assumes that the Voronoi cells of
-   are not affected by atoms beyond the ghost atom cut-off distance.
+   owned atoms are not affected by atoms beyond the ghost atom cut-off
-   This is usually a good assumption for liquid and solid systems, but
+   distance.  This is usually a good assumption for liquid and solid
-   may lead to underestimation of Voronoi volumes in low density
+   systems, but may lead to underestimation of Voronoi volumes in low
-   systems.  By default, the set of ghost atoms stored by each processor
+   density systems.  By default, the set of ghost atoms stored by each
-   is determined by the cutoff used for :doc:`pair_style <pair_style>`
+   processor is determined by the cutoff used for :doc:`pair_style
-   interactions.  The cutoff can be set explicitly via the
+   <pair_style>` interactions.  The cutoff can be set explicitly via
-   :doc:`comm_modify cutoff <comm_modify>` command.  The Voronoi cells
+   the :doc:`comm_modify cutoff <comm_modify>` command.  The Voronoi
-   for atoms adjacent to empty regions will extend into those regions up
+   cells for atoms adjacent to empty regions will extend into those
-   to the communication cutoff in :math:`x`, :math:`y`, or :math:`z`.
+   regions up to the communication cutoff in :math:`x`, :math:`y`, or
-   In that situation, an exterior face is created at the cutoff distance
+   :math:`z`.  In that situation, an exterior face is created at the
-   normal to the :math:`x`, :math:`y`, or :math:`z` direction.  For
+   cutoff distance normal to the :math:`x`, :math:`y`, or :math:`z`
-   triclinic systems, the exterior face is parallel to the corresponding
+   direction.  For triclinic systems, the exterior face is parallel to
-   reciprocal lattice vector.
+   the corresponding reciprocal lattice vector.
 .. note::
-   The Voro++ package performs its calculation in 3d.  This will
+   The Voro++ package performs its calculation in 3d.  This will still
-   still work for a 2d LAMMPS simulation, provided all the atoms have the
+   work for a 2d LAMMPS simulation, provided all the atoms have the
-   same :math:`z`-coordinate. The Voronoi cell of each atom will be a columnar
+   same :math:`z`-coordinate. The Voronoi cell of each atom will be a
-   polyhedron with constant cross-sectional area along the :math:`z`-direction
+   columnar polyhedron with constant cross-sectional area along the
-   and two exterior faces at the top and bottom of the simulation box. If
+   :math:`z`-direction and two exterior faces at the top and bottom of
-   the atoms do not all have the same :math:`z`-coordinate, then the columnar
+   the simulation box. If the atoms do not all have the same
-   cells will be accordingly distorted. The cross-sectional area of each
+   :math:`z`-coordinate, then the columnar cells will be accordingly
-   Voronoi cell can be obtained by dividing its volume by the :math:`z` extent
+   distorted. The cross-sectional area of each Voronoi cell can be
-   of the simulation box.  Note that you define the :math:`z` extent of the
+   obtained by dividing its volume by the :math:`z` extent of the
-   simulation box for 2d simulations when using the
+   simulation box.  Note that you define the :math:`z` extent of the
-   :doc:`create_box <create_box>` or :doc:`read_data <read_data>` commands.
+   simulation box for 2d simulations when using the :doc:`create_box
   <create_box>` or :doc:`read_data <read_data>` commands.
 Output info
 """""""""""
-By default, this compute calculates a per-atom array with two
+.. deprecated:: TBD
-columns. In regular dynamic tessellation mode the first column is the
+
-Voronoi volume, the second is the neighbor count, as described above
+   The *peratom* keyword was removed as it is no longer required.
-(read above for the output data in case the *occupation* keyword is
+
-specified).  These values can be accessed by any command that uses
+This compute calculates a per-atom array with two columns. In regular
-per-atom values from a compute as input.  See the :doc:`Howto output <Howto_output>` page for an overview of LAMMPS output
+dynamic tessellation mode the first column is the Voronoi volume, the
-options. If the *peratom* keyword is set to "no", the per-atom array
+second is the neighbor count, as described above (read above for the
-is still created, but it is not accessible.
+output data in case the *occupation* keyword is specified).  These
 values can be accessed by any command that uses per-atom values from a
 compute as input.  See the :doc:`Howto output <Howto_output>` page for
 an overview of LAMMPS output options.
 If the *edge_histo* keyword is used, then this compute generates a
 global vector of length *maxedge*\ +1, containing a histogram of the
@ -209,17 +210,6 @@ If the *neighbors* value is set to *yes*, then this compute calculates a
 local array with three columns. There is one row for each face of each
 Voronoi cell.
 .. note::
   Some LAMMPS commands such as the :doc:`compute reduce <compute_reduce>`
   command can accept either a per-atom or local quantity. If this compute
   produces both quantities, the command
   may access the per-atom quantity, even if you want to access the local
   quantity.  This effect can be eliminated by using the *peratom*
   keyword to turn off the production of the per-atom quantities.  For
   the default value *yes* both quantities are produced.  For the value
   *no*, only the local array is produced.
 The Voronoi cell volume will be in distance :doc:`units <units>` cubed.
 The Voronoi face area will be in distance :doc:`units <units>` squared.
@ -227,7 +217,8 @@ Restrictions
 """"""""""""
 This compute is part of the VORONOI package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+LAMMPS was built with that package.  See the :doc:`Build package
 <Build_package>` page for more info.
 It also requires you have a copy of the Voro++ library built and
 installed on your system.  See instructions on obtaining and
@ -241,5 +232,4 @@ Related commands
 Default
 """""""
-*neighbors* no, *peratom* yes
+The default for the neighobrs keyword is no.
--- a/doc/src/fix.rst
+++ b/doc/src/fix.rst
@ -77,35 +77,44 @@ for individual fixes for info on which ones can be restarted.
 ----------
-Some fixes calculate one or more of four styles of quantities: global,
+Some fixes calculate and store any of four *styles* of quantities:
-per-atom, local, or per-grid, which can be used by other commands or
+global, per-atom, local, or per-grid.
 output as described below.  A global quantity is one or more
 system-wide values, e.g. the energy of a wall interacting with
 particles.  A per-atom quantity is one or more values per atom,
 e.g. the displacement vector for each atom since time 0.  Per-atom
 values are set to 0.0 for atoms not in the specified fix group.  Local
 quantities are calculated by each processor based on the atoms it
 owns, but there may be zero or more per atoms.  Per-grid quantities
 are calculated on a regular 2d or 3d grid which overlays a 2d or 3d
 simulation domain.  The grid points and the data they store are
 distributed across processors; each processor owns the grid points
 which fall within its subdomain.
-Note that a single fix typically produces either global or per-atom or
+A global quantity is one or more system-wide values, e.g. the energy
-local or per-grid values (or none at all).  It does not produce both
+of a wall interacting with particles.  A per-atom quantity is one or
-global and per-atom.  It can produce local or per-grid values in
+more values per atom, e.g. the original coordinates of each atom at
-tandem with global or per-atom values.  The fix doc page will explain
+time 0.  Per-atom values are set to 0.0 for atoms not in the specified
-the details.
+fix group.  Local quantities are calculated by each processor based on
 the atoms it owns, but there may be zero or more per atom, e.g. values
 for each bond.  Per-grid quantities are calculated on a regular 2d or
 3d grid which overlays a 2d or 3d simulation domain.  The grid points
 and the data they store are distributed across processors; each
 processor owns the grid points which fall within its subdomain.
-Global, per-atom, local, and per-grid quantities come in three kinds:
+As a general rule of thumb, fixes that produce per-atom quantities
-a single scalar value, a vector of values, or a 2d array of values.
+have the word "atom" at the end of their style, e.g. *ave/atom*\ .
-The doc page for each fix describes the style and kind of values it
+Fixes that produce local quantities have the word "local" at the end
-produces, e.g. a per-atom vector.  Some fixes produce more than one
+of their style, e.g. *store/local*\ .  Fixes that produce per-grid
-kind of a single style, e.g. a global scalar and a global vector.
+quantities have the word "grid" at the end of their style,
 e.g. *ave/grid*\ .
-When a fix quantity is accessed, as in many of the output commands
+Global, per-atom, local, and per-grid quantities can also be of three
-discussed below, it can be referenced via the following bracket
+*kinds*: a single scalar value (global only), a vector of values, or a
-notation, where ID is the ID of the fix:
+2d array of values.  For per-atom, local, and per-grid quantities, a
 "vector" means a single value for each atom, each local entity
 (e.g. bond), or grid cell.  Likewise an "array", means multiple values
 for each atom, each local entity, or each grid cell.
 Note that a single fix can produce any combination of global,
 per-atom, local, or per-grid values.  Likewise it can prouduce any
 combination of scalar, vector, or array output for each style.  The
 exception is that for per-atom, local, and per-grid output, either a
 vector or array can be produced, but not both.  The doc page for each
 fix explains the values it produces, if any.
 When a fix output is accessed by another input script command it is
 referenced via the following bracket notation, where ID is the ID of
 the fix:
 +-------------+--------------------------------------------+
 | f_ID        | entire scalar, vector, or array            |
@ -116,19 +125,23 @@ notation, where ID is the ID of the fix:
 +-------------+--------------------------------------------+
 In other words, using one bracket reduces the dimension of the
-quantity once (vector :math:`\to` scalar, array :math:`\to` vector).  Using two
+quantity once (vector :math:`\to` scalar, array :math:`\to` vector).
-brackets reduces the dimension twice (array :math:`\to` scalar).  Thus, a
+Using two brackets reduces the dimension twice (array :math:`\to`
-command that uses scalar fix values as input can also process elements of a
+scalar).  Thus, for example, a command that uses global scalar fix
-vector or array.
+values as input can also process elements of a vector or array.
 Depending on the command, this can either be done directly using the
 syntax in the table, or by first defining a :doc:`variable <variable>`
 of the appropriate style to store the quantity, then using the
 variable as an input to the command.
-Note that commands and :doc:`variables <variable>` that use fix
+Note that commands and :doc:`variables <variable>` which take fix
-quantities typically do not allow for all kinds (e.g., a command may
+outputs as input typically do not allow for all styles and kinds of
-require a vector of values, not a scalar), and even if they do, the context
+data (e.g., a command may require global but not per-atom values, or
-in which they are called can be used to resolve which output is being
+it may require a vector of values, not a scalar).  This means there is
-requested.  This means there is no
+typically no ambiguity about referring to a fix output as c_ID even if
-ambiguity about referring to a fix quantity as f_ID even if it
+it produces, for example, both a scalar and vector.  The doc pages for
-produces, for example, both a scalar and vector.  The doc pages for
+various commands explain the details, including how any ambiguities
-various commands explain the details.
+are resolved.
 ----------
--- a/doc/src/fix_ave_histo.rst
+++ b/doc/src/fix_ave_histo.rst
@ -79,9 +79,10 @@ Description
 Use one or more values as inputs every few timesteps to create a
 single histogram.  The histogram can then be averaged over longer
-timescales.  The resulting histogram can be used by other :doc:`output commands <Howto_output>`, and can also be written to a file.  The
+timescales.  The resulting histogram can be used by other :doc:`output
-fix ave/histo/weight command has identical syntax to fix ave/histo,
+commands <Howto_output>`, and can also be written to a file.  The fix
-except that exactly two values must be specified.  See details below.
+ave/histo/weight command has identical syntax to fix ave/histo, except
 that exactly two values must be specified.  See details below.
 The group specified with this command is ignored for global and local
 input values.  For per-atom input values, only atoms in the group
@ -96,14 +97,18 @@ different ways; see the discussion of the *beyond* keyword below.
 Each input value can be an atom attribute (position, velocity, force
 component) or can be the result of a :doc:`compute <compute>` or
-:doc:`fix <fix>` or the evaluation of an equal-style or vector-style or
+:doc:`fix <fix>` or the evaluation of an equal-style or vector-style
-atom-style :doc:`variable <variable>`.  The set of input values can be
+or atom-style :doc:`variable <variable>`.  The set of input values can
-either all global, all per-atom, or all local quantities.  Inputs of
+be either all global, all per-atom, or all local quantities.  Inputs
-different kinds (e.g. global and per-atom) cannot be mixed.  Atom
+of different kinds (e.g. global and per-atom) cannot be mixed.  Atom
-attributes are per-atom vector values.  See the page for
+attributes are per-atom vector values.  See the page for individual
-individual "compute" and "fix" commands to see what kinds of
+"compute" and "fix" commands to see what kinds of quantities they
-quantities they generate.  See the optional *kind* keyword below for
+generate.
-how to force the fix ave/histo command to disambiguate if necessary.
+
 Note that a compute or fix can produce multiple kinds of data (global,
 per-atom, local).  If LAMMPS cannot unambiguosly determine which kind
 of data to use, the optional *kind* keyword discussed below can force
 the desired disambiguation.
 Note that the output of this command is a single histogram for all
 input values combined together, not one histogram per input value.
@ -258,13 +263,14 @@ keyword is set to *vector*, then all input values must be global or
 per-atom or local vectors, or columns of global or per-atom or local
 arrays.
-The *kind* keyword only needs to be set if a compute or fix produces
+The *kind* keyword only needs to be used if any of the specfied input
-more than one kind of output (global, per-atom, local).  If this is
+computes or fixes produce more than one kind of output (global,
-not the case, then LAMMPS will determine what kind of input is
+per-atom, local).  If not, LAMMPS will determine the kind of data all
-provided and whether all the input arguments are consistent.  If a
+the inputs produce and verify it is all the same kind.  If not, an
-compute or fix produces more than one kind of output, the *kind*
+error will be triggered.  If a compute or fix produces more than one
-keyword should be used to specify which output will be used.  The
+kind of output, the *kind* keyword should be used to specify which
-remaining input arguments must still be consistent.
+output will be used.  The other input arguments must still be
 consistent.
 The *beyond* keyword determines how input values that fall outside the
 *lo* to *hi* bounds are treated.  Values such that *lo* :math:`\le` value
--- a/doc/src/fix_efield.rst
+++ b/doc/src/fix_efield.rst
@ -1,4 +1,5 @@
 .. index:: fix efield
 .. index:: fix efield/kk
 .. index:: fix efield/tip4p
 fix efield command
@ -210,6 +211,12 @@ the iteration count during the minimization.
   system (the quantity being minimized), you MUST enable the
   :doc:`fix_modify <fix_modify>` *energy* option for this fix.
 ----------
 .. include:: accel_styles.rst
 ----------
 Restrictions
 """"""""""""
--- a/doc/src/fix_rigid.rst
+++ b/doc/src/fix_rigid.rst
@ -843,7 +843,7 @@ stress/atom <compute_stress_atom>` commands.  The former can be
 accessed by :doc:`thermodynamic output <thermo_style>`.  The default
 setting for this fix is :doc:`fix_modify virial yes <fix_modify>`.
-All of the *rigid* styles (not the *rigid/small* styles) compute a
+All of the *rigid* styles (but not the *rigid/small* styles) compute a
 global array of values which can be accessed by various :doc:`output
 commands <Howto_output>`.  Similar information about the bodies
 defined by the *rigid/small* styles can be accessed via the
@ -887,7 +887,8 @@ Restrictions
 """"""""""""
 These fixes are all part of the RIGID package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
+LAMMPS was built with that package.  See the :doc:`Build package
 <Build_package>` page for more info.
 Assigning a temperature via the :doc:`velocity create <velocity>`
 command to a system with :doc:`rigid bodies <fix_rigid>` may not have
--- a/doc/src/fix_spring_self.rst
+++ b/doc/src/fix_spring_self.rst
@ -1,4 +1,5 @@
 .. index:: fix spring/self
 .. index:: fix spring/self/kk
 fix spring/self command
 =======================
@ -80,6 +81,12 @@ invoked by the :doc:`minimize <minimize>` command.
   you MUST enable the :doc:`fix_modify <fix_modify>` *energy* option for
   this fix.
 ----------
 .. include:: accel_styles.rst
 ----------
 Restrictions
 """"""""""""
 none
--- a/doc/src/fix_srd.rst
+++ b/doc/src/fix_srd.rst
@ -71,14 +71,15 @@ imbue the SRD particles with fluid-like properties, including an
 effective viscosity.  Thus simulations with large solute particles can
 be run more quickly, to measure solute properties like diffusivity
 and viscosity in a background fluid.  The usual LAMMPS fixes for such
-simulations, such as :doc:`fix deform <fix_deform>`, :doc:`fix viscosity <fix_viscosity>`, and :doc:`fix nvt/sllod <fix_nvt_sllod>`,
+simulations, such as :doc:`fix deform <fix_deform>`,
 :doc:`fix viscosity <fix_viscosity>`, and :doc:`fix nvt/sllod <fix_nvt_sllod>`,
 can be used in conjunction with the SRD model.
-For more details on how the SRD model is implemented in LAMMPS, :ref:`this paper <Petersen1>` describes the implementation and usage of pure SRD
+For more details on how the SRD model is implemented in LAMMPS,
-fluids.  :ref:`This paper <Lechman>`, which is nearly complete, describes
+:ref:`(Petersen) <Petersen1>` describes the implementation and usage of
-the implementation and usage of mixture systems (solute particles in
+pure SRD fluids.  See the ``examples/srd`` directory for sample input
-an SRD fluid).  See the examples/srd directory for sample input
+scripts using SRD particles for that and for mixture systems (solute
-scripts using SRD particles in both settings.
+particles in an SRD fluid).
 This fix does two things:
@ -357,28 +358,28 @@ These are the 12 quantities.  All are values for the current timestep,
 except for quantity 5 and the last three, each of which are
 cumulative quantities since the beginning of the run.
-* (1) # of SRD/big collision checks performed
+(1) # of SRD/big collision checks performed
-* (2) # of SRDs which had a collision
+(2) # of SRDs which had a collision
-* (3) # of SRD/big collisions (including multiple bounces)
+(3) # of SRD/big collisions (including multiple bounces)
-* (4) # of SRD particles inside a big particle
+(4) # of SRD particles inside a big particle
-* (5) # of SRD particles whose velocity was rescaled to be < Vmax
+(5) # of SRD particles whose velocity was rescaled to be < Vmax
-* (6) # of bins for collision searching
+(6) # of bins for collision searching
-* (7) # of bins for SRD velocity rotation
+(7) # of bins for SRD velocity rotation
-* (8) # of bins in which SRD temperature was computed
+(8) # of bins in which SRD temperature was computed
-* (9) SRD temperature
+(9) SRD temperature
-* (10) # of SRD particles which have undergone max # of bounces
+(10) # of SRD particles which have undergone max # of bounces
-* (11) max # of bounces any SRD particle has had in a single step
+(11) max # of bounces any SRD particle has had in a single step
-* (12) # of reneighborings due to SRD particles moving too far
+(12) # of reneighborings due to SRD particles moving too far
 No parameter of this fix can be used with the *start/stop* keywords of
-the :doc:`run <run>` command.  This fix is not invoked during :doc:`energy minimization <minimize>`.
+the :doc:`run <run>` command.  This fix is not invoked during
 :doc:`energy minimization <minimize>`.
 Restrictions
 """"""""""""
-This command can only be used if LAMMPS was built with the SRD
+This command can only be used if LAMMPS was built with the SRD package.
-package.  See the :doc:`Build package <Build_package>` doc
+See the :doc:`Build package <Build_package>` doc page for more info.
 page for more info.
 Related commands
 """"""""""""""""
@ -403,7 +404,3 @@ no, and rescale = yes.
 **(Petersen)** Petersen, Lechman, Plimpton, Grest, in' t Veld, Schunk, J
 Chem Phys, 132, 174106 (2010).
 .. _Lechman:
 **(Lechman)** Lechman, et al, in preparation (2010).
--- a/doc/src/pair_ilp_tmd.rst
+++ b/doc/src/pair_ilp_tmd.rst
@ -22,12 +22,12 @@ Examples
 .. code-block:: LAMMPS
   pair_style  hybrid/overlay ilp/tmd 16.0 1
-   pair_coeff  * * ilp/tmd  TMD.ILP Mo S S
+   pair_coeff  * * ilp/tmd  MoS2.ILP Mo S S
   pair_style  hybrid/overlay sw/mod sw/mod ilp/tmd 16.0
   pair_coeff  * * sw/mod 1  tmd.sw.mod Mo S S NULL NULL NULL
   pair_coeff  * * sw/mod 2  tmd.sw.mod NULL NULL NULL Mo S S
-   pair_coeff  * * ilp/tmd   TMD.ILP    Mo S S Mo S S
+   pair_coeff  * * ilp/tmd   MoS2.ILP   Mo S S Mo S S
 Description
 """""""""""
@ -69,7 +69,7 @@ calculating the normals.
   each atom `i`, its six nearest neighboring atoms belonging to the same
   sub-layer are chosen to define the normal vector `{\bf n}_i`.
-The parameter file (e.g. TMD.ILP), is intended for use with *metal*
+The parameter file (e.g. MoS2.ILP), is intended for use with *metal*
 :doc:`units <units>`, with energies in meV. Two additional parameters,
 *S*, and *rcut* are included in the parameter file. *S* is designed to
 facilitate scaling of energies. *rcut* is designed to build the neighbor
@ -77,7 +77,7 @@ list for calculating the normals for each atom pair.
 .. note::
-   The parameters presented in the parameter file (e.g. TMD.ILP),
+   The parameters presented in the parameter file (e.g. MoS2.ILP),
   are fitted with taper function by setting the cutoff equal to 16.0
   Angstrom.  Using different cutoff or taper function should be careful.
   These parameters provide a good description in both short- and long-range
@ -133,10 +133,10 @@ if LAMMPS was built with that package.  See the :doc:`Build package
 This pair style requires the newton setting to be *on* for pair
 interactions.
-The TMD.ILP potential file provided with LAMMPS (see the potentials
+The MoS2.ILP potential file provided with LAMMPS (see the potentials
 directory) are parameterized for *metal* units.  You can use this
 potential with any LAMMPS units, but you would need to create your own
-custom TMD.ILP potential file with coefficients listed in the appropriate
+custom MoS2.ILP potential file with coefficients listed in the appropriate
 units, if your simulation does not use *metal* units.
 Related commands
--- a/doc/src/pair_reaxff.rst
+++ b/doc/src/pair_reaxff.rst
@ -43,18 +43,18 @@ Examples
 Description
 """""""""""
-Style *reaxff* computes the ReaxFF potential of van Duin, Goddard and
+Pair style *reaxff* computes the ReaxFF potential of van Duin, Goddard
-co-workers.  ReaxFF uses distance-dependent bond-order functions to
+and co-workers.  ReaxFF uses distance-dependent bond-order functions to
 represent the contributions of chemical bonding to the potential
 energy.  There is more than one version of ReaxFF.  The version
 implemented in LAMMPS uses the functional forms documented in the
 supplemental information of the following paper:
-:ref:`(Chenoweth et al., 2008) <Chenoweth_20082>`.  The version integrated
+:ref:`(Chenoweth et al., 2008) <Chenoweth_20082>` and matches the
-into LAMMPS matches the version of ReaxFF From Summer 2010.  For more
+version of the reference ReaxFF implementation from Summer 2010.  For
-technical details about the pair reaxff implementation of ReaxFF, see
+more technical details about the implementation of ReaxFF in pair style
-the :ref:`(Aktulga) <Aktulga>` paper. The *reaxff* style was initially
+*reaxff*, see the :ref:`(Aktulga) <Aktulga>` paper. The *reaxff* style
-implemented as a stand-alone C code and is now converted to C++ and
+was initially implemented as a stand-alone C code and is now converted
-integrated into LAMMPS as a package.
+to C++ and integrated into LAMMPS as a package.
 The *reaxff/kk* style is a Kokkos version of the ReaxFF potential that
 is derived from the *reaxff* style.  The Kokkos version can run on GPUs
--- a/doc/src/pair_yukawa_colloid.rst
+++ b/doc/src/pair_yukawa_colloid.rst
@ -1,11 +1,12 @@
 .. index:: pair_style yukawa/colloid
 .. index:: pair_style yukawa/colloid/gpu
 .. index:: pair_style yukawa/colloid/kk
 .. index:: pair_style yukawa/colloid/omp
 pair_style yukawa/colloid command
 =================================
-Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/omp*
+Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/kk*, *yukawa/colloid/omp*
 Syntax
 """"""
@ -131,6 +132,12 @@ per-type polydispersity is allowed.  This means all particles of the
 same type must have the same diameter.  Each type can have a different
 diameter.
 ----------
 .. include:: accel_styles.rst
 ----------
 Related commands
 """"""""""""""""
--- a/doc/src/thermo_style.rst
+++ b/doc/src/thermo_style.rst
@ -385,19 +385,20 @@ creates a global vector with 6 values.
 The *c_ID* and *c_ID[I]* and *c_ID[I][J]* keywords allow global values
 calculated by a compute to be output.  As discussed on the
 :doc:`compute <compute>` doc page, computes can calculate global,
-per-atom, or local values.  Only global values can be referenced by
+per-atom, local, and per-grid values.  Only global values can be
-this command.  However, per-atom compute values for an individual atom
+referenced by this command.  However, per-atom compute values for an
-can be referenced in a :doc:`variable <variable>` and the variable
+individual atom can be referenced in a :doc:`equal-style variable
-referenced by thermo_style custom, as discussed below.  See the
+<variable>` and the variable referenced by thermo_style custom, as
-discussion above for how the I in *c_ID[I]* can be specified with a
+discussed below.  See the discussion above for how the I in *c_ID[I]*
-wildcard asterisk to effectively specify multiple values from a global
+can be specified with a wildcard asterisk to effectively specify
-compute vector.
+multiple values from a global compute vector.
 The ID in the keyword should be replaced by the actual ID of a compute
 that has been defined elsewhere in the input script.  See the
-:doc:`compute <compute>` command for details.  If the compute calculates
+:doc:`compute <compute>` command for details.  If the compute
-a global scalar, vector, or array, then the keyword formats with 0, 1,
+calculates a global scalar, vector, or array, then the keyword formats
-or 2 brackets will reference a scalar value from the compute.
+with 0, 1, or 2 brackets will reference a scalar value from the
 compute.
 Note that some computes calculate "intensive" global quantities like
 temperature; others calculate "extensive" global quantities like
@ -410,13 +411,14 @@ norm <thermo_modify>` option being used.
 The *f_ID* and *f_ID[I]* and *f_ID[I][J]* keywords allow global values
 calculated by a fix to be output.  As discussed on the :doc:`fix
-<fix>` doc page, fixes can calculate global, per-atom, or local
+<fix>` doc page, fixes can calculate global, per-atom, local, and
-values.  Only global values can be referenced by this command.
+per-grid values.  Only global values can be referenced by this
-However, per-atom fix values can be referenced for an individual atom
+command.  However, per-atom fix values can be referenced for an
-in a :doc:`variable <variable>` and the variable referenced by
+individual atom in a :doc:`equal-style variable <variable>` and the
-thermo_style custom, as discussed below.  See the discussion above for
+variable referenced by thermo_style custom, as discussed below.  See
-how the I in *f_ID[I]* can be specified with a wildcard asterisk to
+the discussion above for how the I in *f_ID[I]* can be specified with
-effectively specify multiple values from a global fix vector.
+a wildcard asterisk to effectively specify multiple values from a
 global fix vector.
 The ID in the keyword should be replaced by the actual ID of a fix
 that has been defined elsewhere in the input script.  See the
@ -438,14 +440,15 @@ output.  The name in the keyword should be replaced by the variable
 name that has been defined elsewhere in the input script.  Only
 equal-style and vector-style variables can be referenced; the latter
 requires a bracketed term to specify the Ith element of the vector
-calculated by the variable.  However, an atom-style variable can be
+calculated by the variable.  However, an equal-style variable can use
-referenced for an individual atom by an equal-style variable and that
+an atom-style variable in its formula indexed by the ID of an
-variable referenced.  See the :doc:`variable <variable>` command for
+individual atom.  This is a way to output a speciic atom's per-atom
-details.  Variables of style *equal* and *vector* and *atom* define a
+coordinates or other per-atom properties in thermo output.  See the
-formula which can reference per-atom properties or thermodynamic
+:doc:`variable <variable>` command for details.  Note that variables
-keywords, or they can invoke other computes, fixes, or variables when
+of style *equal* and *vector* and *atom* define a formula which can
-evaluated, so this is a very general means of creating thermodynamic
+reference per-atom properties or thermodynamic keywords, or they can
-output.
+invoke other computes, fixes, or variables when evaluated, so this is
 a very general means of creating thermodynamic output.
 Note that equal-style and vector-style variables are assumed to
 produce "intensive" global quantities, which are thus printed as-is,
--- a/doc/src/variable.rst
+++ b/doc/src/variable.rst
@ -550,12 +550,11 @@ variables.
 Most of the formula elements produce a scalar value.  Some produce a
 global or per-atom vector of values.  Global vectors can be produced
 by computes or fixes or by other vector-style variables.  Per-atom
-vectors are produced by atom vectors, compute references that
+vectors are produced by atom vectors, computes or fixes which output a
-represent a per-atom vector, fix references that represent a per-atom
+per-atom vector or array, and variables that are atom-style variables.
-vector, and variables that are atom-style variables.  Math functions
+Math functions that operate on scalar values produce a scalar value;
-that operate on scalar values produce a scalar value; math function
+math function that operate on global or per-atom vectors do so
-that operate on global or per-atom vectors do so element-by-element
+element-by-element and produce a global or per-atom vector.
 and produce a global or per-atom vector.
 A formula for equal-style variables cannot use any formula element
 that produces a global or per-atom vector.  A formula for a
@ -564,12 +563,13 @@ scalar value or a global vector value, but cannot use a formula
 element that produces a per-atom vector.  A formula for an atom-style
 variable can use formula elements that produce either a scalar value
 or a per-atom vector, but not one that produces a global vector.
 Atom-style variables are evaluated by other commands that define a
-:doc:`group <group>` on which they operate, e.g. a :doc:`dump <dump>` or
+:doc:`group <group>` on which they operate, e.g. a :doc:`dump <dump>`
-:doc:`compute <compute>` or :doc:`fix <fix>` command.  When they invoke
+or :doc:`compute <compute>` or :doc:`fix <fix>` command.  When they
-the atom-style variable, only atoms in the group are included in the
+invoke the atom-style variable, only atoms in the group are included
-formula evaluation.  The variable evaluates to 0.0 for atoms not in
+in the formula evaluation.  The variable evaluates to 0.0 for atoms
-the group.
+not in the group.
 ----------
@ -1138,69 +1138,74 @@ only defined if an :doc:`atom_style <atom_style>` is being used that
 defines molecule IDs.
 Note that many other atom attributes can be used as inputs to a
-variable by using the :doc:`compute property/atom <compute_property_atom>` command and then specifying
+variable by using the :doc:`compute property/atom
-a quantity from that compute.
+<compute_property_atom>` command and then specifying a quantity from
 that compute.
 ----------
 Compute References
 ------------------
-Compute references access quantities calculated by a
+Compute references access quantities calculated by a :doc:`compute
-:doc:`compute <compute>`.  The ID in the reference should be replaced by
+<compute>`.  The ID in the reference should be replaced by the ID of a
-the ID of a compute defined elsewhere in the input script.  As
+compute defined elsewhere in the input script.
 discussed in the page for the :doc:`compute <compute>` command,
 computes can produce global, per-atom, or local values.  Only global
 and per-atom values can be used in a variable.  Computes can also
 produce a scalar, vector, or array.
-An equal-style variable can only use scalar values, which means a
+As discussed on the page for the :doc:`compute <compute>` command,
-global scalar, or an element of a global or per-atom vector or array.
+computes can produce global, per-atom, local, and per-grid values.
-A vector-style variable can use scalar values or a global vector of
+Only global and per-atom values can be used in a variable.  Computes
-values, or a column of a global array of values.  Atom-style variables
+can also produce scalars (global only), vectors, and arrays.  See the
-can use global scalar values.  They can also use per-atom vector
+doc pages for individual computes to see what different kinds of data
-values, or a column of a per-atom array.  See the doc pages for
+they produce.
 individual computes to see what kind of values they produce.
-Examples of different kinds of compute references are as follows.
+An equal-style variable can only use scalar values, either from global
-There is typically no ambiguity (see exception below) as to what a
+or per-atom data.  In the case of per-atom data, this would be a value
-reference means, since computes only produce either global or per-atom
+for a specific atom.
 quantities, never both.
-+-------------+-------------------------------------------------------------------------------------------------------+
+A vector-style variable can use scalar values (same as for equal-style
-| c_ID       | global scalar, or per-atom vector                                                                      |
+variables), or global vectors of values.  The latter can also be a
-+-------------+-------------------------------------------------------------------------------------------------------+
+column of a global array.
 | c_ID[I]    | Ith element of global vector, or atom I's value in per-atom vector, or Ith column from per-atom array  |
 +-------------+-------------------------------------------------------------------------------------------------------+
 | c_ID[I][J] | I,J element of global array, or atom I's Jth value in per-atom array                                   |
 +-------------+-------------------------------------------------------------------------------------------------------+
-For I and J indices, integers can be specified or a variable name,
+Atom-style variables can use scalar values (same as for equal-style
-specified as v_name, where name is the name of the variable.  The
+varaibles), or per-atom vectors of values.  The latter can also be a
-rules for this syntax are the same as for the "Atom Values and
+column of a per-atom array.
 Vectors" discussion above.
-One source of ambiguity for compute references is when a vector-style
+The various allowed compute references in the variable formulas for
-variable refers to a compute that produces both a global scalar and a
+equal-, vector-, and atom-style variables are listed in the following
-global vector.  Consider a compute with ID "foo" that does this,
+table:
 referenced as follows by variable "a", where "myVec" is another
 vector-style variable:
-.. code-block:: LAMMPS
+--------+------------+------------------------------------------+
 | equal  | c_ID       | global scalar                            |
 | equal  | c_ID[I]    | element of global vector                 |
 | equal  | c_ID[I][J] | element of global array                  |
 | equal  | C_ID[I]    | element of per-atom vector (I = atom ID) |
 | equal  | C_ID[I][J] | element of per-atom array (I = atom ID)  |
 +--------+------------+------------------------------------------+
 | vector | c_ID       | global vector                            |
 | vector | c_ID[I]    | column of global array                   |
 ---------+------------+------------------------------------------+
 | atom   | c_ID       | per-atom vector                          |
 | atom   | c_ID[I]    | column of per-atom array                 |
 +--------+------------+------------------------------------------+
-   variable a vector c_foo*v_myVec
+Note that if an equal-style variable formula wishes to access per-atom
 data from a compute, it must use capital "C" as the ID prefix and not
 lower-case "c".
-The reference "c_foo" could refer to either the global scalar or
+Also note that if a vector- or atom-style variable formula needs to
-global vector produced by compute "foo".  In this case, "c_foo" will
+access a scalar value from a compute (i.e. the 5 kinds of values in
-always refer to the global scalar, and "C_foo" can be used to
+the first 5 lines of the table), it can not do so directly.  Instead,
-reference the global vector.  Similarly if the compute produces both a
+it can use a reference to an equal-style variable which stores the
-global vector and global array, then "c_foo[I]" will always refer to
+scalar value from the compute.
 an element of the global vector, and "C_foo[I]" can be used to
 reference the Ith column of the global array.
-Note that if a variable containing a compute is evaluated directly in
+The I and J indices in these compute references can be integers or can
-an input script (not during a run), then the values accessed by the
+be a variable name, specified as v_name, where name is the name of the
-compute must be current.  See the discussion below about "Variable
+variable.  The rules for this syntax are the same as for indices in
 the "Atom Values and Vectors" discussion above.
 If a variable containing a compute is evaluated directly in an input
 script (not during a run), then the values accessed by the compute
 should be current.  See the discussion below about "Variable
 Accuracy".
 ----------
@ -1208,51 +1213,59 @@ Accuracy".
 Fix References
 --------------
-Fix references access quantities calculated by a :doc:`fix <compute>`.
+Fix references access quantities calculated by a :doc:`fix <fix>`.
 The ID in the reference should be replaced by the ID of a fix defined
-elsewhere in the input script.  As discussed in the page for the
+elsewhere in the input script.
 :doc:`fix <fix>` command, fixes can produce global, per-atom, or local
 values.  Only global and per-atom values can be used in a variable.
 Fixes can also produce a scalar, vector, or array.  An equal-style
 variable can only use scalar values, which means a global scalar, or
 an element of a global or per-atom vector or array.  Atom-style
 variables can use the same scalar values.  They can also use per-atom
 vector values.  A vector value can be a per-atom vector itself, or a
 column of an per-atom array.  See the doc pages for individual fixes
 to see what kind of values they produce.
-The different kinds of fix references are exactly the same as the
+As discussed on the page for the :doc:`fix <fix>` command, fixes can
-compute references listed in the above table, where "c\_" is replaced
+produce global, per-atom, local, and per-grid values.  Only global and
-by "f\_".  Again, there is typically no ambiguity (see exception below)
+per-atom values can be used in a variable.  Fixes can also produce
-as to what a reference means, since fixes only produce either global
+scalars (global only), vectors, and arrays.  See the doc pages for
-or per-atom quantities, never both.
+individual fixes to see what different kinds of data they produce.
-+-------------+-------------------------------------------------------------------------------------------------------+
+An equal-style variable can only use scalar values, either from global
-| f_ID       | global scalar, or per-atom vector                                                                      |
+or per-atom data.  In the case of per-atom data, this would be a value
-+-------------+-------------------------------------------------------------------------------------------------------+
+for a specific atom.
 | f_ID[I]    | Ith element of global vector, or atom I's value in per-atom vector, or Ith column from per-atom array  |
 +-------------+-------------------------------------------------------------------------------------------------------+
 | f_ID[I][J] | I,J element of global array, or atom I's Jth value in per-atom array                                   |
 +-------------+-------------------------------------------------------------------------------------------------------+
-For I and J indices, integers can be specified or a variable name,
+A vector-style variable can use scalar values (same as for equal-style
-specified as v_name, where name is the name of the variable.  The
+variables), or global vectors of values.  The latter can also be a
-rules for this syntax are the same as for the "Atom Values and
+column of a global array.
 Vectors" discussion above.
-One source of ambiguity for fix references is the same ambiguity
+Atom-style variables can use scalar values (same as for equal-style
-discussed for compute references above.  Namely when a vector-style
+varaibles), or per-atom vectors of values.  The latter can also be a
-variable refers to a fix that produces both a global scalar and a
+column of a per-atom array.
 global vector.  The solution is the same as for compute references.
 For a fix with ID "foo", "f_foo" will always refer to the global
 scalar, and "F_foo" can be used to reference the global vector.  And
 similarly for distinguishing between a fix's global vector versus
 global array with "f_foo[I]" versus "F_foo[I]".
-Note that if a variable containing a fix is evaluated directly in an
+The allowed fix references in variable formulas for equal-, vector-,
-input script (not during a run), then the values accessed by the fix
+and atom-style variables are listed in the following table:
-should be current.  See the discussion below about "Variable
+
-Accuracy".
+--------+------------+------------------------------------------+
 | equal  | f_ID       | global scalar                            |
 | equal  | f_ID[I]    | element of global vector                 |
 | equal  | f_ID[I][J] | element of global array                  |
 | equal  | F_ID[I]    | element of per-atom vector (I = atom ID) |
 | equal  | F_ID[I][J] | element of per-atom array (I = atom ID)  |
 +--------+------------+------------------------------------------+
 | vector | f_ID       | global vector                            |
 | vector | f_ID[I]    | column of global array                   |
 ---------+------------+------------------------------------------+
 | atom   | f_ID       | per-atom vector                          |
 | atom   | f_ID[I]    | column of per-atom array                 |
 +--------+------------+------------------------------------------+
 Note that if an equal-style variable formula wishes to access per-atom
 data from a fix, it must use capital "F" as the ID prefix and not
 lower-case "f".
 Also note that if a vector- or atom-style variable formula needs to
 access a scalar value from a fix (i.e. the 5 kinds of values in the
 first 5 lines of the table), it can not do so directly.  Instead, it
 can use a reference to an equal-style variable which stores the scalar
 value from the fix.
 The I and J indices in these fix references can be integers or can be
 a variable name, specified as v_name, where name is the name of the
 variable.  The rules for this syntax are the same as for indices in
 the "Atom Values and Vectors" discussion above.
 Note that some fixes only generate quantities on certain timesteps.
 If a variable attempts to access the fix on non-allowed timesteps, an
@ -1260,6 +1273,10 @@ error is generated.  For example, the :doc:`fix ave/time <fix_ave_time>`
 command may only generate averaged quantities every 100 steps.  See
 the doc pages for individual fix commands for details.
 If a variable containing a fix is evaluated directly in an input
 script (not during a run), then the values accessed by the fix should
 be current.  See the discussion below about "Variable Accuracy".
 ----------
 Variable References
@ -1294,26 +1311,32 @@ including other atom-style or atomfile-style variables.  If it uses a
 vector-style variable, a subscript must be used to access a single
 value from the vector-style variable.
-Examples of different kinds of variable references are as follows.
+The allowed variable references in variable formulas for equal-,
-There is no ambiguity as to what a reference means, since variables
+vector-, and atom-style variables are listed in the following table.
-produce only a global scalar or global vector or per-atom vector.
+Note that there is no ambiguity as to what a reference means, since
 referenced variables produce only a global scalar or global vector or
 per-atom vector.
-+------------+----------------------------------------------------------------------+
+--------+-----------+-----------------------------------------------------------------------------------+
-| v_name    | global scalar from equal-style variable                               |
+| equal  | v_name    | global scalar from an equal-style variable                                        |
-+------------+----------------------------------------------------------------------+
+| equal  | v_name[I] | element of global vector from a vector-style variable                             |
-| v_name    | global vector from vector-style variable                              |
+| equal  | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
-+------------+----------------------------------------------------------------------+
+--------+-----------+-----------------------------------------------------------------------------------+
-| v_name    | per-atom vector from atom-style or atomfile-style variable            |
+| vector | v_name    | global scalar from an equal-style variable                                        |
-+------------+----------------------------------------------------------------------+
+| vector | v_name    | global vector from a vector-style variable                                        |
-| v_name[I] | Ith element of a global vector from vector-style variable             |
+| vector | v_name[I] | element of global vector from a vector-style variable                             |
-+------------+----------------------------------------------------------------------+
+| vector | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
-| v_name[I] | value of atom with ID = I from atom-style or atomfile-style variable  |
+--------+-----------+-----------------------------------------------------------------------------------+
-+------------+----------------------------------------------------------------------+
+| atom   | v_name    | global scalar from an equal-style variable                                        |
 | atom   | v_name    | per-atom vector from an atom-style or atomfile-style variable                     |
 | atom   | v_name[I] | element of global vector from a vector-style variable                             |
 | atom   | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
 +--------+-----------+-----------------------------------------------------------------------------------+
 For the I index, an integer can be specified or a variable name,
 specified as v_name, where name is the name of the variable.  The
-rules for this syntax are the same as for the "Atom Values and
+rules for this syntax are the same as for indices in the "Atom Values
-Vectors" discussion above.
+and Vectors" discussion above.
 ----------
--- a/examples/mliap/in.mliap.quadratic.compute
+++ b/examples/mliap/in.mliap.quadratic.compute
@ -65,7 +65,7 @@ compute         bsum2 snapgroup2 reduce sum c_b[*]
 # fix 		bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
 compute		vbsum all reduce sum c_vb[*]
 # fix 		vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
-variable	db_2_100 equal c_db[2][100]
+variable	db_2_100 equal C_db[2][100]
 # test output:   1: total potential energy
 #                2: xy component of stress tensor
--- a/examples/mliap/in.mliap.snap.compute
+++ b/examples/mliap/in.mliap.snap.compute
@ -65,7 +65,7 @@ compute         bsum2 snapgroup2 reduce sum c_b[*]
 # fix 		bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
 compute		vbsum all reduce sum c_vb[*]
 # fix 		vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
-variable	db_2_25 equal c_db[2][25]
+variable	db_2_25 equal C_db[2][25]
 thermo 		100
--- a/examples/snap/in.grid.snap
+++ b/examples/snap/in.grid.snap
@ -67,18 +67,18 @@ compute 	mygridlocal all sna/grid/local grid ${ngrid} ${ngrid} ${ngrid} &
 # define output
-variable	B5atom equal c_b[2][5]
+variable	B5atom equal C_b[2][5]
 variable	B5grid equal c_mygrid[8][8]
 variable	rmse_global equal "sqrt(   &
 	 (c_mygrid[8][1] - x[2])^2 +      &
 	 (c_mygrid[8][2] - y[2])^2 +      &
 	 (c_mygrid[8][3] - z[2])^2 +      &
-	 (c_mygrid[8][4] - c_b[2][1])^2 + &
+	 (c_mygrid[8][4] - C_b[2][1])^2 + &
-	 (c_mygrid[8][5] - c_b[2][2])^2 + &
+	 (c_mygrid[8][5] - C_b[2][2])^2 + &
-	 (c_mygrid[8][6] - c_b[2][3])^2 + &
+	 (c_mygrid[8][6] - C_b[2][3])^2 + &
-	 (c_mygrid[8][7] - c_b[2][4])^2 + &
+	 (c_mygrid[8][7] - C_b[2][4])^2 + &
-	 (c_mygrid[8][8] - c_b[2][5])^2   &
+	 (c_mygrid[8][8] - C_b[2][5])^2   &
 	 )"
 thermo_style	custom step v_B5atom v_B5grid v_rmse_global
--- a/examples/snap/in.grid.tri
+++ b/examples/snap/in.grid.tri
@ -87,18 +87,18 @@ compute 	mygridlocal all sna/grid/local grid ${ngridx} ${ngridy} ${ngridz} &
 # define output
-variable	B5atom equal c_b[7][5]
+variable	B5atom equal C_b[7][5]
 variable	B5grid equal c_mygrid[13][8]
 # do not compare x,y,z because assignment of ids
 # to atoms is not unnique for different processor grids
 variable	rmse_global equal "sqrt(    &
-	 (c_mygrid[13][4] - c_b[7][1])^2 + &
+	 (c_mygrid[13][4] - C_b[7][1])^2 + &
-	 (c_mygrid[13][5] - c_b[7][2])^2 + &
+	 (c_mygrid[13][5] - C_b[7][2])^2 + &
-	 (c_mygrid[13][6] - c_b[7][3])^2 + &
+	 (c_mygrid[13][6] - C_b[7][3])^2 + &
-	 (c_mygrid[13][7] - c_b[7][4])^2 + &
+	 (c_mygrid[13][7] - C_b[7][4])^2 + &
-	 (c_mygrid[13][8] - c_b[7][5])^2   &
+	 (c_mygrid[13][8] - C_b[7][5])^2   &
 	 )"
 thermo_style	custom step v_B5atom v_B5grid v_rmse_global
--- a/examples/snap/in.snap.compute
+++ b/examples/snap/in.snap.compute
@ -70,7 +70,7 @@ compute         bsum2 snapgroup2 reduce sum c_b[*]
 # fix 		bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
 compute		vbsum all reduce sum c_vb[*]
 # fix 		vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
-variable	db_2_25 equal c_db[2][25]
+variable	db_2_25 equal C_db[2][25]
 # set up compute snap generating global array
--- a/examples/snap/in.snap.compute.quadratic
+++ b/examples/snap/in.snap.compute.quadratic
@ -70,7 +70,7 @@ compute         bsum2 snapgroup2 reduce sum c_b[*]
 # fix 		bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
 compute		vbsum all reduce sum c_vb[*]
 # fix 		vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
-variable	db_2_100 equal c_db[2][100]
+variable	db_2_100 equal C_db[2][100]
 # set up compute snap generating global array
--- a/examples/voronoi/in.voronoi
+++ b/examples/voronoi/in.voronoi
@ -146,10 +146,10 @@ variable i2 equal 257
 compute v1 all voronoi/atom occupation
 compute r0 all   reduce sum c_v1[1]
 compute r1 all   reduce sum c_v1[2]
-variable d5a equal c_v1[${i1}][1]
+variable d5a equal C_v1[${i1}][1]
-variable d5b equal c_v1[${i2}][1]
+variable d5b equal C_v1[${i2}][1]
-variable d5c equal c_v1[${i1}][2]
+variable d5c equal C_v1[${i1}][2]
-variable d5d equal c_v1[${i2}][2]
+variable d5d equal C_v1[${i2}][2]
 thermo_style custom c_r0 c_r1 v_d5a v_d5b v_d5c v_d5d
 run 0
--- a/examples/voronoi/in.voronoi.data
+++ b/examples/voronoi/in.voronoi.data
@ -63,11 +63,9 @@ undump          dlocal
 # TEST 2: 
 #
-# This compute voronoi generates  
+# This compute voronoi generates peratom and local and global quantities
 # local and global quantities, but
 # not per-atom quantities
-compute 	v2 all voronoi/atom neighbors yes edge_histo 6 peratom no
+compute 	v2 all voronoi/atom neighbors yes edge_histo 6
 # write voronoi local quantities to a file
@ -75,7 +73,7 @@ dump            d2 all local  1 dump.neighbors2 index c_v2[1] c_v2[2] c_v2[3]
 # sum up a voronoi local quantity
-compute 	sumarea all reduce sum c_v2[3]
+compute 	sumarea all reduce sum c_v2[3] inputs local
 # output voronoi global quantities
@ -83,6 +81,3 @@ thermo_style 	custom c_sumarea c_v2[3] c_v2[4] c_v2[5] c_v2[6] c_v2[7]
 thermo 		1
 run  		0
--- a/lib/pace/Install.py
+++ b/lib/pace/Install.py
@ -18,11 +18,11 @@ from install_helpers import fullpath, geturl, checkmd5sum, getfallback
 # settings
 thisdir = fullpath('.')
-version ='v.2023.10.04.pre'
+version ='v.2023.10.04'
 # known checksums for different PACE versions. used to validate the download.
 checksums = { \
-    'v.2023.10.04.pre': '61ba11a37ee00de8365b18b521d394a6'
+    'v.2023.10.04': '70ff79f4e59af175e55d24f3243ad1ff'
 }
 parser = ArgumentParser(prog='Install.py', description="LAMMPS library build wrapper script")
--- a/src/BOCS/fix_bocs.cpp
+++ b/src/BOCS/fix_bocs.cpp
@ -1024,7 +1024,10 @@ void FixBocs::final_integrate()
  if (pstat_flag) {
    if (pstyle == ISO) pressure->compute_scalar();
-    else pressure->compute_vector();
+    else {
      temperature->compute_vector();
      pressure->compute_vector();
    }
    couple();
    pressure->addstep(update->ntimestep+1);
  }
@ -1961,6 +1964,7 @@ void FixBocs::nhc_press_integrate()
  int ich,i,pdof;
  double expfac,factor_etap,kecurrent;
  double kt = boltz * t_target;
  double lkt_press;
  // Update masses, to preserve initial freq, if flag set
@ -2006,7 +2010,8 @@ void FixBocs::nhc_press_integrate()
    }
  }
-  double lkt_press = pdof * kt;
+  if (pstyle == ISO) lkt_press = kt;
  else lkt_press = pdof * kt;
  etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0];
  double ncfac = 1.0/nc_pchain;
--- a/src/Depend.sh
+++ b/src/Depend.sh
@ -64,6 +64,7 @@ fi
 if (test $1 = "COLLOID") then
  depend GPU
  depend KOKKOS
  depend OPENMP
 fi
--- a/src/INTEL/fix_intel.cpp
+++ b/src/INTEL/fix_intel.cpp
@ -20,6 +20,7 @@
 #include "fix_intel.h"
 #include "comm.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "neighbor.h"
@ -470,6 +471,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
  int need_tag = 0;
  if (atom->molecular != Atom::ATOMIC || three_body_neighbor()) need_tag = 1;
  if (domain->triclinic && force->newton_pair) need_tag = 1;
  // Clear buffers used for pair style
  char kmode[80];
--- a/src/INTEL/npair_halffull_newton_intel.cpp
+++ b/src/INTEL/npair_halffull_newton_intel.cpp
@ -20,7 +20,9 @@
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "modify.h"
 #include "my_page.h"
 #include "neigh_list.h"
@ -56,6 +58,9 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list,
  const int * _noalias const numneigh_full = list->listfull->numneigh;
  const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh;  // NOLINT
  const double delta = 0.01 * force->angstrom;
  const int triclinic = domain->triclinic;
  #if defined(_OPENMP)
  #pragma omp parallel
  #endif
@ -82,6 +87,7 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list,
      const int * _noalias const jlist = firstneigh_full[i];
      const int jnum = numneigh_full[i];
      if (!triclinic) {
        #if defined(LMP_SIMD_COMPILER)
        #pragma vector aligned
        #pragma ivdep
@ -102,6 +108,30 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list,
          if (addme)
            neighptr[n++] = joriginal;
        }
      } else {
        #if defined(LMP_SIMD_COMPILER)
        #pragma vector aligned
        #pragma ivdep
        #endif
        for (int jj = 0; jj < jnum; jj++) {
          const int joriginal = jlist[jj];
          const int j = joriginal & NEIGHMASK;
          int addme = 1;
          if (j < nlocal) {
            if (i > j) addme = 0;
          } else {
            if (fabs(x[j].z-ztmp) > delta) {
              if (x[j].z < ztmp) addme = 0;
            } else if (fabs(x[j].y-ytmp) > delta) {
              if (x[j].y < ytmp) addme = 0;
            } else {
              if (x[j].x < xtmp) addme = 0;
            }
          }
          if (addme)
            neighptr[n++] = joriginal;
        }
      }
      ilist[ii] = i;
      firstneigh[i] = neighptr;
@ -203,7 +233,7 @@ void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
 void NPairHalffullNewtonIntel::build(NeighList *list)
 {
-  if (_fix->three_body_neighbor() == 0) {
+  if (_fix->three_body_neighbor() == 0 || domain->triclinic) {
    if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
      build_t(list, _fix->get_mixed_buffers());
    else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
--- a/src/INTEL/npair_halffull_newton_trim_intel.cpp
+++ b/src/INTEL/npair_halffull_newton_trim_intel.cpp
@ -20,7 +20,9 @@
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "modify.h"
 #include "my_page.h"
 #include "neigh_list.h"
@ -57,6 +59,8 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
  const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh;  // NOLINT
  const flt_t cutsq_custom = cutoff_custom * cutoff_custom;
  const double delta = 0.01 * force->angstrom;
  const int triclinic = domain->triclinic;
  #if defined(_OPENMP)
  #pragma omp parallel
@ -84,6 +88,7 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
      const int * _noalias const jlist = firstneigh_full[i];
      const int jnum = numneigh_full[i];
      if (!triclinic) {
        #if defined(LMP_SIMD_COMPILER)
        #pragma vector aligned
        #pragma ivdep
@ -114,6 +119,40 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
          if (addme)
            neighptr[n++] = joriginal;
        }
      } else {
        #if defined(LMP_SIMD_COMPILER)
        #pragma vector aligned
        #pragma ivdep
        #endif
        for (int jj = 0; jj < jnum; jj++) {
          const int joriginal = jlist[jj];
          const int j = joriginal & NEIGHMASK;
          int addme = 1;
          if (j < nlocal) {
            if (i > j) addme = 0;
          } else {
            if (fabs(x[j].z-ztmp) > delta) {
              if (x[j].z < ztmp) addme = 0;
            } else if (fabs(x[j].y-ytmp) > delta) {
              if (x[j].y < ytmp) addme = 0;
            } else {
              if (x[j].x < xtmp) addme = 0;
            }
          }
          // trim to shorter cutoff
          const flt_t delx = xtmp - x[j].x;
          const flt_t dely = ytmp - x[j].y;
          const flt_t delz = ztmp - x[j].z;
          const flt_t rsq = delx * delx + dely * dely + delz * delz;
          if (rsq > cutsq_custom) addme = 0;
          if (addme)
            neighptr[n++] = joriginal;
        }
      }
      ilist[ii] = i;
      firstneigh[i] = neighptr;
@ -235,7 +274,7 @@ void NPairHalffullNewtonTrimIntel::build_t3(NeighList *list, int *numhalf,
 void NPairHalffullNewtonTrimIntel::build(NeighList *list)
 {
-  if (_fix->three_body_neighbor() == 0) {
+  if (_fix->three_body_neighbor() == 0 || domain->triclinic) {
    if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
      build_t(list, _fix->get_mixed_buffers());
    else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
--- a/src/INTEL/npair_intel.cpp
+++ b/src/INTEL/npair_intel.cpp
@ -204,6 +204,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
  }
  const int special_bound = sb;
  const double delta = 0.01 * force->angstrom;
  #ifdef _LMP_INTEL_OFFLOAD
  const int * _noalias const binhead = this->binhead;
  const int * _noalias const bins = this->bins;
@ -229,7 +231,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
    in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
    in(offload_end,separate_buffers,astart,aend,nlocal,molecular) \
    in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
-    in(pack_width,special_bound)                                        \
+    in(pack_width,special_bound,delta)                                  \
    out(overflow:length(5) alloc_if(0) free_if(0)) \
    out(timer_compute:length(1) alloc_if(0) free_if(0)) \
    signal(tag)
@ -331,7 +333,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
        const flt_t ztmp = x[i].z;
        const int itype = x[i].w;
        tagint itag;
-        if (THREE) itag = tag[i];
+        if (THREE || (TRI && !FULL)) itag = tag[i];
        const int ioffset = ntypes * itype;
        const int ibin = atombin[i];
@ -365,7 +367,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
            ty[u] = x[j].y;
            tz[u] = x[j].z;
            tjtype[u] = x[j].w;
-            if (THREE) ttag[u] = tag[j];
+            if (THREE || (TRI && !FULL)) ttag[u] = tag[j];
          }
          if (FULL == 0 && TRI != 1) {
@ -486,6 +488,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
          // Triclinic
          if (TRI) {
            if (FULL) {
              if (tz[u] < ztmp) addme = 0;
              if (tz[u] == ztmp) {
                if (ty[u] < ytmp) addme = 0;
@ -494,6 +497,25 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
                  if (tx[u] == xtmp && j <= i) addme = 0;
                }
              }
            } else {
              if (j <= i) addme = 0;
              if (j >= nlocal) {
                const tagint jtag = ttag[u];
                if (itag > jtag) {
                  if ((itag+jtag) % 2 == 0) addme = 0;
                } else if (itag < jtag) {
                  if ((itag+jtag) % 2 == 1) addme = 0;
                } else {
                  if (fabs(tz[u]-ztmp) > delta) {
                    if (tz[u] < ztmp) addme = 0;
                  } else if (fabs(ty[u]-ytmp) > delta) {
                    if (ty[u] < ytmp) addme = 0;
                  } else {
                    if (tx[u] < xtmp) addme = 0;
                  }
                }
              }
            }
          }
          // offload ghost check
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@ -129,6 +129,8 @@ action fix_dt_reset_kokkos.cpp
 action fix_dt_reset_kokkos.h
 action fix_enforce2d_kokkos.cpp
 action fix_enforce2d_kokkos.h
 action fix_efield_kokkos.cpp
 action fix_efield_kokkos.h
 action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp
 action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h
 action fix_freeze_kokkos.cpp fix_freeze.cpp
@ -173,6 +175,8 @@ action fix_shake_kokkos.cpp fix_shake.cpp
 action fix_shake_kokkos.h fix_shake.h
 action fix_shardlow_kokkos.cpp fix_shardlow.cpp
 action fix_shardlow_kokkos.h fix_shardlow.h
 action fix_spring_self_kokkos.cpp
 action fix_spring_self_kokkos.h
 action fix_viscous_kokkos.cpp
 action fix_viscous_kokkos.h
 action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp
@ -363,6 +367,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp
 action pair_vashishta_kokkos.h pair_vashishta.h
 action pair_yukawa_kokkos.cpp
 action pair_yukawa_kokkos.h
 action pair_yukawa_colloid_kokkos.cpp pair_yukawa_colloid.cpp
 action pair_yukawa_colloid_kokkos.h pair_yukawa_colloid.h
 action pair_zbl_kokkos.cpp
 action pair_zbl_kokkos.h
 action pppm_kokkos.cpp pppm.cpp
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@ -44,6 +44,9 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
  h_tag_min = Kokkos::subview(h_tag_min_max,0);
  h_tag_max = Kokkos::subview(h_tag_min_max,1);
  nprop_atom = 0;
  fix_prop_atom = nullptr;
 }
 /* ---------------------------------------------------------------------- */
@ -112,6 +115,7 @@ AtomKokkos::~AtomKokkos()
  memoryKK->destroy_kokkos(k_dvector, dvector);
  dvector = nullptr;
  delete [] fix_prop_atom;
 }
 /* ---------------------------------------------------------------------- */
@ -125,11 +129,37 @@ void AtomKokkos::init()
 /* ---------------------------------------------------------------------- */
 void AtomKokkos::update_property_atom()
 {
  nprop_atom = 0;
  std::vector<Fix *> prop_atom_fixes;
  for (auto &ifix : modify->get_fix_by_style("^property/atom")) {
    if (!ifix->kokkosable)
      error->all(FLERR, "KOKKOS package requires a Kokkos-enabled version of fix property/atom");
    ++nprop_atom;
    prop_atom_fixes.push_back(ifix);
  }
  delete[] fix_prop_atom;
  fix_prop_atom = new FixPropertyAtomKokkos *[nprop_atom];
  int n = 0;
  for (auto &ifix : prop_atom_fixes)
    fix_prop_atom[n++] = dynamic_cast<FixPropertyAtomKokkos *>(ifix);
 }
 /* ---------------------------------------------------------------------- */
 void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
 {
-  if (space == Device && lmp->kokkos->auto_sync) avecKK->modified(Host, mask);
+  if (space == Device && lmp->kokkos->auto_sync) {
    avecKK->modified(Host, mask);
    for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(Host, mask);
  }
  avecKK->sync(space, mask);
  for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync(space, mask);
 }
 /* ---------------------------------------------------------------------- */
@ -137,13 +167,20 @@ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
 void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask)
 {
  avecKK->modified(space, mask);
  for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(space, mask);
-  if (space == Device && lmp->kokkos->auto_sync) avecKK->sync(Host, mask);
+  if (space == Device && lmp->kokkos->auto_sync) {
    avecKK->sync(Host, mask);
    for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync(Host, mask);
  }
 }
 /* ---------------------------------------------------------------------- */
 void AtomKokkos::sync_overlapping_device(const ExecutionSpace space, unsigned int mask)
 {
  avecKK->sync_overlapping_device(space, mask);
  for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync_overlapping_device(space, mask);
 }
 /* ---------------------------------------------------------------------- */
@ -375,7 +412,7 @@ AtomVec *AtomKokkos::new_avec(const std::string &style, int trysuffix, int &sfla
  int hybrid_substyle_flag = (avec != nullptr);
  AtomVec *avec = Atom::new_avec(style, trysuffix, sflag);
-  if (!avec->kokkosable) error->all(FLERR, "KOKKOS package requires a kokkos enabled atom_style");
+  if (!avec->kokkosable) error->all(FLERR, "KOKKOS package requires a Kokkos-enabled atom_style");
  if (!hybrid_substyle_flag)
    avecKK = dynamic_cast<AtomVecKokkos*>(avec);
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@ -14,6 +14,7 @@
 #include "atom.h"               // IWYU pragma: export
 #include "kokkos_type.h"
 #include "fix_property_atom_kokkos.h"
 #include <Kokkos_Sort.hpp>
@ -25,6 +26,8 @@ namespace LAMMPS_NS {
 class AtomKokkos : public Atom {
 public:
  bool sort_classic;
  int nprop_atom;
  FixPropertyAtomKokkos** fix_prop_atom;
  DAT::tdual_tagint_1d k_tag;
  DAT::tdual_int_1d k_type, k_mask;
@ -144,6 +147,7 @@ class AtomKokkos : public Atom {
  }
  void init() override;
  void update_property_atom();
  void allocate_type_arrays() override;
  void sync(const ExecutionSpace space, unsigned int mask);
  void modified(const ExecutionSpace space, unsigned int mask);
--- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
@ -963,7 +963,6 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask)
    if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPDeviceType>();
    if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPDeviceType>();
    if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPDeviceType>();
    if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPDeviceType>();
  } else {
    if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
    if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
@ -980,7 +979,6 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask)
    if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPHostType>();
    if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPHostType>();
    if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPHostType>();
    if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPHostType>();
  }
 }
@ -1019,8 +1017,6 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in
      perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space);
    if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPDeviceType>())
      perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space);
    if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPDeviceType>())
      perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
  } else {
    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
      perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
@ -1052,8 +1048,6 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in
      perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space);
    if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPHostType>())
      perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space);
    if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPHostType>())
      perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
  }
 }
@ -1077,7 +1071,6 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask)
    if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPDeviceType>();
    if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPDeviceType>();
    if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPDeviceType>();
    if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPDeviceType>();
  } else {
    if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
    if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
@ -1094,6 +1087,5 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask)
    if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPHostType>();
    if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPHostType>();
    if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPHostType>();
    if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPHostType>();
  }
 }
--- a/src/KOKKOS/atom_vec_kokkos.h
+++ b/src/KOKKOS/atom_vec_kokkos.h
@ -139,6 +139,8 @@ class AtomVecKokkos : virtual public AtomVec {
  DAT::tdual_int_1d k_count;
 public:
  #ifdef LMP_KOKKOS_GPU
  template<class ViewType>
  Kokkos::View<typename ViewType::data_type,
--- a/src/KOKKOS/fix_dt_reset_kokkos.cpp
+++ b/src/KOKKOS/fix_dt_reset_kokkos.cpp
@ -113,7 +113,7 @@ void FixDtResetKokkos<DeviceType>::end_of_step()
   update->dt = dt;
   update->dt_default = 0;
   if (force->pair) force->pair->reset_dt();
-   for (int i = 0; i < modify->nfix; i++) modify->fix[i]->reset_dt();
+   for (auto &ifix : modify->get_fix_list()) ifix->reset_dt();
   output->reset_dt();
 }
--- a/src/KOKKOS/fix_efield_kokkos.cpp
+++ b/src/KOKKOS/fix_efield_kokkos.cpp
@ -0,0 +1,316 @@
 // clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   LAMMPS development team: developers@lammps.org
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------
   Contributing author: Trung Nguyen (U Chicago)
 ------------------------------------------------------------------------- */
 #include "fix_efield_kokkos.h"
 #include "atom_kokkos.h"
 #include "update.h"
 #include "modify.h"
 #include "domain_kokkos.h"
 #include "region.h"
 #include "input.h"
 #include "variable.h"
 #include "memory_kokkos.h"
 #include "error.h"
 #include "atom_masks.h"
 #include "kokkos_base.h"
 #include <cstring>
 using namespace LAMMPS_NS;
 using namespace FixConst;
 enum{NONE,CONSTANT,EQUAL,ATOM};
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 FixEfieldKokkos<DeviceType>::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) :
  FixEfield(lmp, narg, arg)
 {
  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
  datamask_read = EMPTY_MASK;
  datamask_modify = EMPTY_MASK;
  memory->destroy(efield);
  memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield");
  d_efield = k_efield.view<DeviceType>();
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 FixEfieldKokkos<DeviceType>::~FixEfieldKokkos()
 {
  if (copymode) return;
  memoryKK->destroy_kokkos(k_efield,efield);
  efield = nullptr;
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 void FixEfieldKokkos<DeviceType>::init()
 {
  FixEfield::init();
  if (utils::strmatch(update->integrate_style,"^respa"))
    error->all(FLERR,"Cannot (yet) use respa with Kokkos");
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/)
 {
  atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK);
  x = atomKK->k_x.view<DeviceType>();
  f = atomKK->k_f.view<DeviceType>();
  q = atomKK->k_q.view<DeviceType>();
  image = atomKK->k_image.view<DeviceType>();
  mask = atomKK->k_mask.view<DeviceType>();
  int nlocal = atom->nlocal;
  // update region if necessary
  if (region) {
    if (!utils::strmatch(region->style, "^block"))
      error->all(FLERR,"Cannot (yet) use {}-style region with fix efield/kk",region->style);
    region->prematch();
    DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal);
    KokkosBase* regionKKBase = dynamic_cast<KokkosBase*>(region);
    regionKKBase->match_all_kokkos(groupbit,k_match);
    k_match.template sync<DeviceType>();
    d_match = k_match.template view<DeviceType>();
  }
  // reallocate sforce array if necessary
  if (varflag == ATOM && atom->nmax > maxatom) {
    maxatom = atom->nmax;
    memoryKK->destroy_kokkos(k_efield,efield);
    memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield");
    d_efield = k_efield.view<DeviceType>();
  }
  fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0;
  double_4 fsum_kk;
  force_flag = 0;
  if (varflag == CONSTANT) {
    copymode = 1;
    // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
    //Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldConstant>(0,nlocal),*this,fsum_kk);
    {
    // local variables for lambda capture
    auto prd = Few<double,3>(domain->prd);
    auto h = Few<double,6>(domain->h);
    auto triclinic = domain->triclinic;
    auto l_ex = ex;
    auto l_ey = ey;
    auto l_ez = ez;
    auto l_x = x;
    auto l_q = q;
    auto l_f = f;
    auto l_mask = mask;
    auto l_image = image;
    auto l_groupbit = groupbit;
    Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
      if (l_mask[i] & l_groupbit) {
        Few<double,3> x_i;
        x_i[0] = l_x(i,0);
        x_i[1] = l_x(i,1);
        x_i[2] = l_x(i,2);
        auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
        auto qtmp = l_q(i);
        auto fx = qtmp * l_ex;
        auto fy = qtmp * l_ey;
        auto fz = qtmp * l_ez;
        l_f(i,0) += fx;
        l_f(i,1) += fy;
        l_f(i,2) += fz;
        fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
        fsum_kk.d1 += fx;
        fsum_kk.d2 += fy;
        fsum_kk.d3 += fz;
      }
    },fsum_kk);
    }
    copymode = 0;
  // variable force, wrap with clear/add
  } else {
    atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos
    modify->clearstep_compute();
    if (xstyle == EQUAL) ex = input->variable->compute_equal(xvar);
    else if (xstyle == ATOM)
      input->variable->compute_atom(xvar,igroup,&efield[0][0],4,0);
    if (ystyle == EQUAL) ey = input->variable->compute_equal(yvar);
    else if (ystyle == ATOM)
      input->variable->compute_atom(yvar,igroup,&efield[0][1],4,0);
    if (zstyle == EQUAL) ez = input->variable->compute_equal(zvar);
    else if (zstyle == ATOM)
      input->variable->compute_atom(zvar,igroup,&efield[0][2],4,0);
    modify->addstep_compute(update->ntimestep + 1);
    if (varflag == ATOM) {  // this can be removed when variable class is ported to Kokkos
      k_efield.modify<LMPHostType>();
      k_efield.sync<DeviceType>();
    }
    copymode = 1;
    // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
    //Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldNonConstant>(0,nlocal),*this,fsum_kk);
    {
    // local variables for lambda capture
    auto prd = Few<double,3>(domain->prd);
    auto h = Few<double,6>(domain->h);
    auto triclinic = domain->triclinic;
    auto l_ex = ex;
    auto l_ey = ey;
    auto l_ez = ez;
    auto l_d_efield = d_efield;
    auto l_x = x;
    auto l_q = q;
    auto l_f = f;
    auto l_mask = mask;
    auto l_image = image;
    auto l_groupbit = groupbit;
    auto l_xstyle = xstyle;
    auto l_ystyle = ystyle;
    auto l_zstyle = zstyle;
    Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
      if (l_mask[i] & l_groupbit) {
        Few<double,3> x_i;
        x_i[0] = l_x(i,0);
        x_i[1] = l_x(i,1);
        x_i[2] = l_x(i,2);
        auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
        auto qtmp = l_q(i);
        auto fx = qtmp * l_ex;
        auto fy = qtmp * l_ey;
        auto fz = qtmp * l_ez;
        if (l_xstyle == ATOM) l_f(i,0) += qtmp * l_d_efield(i,0);
        else if (l_xstyle) l_f(i,0) += fx;
        if (l_ystyle == ATOM) l_f(i,1) += qtmp * l_d_efield(i,1);
        else if (l_ystyle) l_f(i,1) += fy;
        if (l_zstyle == ATOM) l_f(i,2) += qtmp * l_d_efield(i,2);
        else if (l_zstyle) l_f(i,2) += fz;
        fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
        fsum_kk.d1 += fx;
        fsum_kk.d2 += fy;
        fsum_kk.d3 += fz;
      }
    },fsum_kk);
    }
    copymode = 0;
  }
  atomKK->modified(execution_space, F_MASK);
  fsum[0] = fsum_kk.d0;
  fsum[1] = fsum_kk.d1;
  fsum[2] = fsum_kk.d2;
  fsum[3] = fsum_kk.d3;
 }
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const {
  if (mask[i] & groupbit) {
    if (region && !d_match[i]) return;
    auto prd = Few<double,3>(domain->prd);
    auto h = Few<double,6>(domain->h);
    auto triclinic = domain->triclinic;
    Few<double,3> x_i;
    x_i[0] = x(i,0);
    x_i[1] = x(i,1);
    x_i[2] = x(i,2);
    auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
    const F_FLOAT qtmp = q(i);
    const F_FLOAT fx = qtmp * ex;
    const F_FLOAT fy = qtmp * ey;
    const F_FLOAT fz = qtmp * ez;
    f(i,0) += fx;
    f(i,1) += fy;
    f(i,2) += fz;
    // TODO: access to unwrap below crashes
    fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
    fsum_kk.d1 += fx;
    fsum_kk.d2 += fy;
    fsum_kk.d3 += fz;
  }
 }
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const {
  auto prd = Few<double,3>(domain->prd);
  auto h = Few<double,6>(domain->h);
  auto triclinic = domain->triclinic;
  if (mask[i] & groupbit) {
    if (region && !d_match[i]) return;
    Few<double,3> x_i;
    x_i[0] = x(i,0);
    x_i[1] = x(i,1);
    x_i[2] = x(i,2);
    auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
    const F_FLOAT qtmp = q[i];
    const F_FLOAT fx = qtmp * ex;
    const F_FLOAT fy = qtmp * ey;
    const F_FLOAT fz = qtmp * ez;
    if (xstyle == ATOM) f(i,0) += d_efield(i,0);
    else if (xstyle) f(i,0) += fx;
    if (ystyle == ATOM) f(i,1) += d_efield(i,1);
    else if (ystyle) f(i,1) += fy;
    if (zstyle == ATOM) f(i,2) += d_efield(i,2);
    else if (zstyle) f(i,2) += fz;
    // TODO: access to unwrap below crashes
    fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
    fsum_kk.d1 += fx;
    fsum_kk.d2 += fy;
    fsum_kk.d3 += fz;
  }
 }
 namespace LAMMPS_NS {
 template class FixEfieldKokkos<LMPDeviceType>;
 #ifdef LMP_KOKKOS_GPU
 template class FixEfieldKokkos<LMPHostType>;
 #endif
 }
--- a/src/KOKKOS/fix_efield_kokkos.h
+++ b/src/KOKKOS/fix_efield_kokkos.h
@ -0,0 +1,86 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   LAMMPS development team: developers@lammps.org
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifdef FIX_CLASS
 // clang-format off
 FixStyle(efield/kk,FixEfieldKokkos<LMPDeviceType>);
 FixStyle(efield/kk/device,FixEfieldKokkos<LMPDeviceType>);
 FixStyle(efield/kk/host,FixEfieldKokkos<LMPHostType>);
 // clang-format on
 #else
 // clang-format off
 #ifndef LMP_FIX_EFIELD_KOKKOS_H
 #define LMP_FIX_EFIELD_KOKKOS_H
 #include "fix_efield.h"
 #include "kokkos_type.h"
 namespace LAMMPS_NS {
 struct e_double_4 {
  double d0, d1, d2, d3;
  KOKKOS_INLINE_FUNCTION
  e_double_4() {
    d0 = d1 = d2 = d3 = 0.0;
  }
  KOKKOS_INLINE_FUNCTION
  e_double_4& operator+=(const e_double_4 &rhs) {
    d0 += rhs.d0;
    d1 += rhs.d1;
    d2 += rhs.d2;
    d3 += rhs.d3;
    return *this;
  }
 };
 typedef e_double_4 double_4;
 struct TagFixEfieldConstant{};
 struct TagFixEfieldNonConstant{};
 template<class DeviceType>
 class FixEfieldKokkos : public FixEfield {
 public:
  typedef DeviceType device_type;
  typedef double_4 value_type;
  typedef ArrayTypes<DeviceType> AT;
  FixEfieldKokkos(class LAMMPS *, int, char **);
  ~FixEfieldKokkos() override;
  void init() override;
  void post_force(int) override;
  KOKKOS_INLINE_FUNCTION
  void operator()(TagFixEfieldConstant, const int&, double_4&) const;
  KOKKOS_INLINE_FUNCTION
  void operator()(TagFixEfieldNonConstant, const int&, double_4&) const;
 private:
  DAT::tdual_ffloat_2d k_efield;
  typename AT::t_ffloat_2d_randomread d_efield;
  typename AT::t_int_1d d_match;
  typename AT::t_x_array_randomread x;
  typename AT::t_float_1d_randomread q;
  typename AT::t_f_array f;
  typename AT::t_imageint_1d_randomread image;
  typename AT::t_int_1d_randomread mask;
 };
 }
 #endif
 #endif
--- a/src/KOKKOS/fix_property_atom_kokkos.cpp
+++ b/src/KOKKOS/fix_property_atom_kokkos.cpp
@ -30,7 +30,46 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg)
  FixPropertyAtom(lmp, narg, arg)
 {
  atomKK = (AtomKokkos *) atom;
-  grow_arrays(atom->nmax);
+  kokkosable = 1;
  dvector_flag = 0;
  for (int nv = 0; nv < nvalue; nv++)
    if (styles[nv] == DVEC) dvector_flag = 1;
 }
 /* ---------------------------------------------------------------------- */
 void FixPropertyAtomKokkos::post_constructor()
 {
  atomKK->update_property_atom();
  FixPropertyAtom::post_constructor();
 }
 /* ---------------------------------------------------------------------- */
 FixPropertyAtomKokkos::~FixPropertyAtomKokkos()
 {
  // deallocate per-atom vectors in Atom class
  // set ptrs to a null pointer, so they no longer exist for Atom class
  for (int nv = 0; nv < nvalue; nv++) {
    if (styles[nv] == MOLECULE) {
      atom->molecule_flag = 0;
      memoryKK->destroy_kokkos(atomKK->k_molecule,atom->molecule);
      atom->molecule = nullptr;
    } else if (styles[nv] == CHARGE) {
      atom->q_flag = 0;
      memoryKK->destroy_kokkos(atomKK->k_q,atom->q);
      atom->q = nullptr;
    } else if (styles[nv] == RMASS) {
      atom->rmass_flag = 0;
      memoryKK->destroy_kokkos(atomKK->k_rmass,atom->rmass);
      atom->rmass = nullptr;
    }
  }
  atomKK->update_property_atom();
 }
 /* ----------------------------------------------------------------------
@ -44,17 +83,17 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
 {
  for (int nv = 0; nv < nvalue; nv++) {
    if (styles[nv] == MOLECULE) {
-      memory->grow(atom->molecule,nmax,"atom:molecule");
+      atomKK->sync(Device,MOLECULE_MASK);
-      size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
+      memoryKK->grow_kokkos(atomKK->k_molecule,atom->molecule,nmax,"atom:molecule");
-      memset(&atom->molecule[nmax_old],0,nbytes);
+      atomKK->modified(Device,MOLECULE_MASK);
    } else if (styles[nv] == CHARGE) {
-      memory->grow(atom->q,nmax,"atom:q");
+      atomKK->sync(Device,Q_MASK);
-      size_t nbytes = (nmax-nmax_old) * sizeof(double);
+      memoryKK->grow_kokkos(atomKK->k_q,atom->q,nmax,"atom:q");
-      memset(&atom->q[nmax_old],0,nbytes);
+      atomKK->modified(Device,Q_MASK);
    } else if (styles[nv] == RMASS) {
-      memory->grow(atom->rmass,nmax,"atom:rmass");
+      atomKK->sync(Device,RMASS_MASK);
-      size_t nbytes = (nmax-nmax_old) * sizeof(double);
+      memoryKK->grow_kokkos(atomKK->k_rmass,atom->rmass,nmax,"atom:rmass");
-      memset(&atom->rmass[nmax_old],0,nbytes);
+      atomKK->modified(Device,RMASS_MASK);
    } else if (styles[nv] == TEMPERATURE) {
      memory->grow(atom->temperature, nmax, "atom:temperature");
      size_t nbytes = (nmax - nmax_old) * sizeof(double);
@ -69,7 +108,7 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
      memset(&atom->ivector[index[nv]][nmax_old],0,nbytes);
    } else if (styles[nv] == DVEC) {
      atomKK->sync(Device,DVECTOR_MASK);
-      memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
+      memoryKK->grow_kokkos(atomKK->k_dvector,atom->dvector,atomKK->k_dvector.extent(0),nmax,
                          "atom:dvector");
      atomKK->modified(Device,DVECTOR_MASK);
    } else if (styles[nv] == IARRAY) {
@ -84,3 +123,62 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
  }
  nmax_old = nmax;
 }
 /* ---------------------------------------------------------------------- */
 void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask)
 {
  if (space == Device) {
    if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync<LMPDeviceType>();
    if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync<LMPDeviceType>();
    if (rmass_flag && (mask & RMASS_MASK)) {atomKK->k_rmass.sync<LMPDeviceType>();}
    if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.sync<LMPDeviceType>();
  } else {
    if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync<LMPHostType>();
    if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync<LMPHostType>();
    if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.sync<LMPHostType>();
    if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.sync<LMPHostType>();
  }
 }
 /* ---------------------------------------------------------------------- */
 void FixPropertyAtomKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
 {
  if (space == Device) {
    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>())
      atomKK->avecKK->perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
      atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
    if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPDeviceType>())
      atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space);
    if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPDeviceType>())
      atomKK->avecKK->perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
  } else {
    if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>())
      atomKK->avecKK->perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>())
      atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
    if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPHostType>())
      atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space);
    if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPHostType>())
      atomKK->avecKK->perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
  }
 }
 /* ---------------------------------------------------------------------- */
 void FixPropertyAtomKokkos::modified(ExecutionSpace space, unsigned int mask)
 {
  if (space == Device) {
    if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify<LMPDeviceType>();
    if (q_flag && (mask & Q_MASK)) atomKK->k_q.modify<LMPDeviceType>();
    if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.modify<LMPDeviceType>();
    if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.modify<LMPDeviceType>();
  } else {
    if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify<LMPHostType>();
    if (q_flag && (mask & Q_MASK)) atomKK->k_q.modify<LMPHostType>();
    if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.modify<LMPHostType>();
    if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.modify<LMPHostType>();
  }
 }
--- a/src/KOKKOS/fix_property_atom_kokkos.h
+++ b/src/KOKKOS/fix_property_atom_kokkos.h
@ -22,14 +22,23 @@ FixStyle(property/atom/kk,FixPropertyAtomKokkos);
 #define LMP_FIX_PROPERTY_ATOM_KOKKOS_H
 #include "fix_property_atom.h"
 #include "atom_vec_kokkos.h"
 namespace LAMMPS_NS {
 class FixPropertyAtomKokkos : public FixPropertyAtom {
 public:
  FixPropertyAtomKokkos(class LAMMPS *, int, char **);
-
+  void post_constructor() override;
  ~FixPropertyAtomKokkos() override;
  void grow_arrays(int) override;
  void sync(ExecutionSpace space, unsigned int mask);
  void modified(ExecutionSpace space, unsigned int mask);
  void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
 private:
  int dvector_flag;
 };
 }
--- a/src/KOKKOS/fix_spring_self_kokkos.cpp
+++ b/src/KOKKOS/fix_spring_self_kokkos.cpp
@ -0,0 +1,332 @@
 // clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   LAMMPS development team: developers@lammps.org
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------
   Contributing author: Trung Nguyen (U Chicago)
 ------------------------------------------------------------------------- */
 #include "fix_spring_self_kokkos.h"
 #include "atom_kokkos.h"
 #include "update.h"
 #include "modify.h"
 #include "domain_kokkos.h"
 #include "region.h"
 #include "input.h"
 #include "variable.h"
 #include "memory_kokkos.h"
 #include "error.h"
 #include "atom_masks.h"
 #include "kokkos_base.h"
 #include <cstring>
 using namespace LAMMPS_NS;
 using namespace FixConst;
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 FixSpringSelfKokkos<DeviceType>::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char **arg) :
  FixSpringSelf(lmp, narg, arg)
 {
  kokkosable = 1;
  exchange_comm_device = 1;
  atomKK = (AtomKokkos *) atom;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
  datamask_read = EMPTY_MASK;
  datamask_modify = EMPTY_MASK;
  xoriginal_tmp = xoriginal;
  xoriginal = nullptr;
  int nmax = atom->nmax;
  grow_arrays(nmax);
  for (int i = 0; i < atom->nlocal; i++) {
    k_xoriginal.h_view(i,0) = xoriginal_tmp[i][0];
    k_xoriginal.h_view(i,1) = xoriginal_tmp[i][1];
    k_xoriginal.h_view(i,2) = xoriginal_tmp[i][2];
  }
  k_xoriginal.modify_host();
  d_count = typename AT::t_int_scalar("spring/self:count");
  h_count = Kokkos::create_mirror_view(d_count);
  memory->destroy(xoriginal_tmp);
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 FixSpringSelfKokkos<DeviceType>::~FixSpringSelfKokkos()
 {
  if (copymode) return;
  memoryKK->destroy_kokkos(k_xoriginal,xoriginal);
  xoriginal = nullptr;
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 void FixSpringSelfKokkos<DeviceType>::init()
 {
  FixSpringSelf::init();
  if (utils::strmatch(update->integrate_style,"^respa"))
    error->all(FLERR,"Cannot (yet) use respa with Kokkos");
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 void FixSpringSelfKokkos<DeviceType>::post_force(int /*vflag*/)
 {
  atomKK->sync(execution_space, X_MASK | F_MASK | IMAGE_MASK | MASK_MASK);
  x = atomKK->k_x.view<DeviceType>();
  f = atomKK->k_f.view<DeviceType>();
  image = atomKK->k_image.view<DeviceType>();
  mask = atomKK->k_mask.view<DeviceType>();
  int nlocal = atom->nlocal;
  double espring_kk;
  k_xoriginal.modify<LMPHostType>();
  k_xoriginal.sync<DeviceType>();
  copymode = 1;
  {
  // local variables for lambda capture
  auto prd = Few<double,3>(domain->prd);
  auto h = Few<double,6>(domain->h);
  auto triclinic = domain->triclinic;
  auto l_k = k;
  auto l_xoriginal = d_xoriginal;
  auto l_x = x;
  auto l_f = f;
  auto l_mask = mask;
  auto l_image = image;
  auto l_groupbit = groupbit;
  auto l_xflag = xflag;
  auto l_yflag = yflag;
  auto l_zflag = zflag;
  Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) {
    if (l_mask[i] & l_groupbit) {
      Few<double,3> x_i;
      x_i[0] = l_x(i,0);
      x_i[1] = l_x(i,1);
      x_i[2] = l_x(i,2);
      auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
      auto dx = unwrap[0] - l_xoriginal(i, 0);
      auto dy = unwrap[1] - l_xoriginal(i, 1);
      auto dz = unwrap[2] - l_xoriginal(i, 2);
      if (!l_xflag) dx = 0.0;
      if (!l_yflag) dy = 0.0;
      if (!l_zflag) dz = 0.0;
      l_f(i,0) -= l_k*dx;
      l_f(i,1) -= l_k*dy;
      l_f(i,2) -= l_k*dz;
      espring_kk += l_k * (dx*dx + dy*dy + dz*dz);
    }
  },espring_kk);
  }
  copymode = 0;
  atomKK->modified(execution_space, F_MASK);
  espring = 0.5*espring_kk;
 }
 /* ----------------------------------------------------------------------
   allocate local atom-based arrays
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 void FixSpringSelfKokkos<DeviceType>::grow_arrays(int nmax)
 {
  memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,"spring/self:xoriginal");
  d_xoriginal = k_xoriginal.view<DeviceType>();
 }
 /* ----------------------------------------------------------------------
   copy values within local atom-based arrays
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 void FixSpringSelfKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
 {
  k_xoriginal.sync_host();
  FixSpringSelf::copy_arrays(i,j,delflag);
  k_xoriginal.modify_host();
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixSpringSelfKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offset, const bool &final) const
 {
  const int i = d_exchange_sendlist(mysend);
  d_buf[mysend] = nsend + offset;
  int m = nsend + offset;
  d_buf[m++] = d_xoriginal(i,0);
  d_buf[m++] = d_xoriginal(i,1);
  d_buf[m++] = d_xoriginal(i,2);
  if (mysend == nsend-1) d_count() = m;
  offset = m - nsend;
  const int j = d_copylist(mysend);
  if (j > -1) {
    d_xoriginal(i,0) = d_xoriginal(j,0);
    d_xoriginal(i,1) = d_xoriginal(j,1);
    d_xoriginal(i,2) = d_xoriginal(j,2);
  }
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 int FixSpringSelfKokkos<DeviceType>::pack_exchange_kokkos(
   const int &nsend, DAT::tdual_xfloat_2d &k_buf,
   DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist,
   ExecutionSpace space)
 {
  k_buf.sync<DeviceType>();
  k_copylist.sync<DeviceType>();
  k_exchange_sendlist.sync<DeviceType>();
  d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
    k_buf.template view<DeviceType>().data(),
    k_buf.extent(0)*k_buf.extent(1));
  d_copylist = k_copylist.view<DeviceType>();
  d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
  this->nsend = nsend;
  k_xoriginal.template sync<DeviceType>();
  Kokkos::deep_copy(d_count,0);
  copymode = 1;
  FixSpringSelfKokkosPackExchangeFunctor<DeviceType> pack_exchange_functor(this);
  Kokkos::parallel_scan(nsend,pack_exchange_functor);
  copymode = 0;
  k_buf.modify<DeviceType>();
  if (space == Host) k_buf.sync<LMPHostType>();
  else k_buf.sync<LMPDeviceType>();
  k_xoriginal.template modify<DeviceType>();
  Kokkos::deep_copy(h_count,d_count);
  return h_count();
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixSpringSelfKokkos<DeviceType>::operator()(TagFixSpringSelfUnpackExchange, const int &i) const
 {
  int index = d_indices(i);
  if (index > -1) {
    int m = d_buf[i];
    d_xoriginal(index,0) = static_cast<tagint> (d_buf[m++]);
    d_xoriginal(index,1) = static_cast<tagint> (d_buf[m++]);
    d_xoriginal(index,2) = static_cast<tagint> (d_buf[m++]);
  }
 }
 /* ---------------------------------------------------------------------- */
 template <class DeviceType>
 void FixSpringSelfKokkos<DeviceType>::unpack_exchange_kokkos(
  DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
  ExecutionSpace /*space*/)
 {
  k_buf.sync<DeviceType>();
  k_indices.sync<DeviceType>();
  d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
    k_buf.template view<DeviceType>().data(),
    k_buf.extent(0)*k_buf.extent(1));
  d_indices = k_indices.view<DeviceType>();
  k_xoriginal.template sync<DeviceType>();
  copymode = 1;
  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixSpringSelfUnpackExchange>(0,nrecv),*this);
  copymode = 0;
  k_xoriginal.template modify<DeviceType>();
 }
 /* ----------------------------------------------------------------------
   pack values in local atom-based arrays for exchange with another proc
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 int FixSpringSelfKokkos<DeviceType>::pack_exchange(int i, double *buf)
 {
  k_xoriginal.sync_host();
  int m = FixSpringSelf::pack_exchange(i,buf);
  k_xoriginal.modify_host();
  return m;
 }
 /* ----------------------------------------------------------------------
   unpack values in local atom-based arrays from exchange with another proc
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 int FixSpringSelfKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
 {
  k_xoriginal.sync_host();
  int m = FixSpringSelf::unpack_exchange(nlocal,buf);
  k_xoriginal.modify_host();
  return m;
 }
 namespace LAMMPS_NS {
 template class FixSpringSelfKokkos<LMPDeviceType>;
 #ifdef LMP_KOKKOS_GPU
 template class FixSpringSelfKokkos<LMPHostType>;
 #endif
 }
--- a/src/KOKKOS/fix_spring_self_kokkos.h
+++ b/src/KOKKOS/fix_spring_self_kokkos.h
@ -0,0 +1,108 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   LAMMPS development team: developers@lammps.org
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifdef FIX_CLASS
 // clang-format off
 FixStyle(spring/self/kk,FixSpringSelfKokkos<LMPDeviceType>);
 FixStyle(spring/self/kk/device,FixSpringSelfKokkos<LMPDeviceType>);
 FixStyle(spring/self/kk/host,FixSpringSelfKokkos<LMPHostType>);
 // clang-format on
 #else
 // clang-format off
 #ifndef LMP_FIX_SPRING_SELF_KOKKOS_H
 #define LMP_FIX_SPRING_SELF_KOKKOS_H
 #include "fix_spring_self.h"
 #include "kokkos_type.h"
 #include "kokkos_base.h"
 namespace LAMMPS_NS {
 struct TagFixSpringSelfUnpackExchange{};
 template<class DeviceType>
 class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase {
 public:
  typedef DeviceType device_type;
  typedef double value_type;
  typedef ArrayTypes<DeviceType> AT;
  FixSpringSelfKokkos(class LAMMPS *, int, char **);
  ~FixSpringSelfKokkos() override;
  void init() override;
  void grow_arrays(int) override;
  void copy_arrays(int, int, int) override;
  void post_force(int) override;
  KOKKOS_INLINE_FUNCTION
  void pack_exchange_item(const int&, int &, const bool &) const;
  KOKKOS_INLINE_FUNCTION
  void operator()(TagFixSpringSelfUnpackExchange, const int&) const;
  int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
                           DAT::tdual_int_1d k_sendlist,
                           DAT::tdual_int_1d k_copylist,
                           ExecutionSpace space) override;
  void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
                              DAT::tdual_int_1d &indices,int nrecv,
                              ExecutionSpace space) override;
  int pack_exchange(int, double *) override;
  int unpack_exchange(int, double *) override;
 protected:
  DAT::tdual_x_array k_xoriginal;
  typename AT::t_x_array d_xoriginal;
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_imageint_1d_randomread image;
  typename AT::t_int_1d_randomread mask;
  int nsend;
  typename AT::t_int_2d d_sendlist;
  typename AT::t_xfloat_1d_um d_buf;
  typename AT::t_int_1d d_exchange_sendlist;
  typename AT::t_int_1d d_copylist;
  typename AT::t_int_1d d_indices;
  typename AT::t_int_scalar d_count;
  HAT::t_int_scalar h_count;
  double **xoriginal_tmp;    // original coords of atoms
 };
 template <class DeviceType>
 struct FixSpringSelfKokkosPackExchangeFunctor {
  typedef DeviceType device_type;
  typedef int value_type;
  FixSpringSelfKokkos<DeviceType> c;
  FixSpringSelfKokkosPackExchangeFunctor(FixSpringSelfKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
  KOKKOS_INLINE_FUNCTION
  void operator()(const int &i, int &offset, const bool &final) const {
    c.pack_exchange_item(i, offset, final);
  }
 };
 }
 #endif
 #endif
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@ -137,13 +137,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
      int set_flag = 0;
      char *str;
-      if ((str = getenv("SLURM_LOCALID"))) {
+      if (str = getenv("SLURM_LOCALID")) {
        int local_rank = atoi(str);
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
        set_flag = 1;
      }
-      if ((str = getenv("MPT_LRANK"))) {
+      if (str = getenv("FLUX_TASK_LOCAL_ID")) {
        if (ngpus > 0) {
          int local_rank = atoi(str);
          device = local_rank % ngpus;
@ -151,7 +151,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
          set_flag = 1;
        }
      }
-      if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
+      if (str = getenv("MPT_LRANK")) {
        if (ngpus > 0) {
          int local_rank = atoi(str);
          device = local_rank % ngpus;
@ -159,7 +159,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
          set_flag = 1;
        }
      }
-      if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
+      if (str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) {
        if (ngpus > 0) {
          int local_rank = atoi(str);
          device = local_rank % ngpus;
@ -167,7 +167,15 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
          set_flag = 1;
        }
      }
-      if ((str = getenv("PMI_LOCAL_RANK"))) {
+      if (str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) {
        if (ngpus > 0) {
          int local_rank = atoi(str);
          device = local_rank % ngpus;
          if (device >= skip_gpu) device++;
          set_flag = 1;
        }
      }
      if (str = getenv("PMI_LOCAL_RANK")) {
        if (ngpus > 0) {
          int local_rank = atoi(str);
          device = local_rank % ngpus;
--- a/src/KOKKOS/kokkos_base.h
+++ b/src/KOKKOS/kokkos_base.h
@ -41,11 +41,6 @@ class KokkosBase {
                                           int, int *) {return 0;};
  virtual void unpack_forward_comm_fix_kokkos(int, int, DAT::tdual_xfloat_1d &) {}
  // Region
  virtual void match_all_kokkos(int, DAT::tdual_int_1d) {}
  // Fix
  virtual int pack_exchange_kokkos(const int & /*nsend*/, DAT::tdual_xfloat_2d & /*k_buf*/,
                                   DAT::tdual_int_1d /*k_sendlist*/,
                                   DAT::tdual_int_1d /*k_copylist*/,
@ -54,6 +49,9 @@ class KokkosBase {
                                      DAT::tdual_int_1d & /*indices*/, int /*nrecv*/,
                                      ExecutionSpace /*space*/) {}
  // Region
  virtual void match_all_kokkos(int, DAT::tdual_int_1d) {}
  using KeyViewType = DAT::t_x_array;
  using BinOp = BinOp3DLAMMPS<KeyViewType>;
  virtual void
--- a/src/KOKKOS/modify_kokkos.cpp
+++ b/src/KOKKOS/modify_kokkos.cpp
@ -362,6 +362,17 @@ void ModifyKokkos::pre_reverse(int eflag, int vflag)
 void ModifyKokkos::post_force(int vflag)
 {
  for (int i = 0; i < n_post_force_group; i++) {
    atomKK->sync(fix[list_post_force_group[i]]->execution_space,
                 fix[list_post_force_group[i]]->datamask_read);
    int prev_auto_sync = lmp->kokkos->auto_sync;
    if (!fix[list_post_force_group[i]]->kokkosable) lmp->kokkos->auto_sync = 1;
    fix[list_post_force_group[i]]->post_force(vflag);
    lmp->kokkos->auto_sync = prev_auto_sync;
    atomKK->modified(fix[list_post_force_group[i]]->execution_space,
                     fix[list_post_force_group[i]]->datamask_modify);
  }
  for (int i = 0; i < n_post_force; i++) {
    atomKK->sync(fix[list_post_force[i]]->execution_space,
                 fix[list_post_force[i]]->datamask_read);
--- a/src/KOKKOS/neigh_bond_kokkos.cpp
+++ b/src/KOKKOS/neigh_bond_kokkos.cpp
@ -112,9 +112,8 @@ void NeighBondKokkos<DeviceType>::init_topology_kk() {
  int i,m;
  int bond_off = 0;
  int angle_off = 0;
-  for (i = 0; i < modify->nfix; i++)
+  for (const auto &ifix : modify->get_fix_list())
-    if ((strcmp(modify->fix[i]->style,"shake") == 0)
+    if (utils::strmatch(ifix->style,"^shake") || utils::strmatch(ifix->style,"^rattle"))
        || (strcmp(modify->fix[i]->style,"rattle") == 0))
      bond_off = angle_off = 1;
  if (force->bond && force->bond_match("quartic")) bond_off = 1;
--- a/src/KOKKOS/neighbor_kokkos.cpp
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@ -308,7 +308,8 @@ void NeighborKokkos::build_kokkos(int topoflag)
  for (i = 0; i < npair_perpetual; i++) {
    m = plist[i];
    if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK);
-    if (!lists[m]->copy) lists[m]->grow(nlocal,nall);
+    if (!lists[m]->copy || lists[m]->trim || lists[m]->kk2cpu)
      lists[m]->grow(nlocal,nall);
    neigh_pair[m]->build_setup();
    neigh_pair[m]->build(lists[m]);
  }
--- a/src/KOKKOS/npair_halffull_kokkos.cpp
+++ b/src/KOKKOS/npair_halffull_kokkos.cpp
@ -18,6 +18,7 @@
 #include "atom_masks.h"
 #include "atom_vec.h"
 #include "domain.h"
 #include "force.h"
 #include "neigh_list_kokkos.h"
 #include <cmath>
@ -26,8 +27,8 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
-template<class DeviceType, int NEWTON, int TRIM>
+template<class DeviceType, int NEWTON, int TRI, int TRIM>
-NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) {
+NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) {
  atomKK = (AtomKokkos *) atom;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
 }
@ -41,13 +42,14 @@ NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) :
   if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
 ------------------------------------------------------------------------- */
-template<class DeviceType, int NEWTON, int TRIM>
+template<class DeviceType, int NEWTON, int TRI, int TRIM>
-void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
+void NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::build(NeighList *list)
 {
  if (NEWTON || TRIM) {
    x = atomKK->k_x.view<DeviceType>();
    atomKK->sync(execution_space,X_MASK);
  }
  nlocal = atom->nlocal;
  cutsq_custom = cutoff_custom*cutoff_custom;
@ -66,6 +68,8 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
  d_numneigh = k_list->d_numneigh;
  d_neighbors = k_list->d_neighbors;
  delta = 0.01 * force->angstrom;
  // loop over parent full list
  copymode = 1;
@ -78,9 +82,9 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
  k_list->k_ilist.template modify<DeviceType>();
 }
-template<class DeviceType, int NEWTON, int TRIM>
+template<class DeviceType, int NEWTON, int TRI, int TRIM>
 KOKKOS_INLINE_FUNCTION
-void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCompute, const int &ii) const {
+void NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::operator()(TagNPairHalffullCompute, const int &ii) const {
  int n = 0;
  const int i = d_ilist_full(ii);
@ -92,6 +96,11 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
  }
  // loop over full neighbor list
  // use i < j < nlocal to eliminate half the local/local interactions
  // for triclinic, must use delta to eliminate half the local/ghost interactions
  // cannot use I/J exact coord comparision as for orthog
  //   b/c transforming orthog -> lambda -> orthog for ghost atoms
  //   with an added PBC offset can shift all 3 coords by epsilon
  const int jnum = d_numneigh_full(i);
  const AtomNeighbors neighbors_i = AtomNeighbors(&d_neighbors(i,0),d_numneigh(i),
@ -103,6 +112,14 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
    if (NEWTON) {
      if (j < nlocal) {
        if (i > j) continue;
      } else if (TRI) {
        if (fabs(x(j,2)-ztmp) > delta) {
          if (x(j,2) < ztmp) continue;
        } else if (fabs(x(j,1)-ytmp) > delta) {
          if (x(j,1) < ytmp) continue;
        } else {
          if (x(j,0) < xtmp) continue;
        }
      } else {
        if (x(j,2) < ztmp) continue;
        if (x(j,2) == ztmp) {
@ -141,14 +158,18 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
 }
 namespace LAMMPS_NS {
-template class NPairHalffullKokkos<LMPDeviceType,0,0>;
+template class NPairHalffullKokkos<LMPDeviceType,0,0,0>;
-template class NPairHalffullKokkos<LMPDeviceType,0,1>;
+template class NPairHalffullKokkos<LMPDeviceType,0,0,1>;
-template class NPairHalffullKokkos<LMPDeviceType,1,0>;
+template class NPairHalffullKokkos<LMPDeviceType,1,0,0>;
-template class NPairHalffullKokkos<LMPDeviceType,1,1>;
+template class NPairHalffullKokkos<LMPDeviceType,1,0,1>;
 template class NPairHalffullKokkos<LMPDeviceType,1,1,0>;
 template class NPairHalffullKokkos<LMPDeviceType,1,1,1>;
 #ifdef LMP_KOKKOS_GPU
-template class NPairHalffullKokkos<LMPHostType,0,0>;
+template class NPairHalffullKokkos<LMPHostType,0,0,0>;
-template class NPairHalffullKokkos<LMPHostType,0,1>;
+template class NPairHalffullKokkos<LMPHostType,0,0,1>;
-template class NPairHalffullKokkos<LMPHostType,1,0>;
+template class NPairHalffullKokkos<LMPHostType,1,0,0>;
-template class NPairHalffullKokkos<LMPHostType,1,1>;
+template class NPairHalffullKokkos<LMPHostType,1,0,1>;
 template class NPairHalffullKokkos<LMPHostType,1,1,0>;
 template class NPairHalffullKokkos<LMPHostType,1,1,1>;
 #endif
 }
--- a/src/KOKKOS/npair_halffull_kokkos.h
+++ b/src/KOKKOS/npair_halffull_kokkos.h
@ -16,53 +16,79 @@
 // Trim off
-// Newton
+// Newton, no triclinic 
-typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
 NPairStyle(halffull/newton/kk/device,
           NPairKokkosHalffullNewtonDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
 NPairStyle(halffull/newton/kk/host,
           NPairKokkosHalffullNewtonHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
+           NP_ORTHO | NP_KOKKOS_HOST);
-typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
 NPairStyle(halffull/newton/skip/kk/device,
           NPairKokkosHalffullNewtonDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_SKIP | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
 NPairStyle(halffull/newton/skip/kk/host,
           NPairKokkosHalffullNewtonHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_SKIP | NP_KOKKOS_HOST);
 // Newton, triclinic
 typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
 NPairStyle(halffull/newton/tri/kk/device,
           NPairKokkosHalffullNewtonTriDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
 typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
 NPairStyle(halffull/newton/tri/kk/host,
           NPairKokkosHalffullNewtonTriHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
 typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
 NPairStyle(halffull/newton/tri/skip/kk/device,
           NPairKokkosHalffullNewtonTriDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
 typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
 NPairStyle(halffull/newton/tri/skip/kk/host,
           NPairKokkosHalffullNewtonTriHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_HOST);
-// Newtoff
+// Newtoff (can be triclinic but template param always set to 0)
-typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
 NPairStyle(halffull/newtoff/kk/device,
           NPairKokkosHalffullNewtoffDevice,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
 NPairStyle(halffull/newtoff/kk/host,
           NPairKokkosHalffullNewtoffHost,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
-typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
 NPairStyle(halffull/newtoff/skip/kk/device,
           NPairKokkosHalffullNewtoffDevice,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
 NPairStyle(halffull/newtoff/skip/kk/host,
           NPairKokkosHalffullNewtoffHost,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
@ -70,166 +96,244 @@ NPairStyle(halffull/newtoff/skip/kk/host,
 //************ Ghost **************
-// Newton
+// Newton, no triclinic
-typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonGhostDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
 NPairStyle(halffull/newton/ghost/kk/device,
-           NPairKokkosHalffullNewtonGhostDevice,
+           NPairKokkosHalffullNewtonDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_GHOST | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
 NPairStyle(halffull/newton/ghost/kk/host,
           NPairKokkosHalffullNewtonHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
+           NP_ORTHO | NP_GHOST | NP_KOKKOS_HOST);
-typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonGhostDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
 NPairStyle(halffull/newton/skip/ghost/kk/device,
-           NPairKokkosHalffullNewtonGhostDevice,
+           NPairKokkosHalffullNewtonDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
 NPairStyle(halffull/newton/skip/ghost/kk/host,
           NPairKokkosHalffullNewtonHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
 // Newton, triclinic
 typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
 NPairStyle(halffull/newton/tri/ghost/kk/device,
           NPairKokkosHalffullNewtonTriDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
 typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
 NPairStyle(halffull/newton/tri/ghost/kk/host,
           NPairKokkosHalffullNewtonTriHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
 typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
 NPairStyle(halffull/newton/tri/skip/ghost/kk/device,
           NPairKokkosHalffullNewtonTriDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
 typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
 NPairStyle(halffull/newton/tri/skip/ghost/kk/host,
           NPairKokkosHalffullNewtonTriHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
-// Newtoff
+// Newtoff (can be triclinic but template param always set to 0)
-typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffGhostDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
 NPairStyle(halffull/newtoff/ghost/kk/device,
-           NPairKokkosHalffullNewtoffGhostDevice,
+           NPairKokkosHalffullNewtoffDevice,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
 NPairStyle(halffull/newtoff/ghost/kk/host,
           NPairKokkosHalffullNewtoffHost,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
-typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffGhostDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
 NPairStyle(halffull/newtoff/skip/ghost/kk/device,
-           NPairKokkosHalffullNewtoffGhostDevice,
+           NPairKokkosHalffullNewtoffDevice,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
 NPairStyle(halffull/newtoff/skip/ghost/kk/host,
           NPairKokkosHalffullNewtoffHost,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
 //************ Trim **************
-// Newton
+// Newton, no triclinic
-typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
 NPairStyle(halffull/newton/trim/kk/device,
           NPairKokkosHalffullNewtonTrimDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
+           NP_ORTHO | NP_TRIM | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
 NPairStyle(halffull/newton/trim/kk/host,
           NPairKokkosHalffullNewtonTrimHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRIM | NP_KOKKOS_HOST);
 typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
 NPairStyle(halffull/newton/trim/skip/kk/device,
           NPairKokkosHalffullNewtonTrimDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
 typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
 NPairStyle(halffull/newton/trim/skip/kk/host,
           NPairKokkosHalffullNewtonTrimHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
 // Newton, triclinic
 typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
 NPairStyle(halffull/newton/tri/trim/kk/device,
           NPairKokkosHalffullNewtonTriTrimDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
 typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
 NPairStyle(halffull/newton/tri/trim/kk/host,
           NPairKokkosHalffullNewtonTriTrimHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST);
-typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
-NPairStyle(halffull/newton/skip/trim/kk/device,
+NPairStyle(halffull/newton/tri/trim/skip/kk/device,
           NPairKokkosHalffullNewtonTrimDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
-NPairStyle(halffull/newton/skip/trim/kk/host,
+NPairStyle(halffull/newton/tri/trim/skip/kk/host,
-           NPairKokkosHalffullNewtonTrimHost,
+           NPairKokkosHalffullNewtonTriTrimHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
-// Newtoff
+// Newtoff (can be triclinic but template param always set to 0)
-typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
 NPairStyle(halffull/newtoff/trim/kk/device,
           NPairKokkosHalffullNewtoffTrimDevice,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
 NPairStyle(halffull/newtoff/trim/kk/host,
           NPairKokkosHalffullNewtoffTrimHost,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST);
-typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
-NPairStyle(halffull/newtoff/skip/trim/kk/device,
+NPairStyle(halffull/newtoff/trim/skip/kk/device,
           NPairKokkosHalffullNewtoffTrimDevice,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
-NPairStyle(halffull/newtoff/skip/trim/kk/host,
+NPairStyle(halffull/newtoff/trim/skip/kk/host,
           NPairKokkosHalffullNewtoffTrimHost,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_SKIP |  NP_TRIM | NP_KOKKOS_HOST);
 //************ Ghost **************
-// Newton
+// Newton, no triclinic
-typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonGhostTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
-NPairStyle(halffull/newton/ghost/trim/kk/device,
+NPairStyle(halffull/newton/tri/trim/ghost/kk/device,
-           NPairKokkosHalffullNewtonGhostTrimDevice,
+           NPairKokkosHalffullNewtonTrimDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
 typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
 NPairStyle(halffull/newton/trim/ghost/kk/host,
           NPairKokkosHalffullNewtonTrimHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
 typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
 NPairStyle(halffull/newton/trim/skip/ghost/kk/device,
           NPairKokkosHalffullNewtonTrimDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
 typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
 NPairStyle(halffull/newton/trim/skip/ghost/kk/host,
           NPairKokkosHalffullNewtonTrimHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
 // Newton, triclinic
 typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
 NPairStyle(halffull/newton/tri/trim/ghost/kk/device,
           NPairKokkosHalffullNewtonTriTrimDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
-NPairStyle(halffull/newton/ghost/trim/kk/host,
+NPairStyle(halffull/newton/tri/trim/ghost/kk/host,
-           NPairKokkosHalffullNewtonTrimHost,
+           NPairKokkosHalffullNewtonTriTrimHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
-typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonGhostTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
-NPairStyle(halffull/newton/skip/ghost/trim/kk/device,
+NPairStyle(halffull/newton/tri/trim/skip/ghost/kk/device,
-           NPairKokkosHalffullNewtonGhostTrimDevice,
+           NPairKokkosHalffullNewtonTriTrimDevice,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
-NPairStyle(halffull/newton/skip/ghost/trim/kk/host,
+NPairStyle(halffull/newton/tri/trim/skip/ghost/kk/host,
-           NPairKokkosHalffullNewtonTrimHost,
+           NPairKokkosHalffullNewtonTriTrimHost,
           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
-// Newtoff
+// Newtoff (can be triclinic but template param always set to 0)
-typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffGhostTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
-NPairStyle(halffull/newtoff/ghost/trim/kk/device,
+NPairStyle(halffull/newtoff/trim/ghost/kk/device,
-           NPairKokkosHalffullNewtoffGhostTrimDevice,
+           NPairKokkosHalffullNewtoffTrimDevice,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
-NPairStyle(halffull/newtoff/ghost/trim/kk/host,
+NPairStyle(halffull/newtoff/trim/ghost/kk/host,
           NPairKokkosHalffullNewtoffTrimHost,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
-typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffGhostTrimDevice;
+typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
-NPairStyle(halffull/newtoff/skip/ghost/trim/kk/device,
+NPairStyle(halffull/newtoff/trim/skip/ghost/kk/device,
-           NPairKokkosHalffullNewtoffGhostTrimDevice,
+           NPairKokkosHalffullNewtoffTrimDevice,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
-typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
+typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
-NPairStyle(halffull/newtoff/skip/ghost/trim/kk/host,
+NPairStyle(halffull/newtoff/trim/skip/ghost/kk/host,
           NPairKokkosHalffullNewtoffTrimHost,
           NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
           NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
 // clang-format on
 #else
@ -244,7 +348,7 @@ namespace LAMMPS_NS {
 struct TagNPairHalffullCompute{};
-template<class DeviceType, int NEWTON, int TRIM>
+template<class DeviceType, int NEWTON, int TRI, int TRIM>
 class NPairHalffullKokkos : public NPair {
 public:
  typedef DeviceType device_type;
@ -257,8 +361,8 @@ class NPairHalffullKokkos : public NPair {
  void operator()(TagNPairHalffullCompute, const int&) const;
 private:
-  int nlocal;
+  int nlocal,triclinic;
-  double cutsq_custom;
+  double cutsq_custom,delta;
  typename AT::t_x_array_randomread x;
--- a/src/KOKKOS/npair_kokkos.cpp
+++ b/src/KOKKOS/npair_kokkos.cpp
@ -155,6 +155,8 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
  list->grow(nall);
  const double delta = 0.01 * force->angstrom;
  NeighborKokkosExecute<DeviceType>
    data(*list,
         k_cutneighsq.view<DeviceType>(),
@ -176,7 +178,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
         atomKK->molecular,
         nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
         bininvx,bininvy,bininvz,
-         exclude, nex_type,
+         delta, exclude, nex_type,
         k_ex1_type.view<DeviceType>(),
         k_ex2_type.view<DeviceType>(),
         k_ex_type.view<DeviceType>(),
@ -217,6 +219,8 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
      atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK);
  }
  if (HALF && NEWTON && TRI) atomKK->sync(Device,TAG_MASK);
  data.special_flag[0] = special_flag[0];
  data.special_flag[1] = special_flag[1];
  data.special_flag[2] = special_flag[2];
@ -261,7 +265,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
 //#endif
    } else {
      if (SIZE) {
-        NPairKokkosBuildFunctorSize<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor);
+        NPairKokkosBuildFunctorSize<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 7 * sizeof(X_FLOAT) * factor);
 #ifdef LMP_KOKKOS_GPU
        if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
          int team_size = atoms_per_bin*factor;
@ -279,7 +283,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
        Kokkos::parallel_for(nall, f);
 #endif
      } else {
-        NPairKokkosBuildFunctor<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+        NPairKokkosBuildFunctor<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor);
 #ifdef LMP_KOKKOS_GPU
        if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
          int team_size = atoms_per_bin*factor;
@ -414,6 +418,8 @@ void NeighborKokkosExecute<DeviceType>::
  const X_FLOAT ytmp = x(i, 1);
  const X_FLOAT ztmp = x(i, 2);
  const int itype = type(i);
  tagint itag;
  if (HalfNeigh && Newton && Tri) itag = tag(i);
  const int ibin = c_atom2bin(i);
@ -484,13 +490,29 @@ void NeighborKokkosExecute<DeviceType>::
        if (HalfNeigh && !Newton && j <= i) continue;
        if (!HalfNeigh && j == i) continue;
        // for triclinic, bin stencil is full in all 3 dims
        // must use itag/jtag to eliminate half the I/J interactions
        // cannot use I/J exact coord comparision
        //   b/c transforming orthog -> lambda -> orthog for ghost atoms
        //   with an added PBC offset can shift all 3 coords by epsilon
        if (HalfNeigh && Newton && Tri) {
          if (j <= i) continue;
          if (j >= nlocal) {
            const tagint jtag = tag(j);
            if (itag > jtag) {
              if ((itag+jtag) % 2 == 0) continue;
            } else if (itag < jtag) {
              if ((itag+jtag) % 2 == 1) continue;
            } else {
              if (fabs(x(j,2)-ztmp) > delta) {
                if (x(j,2) < ztmp) continue;
-          if (x(j,2) == ztmp) {
+              } else if (fabs(x(j,1)-ytmp) > delta) {
                if (x(j,1) < ytmp) continue;
-            if (x(j,1) == ytmp) {
+              } else {
                if (x(j,0) < xtmp) continue;
-              if (x(j,0) == xtmp && j <= i) continue;
+              }
            }
          }
        }
@ -568,8 +590,9 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
                                                      size_t sharedsize) const
 {
  auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
-  /* loop over atoms in i's bin,
+
-  */
+  // loop over atoms in i's bin
  const int atoms_per_bin = c_bins.extent(1);
  const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin;
  const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
@ -579,15 +602,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
  if (ibin >= mbins) return;
-  X_FLOAT* other_x = sharedmem + 5*atoms_per_bin*MY_BIN;
+  X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN;
-  int* other_id = (int*) &other_x[4 * atoms_per_bin];
+  int* other_id = (int*) &other_x[5 * atoms_per_bin];
  int bincount_current = c_bincount[ibin];
  for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
    const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
    const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
    /* if necessary, goto next page and add pages */
    int n = 0;
@ -595,6 +617,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
    X_FLOAT ytmp;
    X_FLOAT ztmp;
    int itype;
    tagint itag;
    const int index = (i >= 0 && i < nlocal) ? i : 0;
    const AtomNeighbors neighbors_i = neigh_transpose ?
    neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index);
@ -608,6 +631,10 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
      other_x[MY_II + atoms_per_bin] = ytmp;
      other_x[MY_II + 2 * atoms_per_bin] = ztmp;
      other_x[MY_II + 3 * atoms_per_bin] = itype;
      if (HalfNeigh && Newton && Tri) {
        itag = tag(i);
        other_x[MY_II + 4 * atoms_per_bin] = itag;
      }
    }
    other_id[MY_II] = i;
@ -695,6 +722,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
        other_x[MY_II + atoms_per_bin] = x(j, 1);
        other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
        other_x[MY_II + 3 * atoms_per_bin] = type(j);
        if (HalfNeigh && Newton && Tri)
          other_x[MY_II + 4 * atoms_per_bin] = tag(j);
      }
      other_id[MY_II] = j;
@ -708,13 +737,29 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
          if (HalfNeigh && !Newton && j <= i) continue;
          if (!HalfNeigh && j == i) continue;
          // for triclinic, bin stencil is full in all 3 dims
          // must use itag/jtag to eliminate half the I/J interactions
          // cannot use I/J exact coord comparision
          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
          //   with an added PBC offset can shift all 3 coords by epsilon
          if (HalfNeigh && Newton && Tri) {
            if (j <= i) continue;
            if (j >= nlocal) {
              const tagint jtag = other_x[m + 4 * atoms_per_bin];
              if (itag > jtag) {
                if ((itag+jtag) % 2 == 0) continue;
              } else if (itag < jtag) {
                if ((itag+jtag) % 2 == 1) continue;
              } else {
                if (fabs(x(j,2)-ztmp) > delta) {
                  if (x(j,2) < ztmp) continue;
-            if (x(j,2) == ztmp) {
+                } else if (fabs(x(j,1)-ytmp) > delta) {
                  if (x(j,1) < ytmp) continue;
-              if (x(j,1) == ytmp) {
+                } else {
                  if (x(j,0) < xtmp) continue;
-                if (x(j,0) == xtmp && j <= i) continue;
+                }
              }
            }
          }
@ -905,6 +950,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGhostGPU(typename Kokkos::Team
                                                      size_t sharedsize) const
 {
  auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
  // loop over atoms in i's bin
  const int atoms_per_bin = c_bins.extent(1);
@ -1084,6 +1130,8 @@ void NeighborKokkosExecute<DeviceType>::
  const X_FLOAT ztmp = x(i, 2);
  const X_FLOAT radi = radius(i);
  const int itype = type(i);
  tagint itag;
  if (HalfNeigh && Newton && Tri) itag = tag(i);
  const int ibin = c_atom2bin(i);
@ -1167,13 +1215,29 @@ void NeighborKokkosExecute<DeviceType>::
      if (HalfNeigh && !Newton && j <= i) continue;
      if (!HalfNeigh && j == i) continue;
      // for triclinic, bin stencil is full in all 3 dims
      // must use itag/jtag to eliminate half the I/J interactions
      // cannot use I/J exact coord comparision
      //   b/c transforming orthog -> lambda -> orthog for ghost atoms
      //   with an added PBC offset can shift all 3 coords by epsilon
      if (HalfNeigh && Newton && Tri) {
        if (j <= i) continue;
        if (j >= nlocal) {
          const tagint jtag = tag(j);
          if (itag > jtag) {
            if ((itag+jtag) % 2 == 0) continue;
          } else if (itag < jtag) {
            if ((itag+jtag) % 2 == 1) continue;
          } else {
            if (fabs(x(j,2)-ztmp) > delta) {
              if (x(j,2) < ztmp) continue;
-        if (x(j,2) == ztmp) {
+            } else if (fabs(x(j,1)-ytmp) > delta) {
              if (x(j,1) < ytmp) continue;
-          if (x(j,1) == ytmp) {
+            } else {
              if (x(j,0) < xtmp) continue;
-            if (x(j,0) == xtmp && j <= i) continue;
+            }
          }
        }
      }
@ -1245,8 +1309,9 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
                                                          size_t sharedsize) const
 {
  auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
-  /* loop over atoms in i's bin,
+
-   */
+  // loop over atoms in i's bin
  const int atoms_per_bin = c_bins.extent(1);
  const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin;
  const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
@ -1256,15 +1321,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
  if (ibin >= mbins) return;
-  X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN;
+  X_FLOAT* other_x = sharedmem + 7*atoms_per_bin*MY_BIN;
-  int* other_id = (int*) &other_x[5 * atoms_per_bin];
+  int* other_id = (int*) &other_x[6 * atoms_per_bin];
  int bincount_current = c_bincount[ibin];
  for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
    const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
    const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
    /* if necessary, goto next page and add pages */
    int n = 0;
@ -1273,6 +1337,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
    X_FLOAT ztmp;
    X_FLOAT radi;
    int itype;
    tagint itag;
    const int index = (i >= 0 && i < nlocal) ? i : 0;
    const AtomNeighbors neighbors_i = neigh_transpose ?
    neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index);
@ -1289,6 +1354,10 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
      other_x[MY_II + 2 * atoms_per_bin] = ztmp;
      other_x[MY_II + 3 * atoms_per_bin] = itype;
      other_x[MY_II + 4 * atoms_per_bin] = radi;
      if (HalfNeigh && Newton && Tri) { 
        itag = tag(i);
        other_x[MY_II + 5 * atoms_per_bin] = itag;
      }
    }
    other_id[MY_II] = i;
 #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
@ -1381,6 +1450,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
        other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
        other_x[MY_II + 3 * atoms_per_bin] = type(j);
        other_x[MY_II + 4 * atoms_per_bin] = radius(j);
        if (HalfNeigh && Newton && Tri)
          other_x[MY_II + 5 * atoms_per_bin] = tag(j);
      }
      other_id[MY_II] = j;
@ -1394,13 +1465,29 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
          if (HalfNeigh && !Newton && j <= i) continue;
          if (!HalfNeigh && j == i) continue;
          // for triclinic, bin stencil is full in all 3 dims
          // must use itag/jtag to eliminate half the I/J interactions
          // cannot use I/J exact coord comparision
          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
          //   with an added PBC offset can shift all 3 coords by epsilon
          if (HalfNeigh && Newton && Tri) {
            if (j <= i) continue;
            if (j >= nlocal) {
              const tagint jtag = other_x[m + 5 * atoms_per_bin];
              if (itag > jtag) {
                if ((itag+jtag) % 2 == 0) continue;
              } else if (itag < jtag) {
                if ((itag+jtag) % 2 == 1) continue;
              } else {
                if (fabs(x(j,2)-ztmp) > delta) {
                  if (x(j,2) < ztmp) continue;
-            if (x(j,2) == ztmp) {
+                } else if (fabs(x(j,1)-ytmp) > delta) {
                  if (x(j,1) < ytmp) continue;
-              if (x(j,1) == ytmp) {
+                } else {
                  if (x(j,0) < xtmp) continue;
-                if (x(j,0) == xtmp && j <= i) continue;
+                }
              }
            }
          }
--- a/src/KOKKOS/npair_kokkos.h
+++ b/src/KOKKOS/npair_kokkos.h
@ -189,6 +189,8 @@ class NeighborKokkosExecute
 public:
  NeighListKokkos<DeviceType> neigh_list;
  const double delta;
  // data from Neighbor class
  const typename AT::t_xfloat_2d_randomread cutneighsq;
@ -282,7 +284,7 @@ class NeighborKokkosExecute
                        const int & _mbinx,const int & _mbiny,const int & _mbinz,
                        const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
                        const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
-                        const int & _exclude,const int & _nex_type,
+                        const double &_delta,const int & _exclude,const int & _nex_type,
                        const typename AT::t_int_1d_const & _ex1_type,
                        const typename AT::t_int_1d_const & _ex2_type,
                        const typename AT::t_int_2d_const & _ex_type,
@ -301,7 +303,7 @@ class NeighborKokkosExecute
                        const typename ArrayTypes<LMPHostType>::t_int_scalar _h_resize,
                        const typename AT::t_int_scalar _new_maxneighs,
                        const typename ArrayTypes<LMPHostType>::t_int_scalar _h_new_maxneighs):
-    neigh_list(_neigh_list), cutneighsq(_cutneighsq),exclude(_exclude),
+    neigh_list(_neigh_list), cutneighsq(_cutneighsq),delta(_delta),exclude(_exclude),
    nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type),
    ex_type(_ex_type),nex_group(_nex_group),
    ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),
--- a/src/KOKKOS/npair_trim_kokkos.cpp
+++ b/src/KOKKOS/npair_trim_kokkos.cpp
@ -62,8 +62,8 @@ void NPairTrimKokkos<DeviceType>::trim_to_kokkos(NeighList *list)
  d_ilist_copy = k_list_copy->d_ilist;
  d_numneigh_copy = k_list_copy->d_numneigh;
  d_neighbors_copy = k_list_copy->d_neighbors;
-  int inum_copy = list->listcopy->inum;
+  int inum_trim = list->listcopy->inum;
-  if (list->ghost) inum_copy += list->listcopy->gnum;
+  if (list->ghost) inum_trim += list->listcopy->gnum;
  NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
  k_list->maxneighs = k_list_copy->maxneighs; // simple, but could be made more memory efficient
@ -75,7 +75,7 @@ void NPairTrimKokkos<DeviceType>::trim_to_kokkos(NeighList *list)
  // loop over parent list and trim
  copymode = 1;
-  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNPairTrim>(0,inum_copy),*this);
+  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNPairTrim>(0,inum_trim),*this);
  copymode = 0;
  list->inum = k_list_copy->inum;
@ -132,8 +132,8 @@ void NPairTrimKokkos<DeviceType>::trim_to_cpu(NeighList *list)
  int inum = listcopy->inum;
  int gnum = listcopy->gnum;
-  int inum_all = inum;
+  int inum_trim = inum;
-  if (list->ghost) inum_all += gnum;
+  if (list->ghost) inum_trim += gnum;
  auto h_ilist = listcopy_kk->k_ilist.h_view;
  auto h_numneigh = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_numneigh);
  auto h_neighbors = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_neighbors);
@ -151,7 +151,7 @@ void NPairTrimKokkos<DeviceType>::trim_to_cpu(NeighList *list)
  MyPage<int> *ipage = list->ipage;
  ipage->reset();
-  for (int ii = 0; ii < inum_all; ii++) {
+  for (int ii = 0; ii < inum_trim; ii++) {
    int n = 0;
    neighptr = ipage->vget();
--- a/src/KOKKOS/pair_buck_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.h
@ -112,15 +112,18 @@ class PairBuckCoulCutKokkos : public PairBuckCoulCut {
  void allocate() override;
-  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,0>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALF,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,1>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALFTHREAD,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALF>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALFTHREAD>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairBuckCoulCutKokkos,void>(PairBuckCoulCutKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairBuckCoulCutKokkos>(PairBuckCoulCutKokkos*);
--- a/src/KOKKOS/pair_buck_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_buck_coul_long_kokkos.h
@ -115,27 +115,33 @@ class PairBuckCoulLongKokkos : public PairBuckCoulLong {
  void allocate() override;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<1> >(PairBuckCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<1>>(PairBuckCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<0> >(PairBuckCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<0>>(PairBuckCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairBuckCoulLongKokkos>(PairBuckCoulLongKokkos*);
--- a/src/KOKKOS/pair_buck_kokkos.h
+++ b/src/KOKKOS/pair_buck_kokkos.h
@ -91,16 +91,19 @@ class PairBuckKokkos : public PairBuck {
  int nlocal,nall,eflag,vflag;
  void allocate() override;
-  friend struct PairComputeFunctor<PairBuckKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairBuckKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairBuckKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairBuckKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairBuckKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairBuckKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairBuckKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairBuckKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,0>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,1>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairBuckKokkos>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
 };
--- a/src/KOKKOS/pair_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_coul_cut_kokkos.h
@ -112,15 +112,18 @@ class PairCoulCutKokkos : public PairCoulCut {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairCoulCutKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairCoulCutKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,0>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALF,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,1>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALFTHREAD,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALF>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALFTHREAD>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairCoulCutKokkos,void>(PairCoulCutKokkos*,
                                                       NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairCoulCutKokkos>(PairCoulCutKokkos*);
--- a/src/KOKKOS/pair_coul_debye_kokkos.h
+++ b/src/KOKKOS/pair_coul_debye_kokkos.h
@ -112,15 +112,18 @@ class PairCoulDebyeKokkos : public PairCoulDebye {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,0>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALF,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,1>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALFTHREAD,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALF>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALFTHREAD>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairCoulDebyeKokkos,void>(PairCoulDebyeKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairCoulDebyeKokkos>(PairCoulDebyeKokkos*);
--- a/src/KOKKOS/pair_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_coul_long_kokkos.h
@ -114,27 +114,33 @@ class PairCoulLongKokkos : public PairCoulLong {
  void allocate() override;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<1> >(PairCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<1>>(PairCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<0> >(PairCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<0>>(PairCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairCoulLongKokkos>(PairCoulLongKokkos*);
--- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
@ -1477,7 +1477,7 @@ void PairEAMAlloyKokkos<DeviceType>::file2array_alloy()
 template<typename DeviceType>
 template<class TAG>
 struct PairEAMAlloyKokkos<DeviceType>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
  static auto get(int inum) {
    auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
    return policy;
@ -1488,7 +1488,7 @@ struct PairEAMAlloyKokkos<DeviceType>::policyInstance {
 template<>
 template<class TAG>
 struct PairEAMAlloyKokkos<Kokkos::Experimental::HIP>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
  static auto get(int inum) {
    static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
                  "Breaking assumption of spline dim for KernelAB and KernelC scratch caching");
--- a/src/KOKKOS/pair_eam_fs_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp
@ -1487,7 +1487,7 @@ void PairEAMFSKokkos<DeviceType>::file2array_fs()
 template<typename DeviceType>
 template<class TAG>
 struct PairEAMFSKokkos<DeviceType>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
  static auto get(int inum) {
    auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
    return policy;
@ -1498,7 +1498,7 @@ struct PairEAMFSKokkos<DeviceType>::policyInstance {
 template<>
 template<class TAG>
 struct PairEAMFSKokkos<Kokkos::Experimental::HIP>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
  static auto get(int inum) {
    static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
                  "Breaking assumption of spline dim for KernelAB and KernelC scratch caching");
--- a/src/KOKKOS/pair_eam_kokkos.cpp
+++ b/src/KOKKOS/pair_eam_kokkos.cpp
@ -1162,7 +1162,7 @@ void PairEAMKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &
 template<typename DeviceType>
 template<class TAG>
 struct PairEAMKokkos<DeviceType>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
  static auto get(int inum) {
    auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
    return policy;
@ -1173,7 +1173,7 @@ struct PairEAMKokkos<DeviceType>::policyInstance {
 template<>
 template<class TAG>
 struct PairEAMKokkos<Kokkos::Experimental::HIP>::policyInstance {
-  KOKKOS_INLINE_FUNCTION
+
  static auto get(int inum) {
    static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
                  "Breaking assumption of spline dim for KernelAB and KernelC scratch caching");
--- a/src/KOKKOS/pair_kokkos.h
+++ b/src/KOKKOS/pair_kokkos.h
@ -50,7 +50,7 @@ struct DoCoul<1> {
 //Specialisation for Neighborlist types Half, HalfThread, Full
-template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
+template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, int ZEROFLAG = 0, class Specialisation = void>
 struct PairComputeFunctor  {
  typedef typename PairStyle::device_type device_type ;
  typedef ArrayTypes<device_type> AT;
@ -137,7 +137,7 @@ struct PairComputeFunctor  {
    F_FLOAT fytmp = 0.0;
    F_FLOAT fztmp = 0.0;
-    if (NEIGHFLAG == FULL) {
+    if (NEIGHFLAG == FULL && ZEROFLAG) {
      f(i,0) = 0.0;
      f(i,1) = 0.0;
      f(i,2) = 0.0;
@ -211,7 +211,7 @@ struct PairComputeFunctor  {
    F_FLOAT fytmp = 0.0;
    F_FLOAT fztmp = 0.0;
-    if (NEIGHFLAG == FULL) {
+    if (NEIGHFLAG == FULL && ZEROFLAG) {
      f(i,0) = 0.0;
      f(i,1) = 0.0;
      f(i,2) = 0.0;
@ -292,11 +292,13 @@ struct PairComputeFunctor  {
      const X_FLOAT ztmp = c.x(i,2);
      const int itype = c.type(i);
      if (ZEROFLAG) {
        Kokkos::single(Kokkos::PerThread(team), [&] (){
          f(i,0) = 0.0;
          f(i,1) = 0.0;
          f(i,2) = 0.0;
        });
      }
      const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
      const int jnum = list.d_numneigh[i];
@ -355,11 +357,13 @@ struct PairComputeFunctor  {
      const int itype = c.type(i);
      const F_FLOAT qtmp = c.q(i);
      if (ZEROFLAG) {
        Kokkos::single(Kokkos::PerThread(team), [&] (){
          f(i,0) = 0.0;
          f(i,1) = 0.0;
          f(i,2) = 0.0;
        });
      }
      const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
      const int jnum = list.d_numneigh[i];
@ -423,11 +427,13 @@ struct PairComputeFunctor  {
      const X_FLOAT ztmp = c.x(i,2);
      const int itype = c.type(i);
      if (ZEROFLAG) {
        Kokkos::single(Kokkos::PerThread(team), [&] (){
          f(i,0) = 0.0;
          f(i,1) = 0.0;
          f(i,2) = 0.0;
        });
      }
      const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
      const int jnum = list.d_numneigh[i];
@ -525,11 +531,13 @@ struct PairComputeFunctor  {
      const int itype = c.type(i);
      const F_FLOAT qtmp = c.q(i);
      if (ZEROFLAG) {
        Kokkos::single(Kokkos::PerThread(team), [&] (){
          f(i,0) = 0.0;
          f(i,1) = 0.0;
          f(i,2) = 0.0;
        });
      }
      const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
      const int jnum = list.d_numneigh[i];
@ -740,7 +748,7 @@ struct PairComputeFunctor  {
 // By having the enable_if with a ! and without it, exactly one of the functions
 // pair_compute_neighlist will match - either the dummy version
 // or the real one further below.
-template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
+template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisation = void>
 EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*> list) {
  EV_FLOAT ev;
  (void) fpair;
@ -770,7 +778,7 @@ int GetTeamSize(FunctorStyle& KOKKOS_GPU_ARG(functor), int KOKKOS_GPU_ARG(inum),
 }
 // Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL
-template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
+template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisation = void>
 EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*> list) {
  EV_FLOAT ev;
@ -784,13 +792,13 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
    int atoms_per_team = 32;
    if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
-      PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
+      PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
      atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
      Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
      else                              Kokkos::parallel_for(policy,ff);
    } else {
-      PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
+      PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
      atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
      Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
@ -798,12 +806,12 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
    }
  } else {
    if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
-      PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
+      PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
      else                              Kokkos::parallel_for(list->inum,ff);
      ff.contribute();
    } else {
-      PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
+      PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
      else                              Kokkos::parallel_for(list->inum,ff);
      ff.contribute();
@ -812,16 +820,21 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
  return ev;
 }
-template<class PairStyle, class Specialisation>
+template<class PairStyle, class Specialisation = void>
 EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
  EV_FLOAT ev;
  if (fpair->neighflag == FULL) {
    if (utils::strmatch(fpair->lmp->force->pair_style,"^hybrid/overlay")) {
      fpair->fuse_force_clear_flag = 0;
      ev = pair_compute_neighlist<PairStyle,FULL,0,Specialisation> (fpair,list);
    } else {
      fpair->fuse_force_clear_flag = 1;
-    ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
+      ev = pair_compute_neighlist<PairStyle,FULL,1,Specialisation> (fpair,list);
    }
  } else if (fpair->neighflag == HALFTHREAD) {
-    ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
+    ev = pair_compute_neighlist<PairStyle,HALFTHREAD,0,Specialisation> (fpair,list);
  } else if (fpair->neighflag == HALF) {
-    ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
+    ev = pair_compute_neighlist<PairStyle,HALF,0,Specialisation> (fpair,list);
  }
  return ev;
 }
--- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h
@ -110,27 +110,33 @@ class PairLJCharmmCoulCharmmImplicitKokkos : public PairLJCharmmCoulCharmmImplic
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJCharmmCoulCharmmImplicitKokkos>(PairLJCharmmCoulCharmmImplicitKokkos*);
--- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h
@ -108,27 +108,33 @@ class PairLJCharmmCoulCharmmKokkos : public PairLJCharmmCoulCharmm {
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJCharmmCoulCharmmKokkos>(PairLJCharmmCoulCharmmKokkos*);
--- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h
@ -106,27 +106,33 @@ class PairLJCharmmCoulLongKokkos : public PairLJCharmmCoulLong {
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJCharmmCoulLongKokkos>(PairLJCharmmCoulLongKokkos*);
--- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h
@ -104,15 +104,18 @@ class PairLJClass2CoulCutKokkos : public PairLJClass2CoulCut {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,0>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALF,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,1>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALFTHREAD,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALF>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALFTHREAD>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJClass2CoulCutKokkos,void>(PairLJClass2CoulCutKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJClass2CoulCutKokkos>(PairLJClass2CoulCutKokkos*);
--- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h
@ -107,27 +107,33 @@ class PairLJClass2CoulLongKokkos : public PairLJClass2CoulLong {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJClass2CoulLongKokkos>(PairLJClass2CoulLongKokkos*);
--- a/src/KOKKOS/pair_lj_class2_kokkos.h
+++ b/src/KOKKOS/pair_lj_class2_kokkos.h
@ -96,16 +96,19 @@ class PairLJClass2Kokkos : public PairLJClass2 {
  int nlocal,nall,eflag,vflag;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
  friend struct PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
  friend struct PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,0>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,1>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
 };
--- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h
@ -104,15 +104,18 @@ class PairLJCutCoulCutKokkos : public PairLJCutCoulCut {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairLJCutCoulCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,0>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALF,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,FULL,1>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALFTHREAD,void>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALF>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulCutKokkos,HALFTHREAD>(PairLJCutCoulCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCutCoulCutKokkos,void>(PairLJCutCoulCutKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJCutCoulCutKokkos>(PairLJCutCoulCutKokkos*);
--- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h
@ -104,15 +104,18 @@ class PairLJCutCoulDebyeKokkos : public PairLJCutCoulDebye {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairLJCutCoulDebyeKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,FULL,void>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,FULL,0>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,HALF,void>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,FULL,1>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,HALFTHREAD,void>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,HALF>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDebyeKokkos,HALFTHREAD>(PairLJCutCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCutCoulDebyeKokkos,void>(PairLJCutCoulDebyeKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJCutCoulDebyeKokkos>(PairLJCutCoulDebyeKokkos*);
--- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h
@ -101,15 +101,18 @@ class PairLJCutCoulDSFKokkos : public PairLJCutCoulDSF {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairLJCutCoulDSFKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,FULL,void>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,FULL,0>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,HALF,void>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,FULL,1>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,HALFTHREAD,void>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,HALF>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulDSFKokkos,HALFTHREAD>(PairLJCutCoulDSFKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCutCoulDSFKokkos,void>(PairLJCutCoulDSFKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJCutCoulDSFKokkos>(PairLJCutCoulDSFKokkos*);
--- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h
@ -107,27 +107,33 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<1> >(PairLJCutCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<1>>(PairLJCutCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCutCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<0> >(PairLJCutCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJCutCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCutCoulLongKokkos,CoulLongTable<0>>(PairLJCutCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJCutCoulLongKokkos>(PairLJCutCoulLongKokkos*);
--- a/src/KOKKOS/pair_lj_cut_kokkos.h
+++ b/src/KOKKOS/pair_lj_cut_kokkos.h
@ -92,16 +92,19 @@ class PairLJCutKokkos : public PairLJCut {
  int nlocal,nall,eflag,vflag;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairLJCutKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairLJCutKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairLJCutKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,0>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,1>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJCutKokkos>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
 };
--- a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h
@ -116,27 +116,33 @@ class PairLJExpandCoulLongKokkos : public PairLJExpandCoulLong {
  double qqrd2e;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<1> >(PairLJExpandCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<1>>(PairLJExpandCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJExpandCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<0> >(PairLJExpandCoulLongKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJExpandCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJExpandCoulLongKokkos,CoulLongTable<0>>(PairLJExpandCoulLongKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJExpandCoulLongKokkos>(PairLJExpandCoulLongKokkos*);
 };
--- a/src/KOKKOS/pair_lj_expand_kokkos.h
+++ b/src/KOKKOS/pair_lj_expand_kokkos.h
@ -97,16 +97,19 @@ class PairLJExpandKokkos : public PairLJExpand {
  int nlocal,nall,eflag,vflag;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairLJExpandKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairLJExpandKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairLJExpandKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,0>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,1>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJExpandKokkos>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJExpandKokkos>(PairLJExpandKokkos*);
 };
--- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h
+++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h
@ -115,27 +115,33 @@ class PairLJGromacsCoulGromacsKokkos : public PairLJGromacsCoulGromacs {
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,CoulLongTable<1> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALF,CoulLongTable<1> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,0,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJGromacsCoulGromacsKokkos,CoulLongTable<1> >(PairLJGromacsCoulGromacsKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,1,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALF,0,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJGromacsCoulGromacsKokkos,CoulLongTable<1>>(PairLJGromacsCoulGromacsKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,CoulLongTable<0> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALF,CoulLongTable<0> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,0,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJGromacsCoulGromacsKokkos,CoulLongTable<0> >(PairLJGromacsCoulGromacsKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,FULL,1,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALF,0,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsCoulGromacsKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJGromacsCoulGromacsKokkos,CoulLongTable<0>>(PairLJGromacsCoulGromacsKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJGromacsCoulGromacsKokkos>(PairLJGromacsCoulGromacsKokkos*);
--- a/src/KOKKOS/pair_lj_gromacs_kokkos.h
+++ b/src/KOKKOS/pair_lj_gromacs_kokkos.h
@ -115,27 +115,33 @@ class PairLJGromacsKokkos : public PairLJGromacs {
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,1,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,true,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,0,CoulLongTable<1>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,false,CoulLongTable<1> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,1,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,CoulLongTable<1> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALF,CoulLongTable<1> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,0,CoulLongTable<1>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJGromacsKokkos,CoulLongTable<1> >(PairLJGromacsKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,1,CoulLongTable<1>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALF,0,CoulLongTable<1>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJGromacsKokkos,CoulLongTable<1>>(PairLJGromacsKokkos*,
                                                            NeighListKokkos<DeviceType>*);
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,true,1,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,true,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,0,CoulLongTable<0>>;
-  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,false,CoulLongTable<0> >;
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,FULL,false,1,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,CoulLongTable<0> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALF,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALF,CoulLongTable<0> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJGromacsKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,0,CoulLongTable<0>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJGromacsKokkos,CoulLongTable<0> >(PairLJGromacsKokkos*,
+  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,FULL,1,CoulLongTable<0>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALF,0,CoulLongTable<0>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairLJGromacsKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJGromacsKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJGromacsKokkos,CoulLongTable<0>>(PairLJGromacsKokkos*,
                                                            NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJGromacsKokkos>(PairLJGromacsKokkos*);
--- a/src/KOKKOS/pair_lj_spica_kokkos.h
+++ b/src/KOKKOS/pair_lj_spica_kokkos.h
@ -97,16 +97,19 @@ class PairLJSPICAKokkos : public PairLJSPICA {
  int nlocal,nall,eflag,vflag;
  void allocate() override;
-  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairLJSPICAKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairLJSPICAKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairLJSPICAKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairLJSPICAKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairLJSPICAKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,FULL,void>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,FULL,0>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,HALF,void>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,FULL,1>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,HALFTHREAD,void>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,HALF>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairLJSPICAKokkos,void>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICAKokkos,HALFTHREAD>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairLJSPICAKokkos>(PairLJSPICAKokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairLJSPICAKokkos>(PairLJSPICAKokkos*);
 };
--- a/src/KOKKOS/pair_morse_kokkos.h
+++ b/src/KOKKOS/pair_morse_kokkos.h
@ -92,16 +92,19 @@ class PairMorseKokkos : public PairMorse {
  int nlocal,nall,eflag,vflag;
  void allocate() override;
-  friend struct PairComputeFunctor<PairMorseKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairMorseKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairMorseKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairMorseKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairMorseKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairMorseKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairMorseKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairMorseKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairMorseKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairMorseKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,FULL,void>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,FULL,0>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,HALF,void>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,FULL,1>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,HALFTHREAD,void>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,HALF>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairMorseKokkos,void>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairMorseKokkos,HALFTHREAD>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairMorseKokkos>(PairMorseKokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairMorseKokkos>(PairMorseKokkos*);
 };
--- a/src/KOKKOS/pair_pace_kokkos.cpp
+++ b/src/KOKKOS/pair_pace_kokkos.cpp
@ -237,6 +237,9 @@ void PairPACEKokkos<DeviceType>::copy_splines()
  ACERadialFunctions* radial_functions = dynamic_cast<ACERadialFunctions*>(basis_set->radial_functions);
  if (radial_functions == nullptr)
    error->all(FLERR,"Chosen radial basis style not supported by pair style pace/kk");
  for (int i = 0; i < nelements; i++) {
    for (int j = 0; j < nelements; j++) {
      k_splines_gk.h_view(i, j) = radial_functions->splines_gk(i, j);
--- a/src/KOKKOS/pair_table_kokkos.cpp
+++ b/src/KOKKOS/pair_table_kokkos.cpp
@ -133,19 +133,19 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
  EV_FLOAT ev;
  if (atom->ntypes > MAX_TYPES_STACKPARAMS) {
    if (neighflag == FULL) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,false,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,false,0,S_TableCompute<DeviceType,TABSTYLE> >
        ff(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
      else Kokkos::parallel_for(list->inum,ff);
      ff.contribute();
    } else if (neighflag == HALFTHREAD) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,false,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,false,0,S_TableCompute<DeviceType,TABSTYLE> >
        ff(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
      else Kokkos::parallel_for(list->inum,ff);
      ff.contribute();
    } else if (neighflag == HALF) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,false,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,false,0,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
@ -153,19 +153,19 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
    }
  } else {
    if (neighflag == FULL) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,true,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,true,0,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
      f.contribute();
    } else if (neighflag == HALFTHREAD) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,true,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,true,0,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
      f.contribute();
    } else if (neighflag == HALF) {
-      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,true,S_TableCompute<DeviceType,TABSTYLE> >
+      PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,true,0,S_TableCompute<DeviceType,TABSTYLE> >
        f(this,(NeighListKokkos<DeviceType>*) list);
      if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
      else Kokkos::parallel_for(list->inum,f);
--- a/src/KOKKOS/pair_table_kokkos.h
+++ b/src/KOKKOS/pair_table_kokkos.h
@ -35,9 +35,6 @@ struct S_TableCompute {
  static constexpr int TabStyle = TABSTYLE;
 };
 template <class DeviceType, int NEIGHFLAG, int TABSTYLE>
 struct PairTableComputeFunctor;
 template<class DeviceType>
 class PairTableKokkos : public PairTable {
 public:
@ -135,33 +132,33 @@ class PairTableKokkos : public PairTable {
  F_FLOAT compute_ecoul(const F_FLOAT& /*rsq*/, const int& /*i*/, const int& /*j*/,
                        const int& /*itype*/, const int& /*jtype*/) const { return 0; }
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,0,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,0,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,0,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,0,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,0,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,0,S_TableCompute<DeviceType,LOOKUP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,0,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,0,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,0,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,0,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,0,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,0,S_TableCompute<DeviceType,LINEAR> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,0,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,0,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,0,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,0,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,0,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,0,S_TableCompute<DeviceType,SPLINE> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,true,0,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,true,0,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,0,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,FULL,false,0,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALF,false,0,S_TableCompute<DeviceType,BITMAP> >;
-  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
+  friend struct PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,0,S_TableCompute<DeviceType,BITMAP> >;
  friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
 };
--- a/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp
+++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp
@ -0,0 +1,270 @@
 // clang-format off
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   LAMMPS development team: developers@lammps.org
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------
   Contributing author: Trung Nguyen (U Chicago)
 ------------------------------------------------------------------------- */
 #include "pair_yukawa_colloid_kokkos.h"
 #include "atom_kokkos.h"
 #include "atom_masks.h"
 #include "error.h"
 #include "force.h"
 #include "kokkos.h"
 #include "memory_kokkos.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor.h"
 #include "respa.h"
 #include "update.h"
 #include <cmath>
 using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 PairYukawaColloidKokkos<DeviceType>::PairYukawaColloidKokkos(LAMMPS *lmp) : PairYukawaColloid(lmp)
 {
  respa_enable = 0;
  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | RADIUS_MASK;
  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 PairYukawaColloidKokkos<DeviceType>::~PairYukawaColloidKokkos()
 {
  if (copymode) return;
  if (allocated) {
    memoryKK->destroy_kokkos(k_eatom,eatom);
    memoryKK->destroy_kokkos(k_vatom,vatom);
    memoryKK->destroy_kokkos(k_cutsq,cutsq);
  }
 }
 /* ----------------------------------------------------------------------
   allocate all arrays
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 void PairYukawaColloidKokkos<DeviceType>::allocate()
 {
  PairYukawaColloid::allocate();
  int n = atom->ntypes;
  memory->destroy(cutsq);
  memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
  d_cutsq = k_cutsq.template view<DeviceType>();
  k_params = Kokkos::DualView<params_yukawa**,
                              Kokkos::LayoutRight,DeviceType>(
                              "PairYukawaColloid::params",n+1,n+1);
  params = k_params.template view<DeviceType>();
 }
 /* ----------------------------------------------------------------------
   init specific to this pair style
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 void PairYukawaColloidKokkos<DeviceType>::init_style()
 {
  PairYukawaColloid::init_style();
  // error if rRESPA with inner levels
  if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
    int respa = 0;
    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
    if (respa)
      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
  }
  // adjust neighbor list request for KOKKOS
  neighflag = lmp->kokkos->neighflag;
  auto request = neighbor->find_request(this);
  request->set_kokkos_host(std::is_same<DeviceType,LMPHostType>::value &&
                           !std::is_same<DeviceType,LMPDeviceType>::value);
  request->set_kokkos_device(std::is_same<DeviceType,LMPDeviceType>::value);
  if (neighflag == FULL) request->enable_full();
 }
 /* ----------------------------------------------------------------------
   init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 // Rewrite this.
 template<class DeviceType>
 double PairYukawaColloidKokkos<DeviceType>::init_one(int i, int j)
 {
  double cutone = PairYukawaColloid::init_one(i,j);
  k_params.h_view(i,j).a      = a[i][j];
  k_params.h_view(i,j).offset = offset[i][j];
  k_params.h_view(i,j).cutsq  = cutone*cutone;
  k_params.h_view(j,i)        = k_params.h_view(i,j);
  if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
  }
  k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
  k_cutsq.template modify<LMPHostType>();
  k_params.template modify<LMPHostType>();
  return cutone;
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 void PairYukawaColloidKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 {
  eflag = eflag_in;
  vflag = vflag_in;
  if (neighflag == FULL) no_virial_fdotr_compute = 1;
  ev_init(eflag,vflag,0);
  // reallocate per-atom arrays if necessary
  if (eflag_atom) {
    memoryKK->destroy_kokkos(k_eatom,eatom);
    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
    d_eatom = k_eatom.view<DeviceType>();
  }
  if (vflag_atom) {
    memoryKK->destroy_kokkos(k_vatom,vatom);
    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
    d_vatom = k_vatom.view<DeviceType>();
  }
  atomKK->sync(execution_space,datamask_read);
  k_cutsq.template sync<DeviceType>();
  k_params.template sync<DeviceType>();
  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
  else atomKK->modified(execution_space,F_MASK);
  x = atomKK->k_x.view<DeviceType>();
  c_x = atomKK->k_x.view<DeviceType>();
  f = atomKK->k_f.view<DeviceType>();
  type = atomKK->k_type.view<DeviceType>();
  radius = atomKK->k_radius.view<DeviceType>();
  nlocal = atom->nlocal;
  nall = atom->nlocal + atom->nghost;
  newton_pair = force->newton_pair;
  special_lj[0] = force->special_lj[0];
  special_lj[1] = force->special_lj[1];
  special_lj[2] = force->special_lj[2];
  special_lj[3] = force->special_lj[3];
  // loop over neighbors of my atoms
  EV_FLOAT ev = pair_compute<PairYukawaColloidKokkos<DeviceType>,void >(
    this,(NeighListKokkos<DeviceType>*)list);
  if (eflag_global) eng_vdwl += ev.evdwl;
  if (vflag_global) {
    virial[0] += ev.v[0];
    virial[1] += ev.v[1];
    virial[2] += ev.v[2];
    virial[3] += ev.v[3];
    virial[4] += ev.v[4];
    virial[5] += ev.v[5];
  }
  if (vflag_fdotr) pair_virial_fdotr_compute(this);
  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }
  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
 F_FLOAT PairYukawaColloidKokkos<DeviceType>::
 compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
              const int& itype, const int& jtype) const {
  (void) i;
  (void) j;
  const F_FLOAT radi   = radius[i];
  const F_FLOAT radj   = radius[j];
  const F_FLOAT rr     = sqrt(rsq);
  // Fetch the params either off the stack or from some mapped memory?
  const F_FLOAT aa     = STACKPARAMS ? m_params[itype][jtype].a
                                     : params(itype,jtype).a;
  // U   = a * exp(-kappa*(r-(radi+radj))) / kappa
  // f   = -dU/dr = a * exp(-kappa*r)
  // f/r = a * exp(-kappa*r) / r
  const F_FLOAT rinv = 1.0 / rr;
  const F_FLOAT screening = exp(-kappa*(rr-(radi+radj)));
  const F_FLOAT forceyukawa = aa * screening;
  const F_FLOAT fpair = forceyukawa * rinv;
  return fpair;
 }
 template<class DeviceType>
 template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
 F_FLOAT PairYukawaColloidKokkos<DeviceType>::
 compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
              const int& itype, const int& jtype) const {
  (void) i;
  (void) j;
  const F_FLOAT radi   = radius[i];
  const F_FLOAT radj   = radius[j];
  const F_FLOAT rr     = sqrt(rsq);
  const F_FLOAT aa     = STACKPARAMS ? m_params[itype][jtype].a
                                     : params(itype,jtype).a;
  const F_FLOAT offset = STACKPARAMS ? m_params[itype][jtype].offset
                                     : params(itype,jtype).offset;
  // U   = a * exp(-kappa*(r-(radi+radj))) / kappa
  const F_FLOAT rinv = 1.0 / rr;
  const F_FLOAT screening = exp(-kappa*(rr-(radi+radj)));
  return aa / kappa * screening - offset;
 }
 namespace LAMMPS_NS {
 template class PairYukawaColloidKokkos<LMPDeviceType>;
 #ifdef LMP_KOKKOS_GPU
 template class PairYukawaColloidKokkos<LMPHostType>;
 #endif
 }
--- a/src/KOKKOS/pair_yukawa_colloid_kokkos.h
+++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.h
@ -0,0 +1,123 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   LAMMPS development team: developers@lammps.org
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifdef PAIR_CLASS
 // clang-format off
 PairStyle(yukawa/colloid/kk,PairYukawaColloidKokkos<LMPDeviceType>);
 PairStyle(yukawa/colloid/kk/device,PairYukawaColloidKokkos<LMPDeviceType>);
 PairStyle(yukawa/colloid/kk/host,PairYukawaColloidKokkos<LMPHostType>);
 // clang-format on
 #else
 // clang-format off
 #ifndef LMP_PAIR_YUKAWA_COLLOID_KOKKOS_H
 #define LMP_PAIR_YUKAWA_COLLOID_KOKKOS_H
 #include "pair_kokkos.h"
 #include "pair_yukawa_colloid.h"
 #include "neigh_list_kokkos.h"
 namespace LAMMPS_NS {
 template<class DeviceType>
 class PairYukawaColloidKokkos : public PairYukawaColloid {
 public:
  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
  enum {COUL_FLAG=0};
  typedef DeviceType device_type;
  typedef ArrayTypes<DeviceType> AT;
  PairYukawaColloidKokkos(class LAMMPS *);
  ~PairYukawaColloidKokkos() override;
  void compute(int, int) override;
  void init_style() override;
  double init_one(int,int) override;
  struct params_yukawa {
    KOKKOS_INLINE_FUNCTION
    params_yukawa() { cutsq=0, a = 0; offset = 0; }
    KOKKOS_INLINE_FUNCTION
    params_yukawa(int /*i*/) { cutsq=0, a = 0; offset = 0; }
    F_FLOAT cutsq, a, offset;
  };
 protected:
  template<bool STACKPARAMS, class Specialisation>
  KOKKOS_INLINE_FUNCTION
  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
                        const int& itype, const int& jtype) const;
  template<bool STACKPARAMS, class Specialisation>
  KOKKOS_INLINE_FUNCTION
  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
                        const int& itype, const int& jtype) const;
  template<bool STACKPARAMS, class Specialisation>
  KOKKOS_INLINE_FUNCTION
  F_FLOAT compute_ecoul(const F_FLOAT& /*rsq*/, const int& /*i*/, const int& /*j*/,
                        const int& /*itype*/, const int& /*jtype*/) const { return 0; }
  Kokkos::DualView<params_yukawa**,Kokkos::LayoutRight,DeviceType> k_params;
  typename Kokkos::DualView<params_yukawa**,Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
  params_yukawa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
  typename AT::t_x_array_randomread x;
  typename AT::t_x_array c_x;
  typename AT::t_f_array f;
  typename AT::t_int_1d_randomread type;
  typename AT::t_float_1d_randomread radius;
  DAT::tdual_efloat_1d k_eatom;
  DAT::tdual_virial_array k_vatom;
  typename AT::t_efloat_1d d_eatom;
  typename AT::t_virial_array d_vatom;
  int newton_pair;
  double special_lj[4];
  typename AT::tdual_ffloat_2d k_cutsq;
  typename AT::t_ffloat_2d d_cutsq;
  int neighflag;
  int nlocal,nall,eflag,vflag;
  void allocate() override;
  friend struct PairComputeFunctor<PairYukawaColloidKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairYukawaColloidKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairYukawaColloidKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairYukawaColloidKokkos,HALFTHREAD,true>;
  friend struct PairComputeFunctor<PairYukawaColloidKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairYukawaColloidKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairYukawaColloidKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairYukawaColloidKokkos,HALFTHREAD,false>;
  friend EV_FLOAT pair_compute_neighlist<PairYukawaColloidKokkos,FULL,0>(PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairYukawaColloidKokkos,FULL,1>(PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairYukawaColloidKokkos,HALF>(
    PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute_neighlist<PairYukawaColloidKokkos,HALFTHREAD>(
    PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairYukawaColloidKokkos>(
    PairYukawaColloidKokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairYukawaColloidKokkos>(PairYukawaColloidKokkos*);
 };
 }
 #endif
 #endif
--- a/src/KOKKOS/pair_yukawa_kokkos.h
+++ b/src/KOKKOS/pair_yukawa_kokkos.h
@ -95,20 +95,19 @@ class PairYukawaKokkos : public PairYukawa {
  int nlocal,nall,eflag,vflag;
  void allocate() override;
-  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairYukawaKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairYukawaKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairYukawaKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairYukawaKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairYukawaKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,FULL,void>(
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,FULL,0>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
-    PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,FULL,1>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALF,void>(
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALF>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
-    PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALFTHREAD>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALFTHREAD,void>(
+  friend EV_FLOAT pair_compute<PairYukawaKokkos,void>(PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
    PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairYukawaKokkos,void>(
    PairYukawaKokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairYukawaKokkos>(PairYukawaKokkos*);
 };
--- a/src/KOKKOS/pair_zbl_kokkos.h
+++ b/src/KOKKOS/pair_zbl_kokkos.h
@ -89,16 +89,19 @@ class PairZBLKokkos : public PairZBL {
  void allocate() override;
-  friend struct PairComputeFunctor<PairZBLKokkos,FULL,true>;
+  friend struct PairComputeFunctor<PairZBLKokkos,FULL,true,0>;
  friend struct PairComputeFunctor<PairZBLKokkos,FULL,true,1>;
  friend struct PairComputeFunctor<PairZBLKokkos,HALF,true>;
  friend struct PairComputeFunctor<PairZBLKokkos,HALFTHREAD,true>;
-  friend struct PairComputeFunctor<PairZBLKokkos,FULL,false>;
+  friend struct PairComputeFunctor<PairZBLKokkos,FULL,false,0>;
  friend struct PairComputeFunctor<PairZBLKokkos,FULL,false,1>;
  friend struct PairComputeFunctor<PairZBLKokkos,HALF,false>;
  friend struct PairComputeFunctor<PairZBLKokkos,HALFTHREAD,false>;
-  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,FULL,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,FULL,0>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALF,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,FULL,1>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALFTHREAD,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALF>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
-  friend EV_FLOAT pair_compute<PairZBLKokkos,void>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairZBLKokkos,HALFTHREAD>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
  friend EV_FLOAT pair_compute<PairZBLKokkos>(PairZBLKokkos*,NeighListKokkos<DeviceType>*);
  friend void pair_virial_fdotr_compute<PairZBLKokkos>(PairZBLKokkos*);
 };
--- a/src/MANYBODY/pair_airebo.cpp
+++ b/src/MANYBODY/pair_airebo.cpp
@ -59,7 +59,6 @@ PairAIREBO::PairAIREBO(LAMMPS *lmp)
  nextra = 3;
  pvector = new double[nextra];
  trim_flag = 0; // workaround
  maxlocal = 0;
  REBO_numneigh = nullptr;
  REBO_firstneigh = nullptr;
--- a/src/OPENMP/npair_half_bin_newton_tri_omp.cpp
+++ b/src/OPENMP/npair_half_bin_newton_tri_omp.cpp
@ -12,16 +12,18 @@
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "omp_compat.h"
 #include "npair_half_bin_newton_tri_omp.h"
 #include "npair_omp.h"
-#include "neigh_list.h"
+#include "omp_compat.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "molecule.h"
 #include "domain.h"
 #include "my_page.h"
 #include "error.h"
 #include "force.h"
 #include "molecule.h"
 #include "my_page.h"
 #include "neigh_list.h"
 using namespace LAMMPS_NS;
@ -40,6 +42,7 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list)
  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
  const int molecular = atom->molecular;
  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
  const double delta = 0.01 * force->angstrom;
  NPAIR_OMP_INIT;
 #if defined(_OPENMP)
@ -48,12 +51,10 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list)
  NPAIR_OMP_SETUP(nlocal);
  int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
-  tagint tagprev;
+  tagint itag,jtag,tagprev;
  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
  int *neighptr;
  // loop over each atom, storing neighbors
  double **x = atom->x;
  int *type = atom->type;
  int *mask = atom->mask;
@ -79,6 +80,7 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list)
    n = 0;
    neighptr = ipage.vget();
    itag = tag[i];
    itype = type[i];
    xtmp = x[i][0];
    ytmp = x[i][1];
@ -90,20 +92,31 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list)
    }
    // loop over all atoms in bins in stencil
-    // pairs for atoms j "below" i are excluded
+    // for triclinic, bin stencil is full in all 3 dims
-    // below = lower z or (equal z and lower y) or (equal zy and lower x)
+    // must use itag/jtag to eliminate half the I/J interactions
-    //         (equal zyx and j <= i)
+    // cannot use I/J exact coord comparision
-    // latter excludes self-self interaction but allows superposed atoms
+    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
    //   with an added PBC offset can shift all 3 coords by epsilon
    ibin = atom2bin[i];
    for (k = 0; k < nstencil; k++) {
      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
        if (j <= i) continue;
        if (j >= nlocal) {
          jtag = tag[j];
          if (itag > jtag) {
            if ((itag+jtag) % 2 == 0) continue;
          } else if (itag < jtag) {
            if ((itag+jtag) % 2 == 1) continue;
          } else {
            if (fabs(x[j][2]-ztmp) > delta) {
              if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
+            } else if (fabs(x[j][1]-ytmp) > delta) {
              if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp) {
+            } else {
              if (x[j][0] < xtmp) continue;
-            if (x[j][0] == xtmp && j <= i) continue;
+            }
          }
        }
--- a/src/OPENMP/npair_half_multi_newton_tri_omp.cpp
+++ b/src/OPENMP/npair_half_multi_newton_tri_omp.cpp
@ -12,17 +12,19 @@
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "omp_compat.h"
 #include "npair_half_multi_newton_tri_omp.h"
-#include "npair_omp.h"
+
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "molecule.h"
 #include "domain.h"
 #include "my_page.h"
 #include "error.h"
 #include "force.h"
 #include "molecule.h"
 #include "my_page.h"
 #include "neigh_list.h"
 #include "neighbor.h"
 #include "npair_omp.h"
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@ -43,6 +45,7 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list)
  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
  const int molecular = atom->molecular;
  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
  const double delta = 0.01 * force->angstrom;
  NPAIR_OMP_INIT;
 #if defined(_OPENMP)
@ -51,13 +54,11 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list)
  NPAIR_OMP_SETUP(nlocal);
  int i,j,k,n,itype,jtype,ibin,jbin,icollection,jcollection,which,ns,imol,iatom;
-  tagint tagprev;
+  tagint itag,jtag,tagprev;
  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
  int *neighptr,*s;
  int js;
  // loop over each atom, storing neighbors
  int *collection = neighbor->collection;
  double **x = atom->x;
  int *type = atom->type;
@ -84,6 +85,7 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list)
    n = 0;
    neighptr = ipage.vget();
    itag = tag[i];
    itype = type[i];
    icollection = collection[i];
    xtmp = x[i][0];
@ -98,20 +100,24 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list)
    ibin = atom2bin[i];
    // loop through stencils for all collections
    for (jcollection = 0; jcollection < ncollections; jcollection++) {
      // if same collection use own bin
      if (icollection == jcollection) jbin = ibin;
      else jbin = coord2bin(x[i], jcollection);
      // loop over all atoms in bins in stencil
      // for triclinic:
      //   stencil is empty if i larger than j
      // stencil is half if i same size as j
      //   stencil is full if i smaller than j
-      // if half: pairs for atoms j "below" i are excluded
+      //   stencil is full if i same size as j
-      // below = lower z or (equal z and lower y) or (equal zy and lower x)
+      // for i smaller than j:
-      //         (equal zyx and j <= i)
+      //   must use itag/jtag to eliminate half the I/J interactions
-      // latter excludes self-self interaction but allows superposed atoms
+      //   cannot use I/J exact coord comparision
      //     b/c transforming orthog -> lambda -> orthog for ghost atoms
      //     with an added PBC offset can shift all 3 coords by epsilon
      s = stencil_multi[icollection][jcollection];
      ns = nstencil_multi[icollection][jcollection];
@ -120,14 +126,25 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list)
        js = binhead_multi[jcollection][jbin + s[k]];
        for (j = js; j >= 0; j = bins[j]) {
-          // if same size (same collection), use half stencil
+          // if same size (same collection), exclude half of interactions
-          if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){
+
          if (cutcollectionsq[icollection][icollection] ==
              cutcollectionsq[jcollection][jcollection]) {
            if (j <= i) continue;
            if (j >= nlocal) {
              jtag = tag[j];
              if (itag > jtag) {
                if ((itag+jtag) % 2 == 0) continue;
              } else if (itag < jtag) {
                if ((itag+jtag) % 2 == 1) continue;
              } else {
                if (fabs(x[j][2]-ztmp) > delta) {
                  if (x[j][2] < ztmp) continue;
-            if (x[j][2] == ztmp) {
+                } else if (fabs(x[j][1]-ytmp) > delta) {
                  if (x[j][1] < ytmp) continue;
-              if (x[j][1] == ytmp) {
+                } else {
                  if (x[j][0] < xtmp) continue;
-                if (x[j][0] == xtmp && j <= i) continue;
+                }
              }
            }
          }
--- a/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp
+++ b/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp
@ -15,13 +15,15 @@
 #include "omp_compat.h"
 #include "npair_half_multi_old_newton_tri_omp.h"
 #include "npair_omp.h"
-#include "neigh_list.h"
+
 #include "atom.h"
 #include "atom_vec.h"
 #include "molecule.h"
 #include "domain.h"
 #include "my_page.h"
 #include "error.h"
 #include "force.h"
 #include "molecule.h"
 #include "my_page.h"
 #include "neigh_list.h"
 using namespace LAMMPS_NS;
@ -42,6 +44,7 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list)
  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
  const int molecular = atom->molecular;
  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
  const double delta = 0.01 * force->angstrom;
  NPAIR_OMP_INIT;
 #if defined(_OPENMP)
@ -50,13 +53,11 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list)
  NPAIR_OMP_SETUP(nlocal);
  int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
-  tagint tagprev;
+  tagint itag,jtag,tagprev;
  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
  int *neighptr,*s;
  double *cutsq,*distsq;
  // loop over each atom, storing neighbors
  double **x = atom->x;
  int *type = atom->type;
  int *mask = atom->mask;
@ -82,6 +83,7 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list)
    n = 0;
    neighptr = ipage.vget();
    itag = tag[i];
    itype = type[i];
    xtmp = x[i][0];
    ytmp = x[i][1];
@ -92,13 +94,12 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list)
      tagprev = tag[i] - iatom - 1;
    }
-    // loop over all atoms in bins, including self, in stencil
+    // loop over all atoms in bins in stencil
-    // skip if i,j neighbor cutoff is less than bin distance
+    // for triclinic, bin stencil is full in all 3 dims
-    // bins below self are excluded from stencil
+    // must use itag/jtag to eliminate half the I/J interactions
-    // pairs for atoms j "below" i are excluded
+    // cannot use I/J exact coord comparision
-    // below = lower z or (equal z and lower y) or (equal zy and lower x)
+    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //         (equal zyx and j <= i)
+    //   with an added PBC offset can shift all 3 coords by epsilon
    // latter excludes self-self interaction but allows superposed atoms
    ibin = atom2bin[i];
    s = stencil_multi_old[itype];
@ -109,12 +110,21 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list)
      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
        jtype = type[j];
        if (cutsq[jtype] < distsq[k]) continue;
        if (j >= nlocal) {
          jtag = tag[j];
          if (itag > jtag) {
            if ((itag+jtag) % 2 == 0) continue;
          } else if (itag < jtag) {
            if ((itag+jtag) % 2 == 1) continue;
          } else {
            if (fabs(x[j][2]-ztmp) > delta) {
              if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
+            } else if (fabs(x[j][1]-ytmp) > delta) {
              if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp) {
+            } else {
              if (x[j][0] < xtmp) continue;
-            if (x[j][0] == xtmp && j <= i) continue;
+            }
          }
        }
--- a/src/OPENMP/npair_half_nsq_newton_omp.cpp
+++ b/src/OPENMP/npair_half_nsq_newton_omp.cpp
@ -15,14 +15,16 @@
 #include "omp_compat.h"
 #include "npair_half_nsq_newton_omp.h"
 #include "npair_omp.h"
-#include "neigh_list.h"
+
 #include "atom.h"
 #include "atom_vec.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "group.h"
 #include "molecule.h"
 #include "domain.h"
 #include "my_page.h"
-#include "error.h"
+#include "neigh_list.h"
 using namespace LAMMPS_NS;
@ -42,6 +44,8 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list)
  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
  const int molecular = atom->molecular;
  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
  const double delta = 0.01 * force->angstrom;
  const int triclinic = domain->triclinic;
  NPAIR_OMP_INIT;
 #if defined(_OPENMP)
@ -54,8 +58,6 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list)
  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
  int *neighptr;
  // loop over each atom, storing neighbors
  double **x = atom->x;
  int *type = atom->type;
  int *mask = atom->mask;
@ -95,7 +97,12 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list)
    }
    // loop over remaining atoms, owned and ghost
    // use itag/jtap comparision to eliminate half the interactions
    // itag = jtag is possible for long cutoffs that include images of self
    // for triclinic, must use delta to eliminate half the I/J interactions
    // cannot use I/J exact coord comparision as for orthog
    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
    //   with an added PBC offset can shift all 3 coords by epsilon
    for (j = i+1; j < nall; j++) {
      if (includegroup && !(mask[j] & bitmask)) continue;
@ -106,6 +113,14 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list)
          if ((itag+jtag) % 2 == 0) continue;
        } else if (itag < jtag) {
          if ((itag+jtag) % 2 == 1) continue;
        } else if (triclinic) {
          if (fabs(x[j][2]-ztmp) > delta) {
            if (x[j][2] < ztmp) continue;
          } else if (fabs(x[j][1]-ytmp) > delta) {
            if (x[j][1] < ytmp) continue;
          } else {
            if (x[j][0] < xtmp) continue;
          }
        } else {
          if (x[j][2] < ztmp) continue;
          if (x[j][2] == ztmp) {
--- a/src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp
+++ b/src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp
@ -15,13 +15,15 @@
 #include "omp_compat.h"
 #include "npair_half_respa_bin_newton_tri_omp.h"
 #include "npair_omp.h"
-#include "neigh_list.h"
+
 #include "atom.h"
 #include "atom_vec.h"
 #include "molecule.h"
 #include "domain.h"
 #include "my_page.h"
 #include "error.h"
 #include "force.h"
 #include "molecule.h"
 #include "my_page.h"
 #include "neigh_list.h"
 using namespace LAMMPS_NS;
@ -42,6 +44,7 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
  const int molecular = atom->molecular;
  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
  const double delta = 0.01 * force->angstrom;
  NPAIR_OMP_INIT;
@ -53,12 +56,10 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
  NPAIR_OMP_SETUP(nlocal);
  int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
-  tagint tagprev;
+  tagint itag,jtag,tagprev;
  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
  int *neighptr,*neighptr_inner,*neighptr_middle;
  // loop over each atom, storing neighbors
  double **x = atom->x;
  int *type = atom->type;
  int *mask = atom->mask;
@ -111,6 +112,7 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
      neighptr_middle = ipage_middle->vget();
    }
    itag = tag[i];
    itype = type[i];
    xtmp = x[i][0];
    ytmp = x[i][1];
@ -122,20 +124,31 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
    }
    // loop over all atoms in bins in stencil
-    // pairs for atoms j "below" i are excluded
+    // for triclinic, bin stencil is full in all 3 dims
-    // below = lower z or (equal z and lower y) or (equal zy and lower x)
+    // must use itag/jtag to eliminate half the I/J interactions
-    //         (equal zyx and j <= i)
+    // cannot use I/J exact coord comparision
-    // latter excludes self-self interaction but allows superposed atoms
+    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
    //   with an added PBC offset can shift all 3 coords by epsilon
    ibin = atom2bin[i];
    for (k = 0; k < nstencil; k++) {
      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
        if (j <= i) continue;
        if (j >= nlocal) {
          jtag = tag[j];
          if (itag > jtag) {
            if ((itag+jtag) % 2 == 0) continue;
          } else if (itag < jtag) {
            if ((itag+jtag) % 2 == 1) continue;
          } else {
            if (fabs(x[j][2]-ztmp) > delta) {
              if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
+            } else if (fabs(x[j][1]-ytmp) > delta) {
              if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp) {
+            } else {
              if (x[j][0] < xtmp) continue;
-            if (x[j][0] == xtmp && j <= i) continue;
+            }
          }
        }
--- a/src/OPENMP/npair_half_respa_nsq_newton_omp.cpp
+++ b/src/OPENMP/npair_half_respa_nsq_newton_omp.cpp
@ -15,21 +15,22 @@
 #include "omp_compat.h"
 #include "npair_half_respa_nsq_newton_omp.h"
 #include "npair_omp.h"
-#include "neigh_list.h"
+
 #include "atom.h"
 #include "atom_vec.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "group.h"
 #include "molecule.h"
 #include "domain.h"
 #include "my_page.h"
-#include "error.h"
+#include "neigh_list.h"
 using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
-NPairHalfRespaNsqNewtonOmp::NPairHalfRespaNsqNewtonOmp(LAMMPS *lmp) :
+NPairHalfRespaNsqNewtonOmp::NPairHalfRespaNsqNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
  NPair(lmp) {}
 /* ----------------------------------------------------------------------
   multiple respa lists
@ -45,6 +46,8 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
  const int molecular = atom->molecular;
  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
  const double delta = 0.01 * force->angstrom;
  const int triclinic = domain->triclinic;
  NPAIR_OMP_INIT;
@ -60,8 +63,6 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
  int *neighptr,*neighptr_inner,*neighptr_middle;
  // loop over each atom, storing neighbors
  double **x = atom->x;
  int *type = atom->type;
  int *mask = atom->mask;
@ -128,6 +129,12 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
    }
    // loop over remaining atoms, owned and ghost
    // use itag/jtap comparision to eliminate half the interactions
    // itag = jtag is possible for long cutoffs that include images of self
    // for triclinic, must use delta to eliminate half the I/J interactions
    // cannot use I/J exact coord comparision as for orthog
    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
    //   with an added PBC offset can shift all 3 coords by epsilon
    for (j = i+1; j < nall; j++) {
      if (includegroup && !(mask[j] & bitmask)) continue;
@ -138,6 +145,14 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
          if ((itag+jtag) % 2 == 0) continue;
        } else if (itag < jtag) {
          if ((itag+jtag) % 2 == 1) continue;
        } else if (triclinic) {
          if (fabs(x[j][2]-ztmp) > delta) {
            if (x[j][2] < ztmp) continue;
          } else if (fabs(x[j][1]-ytmp) > delta) {
            if (x[j][1] < ytmp) continue;
          } else {
            if (x[j][0] < xtmp) continue;
          }
        } else {
          if (x[j][2] < ztmp) continue;
          if (x[j][2] == ztmp) {
--- a/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp
+++ b/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp
@ -18,6 +18,7 @@
 #include "atom_vec.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "molecule.h"
 #include "my_page.h"
 #include "neigh_list.h"
@ -46,6 +47,7 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list)
  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
  const int history = list->history;
  const int mask_history = 1 << HISTBITS;
  const double delta = 0.01 * force->angstrom;
  NPAIR_OMP_INIT;
 #if defined(_OPENMP)
@ -54,13 +56,11 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list)
  NPAIR_OMP_SETUP(nlocal);
  int i,j,jh,k,n,ibin,which,imol,iatom;
-  tagint tagprev;
+  tagint itag,jtag,tagprev;
  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
  double radi,radsum,cutsq;
  int *neighptr;
  // loop over each atom, storing neighbors
  double **x = atom->x;
  double *radius = atom->radius;
  int *type = atom->type;
@ -87,6 +87,7 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list)
    n = 0;
    neighptr = ipage.vget();
    itag = tag[i];
    xtmp = x[i][0];
    ytmp = x[i][1];
    ztmp = x[i][2];
@ -98,20 +99,31 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list)
    }
    // loop over all atoms in bins in stencil
-    // pairs for atoms j "below" i are excluded
+    // for triclinic, bin stencil is full in all 3 dims
-    // below = lower z or (equal z and lower y) or (equal zy and lower x)
+    // must use itag/jtag to eliminate half the I/J interactions
-    //         (equal zyx and j <= i)
+    // cannot use I/J exact coord comparision
-    // latter excludes self-self interaction but allows superposed atoms
+    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
    //   with an added PBC offset can shift all 3 coords by epsilon
    ibin = atom2bin[i];
    for (k = 0; k < nstencil; k++) {
      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
        if (j <= i) continue;
        if (j >= nlocal) {
          jtag = tag[j];
          if (itag > jtag) {
            if ((itag+jtag) % 2 == 0) continue;
          } else if (itag < jtag) {
            if ((itag+jtag) % 2 == 1) continue;
          } else {
            if (fabs(x[j][2]-ztmp) > delta) {
              if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
+            } else if (fabs(x[j][1]-ytmp) > delta) {
              if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp) {
+            } else {
              if (x[j][0] < xtmp) continue;
-            if (x[j][0] == xtmp && j <= i) continue;
+            }
          }
        }
--- a/Show More
+++ b/Show More