Merge branch 'develop' into group-bitmap-accessor

2024-07-30 20:30:37 -04:00
parent 25a4117e67 a54e67bf1c
commit d8d3ddc140
211 changed files with 5877 additions and 1352 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -50,6 +50,7 @@ src/PTM/*             @pmla
 src/QMMM/*            @akohlmey
 src/REACTION/*        @jrgissing
 src/REAXFF/*          @hasanmetin @stanmoore1
+src/RHEO/*            @jtclemm
 src/SCAFACOS/*        @rhalver
 src/SNAP/*            @athomps
 src/SPIN/*            @julient31
--- a/cmake/Modules/LAMMPSUtils.cmake
+++ b/cmake/Modules/LAMMPSUtils.cmake
@ -32,7 +32,13 @@ function(check_omp_h_include)
    set(CMAKE_REQUIRED_INCLUDES ${OpenMP_CXX_INCLUDE_DIRS})
    set(CMAKE_REQUIRED_LINK_OPTIONS ${OpenMP_CXX_FLAGS})
    set(CMAKE_REQUIRED_LIBRARIES ${OpenMP_CXX_LIBRARIES})
-    check_include_file_cxx(omp.h _have_omp_h)
+    # there are all kinds of problems with finding omp.h
+    # for Clang and derived compilers so we pretend it is there.
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+      set(_have_omp_h TRUE)
+    else()
+      check_include_file_cxx(omp.h _have_omp_h)
+    endif()
  else()
    set(_have_omp_h FALSE)
  endif()
--- a/cmake/Modules/Packages/RHEO.cmake
+++ b/cmake/Modules/Packages/RHEO.cmake
@ -1,2 +1,2 @@
-find_package(GSL 2.7 REQUIRED)
+find_package(GSL 2.6 REQUIRED)
 target_link_libraries(lammps PRIVATE GSL::gsl)
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@ -639,6 +639,9 @@ They must be specified in uppercase.
   *  - AMD_GFX1100
      - GPU
      - AMD GPU RX7900XTX
+   *  - AMD_GFX1103
+      - GPU
+      - AMD Phoenix APU with Radeon 740M/760M/780M/880M/890M
   *  - INTEL_GEN
      - GPU
      - SPIR64-based devices, e.g. Intel GPUs, using JIT
--- a/doc/src/Build_settings.rst
+++ b/doc/src/Build_settings.rst
@ -414,8 +414,8 @@ Read or write compressed files
 If this option is enabled, large files can be read or written with
 compression by ``gzip`` or similar tools by several LAMMPS commands,
 including :doc:`read_data <read_data>`, :doc:`rerun <rerun>`, and
-:doc:`dump <dump>`.  Supported compression tools are currently
-``gzip``, ``bzip2``, ``zstd``, and ``lzma``.
+:doc:`dump <dump>`.  Supported compression tools and algorithms are currently
+``gzip``, ``bzip2``, ``zstd``, ``xz``, ``lz4``, and ``lzma`` (via xz).

 .. tabs::

--- a/doc/src/Commands_bond.rst
+++ b/doc/src/Commands_bond.rst
@ -73,7 +73,7 @@ OPT.

   * :doc:`none <angle_none>`
   * :doc:`zero <angle_zero>`
-   * :doc:`hybrid <angle_hybrid>`
+   * :doc:`hybrid (k) <angle_hybrid>`
   *
   *
   *
@ -101,7 +101,7 @@ OPT.
   * :doc:`mesocnt <angle_mesocnt>`
   * :doc:`mm3 <angle_mm3>`
   * :doc:`quartic (o) <angle_quartic>`
-   * :doc:`spica (o) <angle_spica>`
+   * :doc:`spica (ko) <angle_spica>`
   * :doc:`table (o) <angle_table>`

 .. _dihedral:
@ -119,7 +119,7 @@ OPT.

   * :doc:`none <dihedral_none>`
   * :doc:`zero <dihedral_zero>`
-   * :doc:`hybrid <dihedral_hybrid>`
+   * :doc:`hybrid (k) <dihedral_hybrid>`
   *
   *
   *
@ -157,7 +157,7 @@ OPT.

   * :doc:`none <improper_none>`
   * :doc:`zero <improper_zero>`
-   * :doc:`hybrid <improper_hybrid>`
+   * :doc:`hybrid (k) <improper_hybrid>`
   *
   *
   *
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@ -195,7 +195,7 @@ OPT.
   * :doc:`lj/mdf <pair_mdf>`
   * :doc:`lj/relres (o) <pair_lj_relres>`
   * :doc:`lj/spica (gko) <pair_spica>`
-   * :doc:`lj/spica/coul/long (go) <pair_spica>`
+   * :doc:`lj/spica/coul/long (gko) <pair_spica>`
   * :doc:`lj/spica/coul/msm (o) <pair_spica>`
   * :doc:`lj/sf/dipole/sf (go) <pair_dipole>`
   * :doc:`lj/smooth (go) <pair_lj_smooth>`
--- a/doc/src/Howto_lammps_gui.rst
+++ b/doc/src/Howto_lammps_gui.rst
@ -1,5 +1,5 @@
-Using the LAMMPS-GUI
-====================
+Using LAMMPS-GUI
+================

 This document describes **LAMMPS-GUI version 1.6**.

@ -16,54 +16,101 @@ to the online LAMMPS documentation for known LAMMPS commands and styles.

 .. note::

-   Pre-compiled, ready-to-use LAMMPS-GUI executables for Linux (Ubuntu
-   20.04LTS or later and compatible), macOS (version 11 aka Big Sur or
-   later), and Windows (version 10 or later) :ref:`are available
-   <lammps_gui_install>` for download.  None-MPI LAMMPS executables of
-   the same LAMMPS version are included in these packages as well.  The
-   source code for the LAMMPS-GUI is included in the LAMMPS source code
+   Pre-compiled, ready-to-use LAMMPS-GUI executables for Linux x86\_64
+   (Ubuntu 20.04LTS or later and compatible), macOS (version 11 aka Big
+   Sur or later), and Windows (version 10 or later) :ref:`are available
+   <lammps_gui_install>` for download.  None-MPI LAMMPS executables for
+   running LAMMPS from the command line and :doc:`some LAMMPS tools <Tools>`
+   are also included.
+
+   The source code for LAMMPS-GUI is included in the LAMMPS source code
   distribution and can be found in the ``tools/lammps-gui`` folder.  It
   can be compiled alongside LAMMPS when :doc:`compiling with CMake
   <Build_cmake>`.

 LAMMPS-GUI tries to provide an experience similar to what people
-traditionally would do to run LAMMPS using a command line window
-but just rolled into a single executable:
+traditionally would have running LAMMPS using a command line window
+and the console LAMMPS executable but just rolled into a single executable:

- editing LAMMPS input files with a text editor
+- writing & editing LAMMPS input files with a text editor
 - run LAMMPS on those input file with selected command line flags
 - use or extract data from the created files and visualize it with
  either a molecular visualization program or a plotting program

-
 That procedure is quite effective for people proficient in using the
 command line, as that allows them to use tools for the individual steps
 that they are most comfortable with.  It is often *required* to adopt
 this workflow when running LAMMPS simulations on high-performance
 computing facilities.

-The main benefit of using the LAMMPS-GUI application instead is that
-many basic tasks can be done directly from the GUI without switching to
-a text console window or using external programs, let alone writing
-scripts to extract data from the generated output.  It also integrates
-well with graphical desktop environments.
+The main benefit of using LAMMPS-GUI is that many basic tasks can be
+done directly from the GUI without switching to a text console window or
+using external programs, let alone writing scripts to extract data from
+the generated output.  It also integrates well with graphical desktop
+environments where the `.lmp` filename extension can be registered with
+LAMMPS-GUI as the executable to launch when double clicking on such
+files.  Also, LAMMPS-GUI has support for drag-n-drop, i.e.  an input
+file can be selected and then moved and dropped on the LAMMPS-GUI
+executable, and LAMMPS-GUI will launch and read the file into its
+buffer.

 LAMMPS-GUI thus makes it easier for beginners to get started running
 simple LAMMPS simulations.  It is very suitable for tutorials on LAMMPS
 since you only need to learn how to use a single program for most tasks
-and thus time can be saved and people can focus on learning LAMMPS.  It
-is also designed to keep the barrier low when you decide to switch to a
-full featured, standalone programming editor and more sophisticated
-visualization and analysis tools, and run LAMMPS from the command line
-or a batch script.
+and thus time can be saved and people can focus on learning LAMMPS.
+The tutorials at https://lammpstutorials.github.io/ were specifically
+updated for use with LAMMPS-GUI.
+
+Another design goal is to keep the barrier low when replacing part of
+the functionality of LAMMPS-GUI with external tools.

 The following text provides a detailed tour of the features and
-functionality of the LAMMPS-GUI.  Suggestions for new features and
+functionality of LAMMPS-GUI.  Suggestions for new features and
 reports of bugs are always welcome.  You can use the :doc:`the same
 channels as for LAMMPS itself <Errors_bugs>` for that purpose.

 -----

+Installing Pre-compiled LAMMPS-GUI Packages
+-------------------------------------------
+
+LAMMPS-GUI is available as pre-compiled binary packages for Linux
+x86\_64, macOS 11 and later, and Windows 10 and later.  Alternately, it
+can be compiled from source.
+
+Windows 10 and later
+^^^^^^^^^^^^^^^^^^^^
+
+After downloading the ``LAMMPS-Win10-64bit-GUI-<version>.exe`` installer
+package, you need to execute it, and start the installation process.
+Since those packages are currently unsigned, you have to enable "Developer Mode"
+in the Windows System Settings to run the installer.
+
+MacOS 11 and later
+^^^^^^^^^^^^^^^^^^
+
+After downloading the ``LAMMPS-macOS-multiarch-GUI-<version>.dmg``
+installer package, you need to double-click it and then, in the window
+that opens, drag the app bundle as indicated into the "Applications"
+folder.  The follow the instructions in the "README.txt" file to
+get access to the other included executables.
+
+Linux on x86\_64
+^^^^^^^^^^^^^^^^
+
+After downloading and unpacking the
+``LAMMPS-Linux-x86_64-GUI-<version>.tar.gz`` package.  You can switch
+into the "LAMMPS_GUI" folder and execute "./lammps-gui" directly.
+
+Compiling from Source
+^^^^^^^^^^^^^^^^^^^^^
+
+There also are instructions for :ref:`compiling LAMMPS-GUI from source
+code <lammps_gui_compilation>` available elsewhere in the manual.
+Compilation from source *requires* using CMake.
+
+-----
+
 Starting LAMMPS-GUI
 -------------------

@ -88,17 +135,24 @@ window is stored when exiting and restored when starting again.
 Opening Files
 ^^^^^^^^^^^^^

-The LAMMPS-GUI application tries to open the first command line argument
-as a LAMMPS input script, further arguments are ignored.  When no
-argument is given, LAMMPS-GUI starts with an empty buffer.  Files can
-also be opened via the ``File`` menu or by drag-and-drop of a file from
-a graphical file manager into the editor window.  Only one file can be
-edited at a time, so opening a new file with a filled buffer closes that
-buffer.  If the buffer has unsaved modifications, you are asked to
-either cancel the operation, discard the changes, or save them.  A
-buffer with modifications can be saved any time from the "File" menu, by
-the keyboard shortcut `Ctrl-S` (`Command-S` on macOS), or by clicking on
-the "Save" button at the very left in the status bar.
+The LAMMPS-GUI application can be launched without command line arguments
+and then starts with an empty buffer in the *Editor* window.  If arguments
+are given LAMMPS will use first command line argument as the file name for
+the *Editor* buffer and reads its contents into the buffer, if the file
+exists.  All further arguments are ignored.  Files can also be opened via
+the ``File`` menu, the `Ctrl-O` (`Command-O` on macOS) keyboard shortcut
+or by drag-and-drop of a file from a graphical file manager into the editor
+window.  If a file extension (e.g. ``.lmp``) has been registered with the
+graphical environment to launch LAMMPS-GUI, an existing input file can
+be launched with LAMMPS-GUI through double clicking.
+
+Only one file can be edited at a time, so opening a new file with a
+filled buffer closes that buffer.  If the buffer has unsaved
+modifications, you are asked to either cancel the operation, discard the
+changes, or save them.  A buffer with modifications can be saved any
+time from the "File" menu, by the keyboard shortcut `Ctrl-S`
+(`Command-S` on macOS), or by clicking on the "Save" button at the very
+left in the status bar.

 Running LAMMPS
 ^^^^^^^^^^^^^^
@ -235,20 +289,30 @@ run number that this chart window corresponds to.  Same as for the
 *Output* window, the chart window is replaced on each new run, but the
 behavior can be changed in the preferences dialog.

+.. versionadded:: 1.6
+
+   Support for YAML export added
+
 From the ``File`` menu on the top left, it is possible to save an image
 of the currently displayed plot or export the data in either plain text
 columns (for use by plotting tools like `gnuplot
 <http://www.gnuplot.info/>`_ or `grace
-<https://plasma-gate.weizmann.ac.il/Grace/>`_), or as CSV data which can
-be imported for further processing with Microsoft Excel or `pandas
-<https://pandas.pydata.org/>`_
+<https://plasma-gate.weizmann.ac.il/Grace/>`_), as CSV data which can be
+imported for further processing with Microsoft Excel `LibreOffice Calc
+<https://www.libreoffice.org/>`_ or with Python via `pandas
+<https://pandas.pydata.org/>`_, or as YAML which can be imported into
+Python with `PyYAML <https://pyyaml.org/>`_ or pandas.

 Thermo output data from successive run commands in the input script is
 combined into a single data set unless the format, number, or names of
 output columns are changed with a :doc:`thermo_style <thermo_style>` or
 a :doc:`thermo_modify <thermo_modify>` command, or the current time step
 is reset with :doc:`reset_timestep <reset_timestep>`, or if a
-:doc:`clear <clear>` command is issued.
+:doc:`clear <clear>` command is issued.  This is where the YAML export
+from the *Charts* window differs from that of the *Output* window:
+here you get the compounded data set starting with the last change of
+output fields or timestep setting, while the export from the log will
+contain *all* YAML output but *segmented* into individual runs.

 Image Slide Show
 ----------------
@ -347,15 +411,16 @@ actual image size, high-quality (SSAO) rendering, anti-aliasing, view
 style, display of box or axes, zoom factor.  The view of the system can
 be rotated horizontally and vertically.  It is also possible to only
 display the atoms within a group defined in the input script (default is
-"all").  After each change, the image is rendered again and the display
-updated.  The small palette icon on the top left is colored while LAMMPS
-is running to render the new image; it is grayed out when LAMMPS is
-finished.  When there are many atoms to render and high quality images
-with anti-aliasing are requested, re-rendering may take several seconds.
-From the ``File`` menu of the image window, the current image can be
-saved to a file (keyboard shortcut `Ctrl-S`) or copied to the clipboard
-(keyboard shortcut `Ctrl-C`) for pasting the image into another
-application.
+"all").  The image can also be re-centered on the center of mass of the
+selected group.  After each change, the image is rendered again and the
+display updated.  The small palette icon on the top left is colored
+while LAMMPS is running to render the new image; it is grayed out when
+LAMMPS is finished.  When there are many atoms to render and high
+quality images with anti-aliasing are requested, re-rendering may take
+several seconds.  From the ``File`` menu of the image window, the
+current image can be saved to a file (keyboard shortcut `Ctrl-S`) or
+copied to the clipboard (keyboard shortcut `Ctrl-C`) for pasting the
+image into another application.

 .. versionadded:: 1.6

@ -427,7 +492,7 @@ Context Specific Help

 |gui-popup1|  |gui-popup2|

-A unique feature of the LAMMPS-GUI is the option to look up the
+A unique feature of LAMMPS-GUI is the option to look up the LAMMPS
 documentation for the command in the current line.  This can be done by
 either clicking the right mouse button or by using the `Ctrl-?` keyboard
 shortcut.  When using the mouse, there are additional entries in the
@ -435,10 +500,16 @@ context menu that open the corresponding documentation page in the
 online LAMMPS documentation in a web browser window.  When using the
 keyboard, the first of those entries is chosen.

+.. versionadded:: 1.6
+
 If the word under the cursor is a file, then additionally the context
 menu has an entry to open the file in a read-only text viewer window.
 This is a convenient way to view the contents of files that are
-referenced in the input.
+referenced in the input.  The file viewer also supports on-the-fly
+decompression based on the file name suffix in a :ref:`similar fashion
+as available with LAMMPS <gzip>`.  If the necessary decompression
+program is missing or the file cannot be decompressed, the viewer window
+will contain a corresponding message.

 Menu
 ----
@ -458,7 +529,7 @@ The ``File`` menu offers the usual options:

 - ``New`` clears the current buffer and resets the file name to ``*unknown*``
 - ``Open`` opens a dialog to select a new file for editing in the *Editor*
- ``View`` opens a dialog to select a file for viewing in a *separate* window (read-only)
+- ``View`` opens a dialog to select a file for viewing in a *separate* window (read-only) with support for on-the-fly decompression as explained above.
 - ``Save`` saves the current file; if the file name is ``*unknown*``
  a dialog will open to select a new file name
 - ``Save As`` opens a dialog to select and new file name (and folder, if
@ -531,12 +602,12 @@ in an ``Image Viewer`` window.

 The ``View in OVITO`` entry will launch `OVITO <https://ovito.org>`_
 with a :doc:`data file <write_data>` containing the current state of
-the system.  This option is only available if the LAMMPS-GUI can find
+the system.  This option is only available if LAMMPS-GUI can find
 the OVITO executable in the system path.

 The ``View in VMD`` entry will launch VMD with a :doc:`data file
 <write_data>` containing the current state of the system.  This option
-is only available if the LAMMPS-GUI can find the VMD executable in the
+is only available if LAMMPS-GUI can find the VMD executable in the
 system path.

 View
@ -559,6 +630,9 @@ a minimal description of LAMMPS-GUI.  The ``LAMMPS-GUI Howto`` entry
 will open this documentation page from the online documentation in a web
 browser window.  The ``LAMMPS Manual`` entry will open the main page of
 the LAMMPS online documentation in a web browser window.
+The ``LAMMPS Tutorial`` entry will open the main page of the set of
+LAMMPS tutorials authored and maintained by Simon Gravelle at
+https://lammpstutorials.github.io/ in a web browser window.

 -----

@ -566,8 +640,8 @@ Preferences
 -----------

 The ``Preferences`` dialog allows customization of the behavior and
-look of the LAMMPS-GUI application.  The settings are grouped and each
-group is displayed within a tab.
+look of LAMMPS-GUI.  The settings are grouped and each group is
+displayed within a tab.

 .. |guiprefs1| image:: JPG/lammps-gui-prefs-general.png
   :width: 24%
@ -744,12 +818,12 @@ available (On macOS use the Command key instead of Ctrl/Control).
     - Reformat line
     - Shift+TAB
     - Show Completions
-   * - Ctrl+Shift+Enter
+   * - Ctrl+Shift+T
+     - LAMMPS Tutorial
+     - Ctrl+Shift+Enter
     - Run File
     -
     -
-     -
-     -

 Further editing keybindings `are documented with the Qt documentation
 <https://doc.qt.io/qt-5/qplaintextedit.html#editing-key-bindings>`_.  In
--- a/doc/src/Howto_pylammps.rst
+++ b/doc/src/Howto_pylammps.rst
@ -6,19 +6,22 @@ PyLammps Tutorial
 Overview
 --------

-``PyLammps`` is a Python wrapper class for LAMMPS which can be created
-on its own or use an existing lammps Python object.  It creates a simpler,
+:py:class:`PyLammps <lammps.PyLammps>` is a Python wrapper class for
+LAMMPS which can be created on its own or use an existing
+:py:class:`lammps Python <lammps.lammps>` object.  It creates a simpler,
 more "pythonic" interface to common LAMMPS functionality, in contrast to
-the ``lammps`` wrapper for the C-style LAMMPS library interface which
-is written using `Python ctypes <ctypes_>`_.  The ``lammps`` wrapper
-is discussed on the :doc:`Python_head` doc page.
+the :py:class:`lammps <lammps.lammps>` wrapper for the LAMMPS :ref:`C
+language library interface API <lammps_c_api>` which is written using
+`Python ctypes <ctypes_>`_.  The :py:class:`lammps <lammps.lammps>`
+wrapper is discussed on the :doc:`Python_head` doc page.

-Unlike the flat ``ctypes`` interface, PyLammps exposes a discoverable
-API.  It no longer requires knowledge of the underlying C++ code
-implementation.  Finally, the ``IPyLammps`` wrapper builds on top of
-``PyLammps`` and adds some additional features for
-`IPython integration <ipython_>`_ into `Jupyter notebooks <jupyter_>`_,
-e.g. for embedded visualization output from :doc:`dump style image <dump_image>`.
+Unlike the flat `ctypes <ctypes_>`_ interface, PyLammps exposes a
+discoverable API.  It no longer requires knowledge of the underlying C++
+code implementation.  Finally, the :py:class:`IPyLammps
+<lammps.IPyLammps>` wrapper builds on top of :py:class:`PyLammps
+<lammps.PyLammps>` and adds some additional features for `IPython
+integration <ipython_>`_ into `Jupyter notebooks <jupyter_>`_, e.g. for
+embedded visualization output from :doc:`dump style image <dump_image>`.

 .. _ctypes: https://docs.python.org/3/library/ctypes.html
 .. _ipython: https://ipython.org/
@ -30,19 +33,22 @@ Comparison of lammps and PyLammps interfaces
 lammps.lammps
 """""""""""""

-* uses ``ctypes``
-* direct memory access to native C++ data
+* uses `ctypes <ctypes_>`_
+* direct memory access to native C++ data with optional support for NumPy arrays
 * provides functions to send and receive data to LAMMPS
+* interface modeled after the LAMMPS :ref:`C language library interface API <lammps_c_api>`
 * requires knowledge of how LAMMPS internally works (C pointers, etc)
+* full support for running Python with MPI using `mpi4py <https://mpi4py.readthedocs.io>`_

 lammps.PyLammps
 """""""""""""""

-* higher-level abstraction built on top of original ctypes interface
+* higher-level abstraction built on *top* of original :py:class:`ctypes based interface <lammps.lammps>`
 * manipulation of Python objects
 * communication with LAMMPS is hidden from API user
 * shorter, more concise Python
 * better IPython integration, designed for quick prototyping
+* designed for serial execution

 Quick Start
 -----------
@ -506,14 +512,26 @@ inside of the IPython notebook.
 Using PyLammps and mpi4py (Experimental)
 ----------------------------------------

-PyLammps can be run in parallel using mpi4py. This python package can be installed using
+PyLammps can be run in parallel using `mpi4py
+<https://mpi4py.readthedocs.io>`_. This python package can be installed
+using

 .. code-block:: bash

   pip install mpi4py

-The following is a short example which reads in an existing LAMMPS input file and
-executes it in parallel.  You can find in.melt in the examples/melt folder.
+.. warning::
+
+   Usually, any :py:class:`PyLammps <lammps.PyLammps>` command must be
+   executed by *all* MPI processes. However, evaluations and querying
+   the system state is only available on MPI rank 0.  Using these
+   functions from other MPI ranks will raise an exception.
+
+The following is a short example which reads in an existing LAMMPS input
+file and executes it in parallel.  You can find in.melt in the
+examples/melt folder.  Please take note that the
+:py:meth:`PyLammps.eval() <lammps.PyLammps.eval>` is called only from
+MPI rank 0.

 .. code-block:: python

@ -535,10 +553,6 @@ following mpirun command:

   mpirun -np 4 python melt.py

-.. warning::
-
-   Any command must be executed by all MPI processes. However, evaluations and querying the system state is only available on rank 0.
-
 Feedback and Contributing
 -------------------------

--- a/doc/src/JPG/lammps-gui-image.png
+++ b/doc/src/JPG/lammps-gui-image.png
--- a/doc/src/Tools.rst
+++ b/doc/src/Tools.rst
@ -484,23 +484,22 @@ are in the :doc:`Howto_lammps_gui` tutorial Howto page.

 Here are a few highlights of LAMMPS-GUI

- Text editor with syntax highlighting customized for LAMMPS
- Text editor features command completion for known commands and styles
- Text editor will switch working directory to folder of file in buffer
- Text editor will remember up to 5 recent files
+- Text editor with line numbers and syntax highlighting customized for LAMMPS
+- Text editor features command completion and auto-indentation for known commands and styles
+- Text editor will switch its working directory to folder of file in buffer
+- Many adjustable settings and preferences that are persistent including the 5 most recent files
 - Context specific LAMMPS command help via online documentation
 - LAMMPS is running in a concurrent thread, so the GUI remains responsive
- Support for most accelerator packages
 - Progress bar indicates how far a run command is completed
- LAMMPS can be started and stopped with a hotkey
- Screen output is captured in a Log Window
- Thermodynamic output is captured and displayed as line graph in a Chart Window
+- LAMMPS can be started and stopped with a mouse click or a hotkey
+- Screen output is captured in an *Output* Window
+- Thermodynamic output is captured and displayed as line graph in a *Chart* Window
 - Indicator for currently executed command
 - Indicator for line that caused an error
 - Visualization of current state in Image Viewer (via calling :doc:`write_dump image <dump_image>`)
 - Capture of images created via :doc:`dump image <dump_image>` in Slide show window
- Many adjustable settings and preferences that are persistent
 - Dialog to set variables, similar to the LAMMPS command line flag '-v' / '-var'
+- Support for GPU, INTEL, KOKKOS/OpenMP, OPENMAP, and OPT and accelerator packages

 Parallelization
 ^^^^^^^^^^^^^^^
@ -542,6 +541,8 @@ variable so the executables will be found automatically.  The LAMMPS-GUI
 executable is called ``lammps-gui`` and either takes no arguments or
 attempts to load the first argument as LAMMPS input file.

+.. _lammps_gui_compilation:
+
 Compilation
 ^^^^^^^^^^^

--- a/doc/src/angle_hybrid.rst
+++ b/doc/src/angle_hybrid.rst
@ -1,8 +1,11 @@
 .. index:: angle_style hybrid
+.. index:: angle_style hybrid/kk

 angle_style hybrid command
 ==========================

+Accelerator Variants: *hybrid/kk*
+
 Syntax
 """"""

@ -79,6 +82,10 @@ for specific angle types.

 ----------

+.. include:: accel_styles.rst
+
+----------
+
 Restrictions
 """"""""""""

@ -87,8 +94,9 @@ MOLECULE package.  See the :doc:`Build package <Build_package>` doc page
 for more info.

 Unlike other angle styles, the hybrid angle style does not store angle
-coefficient info for individual sub-styles in a :doc:`binary restart files <restart>`.  Thus when restarting a simulation from a restart
-file, you need to re-specify :doc:`angle_coeff <angle_coeff>` commands.
+coefficient info for individual sub-styles in :doc:`binary restart files
+<restart>` or :doc:`data files <write_data>`.  Thus when restarting a
+simulation, you need to re-specify the angle_coeff commands.

 Related commands
 """"""""""""""""
--- a/doc/src/angle_spica.rst
+++ b/doc/src/angle_spica.rst
@ -1,10 +1,11 @@
 .. index:: angle_style spica
 .. index:: angle_style spica/omp
+.. index:: angle_style spica/kk

 angle_style spica command
 =========================

-Accelerator Variants: *spica/omp*
+Accelerator Variants: *spica/omp*, *spica/kk*

 Syntax
 """"""
--- a/doc/src/bond_hybrid.rst
+++ b/doc/src/bond_hybrid.rst
@ -75,8 +75,9 @@ package.  See the :doc:`Build package <Build_package>` page for more
 info.

 Unlike other bond styles, the hybrid bond style does not store bond
-coefficient info for individual sub-styles in a :doc:`binary restart files <restart>`.  Thus when restarting a simulation from a restart
-file, you need to re-specify bond_coeff commands.
+coefficient info for individual sub-styles in :doc:`binary restart files
+<restart>` or :doc:`data files <write_data>`.  Thus when restarting a
+simulation, you need to re-specify the bond_coeff commands.

 Related commands
 """"""""""""""""
--- a/doc/src/dihedral_hybrid.rst
+++ b/doc/src/dihedral_hybrid.rst
@ -1,8 +1,11 @@
 .. index:: dihedral_style hybrid
+.. index:: dihedral_style hybrid/kk

 dihedral_style hybrid command
 =============================

+Accelerator Variants: *hybrid/kk*
+
 Syntax
 """"""

@ -80,6 +83,10 @@ for specific dihedral types.

 ----------

+.. include:: accel_styles.rst
+
+----------
+
 Restrictions
 """"""""""""

@ -88,8 +95,10 @@ MOLECULE package.  See the :doc:`Build package <Build_package>` doc page
 for more info.

 Unlike other dihedral styles, the hybrid dihedral style does not store
-dihedral coefficient info for individual sub-styles in a :doc:`binary restart files <restart>`.  Thus when restarting a simulation from a
-restart file, you need to re-specify dihedral_coeff commands.
+dihedral coefficient info for individual sub-styles in :doc:`binary
+restart files <restart>` or :doc:`data files <write_data>`.  Thus when
+restarting a simulation, you need to re-specify the dihedral_coeff
+commands.

 Related commands
 """"""""""""""""
--- a/doc/src/fix_meso_move.rst
+++ b/doc/src/fix_meso_move.rst
@ -247,6 +247,11 @@ defined by the :doc:`atom_style sph <atom_style>` command.

 All particles in the group must be mesoscopic SPH/SDPD particles.

+.. versionchanged:: TBD
+
+This fix is incompatible with deformation controls that remap velocity,
+for instance the *remap v* option of :doc:`fix deform <fix_deform>`.
+
 Related commands
 """"""""""""""""

--- a/doc/src/fix_mvv_dpd.rst
+++ b/doc/src/fix_mvv_dpd.rst
@ -97,6 +97,11 @@ These fixes are part of the DPD-MESO package. They are only enabled if
 LAMMPS was built with that package. See the :doc:`Build package
 <Build_package>` page for more info.

+.. versionchanged:: TBD
+
+This fix is incompatible with deformation controls that remap velocity,
+for instance the *remap v* option of :doc:`fix deform <fix_deform>`.
+
 Related commands
 """"""""""""""""

--- a/doc/src/fix_pimd.rst
+++ b/doc/src/fix_pimd.rst
@ -236,7 +236,7 @@ The keyword *fixcom* specifies whether the center-of-mass of the extended ring-p
 Once *fixcom* is set to be *yes*, the center-of-mass velocity will be distracted from the centroid-mode velocities in each step.

 The keyword *lj* should be used if :doc:`lj units <units>` is used for *fix pimd/langevin*. Typically one may want to use
-reduced units to run the simulation, and then convert the results into some physical units (for example, :doc:`metal units <units>`). In this case, the 5 quantities in the physical mass units are needed: epsilon (energy scale), sigma (length scale), mass, Planck's constant, mvv2e (mass * velocity^2 to energy conversion factor). Planck's constant and mvv2e can be found in src/update.cpp. If there is no need to convert reduced units to physical units, set all these five value to 1.
+reduced units to run the simulation, and then convert the results into some physical units (for example, :doc:`metal units <units>`). In this case, the 5 quantities in the physical mass units are needed: epsilon (energy scale), sigma (length scale), mass, Planck's constant, mvv2e (mass * velocity^2 to energy conversion factor). Planck's constant and mvv2e can be found in src/update.cpp. If there is no need to convert reduced units to physical units, you can omit the keyword *lj* and these five values will be set to 1.

 The PIMD algorithm in LAMMPS is implemented as a hyper-parallel scheme
 as described in :ref:`Calhoun <Calhoun>`.  In LAMMPS this is done by using
--- a/doc/src/fix_rigid_meso.rst
+++ b/doc/src/fix_rigid_meso.rst
@ -353,6 +353,11 @@ defined by the :doc:`atom_style sph <atom_style>` command.

 All particles in the group must be mesoscopic SPH/SDPD particles.

+.. versionchanged:: TBD
+
+This fix is incompatible with deformation controls that remap velocity,
+for instance the *remap v* option of :doc:`fix deform <fix_deform>`.
+
 Related commands
 """"""""""""""""

--- a/doc/src/fix_shake.rst
+++ b/doc/src/fix_shake.rst
@ -27,9 +27,9 @@ Syntax

  .. parsed-literal::

-       *b* values = one or more bond types
-       *a* values = one or more angle types
-       *t* values = one or more atom types
+       *b* values = one or more bond types (may use type labels)
+       *a* values = one or more angle types  (may use type labels)
+       *t* values = one or more atom types (may use type labels)
       *m* value = one or more mass values

 * zero or more keyword/value pairs may be appended
@ -137,7 +137,17 @@ constrained (within a fudge factor of MASSDELTA specified in
 both bonds in the angle are constrained then the angle will also be
 constrained if its type is in the list.

-For all constraints, a particular bond is only constrained if both
+.. versionchanged:: TBD
+
+The types may be given as type labels *only* if there is no atom, bond,
+or angle type label named *b*, *a*, *t*, or *m* defined in the
+simulation.  If that is the case, type labels cannot be used as
+constraint type index with these two fixes, because the type labels
+would be incorrectly treated as a new type of constraint instead.
+Thus, LAMMPS will print a warning and type label handling is disabled
+and numeric types must be used.
+
+For all constraints, a particular bond is only constrained if *both*
 atoms in the bond are in the group specified with the SHAKE fix.

 The degrees-of-freedom removed by SHAKE bonds and angles are accounted
--- a/doc/src/fix_smd_integrate_tlsph.rst
+++ b/doc/src/fix_smd_integrate_tlsph.rst
@ -53,6 +53,11 @@ Restrictions
 This fix is part of the MACHDYN package.  It is only enabled if
 LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.

+.. versionchanged:: TBD
+
+This fix is incompatible with deformation controls that remap velocity,
+for instance the *remap v* option of :doc:`fix deform <fix_deform>`.
+
 Related commands
 """"""""""""""""

--- a/doc/src/fix_smd_integrate_ulsph.rst
+++ b/doc/src/fix_smd_integrate_ulsph.rst
@ -61,6 +61,11 @@ Restrictions
 This fix is part of the MACHDYN package.  It is only enabled if
 LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.

+.. versionchanged:: TBD
+
+This fix is incompatible with deformation controls that remap velocity,
+for instance the *remap v* option of :doc:`fix deform <fix_deform>`.
+
 Related commands
 """"""""""""""""

--- a/doc/src/improper_hybrid.rst
+++ b/doc/src/improper_hybrid.rst
@ -1,8 +1,11 @@
 .. index:: improper_style hybrid
+.. index:: improper_style hybrid/kk

 improper_style hybrid command
 =============================

+Accelerator Variants: *hybrid/kk*
+
 Syntax
 """"""

@ -79,6 +82,10 @@ types.

 ----------

+.. include:: accel_styles.rst
+
+----------
+
 Restrictions
 """"""""""""

@ -87,9 +94,10 @@ MOLECULE package.  See the :doc:`Build package <Build_package>` doc page
 for more info.

 Unlike other improper styles, the hybrid improper style does not store
-improper coefficient info for individual sub-styles in a :doc:`binary restart files <restart>`.
-Thus when restarting a simulation from a
-restart file, you need to re-specify improper_coeff commands.
+improper coefficient info for individual sub-styles in :doc:`binary
+restart files <restart>` or :doc:`data files <write_data>`.  Thus when
+restarting a simulation, you need to re-specify the improper_coeff
+commands.

 Related commands
 """"""""""""""""
--- a/doc/src/neigh_modify.rst
+++ b/doc/src/neigh_modify.rst
@ -32,7 +32,7 @@ Syntax
         group-ID = only build pair neighbor lists for atoms in this group
       *exclude* values:
         *type* M N
-           M,N = exclude if one atom in pair is type M, other is type N
+           M,N = exclude if one atom in pair is type M, other is type N (M and N may be type labels)
         *group* group1-ID group2-ID
           group1-ID,group2-ID = exclude if one atom is in 1st group, other in 2nd
         *molecule/intra* group-ID
@ -159,15 +159,19 @@ sample scenarios where this is useful:
 * When one or more rigid bodies are specified, interactions within each
  body can be turned off to save needless computation.  See the :doc:`fix rigid <fix_rigid>` command for more details.

-The *exclude type* option turns off the pairwise interaction if one
-atom is of type M and the other of type N.  M can equal N.  The
-*exclude group* option turns off the interaction if one atom is in the
-first group and the other is the second.  Group1-ID can equal
-group2-ID.  The *exclude molecule/intra* option turns off the
-interaction if both atoms are in the specified group and in the same
-molecule, as determined by their molecule ID.  The *exclude
-molecule/inter* turns off the interaction between pairs of atoms that
-have different molecule IDs and are both in the specified group.
+.. versionchanged:: TBD
+
+   Support for type labels was added.
+
+The *exclude type* option turns off the pairwise interaction if one atom
+is of type M and the other of type N.  M can equal N.  The *exclude
+group* option turns off the interaction if one atom is in the first
+group and the other is the second.  Group1-ID can equal group2-ID.  The
+*exclude molecule/intra* option turns off the interaction if both atoms
+are in the specified group and in the same molecule, as determined by
+their molecule ID.  The *exclude molecule/inter* turns off the
+interaction between pairs of atoms that have different molecule IDs and
+are both in the specified group.

 Each of the exclude options can be specified multiple times.  The
 *exclude type* option is the most efficient option to use; it requires
@ -219,34 +223,34 @@ atom can have.
 The *binsize* option allows you to specify what size of bins will be
 used in neighbor list construction to sort and find neighboring atoms.
 By default, for :doc:`neighbor style bin <neighbor>`, LAMMPS uses bins
-that are 1/2 the size of the maximum pair cutoff.  For :doc:`neighbor style multi <neighbor>`,
-the bins are 1/2 the size of the collection interaction cutoff.
-Typically these are good values for minimizing the time for
-neighbor list construction.  This setting overrides the default.
-If you make it too big, there is little overhead due to
+that are 1/2 the size of the maximum pair cutoff.  For :doc:`neighbor
+style multi <neighbor>`, the bins are 1/2 the size of the collection
+interaction cutoff.  Typically these are good values for minimizing the
+time for neighbor list construction.  This setting overrides the
+default.  If you make it too big, there is little overhead due to
 looping over bins, but more atoms are checked.  If you make it too
-small, the optimal number of atoms is checked, but bin overhead goes
-up.  If you set the binsize to 0.0, LAMMPS will use the default
-binsize of 1/2 the cutoff.
+small, the optimal number of atoms is checked, but bin overhead goes up.
+If you set the binsize to 0.0, LAMMPS will use the default binsize of
+1/2 the cutoff.

 The *collection/type* option allows you to define collections of atom
-types, used by the *multi* neighbor mode. By grouping atom types with
-similar physical size or interaction cutoff lengths, one may be able
-to improve performance by reducing
-overhead. You must first specify the number of collections N to be
-defined followed by N lists of types. Each list consists of a series of type
-ranges separated by commas. The range can be specified as a
-single numeric value, or a wildcard asterisk can be used to specify a range
-of values.  This takes the form "\*" or "\*n" or "n\*" or "m\*n".  For
-example, if M = the number of atom types, then an asterisk with no numeric
-values means all types from 1 to M.  A leading asterisk means all types
-from 1 to n (inclusive).  A trailing asterisk means all types from n to M
-(inclusive).  A middle asterisk means all types from m to n (inclusive).
-Note that all atom types must be included in exactly one of the N collections.
+types, used by the *multi* neighbor mode.  By grouping atom types with
+similar physical size or interaction cutoff lengths, one may be able to
+improve performance by reducing overhead.  You must first specify the
+number of collections N to be defined followed by N lists of types.
+Each list consists of a series of type ranges separated by commas. The
+range can be specified as a single numeric value, or a wildcard asterisk
+can be used to specify a range of values.  This takes the form "\*" or
+"\*n" or "n\*" or "m\*n".  For example, if M = the number of atom types,
+then an asterisk with no numeric values means all types from 1 to M.  A
+leading asterisk means all types from 1 to n (inclusive).  A trailing
+asterisk means all types from n to M (inclusive).  A middle asterisk
+means all types from m to n (inclusive).  Note that all atom types must
+be included in exactly one of the N collections.

 The *collection/interval* option provides a similar capability.  This
 command allows a user to define collections by specifying a series of
-cutoff intervals. LAMMPS will automatically sort atoms into these
+cutoff intervals.  LAMMPS will automatically sort atoms into these
 intervals based on their type-dependent cutoffs or their finite size.
 You must first specify the number of collections N to be defined
 followed by N values representing the upper cutoff of each interval.
--- a/doc/src/pair_born_gauss.rst
+++ b/doc/src/pair_born_gauss.rst
@ -19,7 +19,7 @@ Examples
 .. code-block:: LAMMPS

   pair_style born/gauss 10.0
-   pair_coeff 1 1 1 1 8.2464e13 12.48 0.042644277 0.44 3.56
+   pair_coeff 1 1 8.2464e13 12.48 0.042644277 0.44 3.56

 Description
 """""""""""
--- a/doc/src/pair_hybrid.rst
+++ b/doc/src/pair_hybrid.rst
@ -479,11 +479,12 @@ For the hybrid pair styles, the list of sub-styles and their respective
 settings are written to :doc:`binary restart files <restart>`, so a
 :doc:`pair_style <pair_style>` command does not need to specified in an
 input script that reads a restart file.  However, the coefficient
-information is not stored in the restart file.  Thus, pair_coeff
-commands need to be re-specified in the restart input script.  For pair
-style *hybrid/scaled* also the names of any variables used as scale
-factors are restored, but not the variables themselves, so those may
-need to be redefined when continuing from a restart.
+information is not stored in the restart file.  The same is true for
+:doc:`data files <write_data>`.  Thus, pair_coeff commands need to be
+re-specified in the restart input script.  For pair style
+*hybrid/scaled* also the names of any variables used as scale factors
+are restored, but not the variables themselves, so those may need to be
+redefined when continuing from a restart.

 These pair styles support the use of the *inner*, *middle*, and
 *outer* keywords of the :doc:`run_style respa <run_style>` command, if
--- a/doc/src/pair_spica.rst
+++ b/doc/src/pair_spica.rst
@ -5,6 +5,7 @@
 .. index:: pair_style lj/spica/coul/long
 .. index:: pair_style lj/spica/coul/long/gpu
 .. index:: pair_style lj/spica/coul/long/omp
+.. index:: pair_style lj/spica/coul/long/kk
 .. index:: pair_style lj/spica/coul/msm
 .. index:: pair_style lj/spica/coul/msm/omp

@ -16,7 +17,7 @@ Accelerator Variants: *lj/spica/gpu*, *lj/spica/kk*, *lj/spica/omp*
 pair_style lj/spica/coul/long command
 =====================================

-Accelerator Variants: *lj/spica/coul/long/gpu*, *lj/spica/coul/long/omp*
+Accelerator Variants: *lj/spica/coul/long/gpu*, *lj/spica/coul/long/omp*, *lj/spica/coul/long/kk*

 pair_style lj/spica/coul/msm command
 ====================================
--- a/doc/src/write_data.rst
+++ b/doc/src/write_data.rst
@ -51,10 +51,12 @@ value.

   The write_data command may not always write all coefficient settings
   to the corresponding Coeff sections of the data file.  This can have
-   one of multiple reasons. 1) A few styles may be missing the code that
-   would write those sections (if you come across one, please notify
-   the LAMMPS developers). 2) Some pair styles require a single pair_coeff
-   statement and those are not compatible with data files. 3) The
+   one of multiple reasons.  1) The style may be a hybrid style. In that
+   case *no* coeff information is written.  2) A few styles may be
+   missing the code that would write those sections (This is rare these
+   days, but if you come across one, please notify the LAMMPS
+   developers).  3) Some pair styles require a single pair_coeff
+   statement and those are not compatible with data files.  4) The
   default for write_data is to write a PairCoeff section, which has
   only entries for atom types i == j. The remaining coefficients would
   be inferred through the currently selected mixing rule.  If there has
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -1355,6 +1355,7 @@ Grama
 grana
 granregion
 graphene
+Gravelle
 Greathouse
 greenyellow
 Greffet
@ -3080,6 +3081,7 @@ qx
 qy
 qz
 Rackers
+Radeon
 radi
 radialscreened
 radialscreenedspin
--- a/examples/PACKAGES/cgspica/sds-monolayer/in.sds-regular
+++ b/examples/PACKAGES/cgspica/sds-monolayer/in.sds-regular
@ -5,9 +5,9 @@ dimension       3
 atom_style      full
 processors  * * 1

-pair_style lj/sdk/coul/long 15.0 # compatible with "lj/spica/coul/long"
+pair_style      lj/spica/coul/long 15.0
 bond_style      harmonic
-angle_style     sdk # compatible with "spica"
+angle_style     spica 
 special_bonds   lj/coul 0.0 0.0 1.0

 read_data       data.sds.gz
--- a/examples/charmmfsw/charmmff.cmap
+++ b/examples/charmmfsw/charmmff.cmap
@ -0,0 +1,340 @@
+#CMAP for C NH1 CT1 C NH1 CT1 C NH1; id=1
+#phi = -180.000000
+     0.130000     0.770000     0.970000     1.250000     2.120000
+     2.720000     2.090000     1.790000     0.780000    -0.690000
+     1.000000    -2.200000    -4.830000    -4.820000    -4.910000
+    -3.590000    -2.770000    -2.780000    -2.450000    -2.350000
+    -2.340000    -1.520000    -0.950000    -0.040000
+
+#phi = -165.000000
+    -0.130000     1.380000     1.580000     1.870000     2.400000
+     2.490000     2.440000     1.930000     1.090000     0.640000
+     0.260000    -2.800000    -4.010000    -4.140000    -3.420000
+    -2.600000    -2.300000    -1.500000    -1.100000    -0.860000
+    -0.640000    -0.210000    -1.080000    -1.120000
+
+#phi = -150.000000
+     0.080000     1.420000     1.620000     2.050000     2.650000
+     2.720000     2.320000     1.990000     1.560000     2.460000
+    -0.230000    -1.820000    -2.580000    -3.010000    -2.550000
+    -1.890000    -1.350000    -0.730000     0.070000    -0.230000
+    -0.770000    -1.280000    -1.290000    -0.820000
+
+#phi = -135.000000
+     0.930000     1.520000     2.240000     2.550000     3.110000
+     2.920000     2.460000     2.190000     2.060000     1.850000
+     0.120000    -1.180000    -2.000000    -2.280000    -1.960000
+    -1.340000    -0.930000     0.020000     0.310000    -0.520000
+    -1.150000    -0.980000    -0.570000    -0.440000
+
+#phi = -120.000000
+     1.360000     1.960000     2.700000     3.040000     3.700000
+     3.560000     2.640000     2.770000     2.720000     1.630000
+     0.710000    -0.790000    -2.120000    -2.630000    -1.800000
+    -0.430000    -0.060000     0.440000     0.910000    -0.550000
+    -0.970000    -0.860000    -0.250000     0.450000
+
+#phi = -105.000000
+     2.050000     2.540000     2.820000     3.090000     3.370000
+     3.550000     3.070000     2.900000     2.960000     2.120000
+     0.910000    -0.820000    -2.090000    -2.240000    -1.460000
+     0.210000     0.080000     0.770000     1.040000    -0.120000
+    -0.320000    -0.160000     0.310000     0.730000
+
+#phi = -90.000000
+     1.450000     2.750000     2.740000     3.160000     3.450000
+     3.340000     3.180000     3.900000     3.340000     2.440000
+     0.910000    -0.610000    -1.510000    -1.620000    -0.960000
+    -0.020000     0.420000     0.910000     0.460000     0.150000
+    -0.070000     0.020000     0.280000     0.750000
+
+#phi = -75.000000
+     1.380000     3.350000     2.350000     3.060000     3.810000
+     3.700000     3.580000     4.210000     3.540000     1.690000
+     0.100000    -0.680000    -0.120000    -0.430000    -0.600000
+     0.230000     0.420000     0.300000     0.550000     0.190000
+    -0.250000    -0.190000    -0.250000     0.470000
+
+#phi = -60.000000
+     0.240000     1.230000     1.720000     3.170000     4.210000
+     4.390000     4.280000     3.670000     2.270000    -0.480000
+    -0.410000    -0.040000    -0.360000    -0.820000    -0.170000
+     0.140000     0.270000     0.320000     0.310000    -0.670000
+    -0.950000    -1.530000    -1.480000    -0.200000
+
+#phi = -45.000000
+    -1.180000     0.080000     2.350000     4.210000     5.380000
+     5.390000     4.380000     2.460000     1.120000     0.110000
+     0.010000    -0.150000    -0.800000    -0.580000     0.080000
+     0.270000    -0.050000     0.380000     0.250000    -0.890000
+    -1.580000    -1.950000    -1.980000    -2.000000
+
+#phi = -30.000000
+    -1.170000     1.070000     4.180000     6.740000     6.070000
+     4.810000     2.780000     1.320000     0.770000    -0.010000
+     0.280000    -0.710000     1.310000     1.520000     1.920000
+     2.220000     0.190000     0.530000     0.330000    -1.600000
+    -2.850000    -3.550000    -3.280000    -2.660000
+
+#phi = -15.000000
+     0.290000     5.590000     3.730000     3.220000     3.270000
+     2.520000     1.590000     1.380000     0.860000     0.660000
+     1.620000     0.850000     0.510000     0.740000     1.020000
+     1.620000    -0.340000     0.180000    -0.610000    -2.560000
+    -3.790000    -3.810000    -3.160000    -1.750000
+
+#phi = 0.000000
+     2.830000     0.790000     0.320000     0.480000     0.630000
+     0.980000     1.240000     1.670000     1.650000     2.520000
+     1.610000     0.780000     0.120000     0.070000     0.120000
+    -1.570000    -1.210000    -1.930000    -2.600000    -3.790000
+    -3.930000    -3.620000    -2.680000    -0.920000
+
+#phi = 15.000000
+    -0.780000    -1.910000    -2.050000    -1.850000    -1.050000
+     0.180000     1.680000     2.220000     1.360000     2.450000
+     1.440000     0.680000    -0.240000    -0.540000    -0.790000
+    -2.180000    -3.210000    -4.350000    -3.940000    -3.910000
+    -3.460000    -2.770000     1.760000     0.310000
+
+#phi = 30.000000
+    -2.960000    -3.480000    -3.440000    -2.400000    -1.130000
+     0.340000     1.430000     1.390000     0.970000     2.460000
+     1.520000     0.550000    -0.410000    -1.480000    -3.580000
+    -4.130000    -4.560000    -4.440000    -3.580000    -2.960000
+    -1.960000    -1.070000    -1.600000    -2.450000
+
+#phi = 45.000000
+    -4.020000    -3.840000    -3.370000    -2.330000    -0.980000
+     0.360000     0.810000     0.750000     0.500000     1.900000
+     0.770000    -0.420000    -3.290000    -3.910000    -4.520000
+    -4.890000    -3.850000    -4.150000    -2.670000    -2.370000
+    -2.860000    -3.420000    -3.670000    -3.600000
+
+#phi = 60.000000
+    -3.350000    -2.980000    -2.320000    -1.240000    -0.260000
+     0.720000     0.670000     0.440000     2.400000     1.630000
+    -2.010000    -3.310000    -3.990000    -4.530000    -4.850000
+    -3.770000    -3.940000    -3.890000    -2.610000    -3.510000
+    -3.760000    -3.640000    -3.450000    -3.340000
+
+#phi = 75.000000
+    -2.250000    -1.640000    -1.010000     0.040000     0.640000
+     0.820000     0.520000    -0.010000    -0.370000    -1.190000
+    -2.390000    -3.380000    -4.500000    -5.590000    -5.510000
+    -4.940000    -3.830000    -3.840000    -3.700000    -4.150000
+    -4.170000    -3.730000    -3.740000    -2.620000
+
+#phi = 90.000000
+    -1.720000    -1.180000    -0.430000     0.280000     0.810000
+     0.800000     0.480000    -0.340000    -0.790000    -1.770000
+    -2.810000    -3.800000    -5.220000    -6.280000    -6.580000
+    -5.640000    -5.060000    -4.020000    -4.150000    -4.470000
+    -4.100000    -3.770000    -3.160000    -2.650000
+
+#phi = 105.000000
+    -1.850000    -1.090000    -0.450000     0.130000     1.010000
+     0.880000     0.490000    -0.220000    -0.860000    -1.680000
+    -3.010000    -4.130000    -5.990000    -6.860000    -6.830000
+    -5.850000    -3.860000    -4.860000    -4.910000    -4.720000
+    -4.600000    -4.090000    -3.270000    -2.410000
+
+#phi = 120.000000
+    -1.970000    -1.120000    -0.540000    -0.150000     0.760000
+     1.040000     0.760000     0.310000    -0.330000    -1.870000
+    -3.370000    -5.010000    -6.120000    -7.050000    -6.980000
+    -3.700000    -4.510000    -5.090000    -5.420000    -4.850000
+    -4.440000    -4.000000    -3.420000    -2.750000
+
+#phi = 135.000000
+    -2.110000    -1.170000    -0.320000    -0.010000     0.320000
+     1.090000     0.940000     0.630000    -0.170000    -1.830000
+    -3.470000    -4.950000    -6.110000    -1.920000    -4.050000
+    -5.000000    -5.000000    -4.840000    -4.890000    -4.300000
+    -4.490000    -4.440000    -4.160000    -3.180000
+
+#phi = 150.000000
+    -1.760000    -0.400000     0.020000     0.360000     0.630000
+     1.260000     1.360000     0.950000    -0.070000    -1.480000
+    -3.150000     1.840000    -1.760000    -5.090000    -5.740000
+    -5.390000    -4.780000    -4.190000    -4.120000    -4.040000
+    -4.130000    -4.030000    -4.030000    -2.940000
+
+#phi = 165.000000
+    -0.810000    -0.070000     0.380000     0.540000     1.280000
+     1.640000     1.700000     1.520000     0.630000    -1.090000
+    -2.740000    -0.740000    -4.560000    -6.410000    -5.890000
+    -5.140000    -4.190000    -3.670000    -3.840000    -3.560000
+    -3.550000    -3.250000    -2.750000    -1.810000
+
+
+#CMAP for C NH1 CT2 C NH1 CT2 C NH1; id=2
+#phi = -180.000000
+     0.235350     0.182300     0.177200     0.396800     0.859400
+     1.489700     2.092500     2.297700     1.808600     0.696200
+    -0.563300    -1.432700    -1.015100     1.426300    -0.564300
+     0.696200     1.808200     2.301700     2.092600     1.489100
+     0.859500     0.396900     0.176900     0.182400
+
+#phi = -165.000000
+     0.020100    -0.203800    -0.269700     0.014200     0.620800
+     1.392400     2.046200     2.188200     1.683900     0.688500
+    -0.373700    -0.703500     0.837800     3.704000    -0.730100
+     0.594100     1.713100     2.205800     2.026400     1.529800
+     1.027400     0.623800     0.348400     0.182800
+
+#phi = -150.000000
+    -0.533600    -0.807400    -0.804600    -0.379800     0.365300
+     1.168000     1.641000     1.618100     1.302200     0.615100
+     0.065700     0.738500     2.959500    -2.036600    -0.934600
+     0.407900     1.517000     1.984800     1.833100     1.435200
+     0.995600     0.562200     0.150600    -0.209000
+
+#phi = -135.000000
+    -1.208500    -1.429400    -1.319200    -0.817500    -0.112400
+     0.454400     0.737600     0.879300     0.850100     0.670300
+     0.943500    -2.651200    -2.829400    -2.199100    -1.065700
+     0.279600     1.322000     1.668300     1.521300     1.193900
+     0.765300     0.246000    -0.315500    -0.823200
+
+#phi = -120.000000
+    -1.789100    -1.965500    -1.860700    -1.447900    -0.896500
+    -0.401000    -0.015100     0.321300     0.634600     0.976300
+    -1.977500    -2.883200    -2.848500    -2.137900    -0.960300
+     0.308700     1.098100     1.245300     1.133600     0.881800
+     0.448200    -0.153900    -0.823700    -1.404300
+
+#phi = -105.000000
+    -2.246700    -2.487000    -2.473700    -2.135600    -1.577700
+    -0.980600    -0.429100     0.144700     0.734000    -0.918300
+    -2.299200    -2.882200    -2.668600    -1.847100    -0.719800
+     0.107000     0.496000     0.553500     0.584300     0.494000
+     0.098300    -0.529800    -1.237900    -1.840100
+
+#phi = -90.000000
+    -2.851100    -3.181100    -3.199500    -2.785300    -2.054300
+    -1.242900    -0.476500     0.288100    -0.045300    -1.470600
+    -2.558800    -2.869400    -2.450300    -1.582200    -0.930800
+    -0.426400    -0.022700     0.000000    -0.097400    -0.136100
+    -0.439600    -1.038600    -1.741000    -2.373200
+
+#phi = -75.000000
+    -3.961800    -4.268200    -4.109000    -3.364700    -2.252200
+    -1.140400    -0.209800     0.487300    -0.746200    -2.127700
+    -2.932100    -2.898500    -2.247900    -1.730400    -1.177200
+    -0.448200     0.034900    -0.073300    -0.531600    -0.933300
+    -1.360700    -2.009200    -2.745700    -3.424900
+
+#phi = -60.000000
+    -5.408000    -5.355100    -4.640100    -3.283200    -1.710200
+    -0.423800     0.354400    -0.103700    -1.577700    -2.828300
+    -3.151200    -2.649200    -2.183000    -1.761200    -0.981700
+    -0.174700     0.262600     0.039200    -0.663000    -1.530700
+    -2.478200    -3.465600    -4.334200    -5.011200
+
+#phi = -45.000000
+    -6.093200    -5.298400    -3.816620    -1.922530    -0.196160
+     0.768200     0.568500    -0.831300    -2.343900    -3.037100
+    -2.663700    -2.191100    -2.022900    -1.438500    -0.649000
+     0.077000     0.441500     0.257500    -0.491100    -1.820600
+    -3.473100    -4.895200    -5.790700    -6.205900
+
+#phi = -30.000000
+    -5.258225    -3.675795    -1.631110     0.430085     1.496470
+     0.318200    -0.555100    -1.695500    -2.434200    -2.192600
+    -1.691300    -1.890000    -1.708500    -1.206300    -0.567400
+     0.054300     0.497200     0.599600    -0.171000    -2.137600
+    -4.237000    -5.584100    -6.135100    -6.067000
+
+#phi = -15.000000
+    -3.161820    -0.902080     1.432450    -1.452885    -1.560780
+    -1.665600    -1.783100    -1.755100    -1.329300    -0.731100
+    -1.317000    -1.662800    -1.601200    -1.294900    -0.817300
+    -0.197100     0.549500     0.850400    -0.689700    -2.819900
+    -4.393000    -5.111500    -5.205690    -4.654785
+
+#phi = 0.000000
+     0.034035    -2.349860    -3.412065    -3.620070    -3.450950
+    -2.875650    -1.787800    -0.541250     0.410450    -0.372500
+    -1.126850    -1.498450    -1.608700    -1.498450    -1.126850
+    -0.372500     0.410450    -0.541250    -1.787800    -2.875650
+    -3.450950    -3.620070    -3.412065    -2.349860
+
+#phi = 15.000000
+    -3.162345    -4.654785    -5.205690    -5.111500    -4.393000
+    -2.819900    -0.689700     0.850400     0.549500    -0.197100
+    -0.817300    -1.294900    -1.601200    -1.662800    -1.317000
+    -0.731100    -1.329300    -1.755100    -1.783100    -1.665600
+    -1.560780    -1.452885     1.432450    -0.902080
+
+#phi = 30.000000
+    -5.258220    -6.067000    -6.135100    -5.584100    -4.237000
+    -2.137600    -0.171000     0.599600     0.497200     0.054300
+    -0.567400    -1.206300    -1.708500    -1.890000    -1.691300
+    -2.192600    -2.434200    -1.695500    -0.555100     0.318200
+     1.496470     0.430085    -1.631110    -3.675795
+
+#phi = 45.000000
+    -6.093300    -6.205900    -5.790700    -4.895200    -3.473100
+    -1.820600    -0.491100     0.257500     0.441500     0.077000
+    -0.649000    -1.438500    -2.022900    -2.191100    -2.663700
+    -3.037100    -2.343900    -0.831300     0.568500     0.768200
+    -0.196160    -1.922530    -3.816620    -5.298400
+
+#phi = 60.000000
+    -5.407500    -5.011200    -4.334200    -3.465600    -2.478200
+    -1.530700    -0.663000     0.039200     0.262600    -0.174700
+    -0.981700    -1.761200    -2.183000    -2.649200    -3.151200
+    -2.828300    -1.577700    -0.103700     0.354400    -0.423800
+    -1.710200    -3.283200    -4.640100    -5.355100
+
+#phi = 75.000000
+    -3.961900    -3.424900    -2.745700    -2.009200    -1.360700
+    -0.933300    -0.531600    -0.073300     0.034900    -0.448200
+    -1.177200    -1.730400    -2.247900    -2.898500    -2.932100
+    -2.127700    -0.746200     0.487300    -0.209800    -1.140400
+    -2.252200    -3.364700    -4.109000    -4.268200
+
+#phi = 90.000000
+    -2.854500    -2.373200    -1.741000    -1.038600    -0.439600
+    -0.136100    -0.097400     0.000000    -0.022700    -0.426400
+    -0.930800    -1.582200    -2.450300    -2.869400    -2.558800
+    -1.470600    -0.045300     0.288100    -0.476500    -1.242900
+    -2.054300    -2.785300    -3.199500    -3.181100
+
+#phi = 105.000000
+    -2.246400    -1.840100    -1.237900    -0.529800     0.098300
+     0.494000     0.584300     0.553500     0.496000     0.107000
+    -0.719800    -1.847100    -2.668600    -2.882200    -2.299200
+    -0.918300     0.734000     0.144700    -0.429100    -0.980600
+    -1.577700    -2.135600    -2.473700    -2.487000
+
+#phi = 120.000000
+    -1.788800    -1.404300    -0.823700    -0.153900     0.448200
+     0.881800     1.133600     1.245300     1.098100     0.308700
+    -0.960300    -2.137900    -2.848500    -2.883200    -1.977500
+     0.976300     0.634600     0.321300    -0.015100    -0.401000
+    -0.896500    -1.447900    -1.860700    -1.965500
+
+#phi = 135.000000
+    -1.208900    -0.823200    -0.315500     0.246000     0.765300
+     1.193900     1.521300     1.668300     1.322000     0.279600
+    -1.065700    -2.199100    -2.829400    -2.651200     0.943500
+     0.670300     0.850100     0.879300     0.737600     0.454400
+    -0.112400    -0.817500    -1.319200    -1.429400
+
+#phi = 150.000000
+    -0.533400    -0.209000     0.150600     0.562200     0.995600
+     1.435200     1.833100     1.984800     1.517000     0.407900
+    -0.934600    -2.036600     2.959500     0.738500     0.065700
+     0.615100     1.302200     1.618100     1.641000     1.168000
+     0.365300    -0.379800    -0.804600    -0.807400
+
+#phi = 165.000000
+     0.019900     0.182800     0.348400     0.623800     1.027400
+     1.529800     2.026400     2.205800     1.713100     0.594100
+    -0.730100     3.704000     0.837800    -0.703500    -0.373700
+     0.688500     1.683900     2.188200     2.046200     1.392400
+     0.620800     0.014200    -0.269700    -0.203800
+
+
--- a/examples/charmmfsw/data.charmmfsw.gz
+++ b/examples/charmmfsw/data.charmmfsw.gz
--- a/examples/charmmfsw/in.charmmfsw
+++ b/examples/charmmfsw/in.charmmfsw
@ -0,0 +1,46 @@
+# charmmfsw example generated by https://charmm-gui.org/
+# from PDB structure 1HVN (https://www.rcsb.org/structure/1HVN)
+#
+# Dependencies: packages MOLECULE / KSPACE / RIGID
+# To test with KOKKOS: lmp -k on g 1 -sf kk -pk kokkos neigh half -in in.charmmfsw
+
+units           real
+boundary        p p p
+
+newton          off
+pair_style      lj/charmmfsw/coul/long 10 12
+pair_modify     mix arithmetic
+kspace_style    pppm 1e-6
+
+atom_style      full
+bond_style      harmonic
+angle_style     charmm
+dihedral_style  charmmfsw
+special_bonds   charmm
+improper_style  harmonic
+timestep        2
+
+fix             cmap all cmap charmmff.cmap
+fix_modify      cmap energy yes
+read_data       data.charmmfsw.gz fix cmap crossterm CMAP
+
+neighbor        2 bin
+neigh_modify    delay 2 every 1
+
+fix             1 all shake 1e-6 100 100 m 1.008 a 142
+fix             2        all      nvt temp 303.15 303.15 100.0
+
+# for visualization with LAMMPS-GUI
+group           water type 18 60
+group           nowater subtract all water
+group           ions type 63 64
+group           other subtract all water ions
+
+# dump            viz      other    image 10 myimage-*.ppm element type size 800 800 zoom 2.82954 shiny 0.5 fsaa yes bond none none view 20 10 box no 0.0 axes no 0.0 0.0 center s 0.521318 0.489856 0.489856
+# dump_modify viz pad 9 boxcolor darkblue backcolor darkgray element H H H H H H H H H H H H H H H H H H C C C C C C C C C C C C C C C C C C C N N N N N N N N N N N N N N O O O O O O O O O P S Cl K  adiam 1 1.92 adiam 2 1.92 adiam 3 1.92 adiam 4 1.92 adiam 5 1.92 adiam 6 1.92 adiam 7 1.92 adiam 8 1.92 adiam 9 1.92 adiam 10 1.92 adiam 11 1.92 adiam 12 1.92 adiam 13 1.92 adiam 14 1.92 adiam 15 1.92 adiam 16 1.92 adiam 17 1.92 adiam 18 1.92 adiam 19 2.72 adiam 20 2.72 adiam 21 2.72 adiam 22 2.72 adiam 23 2.72 adiam 24 2.72 adiam 25 2.72 adiam 26 2.72 adiam 27 2.72 adiam 28 2.72 adiam 29 2.72 adiam 30 2.72 adiam 31 2.72 adiam 32 2.72 adiam 33 2.72 adiam 34 2.72 adiam 35 2.72 adiam 36 2.72 adiam 37 2.72 adiam 38 2.48 adiam 39 2.48 adiam 40 2.48 adiam 41 2.48 adiam 42 2.48 adiam 43 2.48 adiam 44 2.48 adiam 45 2.48 adiam 46 2.48 adiam 47 2.48 adiam 48 2.48 adiam 49 2.48 adiam 50 2.48 adiam 51 2.48 adiam 52 2.432 adiam 53 2.432 adiam 54 2.432 adiam 55 2.432 adiam 56 2.432 adiam 57 2.432 adiam 58 2.432 adiam 59 2.432 adiam 60 2.432 adiam 61 2.88 adiam 62 2.88 adiam 63 3.632 adiam 64 2.816  
+
+thermo          10
+thermo_style    custom step etotal evdwl ecoul elong edihed pe ke temp press
+
+run             100
+
--- a/examples/charmmfsw/log.26Jul24.charmmfsw.g++.1
+++ b/examples/charmmfsw/log.26Jul24.charmmfsw.g++.1
@ -0,0 +1,221 @@
+LAMMPS (27 Jun 2024)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+# charmmfsw example generated by https://charmm-gui.org/
+# from PDB structure 1HVN (https://www.rcsb.org/structure/1HVN)
+#
+# Dependencies: packages MOLECULE / KSPACE / RIGID
+# To test with KOKKOS: lmp -k on g 1 -sf kk -pk kokkos neigh half -in in.charmmfsw
+
+units           real
+boundary        p p p
+
+newton          off
+pair_style      lj/charmmfsw/coul/long 10 12
+Switching to CHARMM coulomb energy conversion constant (src/KSPACE/pair_lj_charmmfsw_coul_long.cpp:63)
+pair_modify     mix arithmetic
+kspace_style    pppm 1e-6
+
+atom_style      full
+bond_style      harmonic
+angle_style     charmm
+dihedral_style  charmmfsw
+special_bonds   charmm
+improper_style  harmonic
+timestep        2
+
+fix             cmap all cmap charmmff.cmap
+Reading CMAP parameters from: charmmff.cmap
+  Read in CMAP data for 2 crossterm types
+fix_modify      cmap energy yes
+read_data       data.charmmfsw.gz fix cmap crossterm CMAP
+Reading data file ...
+  orthogonal box = (-24 -24 -24) to (24 24 24)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  10245 atoms
+  reading velocities ...
+  10245 velocities
+  scanning bonds ...
+  4 = max bonds/atom
+  scanning angles ...
+  15 = max angles/atom
+  scanning dihedrals ...
+  48 = max dihedrals/atom
+  scanning impropers ...
+  4 = max impropers/atom
+  orthogonal box = (-24 -24 -24) to (24 24 24)
+  1 by 1 by 1 MPI processor grid
+  reading bonds ...
+  6973 bonds
+  reading angles ...
+  4057 angles
+  reading dihedrals ...
+  1363 dihedrals
+  reading impropers ...
+  70 impropers
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     4 = max # of 1-2 neighbors
+     9 = max # of 1-3 neighbors
+    17 = max # of 1-4 neighbors
+    21 = max # of special neighbors
+  special bonds CPU = 0.002 seconds
+  read_data CPU = 0.072 seconds
+
+neighbor        2 bin
+neigh_modify    delay 2 every 1
+
+fix             1 all shake 1e-6 100 100 m 1.008 a 142
+Finding SHAKE clusters ...
+      75 = # of size 2 clusters
+      47 = # of size 3 clusters
+       9 = # of size 4 clusters
+    3265 = # of frozen angles
+  find clusters CPU = 0.003 seconds
+fix             2 all nvt temp 303.15 303.15 100.0
+
+thermo          10
+thermo_style    custom step etotal evdwl ecoul elong edihed pe ke temp press
+
+run             100
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
+
+@Article{Gissinger24,
+ author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
+ title = {Type Label Framework for Bonded Force Fields in LAMMPS},
+ journal = {J. Phys. Chem. B},
+ year =    2024,
+ volume =  128,
+ number =  13,
+ pages =   {3282–-3297}
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+PPPM initialization ...
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
+  G vector (1/distance) = 0.27938162
+  grid = 54 54 54
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.00036407395
+  estimated relative force accuracy = 1.0963718e-06
+  using double precision FFTW3
+  3d grid and FFT values/proc = 226981 157464
+Generated 2016 of 2016 mixed pair_coeff terms from arithmetic mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 2 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 7 7 7
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/charmmfsw/coul/long, perpetual
+      attributes: half, newton off
+      pair build: half/bin/newtoff
+      stencil: full/bin/3d
+      bin: standard
+SHAKE stats (type/ave/delta/count) on step 0
+Bond:    16   1.09      1.38032e-07        6
+Bond:    18   1.09      1.00046e-07        3
+Bond:    34   1.111     1.11388e-06       10
+Bond:    39   1.111     4.83041e-08        5
+Bond:    43   1.111     1.97842e-07       10
+Bond:    44   1.111     1.71815e-06       10
+Bond:    59   1.111     8.42509e-08        2
+Bond:    62   1.111     2.84854e-08        2
+Bond:    63   1.111     2.14153e-07       46
+Bond:    64   1.111     1.59305e-07       18
+Bond:    65   1.08      5.67061e-07       16
+Bond:    66   1.08      1.43965e-06        4
+Bond:    67   1         1.81926e-07       10
+Bond:    68   1.01      0                  1
+Bond:    69   1.08      1.34571e-07        5
+Bond:    70   1.09      0                  1
+Bond:    71   1.083     0                  1
+Bond:    72   0.9572    2.71955e-07     6530
+Bond:    75   1         1.46045e-07       10
+Bond:    79   0.997     5.24499e-07       17
+Bond:    81   1         1.32984e-07        4
+Bond:    84   1.04      7.65389e-07        9
+Bond:    87   1         0                  1
+Bond:    95   0.96      5.75241e-07        2
+Bond:    97   1.325     4.3613e-08         3
+Angle:  142   104.52    2.67611e-05     3265
+Per MPI rank memory allocation (min/avg/max) = 143.6 | 143.6 | 143.6 Mbytes
+   Step         TotEng         E_vdwl         E_coul         E_long        E_dihed         PotEng         KinEng          Temp          Press     
+         0  -27877.652      3447.5013      144035.68     -182420.51      343.05623     -34213.5        6335.8474      307.44113     -989.27065    
+        10  -27879.086      3334.4154      144205.4      -182416.19      348.14696     -34133.566      6254.4808      303.49289     -1211.2863    
+        20  -27882.193      3293.7931      144272.04     -182415.87      333.20456     -34116.91       6234.7164      302.53384     -1041.5231    
+        30  -27886.779      3177.7183      144344.61     -182409.28      340.77044     -34166.241      6279.462       304.70508     -1538.0247    
+        40  -27892.698      3186.4294      144409.85     -182417.01      337.80177     -34097.62       6204.9214      301.08807     -1516.1201    
+        50  -27898.215      3198.5531      144426.3      -182405.24      336.58074     -34049.909      6151.6947      298.50529     -1349.4431    
+        60  -27900.589      3163.4592      144400.32     -182414.85      341.17705     -34110.926      6210.3369      301.35085     -1695.3697    
+        70  -27900.487      3223.7183      144242.71     -182409.21      341.09496     -34188.493      6288.0059      305.11967     -1493.2031    
+        80  -27901.07       3274.244       144265.07     -182417.68      344.0409      -34177.343      6276.2725      304.55032     -1273.0263    
+        90  -27905.672      3237.6056      144288.71     -182418.22      342.15013     -34187.814      6282.1417      304.83511     -1268.0436    
+SHAKE stats (type/ave/delta/count) on step 100
+Bond:    16   1.09      3.78281e-07        6
+Bond:    18   1.09      1.12288e-07        3
+Bond:    34   1.111     7.60709e-07       10
+Bond:    39   1.111     2.37855e-07        5
+Bond:    43   1.111     6.00872e-07       10
+Bond:    44   1.111     3.75324e-07       10
+Bond:    59   1.111     1.12311e-07        2
+Bond:    62   1.111     2.99471e-07        2
+Bond:    63   1.111     6.10589e-07       46
+Bond:    64   1.111     4.50733e-07       18
+Bond:    65   1.08      2.90668e-07       16
+Bond:    66   1.08      1.61592e-07        4
+Bond:    67   1         5.4508e-07        10
+Bond:    68   1.01      0                  1
+Bond:    69   1.08      4.1398e-07         5
+Bond:    70   1.09      0                  1
+Bond:    71   1.083     0                  1
+Bond:    72   0.9572    1.76706e-06     6530
+Bond:    75   1         3.96686e-07       10
+Bond:    79   0.997     7.72922e-07       17
+Bond:    81   1         1.30673e-07        4
+Bond:    84   1.04      1.44551e-07        9
+Bond:    87   1         0                  1
+Bond:    95   0.96      1.03526e-07        2
+Bond:    97   1.325     3.64689e-08        3
+Angle:  142   104.52    0.000130126     3265
+       100  -27913.241      3159.0677      144299.1      -182414.94      336.48839     -34254.412      6341.1706      307.69943     -1421.2905    
+Loop time of 11.5304 on 1 procs for 100 steps with 10245 atoms
+
+Performance: 1.499 ns/day, 16.014 hours/ns, 8.673 timesteps/s, 88.852 katom-step/s
+99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 8.6772     | 8.6772     | 8.6772     |   0.0 | 75.25
+Bond    | 0.012444   | 0.012444   | 0.012444   |   0.0 |  0.11
+Kspace  | 1.2286     | 1.2286     | 1.2286     |   0.0 | 10.66
+Neigh   | 1.5276     | 1.5276     | 1.5276     |   0.0 | 13.25
+Comm    | 0.010441   | 0.010441   | 0.010441   |   0.0 |  0.09
+Output  | 0.00055001 | 0.00055001 | 0.00055001 |   0.0 |  0.00
+Modify  | 0.07101    | 0.07101    | 0.07101    |   0.0 |  0.62
+Other   |            | 0.002628   |            |       |  0.02
+
+Nlocal:          10245 ave       10245 max       10245 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:          30479 ave       30479 max       30479 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    7.05928e+06 ave 7.05928e+06 max 7.05928e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 7059275
+Ave neighs/atom = 689.04588
+Ave special neighs/atom = 2.3664226
+Neighbor list builds = 10
+Dangerous builds = 0
+
+Total wall time: 0:00:11
--- a/examples/charmmfsw/log.26Jul24.charmmfsw.g++.4
+++ b/examples/charmmfsw/log.26Jul24.charmmfsw.g++.4
@ -0,0 +1,221 @@
+LAMMPS (27 Jun 2024)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+# charmmfsw example generated by https://charmm-gui.org/
+# from PDB structure 1HVN (https://www.rcsb.org/structure/1HVN)
+#
+# Dependencies: packages MOLECULE / KSPACE / RIGID
+# To test with KOKKOS: lmp -k on g 1 -sf kk -pk kokkos neigh half -in in.charmmfsw
+
+units           real
+boundary        p p p
+
+newton          off
+pair_style      lj/charmmfsw/coul/long 10 12
+Switching to CHARMM coulomb energy conversion constant (src/KSPACE/pair_lj_charmmfsw_coul_long.cpp:63)
+pair_modify     mix arithmetic
+kspace_style    pppm 1e-6
+
+atom_style      full
+bond_style      harmonic
+angle_style     charmm
+dihedral_style  charmmfsw
+special_bonds   charmm
+improper_style  harmonic
+timestep        2
+
+fix             cmap all cmap charmmff.cmap
+Reading CMAP parameters from: charmmff.cmap
+  Read in CMAP data for 2 crossterm types
+fix_modify      cmap energy yes
+read_data       data.charmmfsw.gz fix cmap crossterm CMAP
+Reading data file ...
+  orthogonal box = (-24 -24 -24) to (24 24 24)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  10245 atoms
+  reading velocities ...
+  10245 velocities
+  scanning bonds ...
+  4 = max bonds/atom
+  scanning angles ...
+  15 = max angles/atom
+  scanning dihedrals ...
+  48 = max dihedrals/atom
+  scanning impropers ...
+  4 = max impropers/atom
+  orthogonal box = (-24 -24 -24) to (24 24 24)
+  1 by 2 by 2 MPI processor grid
+  reading bonds ...
+  6973 bonds
+  reading angles ...
+  4057 angles
+  reading dihedrals ...
+  1363 dihedrals
+  reading impropers ...
+  70 impropers
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     4 = max # of 1-2 neighbors
+     9 = max # of 1-3 neighbors
+    17 = max # of 1-4 neighbors
+    21 = max # of special neighbors
+  special bonds CPU = 0.001 seconds
+  read_data CPU = 0.068 seconds
+
+neighbor        2 bin
+neigh_modify    delay 2 every 1
+
+fix             1 all shake 1e-6 100 100 m 1.008 a 142
+Finding SHAKE clusters ...
+      75 = # of size 2 clusters
+      47 = # of size 3 clusters
+       9 = # of size 4 clusters
+    3265 = # of frozen angles
+  find clusters CPU = 0.001 seconds
+fix             2 all nvt temp 303.15 303.15 100.0
+
+thermo          10
+thermo_style    custom step etotal evdwl ecoul elong edihed pe ke temp press
+
+run             100
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
+
+@Article{Gissinger24,
+ author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
+ title = {Type Label Framework for Bonded Force Fields in LAMMPS},
+ journal = {J. Phys. Chem. B},
+ year =    2024,
+ volume =  128,
+ number =  13,
+ pages =   {3282–-3297}
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+PPPM initialization ...
+  using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
+  G vector (1/distance) = 0.27938162
+  grid = 54 54 54
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.00036407395
+  estimated relative force accuracy = 1.0963718e-06
+  using double precision FFTW3
+  3d grid and FFT values/proc = 70516 40824
+Generated 2016 of 2016 mixed pair_coeff terms from arithmetic mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 2 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 7 7 7
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/charmmfsw/coul/long, perpetual
+      attributes: half, newton off
+      pair build: half/bin/newtoff
+      stencil: full/bin/3d
+      bin: standard
+SHAKE stats (type/ave/delta/count) on step 0
+Bond:    16   1.09      1.38032e-07        6
+Bond:    18   1.09      1.00046e-07        3
+Bond:    34   1.111     1.11388e-06       10
+Bond:    39   1.111     4.83041e-08        5
+Bond:    43   1.111     1.97842e-07       10
+Bond:    44   1.111     1.71815e-06       10
+Bond:    59   1.111     8.42509e-08        2
+Bond:    62   1.111     2.84854e-08        2
+Bond:    63   1.111     2.14153e-07       46
+Bond:    64   1.111     1.59305e-07       18
+Bond:    65   1.08      5.67061e-07       16
+Bond:    66   1.08      1.43965e-06        4
+Bond:    67   1         1.81926e-07       10
+Bond:    68   1.01      0                  1
+Bond:    69   1.08      1.34571e-07        5
+Bond:    70   1.09      0                  1
+Bond:    71   1.083     0                  1
+Bond:    72   0.9572    2.71955e-07     6530
+Bond:    75   1         1.46045e-07       10
+Bond:    79   0.997     5.24499e-07       17
+Bond:    81   1         1.32984e-07        4
+Bond:    84   1.04      7.65389e-07        9
+Bond:    87   1         0                  1
+Bond:    95   0.96      5.75241e-07        2
+Bond:    97   1.325     4.3613e-08         3
+Angle:  142   104.52    2.67611e-05     3265
+Per MPI rank memory allocation (min/avg/max) = 76.88 | 77.06 | 77.25 Mbytes
+   Step         TotEng         E_vdwl         E_coul         E_long        E_dihed         PotEng         KinEng          Temp          Press     
+         0  -27877.652      3447.5013      144035.68     -182420.51      343.05623     -34213.5        6335.8474      307.44113     -989.27065    
+        10  -27879.086      3334.4154      144205.4      -182416.19      348.14696     -34133.566      6254.4808      303.49289     -1211.2863    
+        20  -27882.193      3293.7931      144272.04     -182415.87      333.20456     -34116.91       6234.7164      302.53384     -1041.5231    
+        30  -27886.779      3177.7183      144344.61     -182409.28      340.77044     -34166.241      6279.462       304.70508     -1538.0247    
+        40  -27892.698      3186.4294      144409.85     -182417.01      337.80177     -34097.62       6204.9214      301.08807     -1516.1201    
+        50  -27898.215      3198.5531      144426.3      -182405.24      336.58074     -34049.909      6151.6947      298.50529     -1349.4431    
+        60  -27900.589      3163.4592      144400.32     -182414.85      341.17705     -34110.926      6210.3369      301.35085     -1695.3697    
+        70  -27900.487      3223.7183      144242.71     -182409.21      341.09496     -34188.493      6288.0059      305.11967     -1493.2032    
+        80  -27901.07       3274.244       144265.07     -182417.68      344.0409      -34177.343      6276.2725      304.55032     -1273.0263    
+        90  -27905.672      3237.6056      144288.71     -182418.22      342.15013     -34187.814      6282.1417      304.83511     -1268.0436    
+SHAKE stats (type/ave/delta/count) on step 100
+Bond:    16   1.09      3.78281e-07        6
+Bond:    18   1.09      1.12288e-07        3
+Bond:    34   1.111     7.60709e-07       10
+Bond:    39   1.111     2.37855e-07        5
+Bond:    43   1.111     6.00872e-07       10
+Bond:    44   1.111     3.75324e-07       10
+Bond:    59   1.111     1.12311e-07        2
+Bond:    62   1.111     2.99471e-07        2
+Bond:    63   1.111     6.10589e-07       46
+Bond:    64   1.111     4.50733e-07       18
+Bond:    65   1.08      2.90668e-07       16
+Bond:    66   1.08      1.61592e-07        4
+Bond:    67   1         5.4508e-07        10
+Bond:    68   1.01      0                  1
+Bond:    69   1.08      4.1398e-07         5
+Bond:    70   1.09      0                  1
+Bond:    71   1.083     0                  1
+Bond:    72   0.9572    1.76706e-06     6530
+Bond:    75   1         3.96686e-07       10
+Bond:    79   0.997     7.72922e-07       17
+Bond:    81   1         1.30673e-07        4
+Bond:    84   1.04      1.44551e-07        9
+Bond:    87   1         0                  1
+Bond:    95   0.96      1.03526e-07        2
+Bond:    97   1.325     3.64689e-08        3
+Angle:  142   104.52    0.000130126     3265
+       100  -27913.241      3159.0676      144299.1      -182414.94      336.48839     -34254.412      6341.1706      307.69943     -1421.2905    
+Loop time of 3.49837 on 4 procs for 100 steps with 10245 atoms
+
+Performance: 4.939 ns/day, 4.859 hours/ns, 28.585 timesteps/s, 292.851 katom-step/s
+99.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.4572     | 2.5133     | 2.5634     |   2.6 | 71.84
+Bond    | 0.0040264  | 0.0050718  | 0.0069286  |   1.6 |  0.14
+Kspace  | 0.45979    | 0.50903    | 0.56364    |   5.6 | 14.55
+Neigh   | 0.42029    | 0.42034    | 0.42036    |   0.0 | 12.02
+Comm    | 0.013207   | 0.013292   | 0.013404   |   0.1 |  0.38
+Output  | 0.00026525 | 0.00029549 | 0.00038249 |   0.0 |  0.01
+Modify  | 0.035024   | 0.035546   | 0.03621    |   0.3 |  1.02
+Other   |            | 0.001504   |            |       |  0.04
+
+Nlocal:        2561.25 ave        2599 max        2520 min
+Histogram: 1 1 0 0 0 0 0 0 0 2
+Nghost:        16491.5 ave       16541 max       16442 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Neighs:    1.99855e+06 ave 2.04035e+06 max 1.95468e+06 min
+Histogram: 1 1 0 0 0 0 0 0 0 2
+
+Total # of neighbors = 7994217
+Ave neighs/atom = 780.30425
+Ave special neighs/atom = 2.3664226
+Neighbor list builds = 10
+Dangerous builds = 0
+
+Total wall time: 0:00:03
--- a/lib/gpu/lal_dpd_coul_slater_long.cu
+++ b/lib/gpu/lal_dpd_coul_slater_long.cu
@ -186,15 +186,13 @@ __kernel void k_dpd_coul_slater_long(const __global numtyp4 *restrict x_,
  atom_info(t_per_atom,ii,tid,offset);

  __local numtyp sp_cl[4];
-  ///local_allocate_store_charge();
-
  sp_cl[0]=sp_cl_in[0];
  sp_cl[1]=sp_cl_in[1];
  sp_cl[2]=sp_cl_in[2];
  sp_cl[3]=sp_cl_in[3];

  int n_stride;
-  local_allocate_store_pair();
+  local_allocate_store_charge();

  acctyp3 f;
  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
@ -332,8 +330,7 @@ __kernel void k_dpd_coul_slater_long(const __global numtyp4 *restrict x_,

    } // for nbor
  } // if ii
-  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
-                ans,engv);
+  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,ans,engv);
 }

 __kernel void k_dpd_coul_slater_long_fast(const __global numtyp4 *restrict x_,
@ -378,7 +375,7 @@ __kernel void k_dpd_coul_slater_long_fast(const __global numtyp4 *restrict x_,
  

  int n_stride;
-  local_allocate_store_pair();
+  local_allocate_store_charge();

  acctyp3 f;
  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
@ -517,7 +514,6 @@ __kernel void k_dpd_coul_slater_long_fast(const __global numtyp4 *restrict x_,

    } // for nbor
  } // if ii
-  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
-                ans,engv);
+  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,ans,engv);
 }

--- a/lib/gpu/lal_sw.cpp
+++ b/lib/gpu/lal_sw.cpp
@ -52,12 +52,12 @@ int SWT::init(const int ntypes, const int nlocal, const int nall,
              double ***costheta, const int *map, int ***e2param) {
  _lj_types=ntypes;

-  int oldparam=-1;
  int onetype=-1;
  int onetype3=0;
  int spq=1;
-  int mtypes=0;
  #ifdef USE_OPENCL
+  int oldparam=-1;
+  int mtypes=0;
  for (int ii=1; ii<ntypes; ii++) {
    int i=map[ii];
    if (i<0) continue;
--- a/lib/gpu/lal_tersoff.cpp
+++ b/lib/gpu/lal_tersoff.cpp
@ -52,12 +52,12 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int
                   const double* c, const double* d, const double* h, const double* gamma,
                   const double* beta, const double* powern, const double* host_cutsq)
 {
-  int oldparam=-1;
  int onetype=-1;
  int onetype3=0;
  int spq=0;
-  int mtypes=0;
  #ifdef USE_OPENCL
+  int oldparam=-1;
+  int mtypes=0;
  for (int ii=1; ii<ntypes; ii++) {
    const int i=host_map[ii];
    for (int jj=1; jj<ntypes; jj++) {
--- a/lib/gpu/lal_tersoff_mod.cpp
+++ b/lib/gpu/lal_tersoff_mod.cpp
@ -52,11 +52,11 @@ int TersoffMT::init(const int ntypes, const int nlocal, const int nall, const in
                   const double* c5, const double* h, const double* beta, const double* powern,
                   const double* powern_del, const double* ca1, const double* host_cutsq)
 {
-  int oldparam=-1;
  int onetype=-1;
  int onetype3=0;
-  int mtypes=0;
  #ifdef USE_OPENCL
+  int oldparam=-1;
+  int mtypes=0;
  for (int ii=1; ii<ntypes; ii++) {
    const int i=host_map[ii];
    for (int jj=1; jj<ntypes; jj++) {
--- a/lib/gpu/lal_tersoff_zbl.cpp
+++ b/lib/gpu/lal_tersoff_zbl.cpp
@ -59,11 +59,11 @@ int TersoffZT::init(const int ntypes, const int nlocal, const int nall,
                    const double global_a_0, const double global_epsilon_0,
                    const double* host_cutsq)
 {
-  int oldparam=-1;
  int onetype=-1;
  int onetype3=0;
-  int mtypes=0;
  #ifdef USE_OPENCL
+  int oldparam=-1;
+  int mtypes=0;
  for (int ii=1; ii<ntypes; ii++) {
    const int i=host_map[ii];
    for (int jj=1; jj<ntypes; jj++) {
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@ -23,7 +23,7 @@ KOKKOS_DEVICES ?= "OpenMP"
 # NVIDIA:   Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
 # ARM:      ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
 # IBM:      Power8,Power9
-# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
+# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
 # Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
 KOKKOS_ARCH ?= ""
@ -433,6 +433,7 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
  KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
 endif
+KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103)

 # Any AVX?
 KOKKOS_INTERNAL_USE_ARCH_AVX        := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
@ -1118,6 +1119,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
  KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100
 endif
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1)
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103")
+  tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
+  KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103
+endif


 ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
--- a/lib/kokkos/cmake/KokkosCore_config.h.in
+++ b/lib/kokkos/cmake/KokkosCore_config.h.in
@ -116,6 +116,7 @@
 #cmakedefine KOKKOS_ARCH_AMD_GFX942
 #cmakedefine KOKKOS_ARCH_AMD_GFX1030
 #cmakedefine KOKKOS_ARCH_AMD_GFX1100
+#cmakedefine KOKKOS_ARCH_AMD_GFX1103
 #cmakedefine KOKKOS_ARCH_AMD_GPU
 #cmakedefine KOKKOS_ARCH_VEGA // deprecated
 #cmakedefine KOKKOS_ARCH_VEGA906 // deprecated
--- a/lib/kokkos/cmake/kokkos_arch.cmake
+++ b/lib/kokkos/cmake/kokkos_arch.cmake
@ -101,9 +101,9 @@ LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx90a   gfx90a      gfx908   gfx908)
 LIST(APPEND SUPPORTED_AMD_GPUS       MI50/60  MI50/60)
 LIST(APPEND SUPPORTED_AMD_ARCHS      VEGA906  AMD_GFX906)
 LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx906   gfx906)
-LIST(APPEND SUPPORTED_AMD_GPUS       RX7900XTX  RX7900XTX    V620/W6800  V620/W6800)
-LIST(APPEND SUPPORTED_AMD_ARCHS      NAVI1100   AMD_GFX1100  NAVI1030    AMD_GFX1030)
-LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx1100    gfx1100      gfx1030     gfx1030)
+LIST(APPEND SUPPORTED_AMD_GPUS       PHOENIX      RX7900XTX    V620/W6800  V620/W6800)
+LIST(APPEND SUPPORTED_AMD_ARCHS      AMD_GFX1103  AMD_GFX1100  NAVI1030    AMD_GFX1030)
+LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx1103    gfx1100      gfx1030     gfx1030)

 #FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
 FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
--- a/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
+++ b/lib/kokkos/core/src/HIP/Kokkos_HIP_Instance.hpp
@ -35,7 +35,8 @@ struct HIPTraits {
  static constexpr int WarpSize       = 64;
  static constexpr int WarpIndexMask  = 0x003f; /* hexadecimal for 63 */
  static constexpr int WarpIndexShift = 6;      /* WarpSize == 1 << WarpShift*/
-#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100)
+#elif defined(KOKKOS_ARCH_AMD_GFX1030) || defined(KOKKOS_ARCH_AMD_GFX1100) || \
+    defined(KOKKOS_ARCH_AMD_GFX1103)
  static constexpr int WarpSize       = 32;
  static constexpr int WarpIndexMask  = 0x001f; /* hexadecimal for 31 */
  static constexpr int WarpIndexShift = 5;      /* WarpSize == 1 << WarpShift*/
--- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp
+++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelScan_Range.hpp
@ -143,7 +143,7 @@ class ParallelScan<FunctorType, Kokkos::RangePolicy<Traits...>,
            local_offset_value = element_values(team_id, i - 1);
            // FIXME_OPENMPTARGET We seem to access memory illegaly on AMD GPUs
 #if defined(KOKKOS_ARCH_AMD_GPU) && !defined(KOKKOS_ARCH_AMD_GFX1030) && \
-    !defined(KOKKOS_ARCH_AMD_GFX1100)
+    !defined(KOKKOS_ARCH_AMD_GFX1100) && !defined(KOKKOS_ARCH_AMD_GFX1103)
            if constexpr (Analysis::Reducer::has_join_member_function()) {
              if constexpr (std::is_void_v<WorkTag>)
                a_functor_reducer.get_functor().join(local_offset_value,
--- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp
@ -750,6 +750,9 @@ void pre_initialize_internal(const Kokkos::InitializationSettings& settings) {
 #elif defined(KOKKOS_ARCH_AMD_GFX1100)
  declare_configuration_metadata("architecture", "GPU architecture",
                                 "AMD_GFX1100");
+#elif defined(KOKKOS_ARCH_AMD_GFX1103)
+  declare_configuration_metadata("architecture", "GPU architecture",
+                                 "AMD_GFX1103");

 #else
  declare_configuration_metadata("architecture", "GPU architecture", "none");
--- a/lib/kokkos/generate_makefile.bash
+++ b/lib/kokkos/generate_makefile.bash
@ -164,6 +164,7 @@ display_help_text() {
      echo "                 AMD_GFX942      = AMD GPU MI300 GFX942"
      echo "                 AMD_GFX1030     = AMD GPU V620/W6800 GFX1030"
      echo "                 AMD_GFX1100     = AMD GPU RX 7900 XT(X) GFX1100"
+      echo "                 AMD_GFX1103     = AMD APU Radeon 740M/760M/780M/880M/890M GFX1103"
      echo "               [ARM]"
      echo "                 ARMV80          = ARMv8.0 Compatible CPU"
      echo "                 ARMV81          = ARMv8.1 Compatible CPU"
--- a/python/lammps/core.py
+++ b/python/lammps/core.py
@ -992,7 +992,7 @@ class lammps(object):
      return None

    dim = self.extract_pair_dimension(name)
-    if dim == None:
+    if dim is None:
      return None
    elif dim == 0:
      self.lib.lammps_extract_pair.restype = POINTER(c_double)
--- a/python/lammps/pylammps.py
+++ b/python/lammps/pylammps.py
@ -463,13 +463,19 @@ class PyLammps(object):
        self.lmp = lammps(name=name,cmdargs=cmdargs,ptr=ptr,comm=comm)
    else:
      self.lmp = lammps(name=name,cmdargs=cmdargs,ptr=None,comm=comm)
-    print("LAMMPS output is captured by PyLammps wrapper")
+    self.comm_nprocs = self.lmp.extract_setting("world_size")
+    self.comm_me = self.lmp.extract_setting("world_rank")
+    if self.comm_me == 0:
+      print("LAMMPS output is captured by PyLammps wrapper")
+      if self.comm_nprocs > 1:
+        print("WARNING: Using PyLammps with multiple MPI ranks is experimental. Not all functionality is supported.")
    self._cmd_history = []
    self._enable_cmd_history = False
    self.runs = []

    if not self.lmp.has_package("PYTHON"):
-      print("WARNING: run thermo data not captured since PYTHON LAMMPS package is not enabled")
+      if self.comm_me == 0:
+        print("WARNING: run thermo data not captured since PYTHON LAMMPS package is not enabled")

  def __enter__(self):
    return self
@ -727,7 +733,15 @@ class PyLammps(object):

  def eval(self, expr):
    """
-    Evaluate expression
+    Evaluate LAMMPS input file expression.
+
+    This is equivalent to using immediate variable expressions in the format "$(...)"
+    in the LAMMPS input and will return the result of that expression.
+
+    .. warning::
+
+       This function is only supported on MPI rank 0.  Calling it from a different
+       MPI rank will raise an exception.

    :param expr: the expression string that should be evaluated inside of LAMMPS
    :type expr: string
@ -735,6 +749,9 @@ class PyLammps(object):
    :return: the value of the evaluated expression
    :rtype: float if numeric, string otherwise
    """
+    if self.comm_me > 0:
+      raise Exception("PyLammps.eval() may only be used on MPI rank 0")
+
    value = self.lmp_print('"$(%s)"' % expr).strip()
    try:
      return float(value)
--- a/src/CG-SPICA/angle_spica.cpp
+++ b/src/CG-SPICA/angle_spica.cpp
@ -54,7 +54,7 @@ AngleSPICA::AngleSPICA(LAMMPS *lmp) :

 AngleSPICA::~AngleSPICA()
 {
-  if (allocated) {
+  if (allocated && !copymode) {
    memory->destroy(setflag);
    memory->destroy(k);
    memory->destroy(theta0);
--- a/src/CG-SPICA/angle_spica.h
+++ b/src/CG-SPICA/angle_spica.h
@ -52,7 +52,7 @@ class AngleSPICA : public Angle {

  void ev_tally13(int, int, int, int, double, double, double, double, double);

-  void allocate();
+  virtual void allocate();
 };

 }    // namespace LAMMPS_NS
--- a/src/CG-SPICA/pair_lj_spica_coul_long.cpp
+++ b/src/CG-SPICA/pair_lj_spica_coul_long.cpp
@ -55,6 +55,8 @@ PairLJSPICACoulLong::PairLJSPICACoulLong(LAMMPS *lmp) :

 PairLJSPICACoulLong::~PairLJSPICACoulLong()
 {
+  if (copymode) return;
+
  if (allocated) {
    memory->destroy(setflag);
    memory->destroy(lj_type);
@ -356,7 +358,7 @@ void PairLJSPICACoulLong::coeff(int narg, char **arg)

 void PairLJSPICACoulLong::init_style()
 {
-  if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/long requires atom attribute q");
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/spica/coul/long requires atom attribute q");

  neighbor->add_request(this);

@ -385,7 +387,8 @@ double PairLJSPICACoulLong::init_one(int i, int j)

  const int ljt = lj_type[i][j];

-  if (ljt == LJ_NOT_SET) error->all(FLERR, "unrecognized LJ parameter flag");
+  if (ljt == LJ_NOT_SET)
+    error->all(FLERR,"unrecognized LJ parameter flag");

  double cut = MAX(cut_lj[i][j], cut_coul);
  cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
--- a/src/CG-SPICA/pair_lj_spica_coul_long.h
+++ b/src/CG-SPICA/pair_lj_spica_coul_long.h
@ -64,7 +64,7 @@ class PairLJSPICACoulLong : public Pair {
  double cut_lj_global;
  double g_ewald;

-  void allocate();
+  virtual void allocate();

 private:
  template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void eval();
--- a/src/DPD-MESO/fix_mvv_dpd.cpp
+++ b/src/DPD-MESO/fix_mvv_dpd.cpp
@ -24,6 +24,7 @@
 #include "fix_mvv_dpd.h"

 #include "atom.h"
+#include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "update.h"
@ -65,6 +66,11 @@ void FixMvvDPD::init()
  if (!atom->vest_flag)
    error->all(FLERR,"Fix mvv/dpd requires atom attribute vest e.g. from atom style mdpd");

+  // Cannot use vremap since its effects aren't propagated to vest
+  //   see RHEO or SPH packages for examples of patches
+  if (domain->deform_vremap)
+    error->all(FLERR, "Fix mvv/dpd cannot be used with velocity remapping");
+
  if (!force->pair_match("^mdpd",0) && !force->pair_match("^dpd",0)) {
    if (force->pair_match("^hybrid",0)) {
      if (!(force->pair_match("^mdpd",0,1) || force->pair_match("^dpd",0),1)) {
--- a/src/DPD-MESO/fix_mvv_edpd.cpp
+++ b/src/DPD-MESO/fix_mvv_edpd.cpp
@ -33,6 +33,7 @@
 #include "fix_mvv_edpd.h"

 #include "atom.h"
+#include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "update.h"
@ -73,6 +74,11 @@ void FixMvvEDPD::init()
 {
  if (!atom->edpd_flag) error->all(FLERR,"Fix mvv/edpd requires atom style edpd");

+  // Cannot use vremap since its effects aren't propagated to vest
+  //   see RHEO or SPH packages for examples of patches
+  if (domain->deform_vremap)
+    error->all(FLERR, "Fix mvv/edpd cannot be used with velocity remapping");
+
  if (!force->pair_match("^edpd",0)) {
    if (force->pair_match("^hybrid",0)) {
      if (!force->pair_match("^edpd",0,1)) {
--- a/src/DPD-MESO/fix_mvv_tdpd.cpp
+++ b/src/DPD-MESO/fix_mvv_tdpd.cpp
@ -29,6 +29,7 @@
 #include "fix_mvv_tdpd.h"

 #include "atom.h"
+#include "domain.h"
 #include "error.h"
 #include "force.h"
 #include "update.h"
@ -71,6 +72,11 @@ void FixMvvTDPD::init()
 {
  if (!atom->tdpd_flag) error->all(FLERR,"Fix mvv/tdpd requires atom style tdpd");

+  // Cannot use vremap since its effects aren't propagated to vest
+  //   see RHEO or SPH packages for examples of patches
+  if (domain->deform_vremap)
+    error->all(FLERR, "Fix mvv/tdpd cannot be used with velocity remapping");
+
  if (!force->pair_match("^tdpd",0)) {
    if (force->pair_match("^hybrid",0)) {
      if (!force->pair_match("^tdpd",0,1)) {
--- a/src/DPD-SMOOTH/fix_meso_move.cpp
+++ b/src/DPD-SMOOTH/fix_meso_move.cpp
@ -350,7 +350,14 @@ void FixMesoMove::init () {
 }

 void FixMesoMove::setup_pre_force (int /*vflag*/) {
+
+  // Cannot use vremap since its effects aren't propagated to vest
+  //   see RHEO or SPH packages for examples of patches
+  if (domain->deform_vremap)
+    error->all(FLERR, "Fix meso/move cannot be used with velocity remapping");
+
  // set vest equal to v
+
  double **v = atom->v;
  double **vest = atom->vest;
  int *mask = atom->mask;
--- a/src/DPD-SMOOTH/fix_rigid_meso.cpp
+++ b/src/DPD-SMOOTH/fix_rigid_meso.cpp
@ -29,11 +29,12 @@
 ------------------------------------------------------------------------- */

 #include "fix_rigid_meso.h"
-#include "math_extra.h"
+
 #include "atom.h"
 #include "domain.h"
-#include "memory.h"
 #include "error.h"
+#include "math_extra.h"
+#include "memory.h"

 using namespace LAMMPS_NS;
 using namespace FixConst;
@ -92,6 +93,11 @@ void FixRigidMeso::setup (int vflag) {
    conjqm[ibody][2] *= 2.0;
    conjqm[ibody][3] *= 2.0;
  }
+
+  // Cannot use vremap since its effects aren't propagated to vest
+  //   see RHEO or SPH packages for examples of patches
+  if (domain->deform_vremap)
+    error->all(FLERR, "Fix rigid/meso cannot be used with velocity remapping");
 }

 /* ----------------------------------------------------------------------
--- a/src/Depend.sh
+++ b/src/Depend.sh
@ -195,6 +195,10 @@ if (test $1 = "ML-SNAP") then
  depend INTEL
 fi

+if (test $1 = "ML-UF3") then
+  depend KOKKOS
+fi
+
 if (test $1 = "CG-SPICA") then
  depend GPU
  depend KOKKOS
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@ -53,6 +53,10 @@ action angle_cosine_kokkos.cpp angle_cosine.cpp
 action angle_cosine_kokkos.h angle_cosine.h
 action angle_harmonic_kokkos.cpp angle_harmonic.cpp
 action angle_harmonic_kokkos.h angle_harmonic.h
+action angle_hybrid_kokkos.cpp angle_hybrid.cpp
+action angle_hybrid_kokkos.h angle_hybrid.h
+action angle_spica_kokkos.cpp angle_spica.cpp
+action angle_spica_kokkos.h angle_spica.h
 action atom_kokkos.cpp
 action atom_kokkos.h
 action atom_map_kokkos.cpp
@ -116,6 +120,8 @@ action dihedral_harmonic_kokkos.cpp dihedral_harmonic.cpp
 action dihedral_harmonic_kokkos.h dihedral_harmonic.h
 action dihedral_opls_kokkos.cpp dihedral_opls.cpp
 action dihedral_opls_kokkos.h dihedral_opls.h
+action dihedral_hybrid_kokkos.cpp dihedral_hybrid.cpp
+action dihedral_hybrid_kokkos.h dihedral_hybrid.h
 action domain_kokkos.cpp
 action domain_kokkos.h
 action dynamical_matrix_kokkos.cpp dynamical_matrix.cpp
@ -205,6 +211,8 @@ action improper_class2_kokkos.cpp improper_class2.cpp
 action improper_class2_kokkos.h improper_class2.h
 action improper_harmonic_kokkos.cpp improper_harmonic.cpp
 action improper_harmonic_kokkos.h improper_harmonic.h
+action improper_hybrid_kokkos.cpp improper_hybrid.cpp
+action improper_hybrid_kokkos.h improper_hybrid.h
 action kissfft_kokkos.h kissfft.h
 action kokkos_base_fft.h fft3d.h
 action kokkos_base.h
@ -344,6 +352,8 @@ action pair_lj_gromacs_coul_gromacs_kokkos.cpp pair_lj_gromacs_coul_gromacs.cpp
 action pair_lj_gromacs_coul_gromacs_kokkos.h pair_lj_gromacs_coul_gromacs.h
 action pair_lj_gromacs_kokkos.cpp pair_lj_gromacs.cpp
 action pair_lj_gromacs_kokkos.h pair_lj_gromacs.h
+action pair_lj_spica_coul_long_kokkos.cpp pair_lj_spica_coul_long.cpp
+action pair_lj_spica_coul_long_kokkos.h pair_lj_spica_coul_long.h
 action pair_lj_spica_kokkos.cpp pair_lj_spica.cpp
 action pair_lj_spica_kokkos.h pair_lj_spica.h
 action pair_meam_kokkos.cpp pair_meam.cpp
@ -381,6 +391,8 @@ action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp
 action pair_tersoff_mod_kokkos.h pair_tersoff_mod.h
 action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp
 action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h
+action pair_uf3_kokkos.cpp pair_uf3.cpp
+action pair_uf3_kokkos.h pair_uf3.h
 action pair_vashishta_kokkos.cpp pair_vashishta.cpp
 action pair_vashishta_kokkos.h pair_vashishta.h
 action pair_yukawa_kokkos.cpp
--- a/src/KOKKOS/angle_charmm_kokkos.cpp
+++ b/src/KOKKOS/angle_charmm_kokkos.cpp
@ -38,6 +38,7 @@ static constexpr double SMALL = 0.001;
 template<class DeviceType>
 AngleCharmmKokkos<DeviceType>::AngleCharmmKokkos(LAMMPS *lmp) : AngleCharmm(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -125,12 +126,12 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -284,10 +285,10 @@ void AngleCharmmKokkos<DeviceType>::coeff(int narg, char **arg)
    k_r_ub.h_view[i] = r_ub[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_theta0.template modify<LMPHostType>();
-  k_k_ub.template modify<LMPHostType>();
-  k_r_ub.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_theta0.modify_host();
+  k_k_ub.modify_host();
+  k_r_ub.modify_host();

  k_k.template sync<DeviceType>();
  k_theta0.template sync<DeviceType>();
@ -322,10 +323,10 @@ void AngleCharmmKokkos<DeviceType>::read_restart(FILE *fp)
    k_r_ub.h_view[i] = r_ub[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_theta0.template modify<LMPHostType>();
-  k_k_ub.template modify<LMPHostType>();
-  k_r_ub.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_theta0.modify_host();
+  k_k_ub.modify_host();
+  k_r_ub.modify_host();

  k_k.template sync<DeviceType>();
  k_theta0.template sync<DeviceType>();
--- a/src/KOKKOS/angle_charmm_kokkos.h
+++ b/src/KOKKOS/angle_charmm_kokkos.h
@ -58,19 +58,18 @@ class AngleCharmmKokkos : public AngleCharmm {
                     const F_FLOAT &delx1, const F_FLOAT &dely1, const F_FLOAT &delz1,
                     const F_FLOAT &delx2, const F_FLOAT &dely2, const F_FLOAT &delz2) const;

+  using KKDeviceType = typename KKDevice<DeviceType>::value;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;

  typedef ArrayTypes<DeviceType> AT;
  typename AT::t_x_array_randomread x;
-
-  using KKDeviceType = typename KKDevice<DeviceType>::value;
  typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > f;
  typename AT::t_int_2d anglelist;
-
-  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
-  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic>> d_eatom;
  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic>> d_vatom;

--- a/src/KOKKOS/angle_class2_kokkos.cpp
+++ b/src/KOKKOS/angle_class2_kokkos.cpp
@ -38,6 +38,7 @@ static constexpr double SMALL = 0.001;
 template<class DeviceType>
 AngleClass2Kokkos<DeviceType>::AngleClass2Kokkos(LAMMPS *lmp) : AngleClass2(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -141,12 +142,12 @@ void AngleClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -386,21 +387,21 @@ void AngleClass2Kokkos<DeviceType>::coeff(int narg, char **arg)
    k_theta0.h_view[i] = theta0[i];
  }

-  k_k2.template modify<LMPHostType>();
-  k_k3.template modify<LMPHostType>();
-  k_k4.template modify<LMPHostType>();
-  k_bb_k.template modify<LMPHostType>();
-  k_bb_r1.template modify<LMPHostType>();
-  k_bb_r2.template modify<LMPHostType>();
-  k_ba_k1.template modify<LMPHostType>();
-  k_ba_k2.template modify<LMPHostType>();
-  k_ba_r1.template modify<LMPHostType>();
-  k_ba_r2.template modify<LMPHostType>();
-  k_setflag.template modify<LMPHostType>();
-  k_setflag_a.template modify<LMPHostType>();
-  k_setflag_bb.template modify<LMPHostType>();
-  k_setflag_ba.template modify<LMPHostType>();
-  k_theta0.template modify<LMPHostType>();
+  k_k2.modify_host();
+  k_k3.modify_host();
+  k_k4.modify_host();
+  k_bb_k.modify_host();
+  k_bb_r1.modify_host();
+  k_bb_r2.modify_host();
+  k_ba_k1.modify_host();
+  k_ba_k2.modify_host();
+  k_ba_r1.modify_host();
+  k_ba_r2.modify_host();
+  k_setflag.modify_host();
+  k_setflag_a.modify_host();
+  k_setflag_bb.modify_host();
+  k_setflag_ba.modify_host();
+  k_theta0.modify_host();
 }

 /* ----------------------------------------------------------------------
@ -465,21 +466,21 @@ void AngleClass2Kokkos<DeviceType>::read_restart(FILE *fp)
    k_theta0.h_view[i] = theta0[i];
  }

-  k_k2.template modify<LMPHostType>();
-  k_k3.template modify<LMPHostType>();
-  k_k4.template modify<LMPHostType>();
-  k_bb_k.template modify<LMPHostType>();
-  k_bb_r1.template modify<LMPHostType>();
-  k_bb_r2.template modify<LMPHostType>();
-  k_ba_k1.template modify<LMPHostType>();
-  k_ba_k2.template modify<LMPHostType>();
-  k_ba_r1.template modify<LMPHostType>();
-  k_ba_r2.template modify<LMPHostType>();
-  k_setflag.template modify<LMPHostType>();
-  k_setflag_a.template modify<LMPHostType>();
-  k_setflag_bb.template modify<LMPHostType>();
-  k_setflag_ba.template modify<LMPHostType>();
-  k_theta0.template modify<LMPHostType>();
+  k_k2.modify_host();
+  k_k3.modify_host();
+  k_k4.modify_host();
+  k_bb_k.modify_host();
+  k_bb_r1.modify_host();
+  k_bb_r2.modify_host();
+  k_ba_k1.modify_host();
+  k_ba_k2.modify_host();
+  k_ba_r1.modify_host();
+  k_ba_r2.modify_host();
+  k_setflag.modify_host();
+  k_setflag_a.modify_host();
+  k_setflag_bb.modify_host();
+  k_setflag_ba.modify_host();
+  k_theta0.modify_host();
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/angle_class2_kokkos.h
+++ b/src/KOKKOS/angle_class2_kokkos.h
@ -36,8 +36,8 @@ class AngleClass2Kokkos : public AngleClass2 {

 public:
  typedef DeviceType device_type;
-  typedef ArrayTypes<DeviceType> AT;
  typedef EV_FLOAT value_type;
+  typedef ArrayTypes<DeviceType> AT;

  AngleClass2Kokkos(class LAMMPS *);
  ~AngleClass2Kokkos() override;
@ -60,6 +60,9 @@ class AngleClass2Kokkos : public AngleClass2 {
                     const F_FLOAT &delx1, const F_FLOAT &dely1, const F_FLOAT &delz1,
                     const F_FLOAT &delx2, const F_FLOAT &dely2, const F_FLOAT &delz2) const;

+  typename AT::tdual_efloat_1d k_eatom;
+  typename AT::tdual_virial_array k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
@ -67,9 +70,6 @@ class AngleClass2Kokkos : public AngleClass2 {
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_int_2d anglelist;
-
-  typename AT::tdual_efloat_1d k_eatom;
-  typename AT::tdual_virial_array k_vatom;
  typename AT::t_efloat_1d d_eatom;
  typename AT::t_virial_array d_vatom;

--- a/src/KOKKOS/angle_cosine_kokkos.cpp
+++ b/src/KOKKOS/angle_cosine_kokkos.cpp
@ -36,6 +36,7 @@ using namespace MathConst;
 template<class DeviceType>
 AngleCosineKokkos<DeviceType>::AngleCosineKokkos(LAMMPS *lmp) : AngleCosine(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -124,12 +125,12 @@ void AngleCosineKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -254,7 +255,7 @@ void AngleCosineKokkos<DeviceType>::coeff(int narg, char **arg)
  for (int i = 1; i <= n; i++)
    k_k.h_view[i] = k[i];

-  k_k.template modify<LMPHostType>();
+  k_k.modify_host();
 }

 /* ----------------------------------------------------------------------
@ -270,7 +271,7 @@ void AngleCosineKokkos<DeviceType>::read_restart(FILE *fp)
  for (int i = 1; i <= n; i++)
    k_k.h_view[i] = k[i];

-  k_k.template modify<LMPHostType>();
+  k_k.modify_host();
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/angle_cosine_kokkos.h
+++ b/src/KOKKOS/angle_cosine_kokkos.h
@ -37,6 +37,7 @@ class AngleCosineKokkos : public AngleCosine {
 public:
  typedef DeviceType device_type;
  typedef EV_FLOAT value_type;
+  typedef ArrayTypes<DeviceType> AT;

  AngleCosineKokkos(class LAMMPS *);
  ~AngleCosineKokkos() override;
@ -59,6 +60,9 @@ class AngleCosineKokkos : public AngleCosine {
                     const F_FLOAT &delx1, const F_FLOAT &dely1, const F_FLOAT &delz1,
                     const F_FLOAT &delx2, const F_FLOAT &dely2, const F_FLOAT &delz2) const;

+  typename AT::tdual_efloat_1d k_eatom;
+  typename AT::tdual_virial_array k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
@ -66,9 +70,6 @@ class AngleCosineKokkos : public AngleCosine {
  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
  typename ArrayTypes<DeviceType>::t_f_array f;
  typename ArrayTypes<DeviceType>::t_int_2d anglelist;
-
-  typename ArrayTypes<DeviceType>::tdual_efloat_1d k_eatom;
-  typename ArrayTypes<DeviceType>::tdual_virial_array k_vatom;
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

--- a/src/KOKKOS/angle_harmonic_kokkos.cpp
+++ b/src/KOKKOS/angle_harmonic_kokkos.cpp
@ -38,6 +38,7 @@ static constexpr double SMALL = 0.001;
 template<class DeviceType>
 AngleHarmonicKokkos<DeviceType>::AngleHarmonicKokkos(LAMMPS *lmp) : AngleHarmonic(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -71,14 +72,18 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // reallocate per-atom arrays if necessary

  if (eflag_atom) {
+    if(k_eatom.extent(0) < maxeatom) {
    memoryKK->destroy_kokkos(k_eatom,eatom);
    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom");
    d_eatom = k_eatom.template view<DeviceType>();
+    } else Kokkos::deep_copy(d_eatom,0.0);
  }
  if (vflag_atom) {
+    if(k_vatom.extent(0) < maxvatom) {
    memoryKK->destroy_kokkos(k_vatom,vatom);
    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"angle:vatom");
    d_vatom = k_vatom.template view<DeviceType>();
+    } else Kokkos::deep_copy(d_vatom,0.0);
  }

  //atomKK->sync(execution_space,datamask_read);
@ -127,12 +132,12 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -264,8 +269,8 @@ void AngleHarmonicKokkos<DeviceType>::coeff(int narg, char **arg)
    k_theta0.h_view[i] = theta0[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_theta0.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_theta0.modify_host();
 }

 /* ----------------------------------------------------------------------
@ -283,8 +288,8 @@ void AngleHarmonicKokkos<DeviceType>::read_restart(FILE *fp)
    k_theta0.h_view[i] = theta0[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_theta0.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_theta0.modify_host();
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/angle_harmonic_kokkos.h
+++ b/src/KOKKOS/angle_harmonic_kokkos.h
@ -37,6 +37,7 @@ class AngleHarmonicKokkos : public AngleHarmonic {
 public:
  typedef DeviceType device_type;
  typedef EV_FLOAT value_type;
+  typedef ArrayTypes<DeviceType> AT;

  AngleHarmonicKokkos(class LAMMPS *);
  ~AngleHarmonicKokkos() override;
@ -59,6 +60,9 @@ class AngleHarmonicKokkos : public AngleHarmonic {
                     const F_FLOAT &delx1, const F_FLOAT &dely1, const F_FLOAT &delz1,
                     const F_FLOAT &delx2, const F_FLOAT &dely2, const F_FLOAT &delz2) const;

+  typename AT::tdual_efloat_1d k_eatom;
+  typename AT::tdual_virial_array k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
@ -66,9 +70,6 @@ class AngleHarmonicKokkos : public AngleHarmonic {
  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
  typename ArrayTypes<DeviceType>::t_f_array f;
  typename ArrayTypes<DeviceType>::t_int_2d anglelist;
-
-  typename ArrayTypes<DeviceType>::tdual_efloat_1d k_eatom;
-  typename ArrayTypes<DeviceType>::tdual_virial_array k_vatom;
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

--- a/src/KOKKOS/angle_hybrid_kokkos.cpp
+++ b/src/KOKKOS/angle_hybrid_kokkos.cpp
@ -0,0 +1,224 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "angle_hybrid_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "memory_kokkos.h"
+#include "neighbor_kokkos.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+#define EXTRA 1000
+
+/* ---------------------------------------------------------------------- */
+
+AngleHybridKokkos::AngleHybridKokkos(LAMMPS *lmp) : AngleHybrid(lmp)
+{
+  kokkosable = 1;
+
+  atomKK = (AtomKokkos *) atom;
+  neighborKK = (NeighborKokkos *) neighbor;
+
+  execution_space = Device;
+
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+AngleHybridKokkos::~AngleHybridKokkos()
+{
+  deallocate();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleHybridKokkos::compute(int eflag, int vflag)
+{
+  // save ptrs to original anglelist
+
+  int nanglelist_orig = neighbor->nanglelist;
+  neighborKK->k_anglelist.sync_device();
+  auto k_anglelist_orig = neighborKK->k_anglelist;
+  auto d_anglelist_orig = k_anglelist_orig.d_view;
+  auto d_nanglelist = k_nanglelist.d_view;
+  auto h_nanglelist = k_nanglelist.h_view;
+
+  // if this is re-neighbor step, create sub-style anglelists
+  // nanglelist[] = length of each sub-style list
+  // realloc sub-style anglelist if necessary
+  // load sub-style anglelist with 3 values from original anglelist
+
+  if (neighbor->ago == 0) {
+    Kokkos::deep_copy(d_nanglelist,0);
+
+    k_map.sync_device();
+    auto d_map = k_map.d_view;
+
+    Kokkos::parallel_for(nanglelist_orig,LAMMPS_LAMBDA(int i) {
+      const int m = d_map[d_anglelist_orig(i,3)];
+      if (m >= 0) Kokkos::atomic_increment(&d_nanglelist[m]);
+    });
+
+    k_nanglelist.modify_device();
+    k_nanglelist.sync_host();
+
+    maxangle_all = 0;
+    for (int m = 0; m < nstyles; m++)
+      if (h_nanglelist[m] > maxangle_all)
+        maxangle_all = h_nanglelist[m] + EXTRA;
+
+    if (k_anglelist.d_view.extent(1) < maxangle_all)
+      MemKK::realloc_kokkos(k_anglelist, "angle_hybrid:anglelist", nstyles, maxangle_all, 4);
+    auto d_anglelist = k_anglelist.d_view;
+
+    Kokkos::deep_copy(d_nanglelist,0);
+
+    Kokkos::parallel_for(nanglelist_orig,LAMMPS_LAMBDA(int i) {
+      const int m = d_map[d_anglelist_orig(i,3)];
+      if (m < 0) return;
+      const int n = Kokkos::atomic_fetch_add(&d_nanglelist[m],1);
+      d_anglelist(m,n,0) = d_anglelist_orig(i,0);
+      d_anglelist(m,n,1) = d_anglelist_orig(i,1);
+      d_anglelist(m,n,2) = d_anglelist_orig(i,2);
+      d_anglelist(m,n,3) = d_anglelist_orig(i,3);
+    });
+  }
+
+  // call each sub-style's compute function
+  // set neighbor->anglelist to sub-style anglelist before call
+  // accumulate sub-style global/peratom energy/virial in hybrid
+
+  ev_init(eflag, vflag);
+
+  k_nanglelist.modify_device();
+  k_nanglelist.sync_host();
+
+  for (int m = 0; m < nstyles; m++) {
+    neighbor->nanglelist = h_nanglelist[m];
+    auto k_anglelist_m = Kokkos::subview(k_anglelist,m,Kokkos::ALL,Kokkos::ALL);
+    k_anglelist_m.modify_device();
+    neighborKK->k_anglelist = k_anglelist_m;
+
+    auto style = styles[m];
+    atomKK->sync(style->execution_space,style->datamask_read);
+    style->compute(eflag, vflag);
+    atomKK->modified(style->execution_space,style->datamask_modify);
+
+    if (eflag_global) energy += style->energy;
+    if (vflag_global)
+      for (int n = 0; n < 6; n++) virial[n] += style->virial[n];
+
+    if (eflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double *eatom_substyle = styles[m]->eatom;
+      for (int i = 0; i < n; i++) eatom[i] += eatom_substyle[i];
+    }
+    if (vflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double **vatom_substyle = styles[m]->vatom;
+      for (int i = 0; i < n; i++)
+        for (int j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j];
+    }
+    if (cvflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double **cvatom_substyle = styles[m]->cvatom;
+      for (int i = 0; i < n; i++)
+        for (int j = 0; j < 9; j++) cvatom[i][j] += cvatom_substyle[i][j];
+    }
+  }
+
+  // restore ptrs to original anglelist
+
+  neighbor->nanglelist = nanglelist_orig;
+  neighborKK->k_anglelist = k_anglelist_orig;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleHybridKokkos::allocate()
+{
+  allocated = 1;
+  int np1 = atom->nangletypes + 1;
+
+  memoryKK->create_kokkos(k_map, map, np1, "angle:map");
+  memory->create(setflag, np1, "angle:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+
+  k_nanglelist = DAT::tdual_int_1d("angle:nanglelist", nstyles);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleHybridKokkos::deallocate()
+{
+  if (!allocated) return;
+
+  allocated = 0;
+
+  memory->destroy(setflag);
+  memoryKK->destroy_kokkos(k_map,map);
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one type
+---------------------------------------------------------------------- */
+
+void AngleHybridKokkos::coeff(int narg, char **arg)
+{
+  AngleHybrid::coeff(narg,arg);
+
+  k_map.modify_host();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void AngleHybridKokkos::init_style()
+{
+  AngleHybrid::init_style();
+
+  for (int m = 0; m < nstyles; m++) {
+    if (!styles[m]->kokkosable)
+      error->all(FLERR,"Must use only Kokkos-enabled angle styles with angle_style hybrid/kk");
+
+    if (styles[m]->execution_space == Host)
+      lmp->kokkos->allow_overlap = 0;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double AngleHybridKokkos::memory_usage()
+{
+  double bytes = (double) maxeatom * sizeof(double);
+  bytes += (double) maxvatom * 6 * sizeof(double);
+  bytes += (double) maxcvatom * 9 * sizeof(double);
+  for (int m = 0; m < nstyles; m++) bytes += (double) maxangle_all * 4 * sizeof(int);
+  for (int m = 0; m < nstyles; m++)
+    if (styles[m]) bytes += styles[m]->memory_usage();
+  return bytes;
+}
--- a/src/KOKKOS/angle_hybrid_kokkos.h
+++ b/src/KOKKOS/angle_hybrid_kokkos.h
@ -0,0 +1,58 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+// clang-format off
+AngleStyle(hybrid/kk,AngleHybridKokkos);
+AngleStyle(hybrid/kk/device,AngleHybridKokkos);
+AngleStyle(hybrid/kk/host,AngleHybridKokkos);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_ANGLE_HYBRID_KOKKOS_H
+#define LMP_ANGLE_HYBRID_KOKKOS_H
+
+#include "angle_hybrid.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class AngleHybridKokkos : public AngleHybrid {
+  friend class Force;
+
+ public:
+  AngleHybridKokkos(class LAMMPS *);
+  ~AngleHybridKokkos() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double memory_usage() override;
+
+ private:
+  int maxangle_all;
+
+  class NeighborKokkos *neighborKK;
+
+  DAT::tdual_int_1d k_map;       // which style each angle type points to
+  DAT::tdual_int_1d k_nanglelist; // # of angles in sub-style anglelists
+  DAT::tdual_int_3d k_anglelist;  // anglelist for each sub-style
+
+  void allocate() override;
+  void deallocate() override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
--- a/src/KOKKOS/angle_spica_kokkos.cpp
+++ b/src/KOKKOS/angle_spica_kokkos.cpp
@ -0,0 +1,656 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Mitch Murphy (alphataubio@gmail.com)
+------------------------------------------------------------------------- */
+
+#include "angle_spica_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "math_const.h"
+#include "memory_kokkos.h"
+#include "neighbor_kokkos.h"
+#include "respa.h"
+#include "update.h"
+
+#include "lj_spica_common.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+using namespace LJSPICAParms;
+
+static constexpr double SMALL = 0.001;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+AngleSPICAKokkos<DeviceType>::AngleSPICAKokkos(LAMMPS *lmp) : AngleSPICA(lmp)
+{
+  atomKK = (AtomKokkos *) atom;
+  neighborKK = (NeighborKokkos *) neighbor;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+
+  centroidstressflag = CENTROID_NOTAVAIL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+AngleSPICAKokkos<DeviceType>::~AngleSPICAKokkos()
+{
+  if (!copymode) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void AngleSPICAKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  ev_init(eflag,vflag,0);
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom");
+    d_eatom = k_eatom.template view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"angle:vatom");
+    d_vatom = k_vatom.template view<DeviceType>();
+  }
+
+  k_k.template sync<DeviceType>();
+  k_theta0.template sync<DeviceType>();
+  k_repscale.template sync<DeviceType>();
+  k_lj_type.template sync<DeviceType>();
+  k_lj1.template sync<DeviceType>();
+  k_lj2.template sync<DeviceType>();
+  k_lj3.template sync<DeviceType>();
+  k_lj4.template sync<DeviceType>();
+  k_rminsq.template sync<DeviceType>();
+  k_emin.template sync<DeviceType>();
+
+
+  // "It has to do with overlapping host/device in verlet_kokkos.cpp. For this reason, all topology styles (bond, angle, etc.) must set DATAMASK_READ, DATAMASK_MODIFY in the constructor and must not use atomKK->sync/modified. This is a gotcha that needed to be better documented."
+  // https://matsci.org/t/a-few-kokkos-development-questions/56598
+  //
+  // atomKK->sync(execution_space,datamask_read);
+  // if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  // else atomKK->modified(execution_space,F_MASK);
+  //atomKK->k_type.template sync<DeviceType>();
+
+  x = atomKK->k_x.template view<DeviceType>();
+  f = atomKK->k_f.template view<DeviceType>();
+  neighborKK->k_anglelist.template sync<DeviceType>();
+  anglelist = neighborKK->k_anglelist.template view<DeviceType>();
+  int nanglelist = neighborKK->nanglelist;
+  d_type = atomKK->k_type.template view<DeviceType>();
+  nlocal = atom->nlocal;
+  newton_bond = force->newton_bond;
+
+  copymode = 1;
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev;
+
+  if (evflag) {
+    if (newton_bond) {
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagAngleSPICACompute<1,1> >(0,nanglelist),*this,ev);
+    } else {
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagAngleSPICACompute<0,1> >(0,nanglelist),*this,ev);
+    }
+  } else {
+    if (newton_bond) {
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleSPICACompute<1,0> >(0,nanglelist),*this);
+    } else {
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleSPICACompute<0,0> >(0,nanglelist),*this);
+    }
+  }
+
+  if (eflag_global) energy += ev.evdwl;
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  copymode = 0;
+}
+
+template<class DeviceType>
+template<int NEWTON_BOND, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void AngleSPICAKokkos<DeviceType>::operator()(TagAngleSPICACompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const {
+
+  // The f array is atomic
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
+
+  const int i1 = anglelist(n,0);
+  const int i2 = anglelist(n,1);
+  const int i3 = anglelist(n,2);
+  const int type = anglelist(n,3);
+
+  // 1st bond
+
+  const F_FLOAT delx1 = x(i1,0) - x(i2,0);
+  const F_FLOAT dely1 = x(i1,1) - x(i2,1);
+  const F_FLOAT delz1 = x(i1,2) - x(i2,2);
+
+  const F_FLOAT rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
+  const F_FLOAT r1 = sqrt(rsq1);
+
+  // 2nd bond
+
+  const F_FLOAT delx2 = x(i3,0) - x(i2,0);
+  const F_FLOAT dely2 = x(i3,1) - x(i2,1);
+  const F_FLOAT delz2 = x(i3,2) - x(i2,2);
+
+  const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
+  const F_FLOAT r2 = sqrt(rsq2);
+
+  // angle (cos and sin)
+
+  F_FLOAT c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+  c /= r1*r2;
+
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+
+  F_FLOAT s = sqrt(1.0 - c*c);
+  if (s < SMALL) s = SMALL;
+  s = 1.0/s;
+
+  // 1-3 LJ interaction.
+  // we only want to use the repulsive part,
+  // and it can be scaled (or off).
+  // so this has to be done here and not in the
+  // general non-bonded code.
+
+  F_FLOAT f13, e13, delx3, dely3, delz3;
+  f13 = e13 = delx3 = dely3 = delz3 = 0.0;
+
+  if (repflag) {
+
+    delx3 = x(i1,0) - x(i3,0);
+    dely3 = x(i1,1) - x(i3,1);
+    delz3 = x(i1,2) - x(i3,2);
+    const F_FLOAT rsq3 = delx3*delx3 + dely3*dely3 + delz3*delz3;
+
+    const int type1 = d_type[i1];
+    const int type3 = d_type[i3];
+
+    f13=0.0;
+    e13=0.0;
+
+    if (rsq3 < d_rminsq(type1,type3)) {
+      const int ljt = d_lj_type(type1,type3);
+      const double r2inv = 1.0/rsq3;
+
+      if (ljt == LJ12_4) {
+        const double r4inv=r2inv*r2inv;
+
+        f13 = r4inv*(d_lj1(type1,type3)*r4inv*r4inv - d_lj2(type1,type3));
+        if (eflag) e13 = r4inv*(d_lj3(type1,type3)*r4inv*r4inv - d_lj4(type1,type3));
+
+      } else if (ljt == LJ9_6) {
+        const double r3inv = r2inv*sqrt(r2inv);
+        const double r6inv = r3inv*r3inv;
+
+        f13 = r6inv*(d_lj1(type1,type3)*r3inv - d_lj2(type1,type3));
+        if (eflag) e13 = r6inv*(d_lj3(type1,type3)*r3inv - d_lj4(type1,type3));
+
+      } else if (ljt == LJ12_6) {
+        const double r6inv = r2inv*r2inv*r2inv;
+
+        f13 = r6inv*(d_lj1(type1,type3)*r6inv - d_lj2(type1,type3));
+        if (eflag) e13 = r6inv*(d_lj3(type1,type3)*r6inv - d_lj4(type1,type3));
+
+      } else if (ljt == LJ12_5) {
+        const double r5inv = r2inv*r2inv*sqrt(r2inv);
+        const double r7inv = r5inv*r2inv;
+
+        f13 = r5inv*(d_lj1(type1,type3)*r7inv - d_lj2(type1,type3));
+        if (eflag) e13 = r5inv*(d_lj3(type1,type3)*r7inv - d_lj4(type1,type3));
+      }
+
+      // make sure energy is 0.0 at the cutoff.
+      if (eflag) e13 -= d_emin(type1,type3);
+
+      f13 *= r2inv;
+    }
+  }
+
+  // force & energy
+
+  const F_FLOAT dtheta = acos(c) - d_theta0[type];
+  const F_FLOAT tk = d_k[type] * dtheta;
+
+  F_FLOAT eangle = 0.0;
+  if (eflag) eangle = tk*dtheta;
+
+  const F_FLOAT a = -2.0 * tk * s;
+  const F_FLOAT a11 = a*c / rsq1;
+  const F_FLOAT a12 = -a / (r1*r2);
+  const F_FLOAT a22 = a*c / rsq2;
+
+  F_FLOAT f1[3],f3[3];
+  f1[0] = a11*delx1 + a12*delx2;
+  f1[1] = a11*dely1 + a12*dely2;
+  f1[2] = a11*delz1 + a12*delz2;
+  f3[0] = a22*delx2 + a12*delx1;
+  f3[1] = a22*dely2 + a12*dely1;
+  f3[2] = a22*delz2 + a12*delz1;
+
+  // apply force to each of 3 atoms
+
+  if (NEWTON_BOND || i1 < nlocal) {
+    a_f(i1,0) += f1[0] + f13*delx3;
+    a_f(i1,1) += f1[1] + f13*dely3;
+    a_f(i1,2) += f1[2] + f13*delz3;
+  }
+
+  if (NEWTON_BOND || i2 < nlocal) {
+    a_f(i2,0) -= f1[0] + f3[0];
+    a_f(i2,1) -= f1[1] + f3[1];
+    a_f(i2,2) -= f1[2] + f3[2];
+  }
+
+  if (NEWTON_BOND || i3 < nlocal) {
+    a_f(i3,0) += f3[0] - f13*delx3;
+    a_f(i3,1) += f3[1] - f13*dely3;
+    a_f(i3,2) += f3[2] - f13*delz3;
+  }
+
+  if (EVFLAG) {
+    ev_tally(ev,i1,i2,i3,eangle,f1,f3,delx1,dely1,delz1,delx2,dely2,delz2);
+
+    if (repflag)
+      ev_tally13(ev,i1,i3,e13,f13,delx3,dely3,delz3);
+  }
+}
+
+template<class DeviceType>
+template<int NEWTON_BOND, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void AngleSPICAKokkos<DeviceType>::operator()(TagAngleSPICACompute<NEWTON_BOND,EVFLAG>, const int &n) const {
+  EV_FLOAT ev;
+  this->template operator()<NEWTON_BOND,EVFLAG>(TagAngleSPICACompute<NEWTON_BOND,EVFLAG>(), n, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void AngleSPICAKokkos<DeviceType>::allocate()
+{
+  AngleSPICA::allocate();
+
+  int nangletypes = atom->nangletypes;
+  k_k = typename ArrayTypes<DeviceType>::tdual_ffloat_1d("AngleSPICA::k",nangletypes+1);
+  k_theta0 = typename ArrayTypes<DeviceType>::tdual_ffloat_1d("AngleSPICA::theta0",nangletypes+1);
+  k_repscale = typename ArrayTypes<DeviceType>::tdual_ffloat_1d("AngleSPICA::repscale",nangletypes+1);
+  k_setflag = typename ArrayTypes<DeviceType>::tdual_int_1d("AngleSPICA::setflag",nangletypes+1);
+
+  d_k = k_k.template view<DeviceType>();
+  d_theta0 = k_theta0.template view<DeviceType>();
+  d_repscale = k_repscale.template view<DeviceType>();
+  d_setflag = k_setflag.template view<DeviceType>();
+
+  int ntypes = atom->ntypes;
+  k_lj_type = typename ArrayTypes<DeviceType>::tdual_int_2d("AngleSPICA::lj_type",ntypes+1,ntypes+1);
+  k_lj1 = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("AngleSPICA::lj1",ntypes+1,ntypes+1);
+  k_lj2 = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("AngleSPICA::lj2",ntypes+1,ntypes+1);
+  k_lj3 = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("AngleSPICA::lj3",ntypes+1,ntypes+1);
+  k_lj4 = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("AngleSPICA::lj4",ntypes+1,ntypes+1);
+  k_rminsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("AngleSPICA::rminsq",ntypes+1,ntypes+1);
+  k_emin = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("AngleSPICA::emin",ntypes+1,ntypes+1);
+
+  d_lj_type = k_lj_type.template view<DeviceType>();
+  d_lj1 = k_lj1.template view<DeviceType>();
+  d_lj2 = k_lj2.template view<DeviceType>();
+  d_lj3 = k_lj3.template view<DeviceType>();
+  d_lj4 = k_lj4.template view<DeviceType>();
+  d_rminsq = k_rminsq.template view<DeviceType>();
+  d_emin = k_emin.template view<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void AngleSPICAKokkos<DeviceType>::init_style()
+{
+  AngleSPICA::init_style();
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa)
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  int ntypes = atom->ntypes;
+  for (int i = 1; i <= ntypes; i++) {
+    for (int j = 1; j <= ntypes; j++) {
+      k_lj_type.h_view(i,j) = lj_type[i][j];
+      k_lj1.h_view(i,j) = lj1[i][j];
+      k_lj2.h_view(i,j) = lj2[i][j];
+      k_lj3.h_view(i,j) = lj3[i][j];
+      k_lj4.h_view(i,j) = lj4[i][j];
+      k_rminsq.h_view(i,j) = rminsq[i][j];
+      k_emin.h_view(i,j) = emin[i][j];
+    }
+  }
+
+  k_lj_type.template modify<LMPHostType>();
+  k_lj1.template modify<LMPHostType>();
+  k_lj2.template modify<LMPHostType>();
+  k_lj3.template modify<LMPHostType>();
+  k_lj4.template modify<LMPHostType>();
+  k_rminsq.template modify<LMPHostType>();
+  k_emin.template modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void AngleSPICAKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  AngleSPICA::coeff(narg, arg);
+
+  int n = atom->nangletypes;
+  for (int i = 1; i <= n; i++) {
+    k_k.h_view[i] = k[i];
+    k_theta0.h_view[i] = theta0[i];
+    k_repscale.h_view[i] = repscale[i];
+    k_setflag.h_view[i] = setflag[i];
+  }
+
+  k_k.template modify<LMPHostType>();
+  k_theta0.template modify<LMPHostType>();
+  k_repscale.template modify<LMPHostType>();
+  k_setflag.template modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void AngleSPICAKokkos<DeviceType>::read_restart(FILE *fp)
+{
+  AngleSPICA::read_restart(fp);
+
+  int n = atom->nangletypes;
+  for (int i = 1; i <= n; i++) {
+    k_k.h_view[i] = k[i];
+    k_theta0.h_view[i] = theta0[i];
+    k_repscale.h_view[i] = repscale[i];
+    k_setflag.h_view[i] = setflag[i];
+  }
+
+  k_k.template modify<LMPHostType>();
+  k_theta0.template modify<LMPHostType>();
+  k_repscale.template modify<LMPHostType>();
+  k_setflag.template modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+   virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+//template<int NEWTON_BOND>
+KOKKOS_INLINE_FUNCTION
+void AngleSPICAKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i, const int j, const int k,
+                     F_FLOAT &eangle, F_FLOAT *f1, F_FLOAT *f3,
+                     const F_FLOAT &delx1, const F_FLOAT &dely1, const F_FLOAT &delz1,
+                     const F_FLOAT &delx2, const F_FLOAT &dely2, const F_FLOAT &delz2) const
+{
+  E_FLOAT eanglethird;
+  F_FLOAT v[6];
+
+  // The eatom and vatom arrays are atomic
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.template view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.template view<DeviceType>();
+
+  if (eflag_either) {
+    if (eflag_global) {
+      if (newton_bond) ev.evdwl += eangle;
+      else {
+        eanglethird = THIRD*eangle;
+
+        if (i < nlocal) ev.evdwl += eanglethird;
+        if (j < nlocal) ev.evdwl += eanglethird;
+        if (k < nlocal) ev.evdwl += eanglethird;
+      }
+    }
+    if (eflag_atom) {
+      eanglethird = THIRD*eangle;
+
+      if (newton_bond || i < nlocal) v_eatom[i] += eanglethird;
+      if (newton_bond || j < nlocal) v_eatom[j] += eanglethird;
+      if (newton_bond || k < nlocal) v_eatom[k] += eanglethird;
+    }
+  }
+
+  if (vflag_either) {
+    v[0] = delx1*f1[0] + delx2*f3[0];
+    v[1] = dely1*f1[1] + dely2*f3[1];
+    v[2] = delz1*f1[2] + delz2*f3[2];
+    v[3] = delx1*f1[1] + delx2*f3[1];
+    v[4] = delx1*f1[2] + delx2*f3[2];
+    v[5] = dely1*f1[2] + dely2*f3[2];
+
+    if (vflag_global) {
+      if (newton_bond) {
+        ev.v[0] += v[0];
+        ev.v[1] += v[1];
+        ev.v[2] += v[2];
+        ev.v[3] += v[3];
+        ev.v[4] += v[4];
+        ev.v[5] += v[5];
+      } else {
+        if (i < nlocal) {
+          ev.v[0] += THIRD*v[0];
+          ev.v[1] += THIRD*v[1];
+          ev.v[2] += THIRD*v[2];
+          ev.v[3] += THIRD*v[3];
+          ev.v[4] += THIRD*v[4];
+          ev.v[5] += THIRD*v[5];
+        }
+        if (j < nlocal) {
+          ev.v[0] += THIRD*v[0];
+          ev.v[1] += THIRD*v[1];
+          ev.v[2] += THIRD*v[2];
+          ev.v[3] += THIRD*v[3];
+          ev.v[4] += THIRD*v[4];
+          ev.v[5] += THIRD*v[5];
+        }
+        if (k < nlocal) {
+          ev.v[0] += THIRD*v[0];
+
+          ev.v[1] += THIRD*v[1];
+          ev.v[2] += THIRD*v[2];
+          ev.v[3] += THIRD*v[3];
+          ev.v[4] += THIRD*v[4];
+          ev.v[5] += THIRD*v[5];
+        }
+      }
+    }
+
+    if (vflag_atom) {
+      if (newton_bond || i < nlocal) {
+        v_vatom(i,0) += THIRD*v[0];
+        v_vatom(i,1) += THIRD*v[1];
+        v_vatom(i,2) += THIRD*v[2];
+        v_vatom(i,3) += THIRD*v[3];
+        v_vatom(i,4) += THIRD*v[4];
+        v_vatom(i,5) += THIRD*v[5];
+      }
+      if (newton_bond || j < nlocal) {
+        v_vatom(j,0) += THIRD*v[0];
+        v_vatom(j,1) += THIRD*v[1];
+        v_vatom(j,2) += THIRD*v[2];
+        v_vatom(j,3) += THIRD*v[3];
+        v_vatom(j,4) += THIRD*v[4];
+        v_vatom(j,5) += THIRD*v[5];
+      }
+      if (newton_bond || k < nlocal) {
+        v_vatom(k,0) += THIRD*v[0];
+        v_vatom(k,1) += THIRD*v[1];
+        v_vatom(k,2) += THIRD*v[2];
+        v_vatom(k,3) += THIRD*v[3];
+        v_vatom(k,4) += THIRD*v[4];
+        v_vatom(k,5) += THIRD*v[5];
+
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void AngleSPICAKokkos<DeviceType>::ev_tally13(EV_FLOAT &ev, const int i, const int j,
+                     const F_FLOAT &evdwl, const F_FLOAT &fpair,
+                     const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  double v[6];
+
+  // The eatom and vatom arrays are atomic
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.template view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.template view<DeviceType>();
+
+  if (eflag_either) {
+    if (eflag_global) {
+      if (newton_bond) {
+        ev.evdwl += evdwl;
+      } else {
+        if (i < nlocal)
+          ev.evdwl += 0.5*evdwl;
+        if (j < nlocal)
+          ev.evdwl += 0.5*evdwl;
+      }
+    }
+    if (eflag_atom) {
+      if (newton_bond || i < nlocal) v_eatom[i] += 0.5*evdwl;
+      if (newton_bond || j < nlocal) v_eatom[j] += 0.5*evdwl;
+    }
+  }
+
+  if (vflag_either) {
+    v[0] = delx*delx*fpair;
+    v[1] = dely*dely*fpair;
+    v[2] = delz*delz*fpair;
+    v[3] = delx*dely*fpair;
+    v[4] = delx*delz*fpair;
+    v[5] = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (newton_bond) {
+        ev.v[0] += v[0];
+        ev.v[1] += v[1];
+        ev.v[2] += v[2];
+        ev.v[3] += v[3];
+        ev.v[4] += v[4];
+        ev.v[5] += v[5];
+      } else {
+        if (i < nlocal) {
+          ev.v[0] += 0.5*v[0];
+          ev.v[1] += 0.5*v[1];
+          ev.v[2] += 0.5*v[2];
+          ev.v[3] += 0.5*v[3];
+          ev.v[4] += 0.5*v[4];
+          ev.v[5] += 0.5*v[5];
+        }
+        if (j < nlocal) {
+          ev.v[0] += 0.5*v[0];
+          ev.v[1] += 0.5*v[1];
+          ev.v[2] += 0.5*v[2];
+          ev.v[3] += 0.5*v[3];
+          ev.v[4] += 0.5*v[4];
+          ev.v[5] += 0.5*v[5];
+        }
+      }
+    }
+
+    if (vflag_atom) {
+      if (newton_bond || i < nlocal) {
+        v_vatom(i,0) += 0.5*v[0];
+        v_vatom(i,1) += 0.5*v[1];
+        v_vatom(i,2) += 0.5*v[2];
+        v_vatom(i,3) += 0.5*v[3];
+        v_vatom(i,4) += 0.5*v[4];
+        v_vatom(i,5) += 0.5*v[5];
+      }
+      if (newton_bond || j < nlocal) {
+        v_vatom(j,0) += 0.5*v[0];
+        v_vatom(j,1) += 0.5*v[1];
+        v_vatom(j,2) += 0.5*v[2];
+        v_vatom(j,3) += 0.5*v[3];
+        v_vatom(j,4) += 0.5*v[4];
+        v_vatom(j,5) += 0.5*v[5];
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+namespace LAMMPS_NS {
+template class AngleSPICAKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class AngleSPICAKokkos<LMPHostType>;
+#endif
+}
+
--- a/src/KOKKOS/angle_spica_kokkos.h
+++ b/src/KOKKOS/angle_spica_kokkos.h
@ -0,0 +1,106 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef ANGLE_CLASS
+// clang-format off
+AngleStyle(spica/kk,AngleSPICAKokkos<LMPDeviceType>);
+AngleStyle(spica/kk/device,AngleSPICAKokkos<LMPDeviceType>);
+AngleStyle(spica/kk/host,AngleSPICAKokkos<LMPHostType>);
+AngleStyle(sdk/kk,AngleSPICAKokkos<LMPDeviceType>);
+AngleStyle(sdk/kk/device,AngleSPICAKokkos<LMPDeviceType>);
+AngleStyle(sdk/kk/host,AngleSPICAKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_ANGLE_SPICA_KOKKOS_H
+#define LMP_ANGLE_SPICA_KOKKOS_H
+
+#include "angle_spica.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+template<int NEWTON_BOND, int EVFLAG>
+struct TagAngleSPICACompute{};
+
+template<class DeviceType>
+class AngleSPICAKokkos : public AngleSPICA {
+
+ public:
+  typedef DeviceType device_type;
+  typedef EV_FLOAT value_type;
+
+  AngleSPICAKokkos(class LAMMPS *);
+  ~AngleSPICAKokkos() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  void read_restart(FILE *) override;
+
+  template<int NEWTON_BOND, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagAngleSPICACompute<NEWTON_BOND,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEWTON_BOND, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagAngleSPICACompute<NEWTON_BOND,EVFLAG>, const int&) const;
+
+  //template<int NEWTON_BOND>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EV_FLOAT &ev, const int i, const int j, const int k,
+                     F_FLOAT &eangle, F_FLOAT *f1, F_FLOAT *f3,
+                     const F_FLOAT &delx1, const F_FLOAT &dely1, const F_FLOAT &delz1,
+                     const F_FLOAT &delx2, const F_FLOAT &dely2, const F_FLOAT &delz2) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally13(EV_FLOAT &ev, const int i, const int j,
+                     const F_FLOAT &evdwl, const F_FLOAT &fpair,
+                     const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+ protected:
+
+  class NeighborKokkos *neighborKK;
+
+  typename ArrayTypes<DeviceType>::t_x_array_randomread x;
+  typename ArrayTypes<DeviceType>::t_f_array f;
+  typename ArrayTypes<DeviceType>::t_int_2d anglelist;
+
+  typename ArrayTypes<DeviceType>::tdual_efloat_1d k_eatom;
+  typename ArrayTypes<DeviceType>::tdual_virial_array k_vatom;
+  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
+  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
+
+  int nlocal,newton_bond;
+  int eflag,vflag;
+
+  typename ArrayTypes<DeviceType>::tdual_int_1d k_setflag;
+  typename ArrayTypes<DeviceType>::t_int_1d d_setflag, d_type;
+
+  typename ArrayTypes<DeviceType>::tdual_ffloat_1d k_k, k_theta0, k_repscale;
+  typename ArrayTypes<DeviceType>::t_ffloat_1d d_k, d_theta0, d_repscale;
+
+  typename ArrayTypes<DeviceType>::tdual_int_2d k_lj_type;
+  typename ArrayTypes<DeviceType>::t_int_2d d_lj_type;
+
+  typename ArrayTypes<DeviceType>::tdual_ffloat_2d k_lj1, k_lj2, k_lj3, k_lj4, k_rminsq, k_emin;
+  typename ArrayTypes<DeviceType>::t_ffloat_2d d_lj1, d_lj2, d_lj3, d_lj4, d_rminsq, d_emin;
+
+  void allocate() override;
+};
+
+}
+
+#endif
+#endif
+
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@ -276,22 +276,6 @@ void AtomKokkos::sort_device()
 if (domain->triclinic) domain->x2lamda(nlocal);
 }

-/* ----------------------------------------------------------------------
-   reallocate memory to the pointer selected by the mask
------------------------------------------------------------------------- */
-
-void AtomKokkos::grow(unsigned int mask)
-{
-  if (mask & SPECIAL_MASK) {
-    memoryKK->destroy_kokkos(k_special, special);
-    sync(Device, mask);
-    modified(Device, mask);
-    memoryKK->grow_kokkos(k_special, special, nmax, maxspecial, "atom:special");
-    avec->grow_pointers();
-    sync(Host, mask);
-  }
-}
-
 /* ----------------------------------------------------------------------
   add a custom variable with name of type flag = 0/1 for int/double
   assumes name does not already exist
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@ -165,7 +165,6 @@ class AtomKokkos : public Atom {
  void modified(const ExecutionSpace space, unsigned int mask);
  void sync_overlapping_device(const ExecutionSpace space, unsigned int mask);
  void sort() override;
-  virtual void grow(unsigned int mask);
  int add_custom(const char *, int, int, int border = 0) override;
  void remove_custom(int, int, int) override;
  void deallocate_topology() override;
--- a/src/KOKKOS/bond_hybrid_kokkos.cpp
+++ b/src/KOKKOS/bond_hybrid_kokkos.cpp
@ -153,11 +153,11 @@ void BondHybridKokkos::compute(int eflag, int vflag)
 void BondHybridKokkos::allocate()
 {
  allocated = 1;
-  int n = atom->nbondtypes;
+  int np1 = atom->nbondtypes + 1;

-  memoryKK->create_kokkos(k_map, map, n + 1, "bond:map");
-  memory->create(setflag, n + 1, "bond:setflag");
-  for (int i = 1; i <= n; i++) setflag[i] = 0;
+  memoryKK->create_kokkos(k_map, map, np1, "bond:map");
+  memory->create(setflag, np1, "bond:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;

  k_nbondlist = DAT::tdual_int_1d("bond:nbondlist", nstyles);
 }
--- a/src/KOKKOS/dihedral_charmm_kokkos.cpp
+++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp
@ -40,6 +40,7 @@ static constexpr double TOLERANCE = 0.05;
 template<class DeviceType>
 DihedralCharmmKokkos<DeviceType>::DihedralCharmmKokkos(LAMMPS *lmp) : DihedralCharmm(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -115,7 +116,7 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  qqrd2e = force->qqrd2e;

  h_warning_flag() = 0;
-  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.modify_host();
  k_warning_flag.template sync<DeviceType>();

  copymode = 1;
@ -141,7 +142,7 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // error check

  k_warning_flag.template modify<DeviceType>();
-  k_warning_flag.template sync<LMPHostType>();
+  k_warning_flag.sync_host();
  if (h_warning_flag())
    error->warning(FLERR,"Dihedral problem");

@ -174,20 +175,20 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();

    k_eatom_pair.template modify<DeviceType>();
-    k_eatom_pair.template sync<LMPHostType>();
+    k_eatom_pair.sync_host();
    for (int i = 0; i < n; i++)
      force->pair->eatom[i] += k_eatom_pair.h_view(i);
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();

    k_vatom_pair.template modify<DeviceType>();
-    k_vatom_pair.template sync<LMPHostType>();
+    k_vatom_pair.sync_host();
    for (int i = 0; i < n; i++) {
      force->pair->vatom[i][0] += k_vatom_pair.h_view(i,0);
      force->pair->vatom[i][1] += k_vatom_pair.h_view(i,1);
@ -454,12 +455,12 @@ void DihedralCharmmKokkos<DeviceType>::coeff(int narg, char **arg)
    k_weight.h_view[i] = weight[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_multiplicity.template modify<LMPHostType>();
-  k_shift.template modify<LMPHostType>();
-  k_cos_shift.template modify<LMPHostType>();
-  k_sin_shift.template modify<LMPHostType>();
-  k_weight.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_multiplicity.modify_host();
+  k_shift.modify_host();
+  k_cos_shift.modify_host();
+  k_sin_shift.modify_host();
+  k_weight.modify_host();

  k_k.template sync<DeviceType>();
  k_multiplicity.template sync<DeviceType>();
@ -502,10 +503,10 @@ void DihedralCharmmKokkos<DeviceType>::init_style()
    }
  }

-  k_lj14_1.template modify<LMPHostType>();
-  k_lj14_2.template modify<LMPHostType>();
-  k_lj14_3.template modify<LMPHostType>();
-  k_lj14_4.template modify<LMPHostType>();
+  k_lj14_1.modify_host();
+  k_lj14_2.modify_host();
+  k_lj14_3.modify_host();
+  k_lj14_4.modify_host();

  k_lj14_1.template sync<DeviceType>();
  k_lj14_2.template sync<DeviceType>();
@ -547,12 +548,12 @@ void DihedralCharmmKokkos<DeviceType>::read_restart(FILE *fp)
    k_weight.h_view[i] = weight[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_multiplicity.template modify<LMPHostType>();
-  k_shift.template modify<LMPHostType>();
-  k_cos_shift.template modify<LMPHostType>();
-  k_sin_shift.template modify<LMPHostType>();
-  k_weight.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_multiplicity.modify_host();
+  k_shift.modify_host();
+  k_cos_shift.modify_host();
+  k_sin_shift.modify_host();
+  k_weight.modify_host();

  k_k.template sync<DeviceType>();
  k_multiplicity.template sync<DeviceType>();
--- a/src/KOKKOS/dihedral_charmm_kokkos.h
+++ b/src/KOKKOS/dihedral_charmm_kokkos.h
@ -104,6 +104,10 @@ class DihedralCharmmKokkos : public DihedralCharmm {
        const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx,
                const F_FLOAT &dely, const F_FLOAT &delz) const;

+  typedef typename KKDevice<DeviceType>::value KKDeviceType;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
@ -114,9 +118,6 @@ class DihedralCharmmKokkos : public DihedralCharmm {
  typename AT::t_f_array f;
  typename AT::t_int_2d dihedrallist;

-  typedef typename KKDevice<DeviceType>::value KKDeviceType;
-  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
-  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom;
  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom;

--- a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp
+++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp
@ -47,6 +47,7 @@ static constexpr double TOLERANCE = 0.05;
 template<class DeviceType>
 DihedralCharmmfswKokkos<DeviceType>::DihedralCharmmfswKokkos(LAMMPS *lmp) : DihedralCharmmfsw(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -122,7 +123,7 @@ void DihedralCharmmfswKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  qqrd2e = force->qqrd2e;

  h_warning_flag() = 0;
-  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.modify_host();
  k_warning_flag.template sync<DeviceType>();

  copymode = 1;
@ -148,7 +149,7 @@ void DihedralCharmmfswKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // error check

  k_warning_flag.template modify<DeviceType>();
-  k_warning_flag.template sync<LMPHostType>();
+  k_warning_flag.sync_host();
  if (h_warning_flag())
    error->warning(FLERR,"Dihedral problem");

@ -181,20 +182,20 @@ void DihedralCharmmfswKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();

    k_eatom_pair.template modify<DeviceType>();
-    k_eatom_pair.template sync<LMPHostType>();
+    k_eatom_pair.sync_host();
    for (int i = 0; i < n; i++)
      force->pair->eatom[i] += k_eatom_pair.h_view(i);
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();

    k_vatom_pair.template modify<DeviceType>();
-    k_vatom_pair.template sync<LMPHostType>();
+    k_vatom_pair.sync_host();
    for (int i = 0; i < n; i++) {
      force->pair->vatom[i][0] += k_vatom_pair.h_view(i,0);
      force->pair->vatom[i][1] += k_vatom_pair.h_view(i,1);
@ -379,16 +380,17 @@ void DihedralCharmmfswKokkos<DeviceType>::operator()(TagDihedralCharmmfswCompute
    const F_FLOAT dely = x(i1,1) - x(i4,1);
    const F_FLOAT delz = x(i1,2) - x(i4,2);
    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const F_FLOAT r = sqrt(rsq);
    const F_FLOAT r2inv = 1.0/rsq;
    const F_FLOAT r6inv = r2inv*r2inv*r2inv;

    F_FLOAT forcecoul;
    if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv;
-    else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv);
+    else if (dihedflag) forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv);
+    else forcecoul = qqrd2e * q[i1]*q[i4]*(sqrt(r2inv) - r*cut_coulinv14*cut_coulinv14);
    const F_FLOAT forcelj = r6inv * (d_lj14_1(itype,jtype)*r6inv - d_lj14_2(itype,jtype));
    const F_FLOAT fpair = d_weight[type] * (forcelj+forcecoul)*r2inv;

-    const F_FLOAT r = sqrt(rsq);
    F_FLOAT ecoul = 0.0;
    F_FLOAT evdwl = 0.0;
    F_FLOAT evdwl14_12, evdwl14_6;
@ -471,12 +473,12 @@ void DihedralCharmmfswKokkos<DeviceType>::coeff(int narg, char **arg)
    k_weight.h_view[i] = weight[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_multiplicity.template modify<LMPHostType>();
-  k_shift.template modify<LMPHostType>();
-  k_cos_shift.template modify<LMPHostType>();
-  k_sin_shift.template modify<LMPHostType>();
-  k_weight.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_multiplicity.modify_host();
+  k_shift.modify_host();
+  k_cos_shift.modify_host();
+  k_sin_shift.modify_host();
+  k_weight.modify_host();

  k_k.template sync<DeviceType>();
  k_multiplicity.template sync<DeviceType>();
@ -519,10 +521,10 @@ void DihedralCharmmfswKokkos<DeviceType>::init_style()
    }
  }

-  k_lj14_1.template modify<LMPHostType>();
-  k_lj14_2.template modify<LMPHostType>();
-  k_lj14_3.template modify<LMPHostType>();
-  k_lj14_4.template modify<LMPHostType>();
+  k_lj14_1.modify_host();
+  k_lj14_2.modify_host();
+  k_lj14_3.modify_host();
+  k_lj14_4.modify_host();

  k_lj14_1.template sync<DeviceType>();
  k_lj14_2.template sync<DeviceType>();
@ -564,12 +566,12 @@ void DihedralCharmmfswKokkos<DeviceType>::read_restart(FILE *fp)
    k_weight.h_view[i] = weight[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_multiplicity.template modify<LMPHostType>();
-  k_shift.template modify<LMPHostType>();
-  k_cos_shift.template modify<LMPHostType>();
-  k_sin_shift.template modify<LMPHostType>();
-  k_weight.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_multiplicity.modify_host();
+  k_shift.modify_host();
+  k_cos_shift.modify_host();
+  k_sin_shift.modify_host();
+  k_weight.modify_host();

  k_k.template sync<DeviceType>();
  k_multiplicity.template sync<DeviceType>();
--- a/src/KOKKOS/dihedral_charmmfsw_kokkos.h
+++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h
@ -67,6 +67,10 @@ class DihedralCharmmfswKokkos : public DihedralCharmmfsw {
        const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx,
                const F_FLOAT &dely, const F_FLOAT &delz) const;

+  typedef typename KKDevice<DeviceType>::value KKDeviceType;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
@ -76,10 +80,6 @@ class DihedralCharmmfswKokkos : public DihedralCharmmfsw {
  typename AT::t_ffloat_1d_randomread q;
  typename AT::t_f_array f;
  typename AT::t_int_2d dihedrallist;
-
-  typedef typename KKDevice<DeviceType>::value KKDeviceType;
-  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
-  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom;
  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom;

--- a/src/KOKKOS/dihedral_class2_kokkos.cpp
+++ b/src/KOKKOS/dihedral_class2_kokkos.cpp
@ -38,6 +38,7 @@ static constexpr double SMALL =     0.001;
 template<class DeviceType>
 DihedralClass2Kokkos<DeviceType>::DihedralClass2Kokkos(LAMMPS *lmp) : DihedralClass2(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -137,7 +138,7 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  newton_bond = force->newton_bond;

  h_warning_flag() = 0;
-  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.modify_host();
  k_warning_flag.template sync<DeviceType>();

  copymode = 1;
@ -163,7 +164,7 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // error check

  k_warning_flag.template modify<DeviceType>();
-  k_warning_flag.template sync<LMPHostType>();
+  k_warning_flag.sync_host();
  if (h_warning_flag())
    error->warning(FLERR,"Dihedral problem");

@ -179,12 +180,12 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -786,44 +787,44 @@ void DihedralClass2Kokkos<DeviceType>::coeff(int narg, char **arg)
    k_setflag_bb13t.h_view[i] = setflag_bb13t[i];
  }

-  k_k1.template modify<LMPHostType>();
-  k_k2.template modify<LMPHostType>();
-  k_k3.template modify<LMPHostType>();
-  k_phi1.template modify<LMPHostType>();
-  k_phi2.template modify<LMPHostType>();
-  k_phi3.template modify<LMPHostType>();
-  k_mbt_f1.template modify<LMPHostType>();
-  k_mbt_f2.template modify<LMPHostType>();
-  k_mbt_f3.template modify<LMPHostType>();
-  k_mbt_r0.template modify<LMPHostType>();
-  k_ebt_f1_1.template modify<LMPHostType>();
-  k_ebt_f2_1.template modify<LMPHostType>();
-  k_ebt_f3_1.template modify<LMPHostType>();
-  k_ebt_r0_1.template modify<LMPHostType>();
-  k_ebt_f1_2.template modify<LMPHostType>();
-  k_ebt_f2_2.template modify<LMPHostType>();
-  k_ebt_f3_2.template modify<LMPHostType>();
-  k_ebt_r0_2.template modify<LMPHostType>();
-  k_at_f1_1.template modify<LMPHostType>();
-  k_at_f2_1.template modify<LMPHostType>();
-  k_at_f3_1.template modify<LMPHostType>();
-  k_at_f1_2.template modify<LMPHostType>();
-  k_at_f2_2.template modify<LMPHostType>();
-  k_at_f3_2.template modify<LMPHostType>();
-  k_at_theta0_1.template modify<LMPHostType>();
-  k_at_theta0_2.template modify<LMPHostType>();
-  k_aat_k.template modify<LMPHostType>();
-  k_aat_theta0_1.template modify<LMPHostType>();
-  k_aat_theta0_2.template modify<LMPHostType>();
-  k_bb13t_k.template modify<LMPHostType>();
-  k_bb13t_r10.template modify<LMPHostType>();
-  k_bb13t_r30.template modify<LMPHostType>();
-  k_setflag_d.template modify<LMPHostType>();
-  k_setflag_mbt.template modify<LMPHostType>();
-  k_setflag_ebt.template modify<LMPHostType>();
-  k_setflag_at.template modify<LMPHostType>();
-  k_setflag_aat.template modify<LMPHostType>();
-  k_setflag_bb13t.template modify<LMPHostType>();
+  k_k1.modify_host();
+  k_k2.modify_host();
+  k_k3.modify_host();
+  k_phi1.modify_host();
+  k_phi2.modify_host();
+  k_phi3.modify_host();
+  k_mbt_f1.modify_host();
+  k_mbt_f2.modify_host();
+  k_mbt_f3.modify_host();
+  k_mbt_r0.modify_host();
+  k_ebt_f1_1.modify_host();
+  k_ebt_f2_1.modify_host();
+  k_ebt_f3_1.modify_host();
+  k_ebt_r0_1.modify_host();
+  k_ebt_f1_2.modify_host();
+  k_ebt_f2_2.modify_host();
+  k_ebt_f3_2.modify_host();
+  k_ebt_r0_2.modify_host();
+  k_at_f1_1.modify_host();
+  k_at_f2_1.modify_host();
+  k_at_f3_1.modify_host();
+  k_at_f1_2.modify_host();
+  k_at_f2_2.modify_host();
+  k_at_f3_2.modify_host();
+  k_at_theta0_1.modify_host();
+  k_at_theta0_2.modify_host();
+  k_aat_k.modify_host();
+  k_aat_theta0_1.modify_host();
+  k_aat_theta0_2.modify_host();
+  k_bb13t_k.modify_host();
+  k_bb13t_r10.modify_host();
+  k_bb13t_r30.modify_host();
+  k_setflag_d.modify_host();
+  k_setflag_mbt.modify_host();
+  k_setflag_ebt.modify_host();
+  k_setflag_at.modify_host();
+  k_setflag_aat.modify_host();
+  k_setflag_bb13t.modify_host();
 }


@ -956,44 +957,44 @@ void DihedralClass2Kokkos<DeviceType>::read_restart(FILE *fp)
    k_setflag_bb13t.h_view[i] = setflag_bb13t[i];
  }

-  k_k1.template modify<LMPHostType>();
-  k_k2.template modify<LMPHostType>();
-  k_k3.template modify<LMPHostType>();
-  k_phi1.template modify<LMPHostType>();
-  k_phi2.template modify<LMPHostType>();
-  k_phi3.template modify<LMPHostType>();
-  k_mbt_f1.template modify<LMPHostType>();
-  k_mbt_f2.template modify<LMPHostType>();
-  k_mbt_f3.template modify<LMPHostType>();
-  k_mbt_r0.template modify<LMPHostType>();
-  k_ebt_f1_1.template modify<LMPHostType>();
-  k_ebt_f2_1.template modify<LMPHostType>();
-  k_ebt_f3_1.template modify<LMPHostType>();
-  k_ebt_r0_1.template modify<LMPHostType>();
-  k_ebt_f1_2.template modify<LMPHostType>();
-  k_ebt_f2_2.template modify<LMPHostType>();
-  k_ebt_f3_2.template modify<LMPHostType>();
-  k_ebt_r0_2.template modify<LMPHostType>();
-  k_at_f1_1.template modify<LMPHostType>();
-  k_at_f2_1.template modify<LMPHostType>();
-  k_at_f3_1.template modify<LMPHostType>();
-  k_at_f1_2.template modify<LMPHostType>();
-  k_at_f2_2.template modify<LMPHostType>();
-  k_at_f3_2.template modify<LMPHostType>();
-  k_at_theta0_1.template modify<LMPHostType>();
-  k_at_theta0_2.template modify<LMPHostType>();
-  k_aat_k.template modify<LMPHostType>();
-  k_aat_theta0_1.template modify<LMPHostType>();
-  k_aat_theta0_2.template modify<LMPHostType>();
-  k_bb13t_k.template modify<LMPHostType>();
-  k_bb13t_r10.template modify<LMPHostType>();
-  k_bb13t_r30.template modify<LMPHostType>();
-  k_setflag_d.template modify<LMPHostType>();
-  k_setflag_mbt.template modify<LMPHostType>();
-  k_setflag_ebt.template modify<LMPHostType>();
-  k_setflag_at.template modify<LMPHostType>();
-  k_setflag_aat.template modify<LMPHostType>();
-  k_setflag_bb13t.template modify<LMPHostType>();
+  k_k1.modify_host();
+  k_k2.modify_host();
+  k_k3.modify_host();
+  k_phi1.modify_host();
+  k_phi2.modify_host();
+  k_phi3.modify_host();
+  k_mbt_f1.modify_host();
+  k_mbt_f2.modify_host();
+  k_mbt_f3.modify_host();
+  k_mbt_r0.modify_host();
+  k_ebt_f1_1.modify_host();
+  k_ebt_f2_1.modify_host();
+  k_ebt_f3_1.modify_host();
+  k_ebt_r0_1.modify_host();
+  k_ebt_f1_2.modify_host();
+  k_ebt_f2_2.modify_host();
+  k_ebt_f3_2.modify_host();
+  k_ebt_r0_2.modify_host();
+  k_at_f1_1.modify_host();
+  k_at_f2_1.modify_host();
+  k_at_f3_1.modify_host();
+  k_at_f1_2.modify_host();
+  k_at_f2_2.modify_host();
+  k_at_f3_2.modify_host();
+  k_at_theta0_1.modify_host();
+  k_at_theta0_2.modify_host();
+  k_aat_k.modify_host();
+  k_aat_theta0_1.modify_host();
+  k_aat_theta0_2.modify_host();
+  k_bb13t_k.modify_host();
+  k_bb13t_r10.modify_host();
+  k_bb13t_r30.modify_host();
+  k_setflag_d.modify_host();
+  k_setflag_mbt.modify_host();
+  k_setflag_ebt.modify_host();
+  k_setflag_at.modify_host();
+  k_setflag_aat.modify_host();
+  k_setflag_bb13t.modify_host();
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/dihedral_class2_kokkos.h
+++ b/src/KOKKOS/dihedral_class2_kokkos.h
@ -60,6 +60,9 @@ class DihedralClass2Kokkos : public DihedralClass2 {
                          const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
                          const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const;

+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
@ -67,9 +70,6 @@ class DihedralClass2Kokkos : public DihedralClass2 {
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_int_2d dihedrallist;
-
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
  typename AT::t_efloat_1d d_eatom;
  typename AT::t_virial_array d_vatom;

--- a/src/KOKKOS/dihedral_harmonic_kokkos.cpp
+++ b/src/KOKKOS/dihedral_harmonic_kokkos.cpp
@ -37,6 +37,7 @@ static constexpr double TOLERANCE = 0.05;
 template<class DeviceType>
 DihedralHarmonicKokkos<DeviceType>::DihedralHarmonicKokkos(LAMMPS *lmp) : DihedralHarmonic(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -74,14 +75,18 @@ void DihedralHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // reallocate per-atom arrays if necessary

  if (eflag_atom) {
+    if(k_eatom.extent(0) < maxeatom) {
    memoryKK->destroy_kokkos(k_eatom,eatom);
    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom");
    d_eatom = k_eatom.view<DeviceType>();
+    } else Kokkos::deep_copy(d_eatom,0.0);
  }
  if (vflag_atom) {
+    if(k_vatom.extent(0) < maxvatom) {
    memoryKK->destroy_kokkos(k_vatom,vatom);
    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"dihedral:vatom");
    d_vatom = k_vatom.view<DeviceType>();
+    } else Kokkos::deep_copy(d_vatom,0.0);
  }

  k_k.template sync<DeviceType>();
@ -99,7 +104,7 @@ void DihedralHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  newton_bond = force->newton_bond;

  h_warning_flag() = 0;
-  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.modify_host();
  k_warning_flag.template sync<DeviceType>();

  copymode = 1;
@ -125,7 +130,7 @@ void DihedralHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // error check

  k_warning_flag.template modify<DeviceType>();
-  k_warning_flag.template sync<LMPHostType>();
+  k_warning_flag.sync_host();
  if (h_warning_flag())
    error->warning(FLERR,"Dihedral problem");

@ -141,12 +146,12 @@ void DihedralHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -362,11 +367,11 @@ void DihedralHarmonicKokkos<DeviceType>::coeff(int narg, char **arg)
    k_multiplicity.h_view[i] = multiplicity[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_cos_shift.template modify<LMPHostType>();
-  k_sin_shift.template modify<LMPHostType>();
-  k_sign.template modify<LMPHostType>();
-  k_multiplicity.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_cos_shift.modify_host();
+  k_sin_shift.modify_host();
+  k_sign.modify_host();
+  k_multiplicity.modify_host();
 }

 /* ----------------------------------------------------------------------
@ -387,11 +392,11 @@ void DihedralHarmonicKokkos<DeviceType>::read_restart(FILE *fp)
    k_multiplicity.h_view[i] = multiplicity[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_cos_shift.template modify<LMPHostType>();
-  k_sin_shift.template modify<LMPHostType>();
-  k_sign.template modify<LMPHostType>();
-  k_multiplicity.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_cos_shift.modify_host();
+  k_sin_shift.modify_host();
+  k_sign.modify_host();
+  k_multiplicity.modify_host();
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/dihedral_harmonic_kokkos.h
+++ b/src/KOKKOS/dihedral_harmonic_kokkos.h
@ -60,6 +60,9 @@ class DihedralHarmonicKokkos : public DihedralHarmonic {
                          const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
                          const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const;

+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
@ -67,9 +70,6 @@ class DihedralHarmonicKokkos : public DihedralHarmonic {
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_int_2d dihedrallist;
-
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

--- a/src/KOKKOS/dihedral_hybrid_kokkos.cpp
+++ b/src/KOKKOS/dihedral_hybrid_kokkos.cpp
@ -0,0 +1,225 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "dihedral_hybrid_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "memory_kokkos.h"
+#include "neighbor_kokkos.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+#define EXTRA 1000
+
+/* ---------------------------------------------------------------------- */
+
+DihedralHybridKokkos::DihedralHybridKokkos(LAMMPS *lmp) : DihedralHybrid(lmp)
+{
+  kokkosable = 1;
+
+  atomKK = (AtomKokkos *) atom;
+  neighborKK = (NeighborKokkos *) neighbor;
+
+  execution_space = Device;
+
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+DihedralHybridKokkos::~DihedralHybridKokkos()
+{
+  deallocate();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralHybridKokkos::compute(int eflag, int vflag)
+{
+  // save ptrs to original dihedrallist
+
+  int ndihedrallist_orig = neighbor->ndihedrallist;
+  neighborKK->k_dihedrallist.sync_device();
+  auto k_dihedrallist_orig = neighborKK->k_dihedrallist;
+  auto d_dihedrallist_orig = k_dihedrallist_orig.d_view;
+  auto d_ndihedrallist = k_ndihedrallist.d_view;
+  auto h_ndihedrallist = k_ndihedrallist.h_view;
+
+  // if this is re-neighbor step, create sub-style dihedrallists
+  // ndihedrallist[] = length of each sub-style list
+  // realloc sub-style dihedrallist if necessary
+  // load sub-style dihedrallist with 3 values from original dihedrallist
+
+  if (neighbor->ago == 0) {
+    Kokkos::deep_copy(d_ndihedrallist,0);
+
+    k_map.sync_device();
+    auto d_map = k_map.d_view;
+
+    Kokkos::parallel_for(ndihedrallist_orig,LAMMPS_LAMBDA(int i) {
+      const int m = d_map[d_dihedrallist_orig(i,4)];
+      if (m >= 0) Kokkos::atomic_increment(&d_ndihedrallist[m]);
+    });
+
+    k_ndihedrallist.modify_device();
+    k_ndihedrallist.sync_host();
+
+    maxdihedral_all = 0;
+    for (int m = 0; m < nstyles; m++)
+      if (h_ndihedrallist[m] > maxdihedral_all)
+        maxdihedral_all = h_ndihedrallist[m] + EXTRA;
+
+    if (k_dihedrallist.d_view.extent(1) < maxdihedral_all)
+      MemKK::realloc_kokkos(k_dihedrallist, "dihedral_hybrid:dihedrallist", nstyles, maxdihedral_all, 5);
+    auto d_dihedrallist = k_dihedrallist.d_view;
+
+    Kokkos::deep_copy(d_ndihedrallist,0);
+
+    Kokkos::parallel_for(ndihedrallist_orig,LAMMPS_LAMBDA(int i) {
+      const int m = d_map[d_dihedrallist_orig(i,4)];
+      if (m < 0) return;
+      const int n = Kokkos::atomic_fetch_add(&d_ndihedrallist[m],1);
+      d_dihedrallist(m,n,0) = d_dihedrallist_orig(i,0);
+      d_dihedrallist(m,n,1) = d_dihedrallist_orig(i,1);
+      d_dihedrallist(m,n,2) = d_dihedrallist_orig(i,2);
+      d_dihedrallist(m,n,3) = d_dihedrallist_orig(i,3);
+      d_dihedrallist(m,n,4) = d_dihedrallist_orig(i,4);
+    });
+  }
+
+  // call each sub-style's compute function
+  // set neighbor->dihedrallist to sub-style dihedrallist before call
+  // accumulate sub-style global/peratom energy/virial in hybrid
+
+  ev_init(eflag, vflag);
+
+  k_ndihedrallist.modify_device();
+  k_ndihedrallist.sync_host();
+
+  for (int m = 0; m < nstyles; m++) {
+    neighbor->ndihedrallist = h_ndihedrallist[m];
+    auto k_dihedrallist_m = Kokkos::subview(k_dihedrallist,m,Kokkos::ALL,Kokkos::ALL);
+    k_dihedrallist_m.modify_device();
+    neighborKK->k_dihedrallist = k_dihedrallist_m;
+
+    auto style = styles[m];
+    atomKK->sync(style->execution_space,style->datamask_read);
+    style->compute(eflag, vflag);
+    atomKK->modified(style->execution_space,style->datamask_modify);
+
+    if (eflag_global) energy += style->energy;
+    if (vflag_global)
+      for (int n = 0; n < 6; n++) virial[n] += style->virial[n];
+
+    if (eflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double *eatom_substyle = styles[m]->eatom;
+      for (int i = 0; i < n; i++) eatom[i] += eatom_substyle[i];
+    }
+    if (vflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double **vatom_substyle = styles[m]->vatom;
+      for (int i = 0; i < n; i++)
+        for (int j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j];
+    }
+    if (cvflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double **cvatom_substyle = styles[m]->cvatom;
+      for (int i = 0; i < n; i++)
+        for (int j = 0; j < 9; j++) cvatom[i][j] += cvatom_substyle[i][j];
+    }
+  }
+
+  // restore ptrs to original dihedrallist
+
+  neighbor->ndihedrallist = ndihedrallist_orig;
+  neighborKK->k_dihedrallist = k_dihedrallist_orig;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralHybridKokkos::allocate()
+{
+  allocated = 1;
+  int np1 = atom->ndihedraltypes + 1;
+
+  memoryKK->create_kokkos(k_map, map, np1, "dihedral:map");
+  memory->create(setflag, np1, "dihedral:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+
+  k_ndihedrallist = DAT::tdual_int_1d("dihedral:ndihedrallist", nstyles);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralHybridKokkos::deallocate()
+{
+  if (!allocated) return;
+
+  allocated = 0;
+
+  memory->destroy(setflag);
+  memoryKK->destroy_kokkos(k_map,map);
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one type
+---------------------------------------------------------------------- */
+
+void DihedralHybridKokkos::coeff(int narg, char **arg)
+{
+  DihedralHybrid::coeff(narg,arg);
+
+  k_map.modify_host();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralHybridKokkos::init_style()
+{
+  DihedralHybrid::init_style();
+
+  for (int m = 0; m < nstyles; m++) {
+    if (!styles[m]->kokkosable)
+      error->all(FLERR,"Must use only Kokkos-enabled dihedral styles with dihedral_style hybrid/kk");
+
+    if (styles[m]->execution_space == Host)
+      lmp->kokkos->allow_overlap = 0;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double DihedralHybridKokkos::memory_usage()
+{
+  double bytes = (double) maxeatom * sizeof(double);
+  bytes += (double) maxvatom * 6 * sizeof(double);
+  bytes += (double) maxcvatom * 9 * sizeof(double);
+  for (int m = 0; m < nstyles; m++) bytes += (double) maxdihedral_all * 5 * sizeof(int);
+  for (int m = 0; m < nstyles; m++)
+    if (styles[m]) bytes += styles[m]->memory_usage();
+  return bytes;
+}
--- a/src/KOKKOS/dihedral_hybrid_kokkos.h
+++ b/src/KOKKOS/dihedral_hybrid_kokkos.h
@ -0,0 +1,58 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+// clang-format off
+DihedralStyle(hybrid/kk,DihedralHybridKokkos);
+DihedralStyle(hybrid/kk/device,DihedralHybridKokkos);
+DihedralStyle(hybrid/kk/host,DihedralHybridKokkos);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_DIHEDRAL_HYBRID_KOKKOS_H
+#define LMP_DIHEDRAL_HYBRID_KOKKOS_H
+
+#include "dihedral_hybrid.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class DihedralHybridKokkos : public DihedralHybrid {
+  friend class Force;
+
+ public:
+  DihedralHybridKokkos(class LAMMPS *);
+  ~DihedralHybridKokkos() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double memory_usage() override;
+
+ private:
+  int maxdihedral_all;
+
+  class NeighborKokkos *neighborKK;
+
+  DAT::tdual_int_1d k_map;       // which style each dihedral type points to
+  DAT::tdual_int_1d k_ndihedrallist; // # of dihedrals in sub-style dihedrallists
+  DAT::tdual_int_3d k_dihedrallist;  // dihedrallist for each sub-style
+
+  void allocate() override;
+  void deallocate() override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
--- a/src/KOKKOS/dihedral_opls_kokkos.cpp
+++ b/src/KOKKOS/dihedral_opls_kokkos.cpp
@ -39,6 +39,7 @@ static constexpr double SMALLER =   0.00001;
 template<class DeviceType>
 DihedralOPLSKokkos<DeviceType>::DihedralOPLSKokkos(LAMMPS *lmp) : DihedralOPLS(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -100,7 +101,7 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  newton_bond = force->newton_bond;

  h_warning_flag() = 0;
-  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.modify_host();
  k_warning_flag.template sync<DeviceType>();

  copymode = 1;
@ -126,7 +127,7 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // error check

  k_warning_flag.template modify<DeviceType>();
-  k_warning_flag.template sync<LMPHostType>();
+  k_warning_flag.sync_host();
  if (h_warning_flag())
    error->warning(FLERR,"Dihedral problem");

@ -142,12 +143,12 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -372,10 +373,10 @@ void DihedralOPLSKokkos<DeviceType>::coeff(int narg, char **arg)
    k_k4.h_view[i] = k4[i];
  }

-  k_k1.template modify<LMPHostType>();
-  k_k2.template modify<LMPHostType>();
-  k_k3.template modify<LMPHostType>();
-  k_k4.template modify<LMPHostType>();
+  k_k1.modify_host();
+  k_k2.modify_host();
+  k_k3.modify_host();
+  k_k4.modify_host();
 }

 /* ----------------------------------------------------------------------
@ -395,10 +396,10 @@ void DihedralOPLSKokkos<DeviceType>::read_restart(FILE *fp)
    k_k4.h_view[i] = k4[i];
  }

-  k_k1.template modify<LMPHostType>();
-  k_k2.template modify<LMPHostType>();
-  k_k3.template modify<LMPHostType>();
-  k_k4.template modify<LMPHostType>();
+  k_k1.modify_host();
+  k_k2.modify_host();
+  k_k3.modify_host();
+  k_k4.modify_host();
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/dihedral_opls_kokkos.h
+++ b/src/KOKKOS/dihedral_opls_kokkos.h
@ -60,16 +60,15 @@ class DihedralOPLSKokkos : public DihedralOPLS {
                          const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
                          const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const;

+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
-
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_int_2d dihedrallist;
-
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

--- a/src/KOKKOS/improper_class2_kokkos.cpp
+++ b/src/KOKKOS/improper_class2_kokkos.cpp
@ -34,6 +34,7 @@ static constexpr double SMALL =     0.001;
 template<class DeviceType>
 ImproperClass2Kokkos<DeviceType>::ImproperClass2Kokkos(LAMMPS *lmp) : ImproperClass2(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -110,7 +111,7 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  newton_bond = force->newton_bond;

  h_warning_flag() = 0;
-  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.modify_host();
  k_warning_flag.template sync<DeviceType>();

  copymode = 1;
@ -139,7 +140,7 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // error check

  k_warning_flag.template modify<DeviceType>();
-  k_warning_flag.template sync<LMPHostType>();
+  k_warning_flag.sync_host();
  if (h_warning_flag())
    error->warning(FLERR,"Improper problem");

@ -171,12 +172,12 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -918,17 +919,17 @@ void ImproperClass2Kokkos<DeviceType>::coeff(int narg, char **arg)
    k_setflag_aa.h_view[i] = setflag_aa[i];
  }

-  k_k0.template modify<LMPHostType>();
-  k_chi0.template modify<LMPHostType>();
-  k_aa_k1.template modify<LMPHostType>();
-  k_aa_k2.template modify<LMPHostType>();
-  k_aa_k3.template modify<LMPHostType>();
-  k_aa_theta0_1.template modify<LMPHostType>();
-  k_aa_theta0_2.template modify<LMPHostType>();
-  k_aa_theta0_3 .template modify<LMPHostType>();
-  k_setflag.template modify<LMPHostType>();
-  k_setflag_i.template modify<LMPHostType>();
-  k_setflag_aa.template modify<LMPHostType>();
+  k_k0.modify_host();
+  k_chi0.modify_host();
+  k_aa_k1.modify_host();
+  k_aa_k2.modify_host();
+  k_aa_k3.modify_host();
+  k_aa_theta0_1.modify_host();
+  k_aa_theta0_2.modify_host();
+  k_aa_theta0_3 .modify_host();
+  k_setflag.modify_host();
+  k_setflag_i.modify_host();
+  k_setflag_aa.modify_host();
 }

 /* ----------------------------------------------------------------------
@ -979,17 +980,17 @@ void ImproperClass2Kokkos<DeviceType>::read_restart(FILE *fp)
    k_setflag_aa.h_view[i] = setflag_aa[i];
  }

-  k_k0.template modify<LMPHostType>();
-  k_chi0.template modify<LMPHostType>();
-  k_aa_k1.template modify<LMPHostType>();
-  k_aa_k2.template modify<LMPHostType>();
-  k_aa_k3.template modify<LMPHostType>();
-  k_aa_theta0_1.template modify<LMPHostType>();
-  k_aa_theta0_2.template modify<LMPHostType>();
-  k_aa_theta0_3 .template modify<LMPHostType>();
-  k_setflag.template modify<LMPHostType>();
-  k_setflag_i.template modify<LMPHostType>();
-  k_setflag_aa.template modify<LMPHostType>();
+  k_k0.modify_host();
+  k_chi0.modify_host();
+  k_aa_k1.modify_host();
+  k_aa_k2.modify_host();
+  k_aa_k3.modify_host();
+  k_aa_theta0_1.modify_host();
+  k_aa_theta0_2.modify_host();
+  k_aa_theta0_3 .modify_host();
+  k_setflag.modify_host();
+  k_setflag_i.modify_host();
+  k_setflag_aa.modify_host();
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/improper_class2_kokkos.h
+++ b/src/KOKKOS/improper_class2_kokkos.h
@ -71,6 +71,9 @@ class ImproperClass2Kokkos : public ImproperClass2 {
                          const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
                          const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const;

+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;
@ -78,9 +81,6 @@ class ImproperClass2Kokkos : public ImproperClass2 {
  typename AT::t_x_array_randomread x;
  typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic> > f;
  typename AT::t_int_2d improperlist;
-
-  DAT::tdual_efloat_1d k_eatom;
-  DAT::tdual_virial_array k_vatom;
  typename AT::t_efloat_1d d_eatom;
  typename AT::t_virial_array d_vatom;

--- a/src/KOKKOS/improper_harmonic_kokkos.cpp
+++ b/src/KOKKOS/improper_harmonic_kokkos.cpp
@ -36,6 +36,7 @@ static constexpr double SMALL =     0.001;
 template<class DeviceType>
 ImproperHarmonicKokkos<DeviceType>::ImproperHarmonicKokkos(LAMMPS *lmp) : ImproperHarmonic(lmp)
 {
+  kokkosable = 1;
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
@ -73,18 +74,18 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // reallocate per-atom arrays if necessary

  if (eflag_atom) {
-    //if(k_eatom.extent(0)<maxeatom) { // won't work without adding zero functor
+    if(k_eatom.extent(0) < maxeatom) {
      memoryKK->destroy_kokkos(k_eatom,eatom);
      memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom");
      d_eatom = k_eatom.template view<KKDeviceType>();
-    //}
+    } else Kokkos::deep_copy(d_eatom,0.0);
  }
  if (vflag_atom) {
-    //if(k_vatom.extent(0)<maxvatom) { // won't work without adding zero functor
+    if(k_vatom.extent(0) < maxvatom) {
      memoryKK->destroy_kokkos(k_vatom,vatom);
      memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom");
      d_vatom = k_vatom.template view<KKDeviceType>();
-    //}
+    } else Kokkos::deep_copy(d_vatom,0.0);
  }

  //atomKK->sync(execution_space,datamask_read);
@ -102,7 +103,7 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  newton_bond = force->newton_bond;

  h_warning_flag() = 0;
-  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.modify_host();
  k_warning_flag.template sync<DeviceType>();

  copymode = 1;
@ -128,7 +129,7 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  // error check

  k_warning_flag.template modify<DeviceType>();
-  k_warning_flag.template sync<LMPHostType>();
+  k_warning_flag.sync_host();
  if (h_warning_flag())
    error->warning(FLERR,"Dihedral problem");

@ -144,12 +145,12 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
-    k_eatom.template sync<LMPHostType>();
+    k_eatom.sync_host();
  }

  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
-    k_vatom.template sync<LMPHostType>();
+    k_vatom.sync_host();
  }

  copymode = 0;
@ -324,8 +325,8 @@ void ImproperHarmonicKokkos<DeviceType>::coeff(int narg, char **arg)
    k_chi.h_view[i] = chi[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_chi.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_chi.modify_host();
 }

 /* ----------------------------------------------------------------------
@ -343,8 +344,8 @@ void ImproperHarmonicKokkos<DeviceType>::read_restart(FILE *fp)
    k_chi.h_view[i] = chi[i];
  }

-  k_k.template modify<LMPHostType>();
-  k_chi.template modify<LMPHostType>();
+  k_k.modify_host();
+  k_chi.modify_host();
 }

 /* ----------------------------------------------------------------------
--- a/src/KOKKOS/improper_harmonic_kokkos.h
+++ b/src/KOKKOS/improper_harmonic_kokkos.h
@ -60,17 +60,17 @@ class ImproperHarmonicKokkos : public ImproperHarmonic {
                          const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
                          const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const;

+  typedef typename KKDevice<DeviceType>::value KKDeviceType;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
+
 protected:

  class NeighborKokkos *neighborKK;

-  typedef typename KKDevice<DeviceType>::value KKDeviceType;
  typename AT::t_x_array_randomread x;
  typename Kokkos::View<double*[3],typename AT::t_f_array::array_layout,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > f;
  typename AT::t_int_2d improperlist;
-
-  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
-  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom;
  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom;

--- a/src/KOKKOS/improper_hybrid_kokkos.cpp
+++ b/src/KOKKOS/improper_hybrid_kokkos.cpp
@ -0,0 +1,226 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "improper_hybrid_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "memory_kokkos.h"
+#include "neighbor_kokkos.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+#define EXTRA 1000
+
+/* ---------------------------------------------------------------------- */
+
+ImproperHybridKokkos::ImproperHybridKokkos(LAMMPS *lmp) : ImproperHybrid(lmp)
+{
+  kokkosable = 1;
+
+  atomKK = (AtomKokkos *) atom;
+  neighborKK = (NeighborKokkos *) neighbor;
+
+  execution_space = Device;
+
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ImproperHybridKokkos::~ImproperHybridKokkos()
+{
+  deallocate();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperHybridKokkos::compute(int eflag, int vflag)
+{
+
+  // save ptrs to original improperlist
+
+  int nimproperlist_orig = neighbor->nimproperlist;
+  neighborKK->k_improperlist.sync_device();
+  auto k_improperlist_orig = neighborKK->k_improperlist;
+  auto d_improperlist_orig = k_improperlist_orig.d_view;
+  auto d_nimproperlist = k_nimproperlist.d_view;
+  auto h_nimproperlist = k_nimproperlist.h_view;
+
+  // if this is re-neighbor step, create sub-style improperlists
+  // nimproperlist[] = length of each sub-style list
+  // realloc sub-style improperlist if necessary
+  // load sub-style improperlist with 3 values from original improperlist
+
+  if (neighbor->ago == 0) {
+    Kokkos::deep_copy(d_nimproperlist,0);
+
+    k_map.sync_device();
+    auto d_map = k_map.d_view;
+
+    Kokkos::parallel_for(nimproperlist_orig,LAMMPS_LAMBDA(int i) {
+      const int m = d_map[d_improperlist_orig(i,4)];
+      if (m >= 0) Kokkos::atomic_increment(&d_nimproperlist[m]);
+    });
+
+    k_nimproperlist.modify_device();
+    k_nimproperlist.sync_host();
+
+    maximproper_all = 0;
+    for (int m = 0; m < nstyles; m++)
+      if (h_nimproperlist[m] > maximproper_all)
+        maximproper_all = h_nimproperlist[m] + EXTRA;
+
+    if (k_improperlist.d_view.extent(1) < maximproper_all)
+      MemKK::realloc_kokkos(k_improperlist, "improper_hybrid:improperlist", nstyles, maximproper_all, 5);
+    auto d_improperlist = k_improperlist.d_view;
+
+    Kokkos::deep_copy(d_nimproperlist,0);
+
+    Kokkos::parallel_for(nimproperlist_orig,LAMMPS_LAMBDA(int i) {
+      const int m = d_map[d_improperlist_orig(i,4)];
+      if (m < 0) return;
+      const int n = Kokkos::atomic_fetch_add(&d_nimproperlist[m],1);
+      d_improperlist(m,n,0) = d_improperlist_orig(i,0);
+      d_improperlist(m,n,1) = d_improperlist_orig(i,1);
+      d_improperlist(m,n,2) = d_improperlist_orig(i,2);
+      d_improperlist(m,n,3) = d_improperlist_orig(i,3);
+      d_improperlist(m,n,4) = d_improperlist_orig(i,4);
+    });
+  }
+
+  // call each sub-style's compute function
+  // set neighbor->improperlist to sub-style improperlist before call
+  // accumulate sub-style global/peratom energy/virial in hybrid
+
+  ev_init(eflag, vflag);
+
+  k_nimproperlist.modify_device();
+  k_nimproperlist.sync_host();
+
+  for (int m = 0; m < nstyles; m++) {
+    neighbor->nimproperlist = h_nimproperlist[m];
+    auto k_improperlist_m = Kokkos::subview(k_improperlist,m,Kokkos::ALL,Kokkos::ALL);
+    k_improperlist_m.modify_device();
+    neighborKK->k_improperlist = k_improperlist_m;
+
+    auto style = styles[m];
+    atomKK->sync(style->execution_space,style->datamask_read);
+    style->compute(eflag, vflag);
+    atomKK->modified(style->execution_space,style->datamask_modify);
+
+    if (eflag_global) energy += style->energy;
+    if (vflag_global)
+      for (int n = 0; n < 6; n++) virial[n] += style->virial[n];
+
+    if (eflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double *eatom_substyle = styles[m]->eatom;
+      for (int i = 0; i < n; i++) eatom[i] += eatom_substyle[i];
+    }
+    if (vflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double **vatom_substyle = styles[m]->vatom;
+      for (int i = 0; i < n; i++)
+        for (int j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j];
+    }
+    if (cvflag_atom) {
+      int n = atom->nlocal;
+      if (force->newton_bond) n += atom->nghost;
+      double **cvatom_substyle = styles[m]->cvatom;
+      for (int i = 0; i < n; i++)
+        for (int j = 0; j < 9; j++) cvatom[i][j] += cvatom_substyle[i][j];
+    }
+  }
+
+  // restore ptrs to original improperlist
+
+  neighbor->nimproperlist = nimproperlist_orig;
+  neighborKK->k_improperlist = k_improperlist_orig;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperHybridKokkos::allocate()
+{
+  allocated = 1;
+  int np1 = atom->nimpropertypes + 1;
+
+  memoryKK->create_kokkos(k_map, map, np1, "improper:map");
+  memory->create(setflag, np1, "improper:setflag");
+  for (int i = 1; i < np1; i++) setflag[i] = 0;
+
+  k_nimproperlist = DAT::tdual_int_1d("improper:nimproperlist", nstyles);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperHybridKokkos::deallocate()
+{
+  if (!allocated) return;
+
+  allocated = 0;
+
+  memory->destroy(setflag);
+  memoryKK->destroy_kokkos(k_map,map);
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one type
+---------------------------------------------------------------------- */
+
+void ImproperHybridKokkos::coeff(int narg, char **arg)
+{
+  ImproperHybrid::coeff(narg,arg);
+
+  k_map.modify_host();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ImproperHybridKokkos::init_style()
+{
+  ImproperHybrid::init_style();
+
+  for (int m = 0; m < nstyles; m++) {
+    if (!styles[m]->kokkosable)
+      error->all(FLERR,"Must use only Kokkos-enabled improper styles with improper_style hybrid/kk");
+
+    if (styles[m]->execution_space == Host)
+      lmp->kokkos->allow_overlap = 0;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double ImproperHybridKokkos::memory_usage()
+{
+  double bytes = (double) maxeatom * sizeof(double);
+  bytes += (double) maxvatom * 6 * sizeof(double);
+  bytes += (double) maxcvatom * 9 * sizeof(double);
+  for (int m = 0; m < nstyles; m++) bytes += (double) maximproper_all * 5 * sizeof(int);
+  for (int m = 0; m < nstyles; m++)
+    if (styles[m]) bytes += styles[m]->memory_usage();
+  return bytes;
+}
--- a/src/KOKKOS/improper_hybrid_kokkos.h
+++ b/src/KOKKOS/improper_hybrid_kokkos.h
@ -0,0 +1,58 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef IMPROPER_CLASS
+// clang-format off
+ImproperStyle(hybrid/kk,ImproperHybridKokkos);
+ImproperStyle(hybrid/kk/device,ImproperHybridKokkos);
+ImproperStyle(hybrid/kk/host,ImproperHybridKokkos);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_IMPROPER_HYBRID_KOKKOS_H
+#define LMP_IMPROPER_HYBRID_KOKKOS_H
+
+#include "improper_hybrid.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class ImproperHybridKokkos : public ImproperHybrid {
+  friend class Force;
+
+ public:
+  ImproperHybridKokkos(class LAMMPS *);
+  ~ImproperHybridKokkos() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  double memory_usage() override;
+
+ private:
+  int maximproper_all;
+
+  class NeighborKokkos *neighborKK;
+
+  DAT::tdual_int_1d k_map;       // which style each improper type points to
+  DAT::tdual_int_1d k_nimproperlist; // # of impropers in sub-style improperlists
+  DAT::tdual_int_3d k_improperlist;  // improperlist for each sub-style
+
+  void allocate() override;
+  void deallocate() override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
--- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp
@ -13,7 +13,7 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   Contributing author: Mitch Murphy (alphataubio)
+   Contributing author: Mitch Murphy (alphataubio@gmail.com)

   Based on serial kspace lj-fsw sections (force-switched) provided by
   Robert Meissner and Lucio Colombi Ciacchi of Bremen University, Germany,
@ -463,7 +463,6 @@ double PairLJCharmmfswCoulLongKokkos<DeviceType>::init_one(int i, int j)
  k_params.h_view(i,j).lj2 = lj2[i][j];
  k_params.h_view(i,j).lj3 = lj3[i][j];
  k_params.h_view(i,j).lj4 = lj4[i][j];
-  //k_params.h_view(i,j).offset = offset[i][j];
  k_params.h_view(i,j).cut_ljsq = cut_ljsq;
  k_params.h_view(i,j).cut_coulsq = cut_coulsq;

--- a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h
@ -48,30 +48,25 @@ class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong {
 protected:
  template<bool STACKPARAMS, class Specialisation>
  KOKKOS_INLINE_FUNCTION
-  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
-                        const int& itype, const int& jtype) const;
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int& j, const int& itype, const int& jtype) const;

  template<bool STACKPARAMS, class Specialisation>
  KOKKOS_INLINE_FUNCTION
-  F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int& j, const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int& j, const int& itype,
                        const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;

  template<bool STACKPARAMS, class Specialisation>
  KOKKOS_INLINE_FUNCTION
-  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
-                        const int& itype, const int& jtype) const;
-
-  template<bool STACKPARAMS, class Specialisation>
-  KOKKOS_INLINE_FUNCTION
-  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
-                        const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int& j, const int& itype,
+                        const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;

  Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
-  typename Kokkos::DualView<params_lj_coul**,
-    Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
-  // hardwired to space for 12 atom types
-  params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
-
+  typename Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
+  params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];  // hardwired to space for 12 atom types
  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
  F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
  F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
@ -100,8 +95,8 @@ class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong {
  int neighflag;
  int nlocal,nall,eflag,vflag;

-  double special_coul[4];
  double special_lj[4];
+  double special_coul[4];
  double qqrd2e;

  void allocate() override;
--- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp
@ -76,7 +76,6 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  ev_init(eflag,vflag,0);

-
  // reallocate per-atom arrays if necessary

  if (eflag_atom) {
@ -125,11 +124,11 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
    ev = pair_compute<PairLJCutCoulLongKokkos<DeviceType>,CoulLongTable<0> >
      (this,(NeighListKokkos<DeviceType>*)list);

-
  if (eflag) {
    eng_vdwl += ev.evdwl;
    eng_coul += ev.ecoul;
  }
+
  if (vflag_global) {
    virial[0] += ev.v[0];
    virial[1] += ev.v[1];
--- a/src/KOKKOS/pair_lj_spica_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_spica_coul_long_kokkos.cpp
@ -0,0 +1,503 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Mitch Murphy (alphataubio@gmail.com)
+------------------------------------------------------------------------- */
+
+#include "pair_lj_spica_coul_long_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "error.h"
+#include "ewald_const.h"
+#include "force.h"
+#include "kokkos.h"
+#include "memory_kokkos.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "respa.h"
+#include "update.h"
+
+#include "lj_spica_common.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace LJSPICAParms;
+using namespace EwaldConst;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJSPICACoulLongKokkos<DeviceType>::PairLJSPICACoulLongKokkos(LAMMPS *lmp) : PairLJSPICACoulLong(lmp)
+{
+  respa_enable = 0;
+
+  kokkosable = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJSPICACoulLongKokkos<DeviceType>::~PairLJSPICACoulLongKokkos()
+{
+  if (copymode) return;
+
+  if (allocated) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->destroy_kokkos(k_cutsq,cutsq);
+    memoryKK->destroy_kokkos(k_cut_ljsq,cut_ljsq);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJSPICACoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  ev_init(eflag,vflag,0);
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_cut_ljsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  special_coul[0] = force->special_coul[0];
+  special_coul[1] = force->special_coul[1];
+  special_coul[2] = force->special_coul[2];
+  special_coul[3] = force->special_coul[3];
+  qqrd2e = force->qqrd2e;
+  newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  copymode = 1;
+
+  EV_FLOAT ev;
+  if (ncoultablebits)
+    ev = pair_compute<PairLJSPICACoulLongKokkos<DeviceType>,CoulLongTable<1> >
+      (this,(NeighListKokkos<DeviceType>*)list);
+  else
+    ev = pair_compute<PairLJSPICACoulLongKokkos<DeviceType>,CoulLongTable<0> >
+      (this,(NeighListKokkos<DeviceType>*)list);
+
+  if (eflag) {
+    eng_vdwl += ev.evdwl;
+    eng_coul += ev.ecoul;
+  }
+
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  copymode = 0;
+}
+
+/* ----------------------------------------------------------------------
+   compute pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJSPICACoulLongKokkos<DeviceType>::
+compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  (void) i;
+  (void) j;
+  const F_FLOAT r2inv = 1.0/rsq;
+  const int ljt = (STACKPARAMS?m_params[itype][jtype].lj_type:params(itype,jtype).lj_type);
+
+  const F_FLOAT lj_1 =  (STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1);
+  const F_FLOAT lj_2 =  (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2);
+
+  const F_FLOAT r4inv=r2inv*r2inv;
+  const F_FLOAT r6inv=r2inv*r4inv;
+  const F_FLOAT a = ljt==LJ12_4?r4inv:(ljt==LJ12_5?r4inv*sqrt(r2inv):r6inv);
+  const F_FLOAT b = ljt==LJ12_4?r4inv:(ljt==LJ9_6?1.0/sqrt(r2inv):(ljt==LJ12_5?r2inv*sqrt(r2inv):r2inv));
+  return a* ( lj_1*r6inv*b - lj_2 * r2inv);
+}
+
+/* ----------------------------------------------------------------------
+   compute pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJSPICACoulLongKokkos<DeviceType>::
+compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
+  (void) i;
+  (void) j;
+  const F_FLOAT r2inv = 1.0/rsq;
+  const int ljt = (STACKPARAMS?m_params[itype][jtype].lj_type:params(itype,jtype).lj_type);
+
+  const F_FLOAT lj_3 =  (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3);
+  const F_FLOAT lj_4 =  (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4);
+  const F_FLOAT offset =  (STACKPARAMS?m_params[itype][jtype].offset:params(itype,jtype).offset);
+
+  if (ljt == LJ12_4) {
+    const F_FLOAT r4inv=r2inv*r2inv;
+    return r4inv*(lj_3*r4inv*r4inv - lj_4) - offset;
+  } else if (ljt == LJ9_6) {
+    const F_FLOAT r3inv = r2inv*sqrt(r2inv);
+    const F_FLOAT r6inv = r3inv*r3inv;
+    return r6inv*(lj_3*r3inv - lj_4) - offset;
+  } else if (ljt == LJ12_6) {
+    const double r6inv = r2inv*r2inv*r2inv;
+    return r6inv*(lj_3*r6inv - lj_4) - offset;
+  } else if (ljt == LJ12_5) {
+    const F_FLOAT r5inv = r2inv*r2inv*sqrt(r2inv);
+    const F_FLOAT r7inv = r5inv*r2inv;
+    return r5inv*(lj_3*r7inv - lj_4) - offset;
+  } else
+    return 0.0;
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair force between atoms i and j
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<bool STACKPARAMS,  class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJSPICACoulLongKokkos<DeviceType>::
+compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
+              const int& /*itype*/, const int& /*jtype*/,
+              const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+
+  if (Specialisation::DoTable && rsq > tabinnersq) {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
+    const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
+    const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable];
+    F_FLOAT forcecoul = qtmp*q[j] * table;
+    if (factor_coul < 1.0) {
+      const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
+      const F_FLOAT prefactor = qtmp*q[j] * table;
+      forcecoul -= (1.0-factor_coul)*prefactor;
+    }
+    return forcecoul/rsq;
+  } else {
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT grij = g_ewald * r;
+    const F_FLOAT expm2 = exp(-grij*grij);
+    const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
+    const F_FLOAT rinv = 1.0/r;
+    const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv;
+    F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+
+    return forcecoul*rinv*rinv;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair potential energy between atoms i and j
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJSPICACoulLongKokkos<DeviceType>::
+compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
+              const int& /*itype*/, const int& /*jtype*/, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  if (Specialisation::DoTable && rsq > tabinnersq) {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
+    const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
+    const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable];
+    F_FLOAT ecoul = qtmp*q[j] * table;
+    if (factor_coul < 1.0) {
+      const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
+      const F_FLOAT prefactor = qtmp*q[j] * table;
+      ecoul -= (1.0-factor_coul)*prefactor;
+    }
+    return ecoul;
+  } else {
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT grij = g_ewald * r;
+    const F_FLOAT expm2 = exp(-grij*grij);
+    const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
+    const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r;
+    F_FLOAT ecoul = prefactor * erfc;
+    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+    return ecoul;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJSPICACoulLongKokkos<DeviceType>::allocate()
+{
+  PairLJSPICACoulLong::allocate();
+
+  int n = atom->ntypes;
+  memory->destroy(cutsq);
+  memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+
+  memory->destroy(cut_ljsq);
+  memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq");
+  d_cut_ljsq = k_cut_ljsq.template view<DeviceType>();
+
+  d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1);
+
+  k_params = Kokkos::DualView<params_lj_spica_coul**,Kokkos::LayoutRight,DeviceType>("PairLJSPICACoulLong::params",n+1,n+1);
+  params = k_params.template view<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   init tables
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJSPICACoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
+{
+  Pair::init_tables(cut_coul,cut_respa);
+
+  typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
+  typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
+
+  int ntable = 1;
+  for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
+
+
+  // Copy rtable and drtable
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = rtable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_rtable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = drtable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_drtable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy ftable and dftable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = ftable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_ftable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = dftable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_dftable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy ctable and dctable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = ctable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_ctable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = dctable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_dctable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy etable and detable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = etable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_etable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = detable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_detable = d_table;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJSPICACoulLongKokkos<DeviceType>::init_style()
+{
+  PairLJSPICACoulLong::init_style();
+
+  Kokkos::deep_copy(d_cut_coulsq,cut_coulsq);
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa)
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  // adjust neighbor list request for KOKKOS
+
+  neighflag = lmp->kokkos->neighflag;
+  auto request = neighbor->find_request(this);
+  request->set_kokkos_host(std::is_same_v<DeviceType,LMPHostType> &&
+                           !std::is_same_v<DeviceType,LMPDeviceType>);
+  request->set_kokkos_device(std::is_same_v<DeviceType,LMPDeviceType>);
+  if (neighflag == FULL) request->enable_full();
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairLJSPICACoulLongKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairLJSPICACoulLong::init_one(i,j);
+
+  k_params.h_view(i,j).lj1 = lj1[i][j];
+  k_params.h_view(i,j).lj2 = lj2[i][j];
+  k_params.h_view(i,j).lj3 = lj3[i][j];
+  k_params.h_view(i,j).lj4 = lj4[i][j];
+  k_params.h_view(i,j).offset = offset[i][j];
+  k_params.h_view(i,j).cut_ljsq = cut_ljsq[i][j];
+  k_params.h_view(i,j).cut_coulsq = cut_coulsq;
+  k_params.h_view(i,j).lj_type = lj_type[i][j];
+  k_params.h_view(j,i) = k_params.h_view(i,j);
+
+  if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+    m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsq[i][j];
+    m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsq;
+  }
+
+  k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
+  k_cut_ljsq.h_view(i,j) = k_cut_ljsq.h_view(j,i) = cut_ljsq[i][j];
+
+  k_cutsq.template modify<LMPHostType>();
+  k_cut_ljsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+namespace LAMMPS_NS {
+template class PairLJSPICACoulLongKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class PairLJSPICACoulLongKokkos<LMPHostType>;
+#endif
+}
+
--- a/src/KOKKOS/pair_lj_spica_coul_long_kokkos.h
+++ b/src/KOKKOS/pair_lj_spica_coul_long_kokkos.h
@ -0,0 +1,148 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lj/spica/coul/long/kk,PairLJSPICACoulLongKokkos<LMPDeviceType>);
+PairStyle(lj/spica/coul/long/kk/device,PairLJSPICACoulLongKokkos<LMPDeviceType>);
+PairStyle(lj/spica/coul/long/kk/host,PairLJSPICACoulLongKokkos<LMPHostType>);
+PairStyle(lj/sdk/coul/long/kk,PairLJSPICACoulLongKokkos<LMPDeviceType>);
+PairStyle(lj/sdk/coul/long/kk/device,PairLJSPICACoulLongKokkos<LMPDeviceType>);
+PairStyle(lj/sdk/coul/long/kk/host,PairLJSPICACoulLongKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_PAIR_LJ_SPICA_COUL_LONG_KOKKOS_H
+#define LMP_PAIR_LJ_SPICA_COUL_LONG_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_lj_spica_coul_long.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairLJSPICACoulLongKokkos : public PairLJSPICACoulLong {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=1};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  PairLJSPICACoulLongKokkos(class LAMMPS *);
+  ~PairLJSPICACoulLongKokkos() override;
+
+  void compute(int, int) override;
+
+  void init_tables(double cut_coul, double *cut_respa) override;
+  void init_style() override;
+  double init_one(int, int) override;
+
+  struct params_lj_spica_coul {
+    KOKKOS_INLINE_FUNCTION
+    params_lj_spica_coul() {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;lj_type=0;};
+    KOKKOS_INLINE_FUNCTION
+    params_lj_spica_coul(int /*i*/) {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;lj_type=0;};
+    F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
+    int lj_type;
+  };
+
+ protected:
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int& j, const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int& j, const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int& j, const int& itype,
+                        const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int& j, const int& itype,
+                        const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  Kokkos::DualView<params_lj_spica_coul**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_lj_spica_coul**,Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
+  params_lj_spica_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];  // hardwired to space for 12 atom types
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename AT::t_x_array_randomread x;
+  typename AT::t_x_array c_x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_float_1d_randomread q;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+
+  int newton_pair;
+
+  typename AT::tdual_ffloat_2d k_cutsq, k_cut_ljsq;
+  typename AT::t_ffloat_2d d_cutsq, d_cut_ljsq, d_cut_coulsq;
+
+  typename AT::t_ffloat_1d_randomread
+    d_rtable, d_drtable, d_ftable, d_dftable,
+    d_ctable, d_dctable, d_etable, d_detable;
+
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  double special_lj[4];
+  double special_coul[4];
+  double qqrd2e;
+
+  void allocate() override;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICACoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICACoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICACoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICACoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJSPICACoulLongKokkos,CoulLongTable<1>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJSPICACoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICACoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICACoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICACoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJSPICACoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJSPICACoulLongKokkos,CoulLongTable<0>>(PairLJSPICACoulLongKokkos*,NeighListKokkos<DeviceType>*);
+
+  friend void pair_virial_fdotr_compute<PairLJSPICACoulLongKokkos>(PairLJSPICACoulLongKokkos*);
+};
+
+}
+
+#endif
+#endif
+
--- a/src/KOKKOS/pair_lj_spica_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_spica_kokkos.cpp
@ -70,7 +70,6 @@ void PairLJSPICAKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
  eflag = eflag_in;
  vflag = vflag_in;

-
  if (neighflag == FULL) no_virial_fdotr_compute = 1;

  ev_init(eflag,vflag,0);
@ -108,6 +107,8 @@ void PairLJSPICAKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  // loop over neighbors of my atoms

+  copymode = 1;
+
  EV_FLOAT ev = pair_compute<PairLJSPICAKokkos<DeviceType>,void >(this,(NeighListKokkos<DeviceType>*)list);

  if (eflag) eng_vdwl += ev.evdwl;
@ -132,8 +133,13 @@ void PairLJSPICAKokkos<DeviceType>::compute(int eflag_in, int vflag_in)

  if (vflag_fdotr) pair_virial_fdotr_compute(this);

+  copymode = 0;
 }

+/* ----------------------------------------------------------------------
+   compute pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+
 template<class DeviceType>
 template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
@ -152,6 +158,10 @@ compute_fpair(const F_FLOAT &rsq, const int &, const int &, const int &itype, co
  return a* ( lj_1*r6inv*b - lj_2 * r2inv);
 }

+/* ----------------------------------------------------------------------
+   compute pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+
 template<class DeviceType>
 template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
@ -166,18 +176,14 @@ compute_evdwl(const F_FLOAT &rsq, const int &, const int &, const int &itype, co

  if (ljt == LJ12_4) {
    const F_FLOAT r4inv=r2inv*r2inv;
-
    return r4inv*(lj_3*r4inv*r4inv - lj_4) - offset;
-
  } else if (ljt == LJ9_6) {
    const F_FLOAT r3inv = r2inv*sqrt(r2inv);
    const F_FLOAT r6inv = r3inv*r3inv;
    return r6inv*(lj_3*r3inv - lj_4) - offset;
-
  } else if (ljt == LJ12_6) {
    const double r6inv = r2inv*r2inv*r2inv;
    return r6inv*(lj_3*r6inv - lj_4) - offset;
-
  } else if (ljt == LJ12_5) {
    const F_FLOAT r5inv = r2inv*r2inv*sqrt(r2inv);
    const F_FLOAT r7inv = r5inv*r2inv;
@ -273,8 +279,6 @@ double PairLJSPICAKokkos<DeviceType>::init_one(int i, int j)
  return cutone;
 }

-
-
 namespace LAMMPS_NS {
 template class PairLJSPICAKokkos<LMPDeviceType>;
 #ifdef LMP_KOKKOS_GPU
--- a/src/KOKKOS/pair_uf3_kokkos.cpp
+++ b/src/KOKKOS/pair_uf3_kokkos.cpp
@ -1655,7 +1655,7 @@ double PairUF3Kokkos<DeviceType>::single(int /*i*/, int /*j*/, int itype, int jt

 namespace LAMMPS_NS {
 template class PairUF3Kokkos<LMPDeviceType>;
-#ifdef KOKKOS_ENABLE_CUDA
+#ifdef KOKKOS_ENABLE_GPU
 template class PairUF3Kokkos<LMPHostType>;
 #endif
 }    // namespace LAMMPS_NS
--- a/Show More
+++ b/Show More