Merge branch 'master' into citation-refactor

2021-01-08 18:32:30 -05:00
parent 6428e542db 102a6eba79
commit 9ce477dd3f
546 changed files with 9286 additions and 7269 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -25,7 +25,7 @@ set(LAMMPS_POTENTIALS_DIR ${LAMMPS_DIR}/potentials)
 find_package(Git)

 # by default, install into $HOME/.local (not /usr/local), so that no root access (and sudo!!) is needed
-if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
  set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "default install path" FORCE )
 endif()

@ -33,7 +33,7 @@ endif()
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Modules)

 # make sure LIBRARY_PATH is set if environment variable is set
-if (DEFINED ENV{LIBRARY_PATH})
+if(DEFINED ENV{LIBRARY_PATH})
  list(APPEND CMAKE_LIBRARY_PATH "$ENV{LIBRARY_PATH}")
  message(STATUS "Appending $ENV{LIBRARY_PATH} to CMAKE_LIBRARY_PATH: ${CMAKE_LIBRARY_PATH}")
 endif()
@ -373,7 +373,7 @@ else()
  set(CUDA_REQUEST_PIC)
 endif()

-foreach(PKG_WITH_INCL KSPACE PYTHON VORONOI USER-COLVARS USER-MOLFILE USER-NETCDF USER-PLUMED USER-QMMM
+foreach(PKG_WITH_INCL KSPACE PYTHON MLIAP VORONOI USER-COLVARS USER-MOLFILE USER-NETCDF USER-PLUMED USER-QMMM
        USER-QUIP USER-SCAFACOS USER-SMD USER-VTK KIM LATTE MESSAGE MSCG COMPRESS)
  if(PKG_${PKG_WITH_INCL})
    include(Packages/${PKG_WITH_INCL})
@ -580,7 +580,7 @@ add_dependencies(lammps gitversion)
 ############################################
 get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
 list (FIND LANGUAGES "Fortran" _index)
-if (${_index} GREATER -1)
+if(${_index} GREATER -1)
  target_link_libraries(lammps PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
 endif()
 set(LAMMPS_CXX_HEADERS angle.h atom.h bond.h citeme.h comm.h compute.h dihedral.h domain.h error.h fix.h force.h group.h improper.h
@ -737,14 +737,14 @@ if(OPTIONS)
 endif()
 get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
 list (FIND LANGUAGES "Fortran" _index)
-if (${_index} GREATER -1)
+if(${_index} GREATER -1)
  message(STATUS "Fortran Compiler: ${CMAKE_Fortran_COMPILER}
      Type:          ${CMAKE_Fortran_COMPILER_ID}
      Version:       ${CMAKE_Fortran_COMPILER_VERSION}
      Fortran Flags:${CMAKE_Fortran_FLAGS} ${CMAKE_Fortran_FLAGS_${BTYPE}}")
 endif()
 list (FIND LANGUAGES "C" _index)
-if (${_index} GREATER -1)
+if(${_index} GREATER -1)
  message(STATUS "C compiler:       ${CMAKE_C_COMPILER}
      Type:          ${CMAKE_C_COMPILER_ID}
      Version:       ${CMAKE_C_COMPILER_VERSION}
--- a/cmake/Modules/CodingStandard.cmake
+++ b/cmake/Modules/CodingStandard.cmake
@ -8,7 +8,7 @@ else()
    find_package(Python3 COMPONENTS Interpreter QUIET)
 endif()

-if (Python3_EXECUTABLE)
+if(Python3_EXECUTABLE)
    if(Python3_VERSION VERSION_GREATER_EQUAL 3.5)
        add_custom_target(
          check-whitespace
--- a/cmake/Modules/FindCythonize.cmake
+++ b/cmake/Modules/FindCythonize.cmake
@ -0,0 +1,30 @@
+# Find the Cythonize tool.
+#
+# This code sets the following variables:
+#
+#  Cythonize_EXECUTABLE
+#
+# adapted from https://github.com/cmarshall108/cython-cmake-example/blob/master/cmake/FindCython.cmake
+#=============================================================================
+
+if(CMAKE_VERSION VERSION_LESS 3.12)
+    find_package(PythonInterp 3.6 QUIET) # Deprecated since version 3.12
+    if(PYTHONINTERP_FOUND)
+        set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE})
+    endif()
+else()
+    find_package(Python3 3.6 COMPONENTS Interpreter QUIET)
+endif()
+
+# Use the Cython executable that lives next to the Python executable
+# if it is a local installation.
+if(Python3_EXECUTABLE)
+  get_filename_component(_python_path ${Python3_EXECUTABLE} PATH)
+  find_program(Cythonize_EXECUTABLE
+    NAMES cythonize3 cythonize cythonize.bat
+    HINTS ${_python_path})
+endif()
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(Cythonize REQUIRED_VARS Cythonize_EXECUTABLE)
+mark_as_advanced(Cythonize_EXECUTABLE)
--- a/cmake/Modules/LAMMPSUtils.cmake
+++ b/cmake/Modules/LAMMPSUtils.cmake
@ -50,6 +50,7 @@ function(check_for_autogen_files source_dir)
    file(GLOB SRC_AUTOGEN_FILES ${source_dir}/style_*.h)
    file(GLOB SRC_AUTOGEN_PACKAGES ${source_dir}/packages_*.h)
    list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h)
+    list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp)
    foreach(_SRC ${SRC_AUTOGEN_FILES})
      get_filename_component(FILENAME "${_SRC}" NAME)
      if(EXISTS ${source_dir}/${FILENAME})
--- a/cmake/Modules/MPI4WIN.cmake
+++ b/cmake/Modules/MPI4WIN.cmake
@ -1,7 +1,7 @@
 # Download and configure custom MPICH files for Windows
 message(STATUS "Downloading and configuring MPICH-1.4.1 for Windows")
 include(ExternalProject)
-if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
  ExternalProject_Add(mpi4win_build
    URL https://download.lammps.org/thirdparty/mpich2-win64-devel.tar.gz
    URL_MD5 4939fdb59d13182fd5dd65211e469f14
--- a/cmake/Modules/Packages/MLIAP.cmake
+++ b/cmake/Modules/Packages/MLIAP.cmake
@ -0,0 +1,31 @@
+# if PYTHON package is included we may also include Python support in MLIAP
+set(MLIAP_ENABLE_PYTHON_DEFAULT OFF)
+if(PKG_PYTHON)
+  find_package(Cythonize)
+  if(Cythonize_FOUND)
+    set(MLIAP_ENABLE_PYTHON_DEFAULT ON)
+  endif()
+endif()
+
+option(MLIAP_ENABLE_PYTHON "Build MLIAP package with Python support" ${MLIAP_ENABLE_PYTHON_DEFAULT})
+
+if(MLIAP_ENABLE_PYTHON)
+  find_package(Cythonize REQUIRED)
+  if(NOT PKG_PYTHON)
+    message(FATAL_ERROR "Must enable PYTHON package for including Python support in MLIAP")
+  endif()
+
+  set(MLIAP_BINARY_DIR ${CMAKE_BINARY_DIR}/cython)
+  set(MLIAP_CYTHON_SRC ${LAMMPS_SOURCE_DIR}/MLIAP/mliap_model_python_couple.pyx)
+  get_filename_component(MLIAP_CYTHON_BASE ${MLIAP_CYTHON_SRC} NAME_WE)
+  file(MAKE_DIRECTORY ${MLIAP_BINARY_DIR})
+  add_custom_command(OUTPUT  ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.cpp ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.h
+          COMMAND            ${CMAKE_COMMAND} -E copy_if_different ${MLIAP_CYTHON_SRC} ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.pyx
+          COMMAND            ${Cythonize_EXECUTABLE} -3 ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.pyx
+          WORKING_DIRECTORY  ${MLIAP_BINARY_DIR}
+          MAIN_DEPENDENCY    ${MLIAP_CYTHON_SRC}
+          COMMENT "Generating C++ sources with cythonize...")
+  target_compile_definitions(lammps PRIVATE -DMLIAP_PYTHON)
+  target_sources(lammps PRIVATE ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.cpp)
+  target_include_directories(lammps PRIVATE ${MLIAP_BINARY_DIR})
+endif()
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@ -37,6 +37,7 @@ This is the list of packages that may require additional steps.
   * :ref:`KOKKOS <kokkos>`
   * :ref:`LATTE <latte>`
   * :ref:`MESSAGE <message>`
+   * :ref:`MLIAP <mliap>`
   * :ref:`MSCG <mscg>`
   * :ref:`OPT <opt>`
   * :ref:`POEMS <poems>`
@ -770,6 +771,54 @@ be installed on your system.

 ----------

+.. _mliap:
+
+MLIAP package
+---------------------------
+
+Building the MLIAP package requires including the :ref:`SNAP <PKG-SNAP>`
+package.  There will be an error message if this requirement is not satisfied.
+Using the *mliappy* model also requires enabling Python support, which
+in turn requires the :ref:`PYTHON <PKG-PYTHON>`
+package **and** requires you have the `cython <https://cython.org>`_ software
+installed and with it a working ``cythonize`` command.  This feature requires
+compiling LAMMPS with Python version 3.6 or later.
+
+.. tabs::
+
+   .. tab:: CMake build
+
+      .. code-block:: bash
+
+         -D MLIAP_ENABLE_PYTHON=value   # enable mliappy model (default is autodetect)
+
+      Without this setting, CMake will check whether it can find a
+      suitable Python version and the ``cythonize`` command and choose
+      the default accordingly.  During the build procedure the provided
+      .pyx file(s) will be automatically translated to C++ code and compiled.
+      Please do **not** run ``cythonize`` manually in the ``src/MLIAP`` folder,
+      as that can lead to compilation errors if Python support is not enabled.
+      If you did by accident, please remove the generated .cpp and .h files.
+
+   .. tab:: Traditional make
+
+      The build uses the ``lib/python/Makefile.mliap_python`` file in the
+      compile/link process to add a rule to update the files generated by
+      the ``cythonize`` command in case the corresponding .pyx file(s) were
+      modified.  You may need to modify ``lib/python/Makefile.lammps``
+      if the LAMMPS build fails.
+      To manually enforce building MLIAP with Python support enabled,
+      you can add
+      ``-DMLIAP_PYTHON`` to the ``LMP_INC`` variable in your machine makefile.
+      You may have to manually run the ``cythonize`` command on .pyx file(s)
+      in the ``src`` folder, if this is not automatically done during
+      installing the MLIAP package.  Please do **not** run ``cythonize``
+      in the ``src/MLIAP`` folder, as that can lead to compilation errors
+      if Python support is not enabled.
+      If you did by accident, please remove the generated .cpp and .h files.
+
+----------
+
 .. _mscg:

 MSCG package
--- a/doc/src/Build_package.rst
+++ b/doc/src/Build_package.rst
@ -1,5 +1,4 @@
 Include packages in build
-
 =========================

 In LAMMPS, a package is a group of files that enable a specific set of
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@ -662,19 +662,31 @@ MLIAP package

 **Contents:**

-A general interface for machine-learning interatomic potentials.
+A general interface for machine-learning interatomic potentials, including PyTorch.

 **Install:**

-To use this package, also the :ref:`SNAP package <PKG-SNAP>` needs to be installed.
+To use this package, also the :ref:`SNAP package <PKG-SNAP>` package needs
+to be installed.  To make the *mliappy* model available, also the
+:ref:`PYTHON package <PKG-PYTHON>` package needs to be installed, the version of
+Python must be 3.6 or later, and the `cython <https://cython.org/>`_ software
+must be installed.

-**Author:** Aidan Thompson (Sandia).
+**Author:** Aidan Thompson (Sandia), Nicholas Lubbers (LANL).

 **Supporting info:**

 * src/MLIAP: filenames -> commands
+* src/MLIAP/README
 * :doc:`pair_style mliap <pair_mliap>`
-* examples/mliap
+* :doc:`compute_style mliap <compute_mliap>`
+* examples/mliap (see README)
+
+When built with the *mliappy* model this package includes an extension for
+coupling with Python models, including PyTorch. In this case, the Python
+interpreter linked to LAMMPS will need the ``cython`` and ``numpy`` modules
+installed.  The provided examples build models with PyTorch, which would
+therefore also needs to be installed to run those examples.

 ----------

--- a/doc/src/Speed_kokkos.rst
+++ b/doc/src/Speed_kokkos.rst
@ -38,14 +38,14 @@ produce an executable compatible with a specific hardware.
   :class: note

   Kokkos with CUDA currently implicitly assumes that the MPI library is
-   CUDA-aware. This is not always the case, especially when using
+   GPU-aware. This is not always the case, especially when using
   pre-compiled MPI libraries provided by a Linux distribution. This is
   not a problem when using only a single GPU with a single MPI
   rank. When running with multiple MPI ranks, you may see segmentation
-   faults without CUDA-aware MPI support. These can be avoided by adding
-   the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the
+   faults without GPU-aware MPI support. These can be avoided by adding
+   the flags :doc:`-pk kokkos gpu/aware off <Run_options>` to the
   LAMMPS command line or by using the command :doc:`package kokkos
-   cuda/aware off <package>` in the input file.
+   gpu/aware off <package>` in the input file.

 .. admonition:: AMD GPU support
   :class: note
@ -242,8 +242,8 @@ case, also packing/unpacking communication buffers on the host may give
 speedup (see the KOKKOS :doc:`package <package>` command). Using CUDA MPS
 is recommended in this scenario.

-Using a CUDA-aware MPI library is highly recommended. CUDA-aware MPI use can be
-avoided by using :doc:`-pk kokkos cuda/aware no <package>`. As above for
+Using a GPU-aware MPI library is highly recommended. GPU-aware MPI use can be
+avoided by using :doc:`-pk kokkos gpu/aware off <package>`. As above for
 multi-core CPUs (and no GPU), if N is the number of physical cores/node,
 then the number of MPI tasks/node should not exceed N.

--- a/doc/src/compute_mliap.rst
+++ b/doc/src/compute_mliap.rst
@ -18,7 +18,7 @@ Syntax
  .. parsed-literal::

       *model* values = style
-         style = *linear* or *quadratic*
+         style = *linear* or *quadratic* or *mliappy*
       *descriptor* values = style filename
         style = *sna*
         filename = name of file containing descriptor definitions
@ -56,13 +56,15 @@ and it is also straightforward to add new descriptor styles.
 The compute *mliap* command must be followed by two keywords
 *model* and *descriptor* in either order.

-The *model* keyword is followed by a model style, currently limited to
-either *linear* or *quadratic*.
+The *model* keyword is followed by the model style (*linear*, *quadratic* or *mliappy*).
+The *mliappy* model is only available
+if lammps is built with MLIAPPY package.

 The *descriptor* keyword is followed by a descriptor style, and additional arguments.
-Currently the only descriptor style is *sna*, indicating the bispectrum component
-descriptors used by the Spectral Neighbor Analysis Potential (SNAP) potentials of
-:doc:`pair_style snap <pair_snap>`.
+The compute currently supports just one descriptor style, but it is
+is straightforward to add new descriptor styles.
+The SNAP descriptor style *sna* is the same as that used by :doc:`pair_style snap <pair_snap>`,
+including the linear, quadratic, and chem variants.
 A single additional argument specifies the descriptor filename
 containing the parameters and setting used by the SNAP descriptor.
 The descriptor filename usually ends in the *.mliap.descriptor* extension.
@ -162,9 +164,10 @@ potentials, see the examples in `FitSNAP <https://github.com/FitSNAP/FitSNAP>`_.
 Restrictions
 """"""""""""

-This compute is part of the MLIAP package.  It is only enabled if
-LAMMPS was built with that package.  In addition, building LAMMPS with the MLIAP package
+This compute is part of the MLIAP package.  It is only enabled if LAMMPS
+was built with that package. In addition, building LAMMPS with the MLIAP package
 requires building LAMMPS with the SNAP package.
+The *mliappy* model requires building LAMMPS with the PYTHON package.
 See the :doc:`Build package <Build_package>` doc page for more info.

 Related commands
--- a/doc/src/compute_orientorder_atom.rst
+++ b/doc/src/compute_orientorder_atom.rst
@ -115,8 +115,8 @@ The optional keyword *chunksize* is only applicable when using the
 the KOKKOS package and is ignored otherwise. This keyword controls
 the number of atoms in each pass used to compute the bond-orientational
 order parameters and is used to avoid running out of memory. For example
-if there are 4000 atoms in the simulation and the *chunksize*
-is set to 2000, the parameter calculation will be broken up
+if there are 32768 atoms in the simulation and the *chunksize*
+is set to 16384, the parameter calculation will be broken up
 into two passes.

 The value of :math:`Q_l` is set to zero for atoms not in the
@ -193,7 +193,7 @@ Default

 The option defaults are *cutoff* = pair style cutoff, *nnn* = 12,
 *degrees* = 5 4 6 8 10 12 i.e. :math:`Q_4`, :math:`Q_6`, :math:`Q_8`, :math:`Q_{10}`, and :math:`Q_{12}`,
-*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 2000
+*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 16384

 ----------

--- a/doc/src/fix_ave_correlate.rst
+++ b/doc/src/fix_ave_correlate.rst
@ -93,7 +93,7 @@ from a compute, fix, or variable, then see the :doc:`fix ave/chunk <fix_ave_chun
 :doc:`fix ave/histo <fix_ave_histo>` commands.  If you wish to convert a
 per-atom quantity into a single global value, see the :doc:`compute reduce <compute_reduce>` command.

-The input values must either be all scalars.  What kinds of
+The input values must be all scalars.  What kinds of
 correlations between input values are calculated is determined by the
 *type* keyword as discussed below.

--- a/doc/src/package.rst
+++ b/doc/src/package.rst
@ -68,7 +68,7 @@ Syntax
           *no_affinity* values = none
       *kokkos* args = keyword value ...
         zero or more keyword/value pairs may be appended
-         keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* or *pair/only*
+         keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* *pair/comm/forward* *fix/comm/forward* or *comm/reverse* or *gpu/aware* or *pair/only*
           *neigh* value = *full* or *half*
             full = full neighbor list
             half = half neighbor list built in thread-safe manner
@ -84,16 +84,18 @@ Syntax
           *binsize* value = size
             size = bin size for neighbor list construction (distance units)
           *comm* value = *no* or *host* or *device*
-             use value for comm/exchange and comm/forward and comm/reverse
+             use value for comm/exchange and comm/forward and pair/comm/forward and fix/comm/forward and comm/reverse
           *comm/exchange* value = *no* or *host* or *device*
           *comm/forward* value = *no* or *host* or *device*
+           *pair/comm/forward* value = *no* or *device*
+           *fix/comm/forward* value = *no* or *device*
           *comm/reverse* value = *no* or *host* or *device*
             no = perform communication pack/unpack in non-KOKKOS mode
             host = perform pack/unpack on host (e.g. with OpenMP threading)
             device = perform pack/unpack on device (e.g. on GPU)
-           *cuda/aware* = *off* or *on*
-             off = do not use CUDA-aware MPI
-             on = use CUDA-aware MPI (default)
+           *gpu/aware* = *off* or *on*
+             off = do not use GPU-aware MPI
+             on = use GPU-aware MPI (default)
           *pair/only* = *off* or *on*
             off = use device acceleration (e.g. GPU) for all available styles in the KOKKOS package (default)
             on  = use device acceleration only for pair styles (and host acceleration for others)
@ -498,7 +500,8 @@ because the GPU is faster at performing pairwise interactions, then this
 rule of thumb may give too large a binsize and the default should be
 overridden with a smaller value.

-The *comm* and *comm/exchange* and *comm/forward* and *comm/reverse*
+The *comm* and *comm/exchange* and *comm/forward* and *pair/comm/forward*
+and *fix/comm/forward* and comm/reverse*
 keywords determine whether the host or device performs the packing and
 unpacking of data when communicating per-atom data between processors.
 "Exchange" communication happens only on timesteps that neighbor lists
@ -506,18 +509,22 @@ are rebuilt. The data is only for atoms that migrate to new processors.
 "Forward" communication happens every timestep. "Reverse" communication
 happens every timestep if the *newton* option is on. The data is for
 atom coordinates and any other atom properties that needs to be updated
-for ghost atoms owned by each processor.
+for ghost atoms owned by each processor. "Pair/comm" controls additional
+communication in pair styles, such as pair_style EAM. "Fix/comm" controls
+additional communication in fixes, such as fix SHAKE.

-The *comm* keyword is simply a short-cut to set the same value for both
-the *comm/exchange* and *comm/forward* and *comm/reverse* keywords.
+The *comm* keyword is simply a short-cut to set the same value for all
+the comm keywords.

-The value options for all 3 keywords are *no* or *host* or *device*\ . A
+The value options for the keywords are *no* or *host* or *device*\ . A
 value of *no* means to use the standard non-KOKKOS method of
 packing/unpacking data for the communication. A value of *host* means to
 use the host, typically a multi-core CPU, and perform the
 packing/unpacking in parallel with threads. A value of *device* means to
 use the device, typically a GPU, to perform the packing/unpacking
-operation.
+operation. If a value of *host* is used for the *pair/comm/forward* or
+*fix/comm/forward* keyword, it will be automatically be changed to *no*
+since these keywords don't support *host* mode.

 The optimal choice for these keywords depends on the input script and
 the hardware used. The *no* value is useful for verifying that the
@ -538,18 +545,18 @@ pack/unpack communicated data. When running small systems on a GPU,
 performing the exchange pack/unpack on the host CPU can give speedup
 since it reduces the number of CUDA kernel launches.

-The *cuda/aware* keyword chooses whether CUDA-aware MPI will be used. When
+The *gpu/aware* keyword chooses whether GPU-aware MPI will be used. When
 this keyword is set to *on*\ , buffers in GPU memory are passed directly
 through MPI send/receive calls. This reduces overhead of first copying
-the data to the host CPU. However CUDA-aware MPI is not supported on all
+the data to the host CPU. However GPU-aware MPI is not supported on all
 systems, which can lead to segmentation faults and would require using a
-value of *off*\ . If LAMMPS can safely detect that CUDA-aware MPI is not
+value of *off*\ . If LAMMPS can safely detect that GPU-aware MPI is not
 available (currently only possible with OpenMPI v2.0.0 or later), then
-the *cuda/aware* keyword is automatically set to *off* by default. When
-the *cuda/aware* keyword is set to *off* while any of the *comm*
+the *gpu/aware* keyword is automatically set to *off* by default. When
+the *gpu/aware* keyword is set to *off* while any of the *comm*
 keywords are set to *device*\ , the value for these *comm* keywords will
 be automatically changed to *no*\ . This setting has no effect if not
-running on GPUs or if using only one MPI rank. CUDA-aware MPI is available
+running on GPUs or if using only one MPI rank. GPU-aware MPI is available
 for OpenMPI 1.8 (or later versions), Mvapich2 1.9 (or later) when the
 "MV2_USE_CUDA" environment variable is set to "1", CrayMPI, and IBM
 Spectrum MPI when the "-gpu" flag is used.
@ -558,7 +565,7 @@ The *pair/only* keyword can change how the KOKKOS suffix "kk" is applied
 when using an accelerator device.  By default device acceleration is
 always used for all available styles.  With *pair/only* set to *on* the
 suffix setting will choose device acceleration only for pair styles and
-run all other force computations concurrently on the host CPU.
+run all other force computations on the host CPU.
 The *comm* flags will also automatically be changed to *no*\ . This can
 result in better performance for certain configurations and system sizes.

@ -671,8 +678,8 @@ script or via the "-pk intel" :doc:`command-line switch <Run_options>`.

 For the KOKKOS package, the option defaults for GPUs are neigh = full,
 neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
-value, comm = device, cuda/aware = on. When LAMMPS can safely detect
-that CUDA-aware MPI is not available, the default value of cuda/aware
+value, comm = device, gpu/aware = on. When LAMMPS can safely detect
+that GPU-aware MPI is not available, the default value of gpu/aware
 becomes "off". For CPUs or Xeon Phis, the option defaults are neigh =
 half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The
 option neigh/thread = on when there are 16K atoms or less on an MPI
--- a/doc/src/pair_mliap.rst
+++ b/doc/src/pair_mliap.rst
@ -16,7 +16,7 @@ Syntax
  .. parsed-literal::

       *model* values = style filename
-         style = *linear* or *quadratic*
+         style = *linear* or *quadratic* or *mliappy*
         filename = name of file containing model definitions
       *descriptor* values = style filename
         style = *sna*
@ -40,12 +40,15 @@ definitions of the interatomic potential functional form (*model*)
 and the geometric quantities that characterize the atomic positions
 (*descriptor*). By defining *model* and *descriptor* separately,
 it is possible to use many different models with a given descriptor,
-or many different descriptors with a given model. Currently, the pair_style
-supports just two models, *linear* and *quadratic*,
-and one descriptor, *sna*, the SNAP descriptor used by :doc:`pair_style snap <pair_snap>`, including the linear, quadratic,
-and chem variants. Work is currently underway to extend
-the interface to handle neural network energy models,
-and it is also straightforward to add new descriptor styles.
+or many different descriptors with a given model. The
+pair style currently supports just one descriptor style, but it is
+is straightforward to add new descriptor styles.
+The SNAP descriptor style *sna* is the same as that used by :doc:`pair_style snap <pair_snap>`,
+including the linear, quadratic, and chem variants.
+The available models are *linear*, *quadratic*, and *mliappy*.
+The *mliappy* style can be used to couple python models,
+e.g. PyTorch neural network energy models, and requires building
+LAMMPS with the PYTHON package (see below).
 In order to train a model, it is useful to know the gradient or derivative
 of energy, force, and stress w.r.t. model parameters. This information
 can be accessed using the related :doc:`compute mliap <compute_mliap>` command.
@ -59,9 +62,8 @@ that specify the mapping of MLIAP
 element names to LAMMPS atom types,
 where N is the number of LAMMPS atom types.

-The *model* keyword is followed by a model style, currently limited to
-either *linear* or *quadratic*. In both cases,
-this is followed by a single argument specifying the model filename containing the
+The *model* keyword is followed by the  model style. This is followed
+by a single argument specifying the model filename containing the
 parameters for a set of elements.
 The model filename usually ends in the *.mliap.model* extension.
 It may contain parameters for many elements. The only requirement is that it
@ -82,6 +84,16 @@ for the :doc:`pair_style snap <pair_snap>` coefficient file.
 Specifically, the line containing the element weight and radius is omitted,
 since these are handled by the *descriptor*.

+Notes on mliappy models:
+When the *model* keyword is *mliappy*, the filename should end in '.pt',
+'.pth' for pytorch models, or be a pickle file. To load a model from
+memory (i.e. an existing python object), specify the filename as
+"LATER", and then call `lammps.mliap.load_model(model)` from python
+before using the pair style. When using lammps via the library mode, you will need to call
+`lammps.mliappy.activate_mliappy(lmp)` on the active lammps object
+before the pair style is defined. This call locates and loads the mliap-specific
+python module that is built into lammps.
+
 The *descriptor* keyword is followed by a descriptor style, and additional arguments.
 Currently the only descriptor style is *sna*, indicating the bispectrum component
 descriptors used by the Spectral Neighbor Analysis Potential (SNAP) potentials of
@ -138,11 +150,13 @@ This pair style can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""

-This style is part of the MLIAP package.  It is only enabled if LAMMPS
+This pair style is part of the MLIAP package.  It is only enabled if LAMMPS
 was built with that package. In addition, building LAMMPS with the MLIAP package
 requires building LAMMPS with the SNAP package.
+The *mliappy* model requires building LAMMPS with the PYTHON package.
 See the :doc:`Build package <Build_package>` doc page for more info.

+
 Related commands
 """"""""""""""""

--- a/doc/src/pair_snap.rst
+++ b/doc/src/pair_snap.rst
@ -152,7 +152,7 @@ The default values for these keywords are
 * *chemflag* = 0
 * *bnormflag* = 0
 * *wselfallflag* = 0
-* *chunksize* = 2000
+* *chunksize* = 4096

 If *quadraticflag* is set to 1, then the SNAP energy expression includes additional quadratic terms
 that have been shown to increase the overall accuracy of the potential without much increase
@ -189,8 +189,8 @@ pair style *snap* with the KOKKOS package and is ignored otherwise.
 This keyword controls
 the number of atoms in each pass used to compute the bispectrum
 components and is used to avoid running out of memory. For example
-if there are 4000 atoms in the simulation and the *chunksize*
-is set to 2000, the bispectrum calculation will be broken up
+if there are 8192 atoms in the simulation and the *chunksize*
+is set to 4096, the bispectrum calculation will be broken up
 into two passes.

 Detailed definitions for all the other keywords
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -558,6 +558,7 @@ Cygwin
 cylindrically
 Cyrot
 cyrstals
+cython
 Daivis
 Dammak
 dampflag
@ -1918,6 +1919,7 @@ mK
 mkdir
 mkv
 mliap
+mliappy
 mlparks
 Mniszewski
 mnt
@ -2508,6 +2510,7 @@ Pstart
 Pstop
 pstyle
 Ptarget
+pth
 pthread
 pthreads
 ptm
@ -2536,6 +2539,7 @@ pymodule
 pymol
 pypar
 pythonic
+pytorch
 Pyy
 pz
 Pz
--- a/examples/mliap/README
+++ b/examples/mliap/README
@ -0,0 +1,103 @@
+This directory contains multiple examples of 
+machine-learning potentials defined using the 
+MLIAP package in LAMMPS. The input files
+are described below.
+
+in.mliap.snap.Ta06A
+-------------------
+Run linear SNAP, equivalent to examples/snap/in.snap.Ta06A
+
+in.mliap.snap.WBe.PRB2019
+-------------------------
+Run linear SNAP, equivalent to examples/snap/in.snap.WBe.PRB2019
+
+in.mliap.snap.quadratic
+-----------------------
+Run quadratic SNAP
+
+in.mliap.snap.chem
+------------------
+Run EME-SNAP, equivalent to examples/snap/in.snap.InP.JCPA2020
+
+in.mliap.snap.compute
+---------------------
+Generate the A matrix, the gradients (w.r.t. coefficients) 
+of total potential energy, forces, and stress tensor for 
+linear SNAP, equivalent to in.snap.compute
+
+in.mliap.quadratic.compute
+--------------------------
+Generate the A matrix, the gradients (w.r.t. coefficients) 
+of total potential energy, forces, and stress tensor for 
+for quadratic SNAP, equivalent to in.snap.compute.quadratic
+
+in.mliap.pytorch.Ta06A
+-----------------------
+This reproduces the output of in.mliap.snap.Ta06A above,
+but using the Python coupling to PyTorch.
+
+This example can be run in two different ways:
+
+1: Running a LAMMPS executable: in.mliap.pytorch.Ta06A
+
+First run ``python convert_mliap_Ta06A.py``. It creates
+a PyTorch energy model that replicates the 
+SNAP Ta06A potential and saves it in the file 
+"Ta06A.mliap.pytorch.model.pt".
+
+You can then run the example as follows
+
+`lmp -in in.mliap.pytorch.Ta06A -echo both`
+
+The resultant log.lammps output should be identical to that generated
+by in.mliap.snap.Ta06A.
+
+If this fails, see the instructions for building the MLIAP package
+with Python support enabled. Also, confirm that the
+LAMMPS Python embedded Python interpreter is
+working by running ../examples/in.python.
+
+2: Running a Python script: mliap_pytorch_Ta06A.py
+
+Before testing this, ensure that the previous method
+(running a LAMMPS executable) works.
+
+You can run the example in serial: 
+
+`python mliap_pytorch_Ta06A.py`
+
+or in parallel:
+
+`mpirun -np 4 python mliap_pytorch_Ta06A.py`
+
+The resultant log.lammps output should be identical to that generated
+by in.mliap.snap.Ta06A and in.mliap.pytorch.Ta06A.
+
+Not all Python installations support this mode of operation.
+It requires that the Python interpreter be initialized. If not,
+the script will exit with an error message.
+
+in.mliap.pytorch.relu1hidden
+----------------------------
+This example demonstrates a simple neural network potential
+using PyTorch and SNAP descriptors. 
+
+`lmp -in in.mliap.pytorch.relu1hidden -echo both`
+
+It was trained on just the energy component (no forces) of 
+the data used in the original SNAP Ta06A potential for 
+tantalum (Thompson, Swiler, Trott, Foiles, Tucker, 
+J Comp Phys, 285, 316 (2015).). Because of the very small amount
+of energy training data, it uses just 1 hidden layer with 
+a ReLU activation function. It is not expected to be 
+very accurate for forces. 
+
+NOTE: Unlike the previous example, this example uses 
+a pre-built PyTorch file `Ta06A.mliap.pytorch.model.pt`. 
+It is read using `torch.load`,
+which implicitly uses the Python `pickle` module.
+This is known to be insecure. It is possible to construct malicious 
+pickle data that will execute arbitrary code during unpickling. Never 
+load data that could have come from an untrusted source, or that 
+could have been tampered with. Only load data you trust.
+
--- a/examples/mliap/Ta06A.mliap.pytorch
+++ b/examples/mliap/Ta06A.mliap.pytorch
@ -0,0 +1,18 @@
+# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014) 
+
+# Definition of SNAP potential Ta_Cand06A
+# Assumes 1 LAMMPS atom type
+ 
+variable zblcutinner equal 4
+variable zblcutouter equal 4.8
+variable zblz equal 73
+
+# Specify hybrid with SNAP, ZBL
+
+pair_style hybrid/overlay &
+zbl ${zblcutinner} ${zblcutouter} &
+mliap model mliappy Ta06A.mliap.pytorch.model.pt &
+descriptor sna Ta06A.mliap.descriptor
+pair_coeff 1 1 zbl ${zblz} ${zblz}
+pair_coeff * * mliap Ta
+
--- a/examples/mliap/convert_mliap_Ta06A.py
+++ b/examples/mliap/convert_mliap_Ta06A.py
@ -0,0 +1,26 @@
+import sys
+import numpy as np
+import torch
+
+# torch.nn.modules useful for defining a MLIAPPY model.
+from lammps.mliap.pytorch import TorchWrapper, IgnoreElems
+
+# Read coefficients
+coeffs = np.genfromtxt("Ta06A.mliap.model",skip_header=6)
+
+# Write coefficients to a pytorch linear model
+bias = coeffs[0]
+weights = coeffs[1:]
+lin = torch.nn.Linear(weights.shape[0],1)
+lin.to(torch.float64)
+with torch.autograd.no_grad():
+    lin.weight.set_(torch.from_numpy(weights).unsqueeze(0))
+    lin.bias.set_(torch.as_tensor(bias,dtype=torch.float64).unsqueeze(0))
+
+# Wrap the pytorch model for usage with mliappy coupling.
+model = IgnoreElems(lin) # The linear module does not use the types.
+n_descriptors = lin.weight.shape[1]
+n_elements = 1
+linked_model = TorchWrapper(model,n_descriptors=n_descriptors,n_elements=n_elements)
+
+torch.save(linked_model,"Ta06A.mliap.pytorch.model.pt")
--- a/examples/mliap/in.mliap.pytorch.Ta06A
+++ b/examples/mliap/in.mliap.pytorch.Ta06A
@ -0,0 +1,53 @@
+# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
+
+# Initialize simulation
+
+variable nsteps index 100
+variable nrep equal 4
+variable a equal 3.316
+units           metal
+
+# generate the box and atom positions using a BCC lattice
+
+variable nx equal ${nrep}
+variable ny equal ${nrep}
+variable nz equal ${nrep}
+
+boundary        p p p
+
+lattice         bcc $a
+region          box block 0 ${nx} 0 ${ny} 0 ${nz}
+create_box      1 box
+create_atoms    1 box
+
+mass 1 180.88
+
+# choose potential
+
+include Ta06A.mliap.pytorch
+
+# Setup output
+
+compute  eatom all pe/atom
+compute  energy all reduce sum c_eatom
+
+compute  satom all stress/atom NULL
+compute  str all reduce sum c_satom[1] c_satom[2] c_satom[3]
+variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
+
+thermo_style    custom step temp epair c_energy etotal press v_press
+thermo          10
+thermo_modify norm yes
+
+# Set up NVE run
+
+timestep 0.5e-3
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Run MD
+
+velocity all create 300.0 4928459 loop geom
+fix 1 all nve
+run             ${nsteps}
+
--- a/examples/mliap/in.mliap.pytorch.relu1hidden
+++ b/examples/mliap/in.mliap.pytorch.relu1hidden
@ -0,0 +1,53 @@
+# Demonstrate MLIAP interface to linear SNAP potential
+
+# Initialize simulation
+
+variable nsteps index 100
+variable nrep equal 4
+variable a equal 3.316
+units           metal
+
+# generate the box and atom positions using a BCC lattice
+
+variable nx equal ${nrep}
+variable ny equal ${nrep}
+variable nz equal ${nrep}
+
+boundary        p p p
+
+lattice         bcc $a
+region          box block 0 ${nx} 0 ${ny} 0 ${nz}
+create_box      1 box
+create_atoms    1 box
+
+mass 1 180.88
+
+# choose potential
+
+include relu1hidden.mliap.pytorch
+
+# Setup output
+
+compute  eatom all pe/atom
+compute  energy all reduce sum c_eatom
+
+compute  satom all stress/atom NULL
+compute  str all reduce sum c_satom[1] c_satom[2] c_satom[3]
+variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
+
+thermo_style    custom step temp epair c_energy etotal press v_press
+thermo          10
+thermo_modify norm yes
+
+# Set up NVE run
+
+timestep 0.5e-3
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Run MD
+
+velocity all create 300.0 4928459 loop geom
+fix 1 all nve
+run             ${nsteps}
+
--- a/examples/mliap/in.mliap.snap.Ta06A
+++ b/examples/mliap/in.mliap.snap.Ta06A
@ -1,4 +1,4 @@
-# Demonstrate MLIAP interface to kinear SNAP potential
+# Demonstrate MLIAP interface to linear SNAP potential

 # Initialize simulation

--- a/examples/mliap/log.04Dec20.mliap.pytorch.Ta06A.g++.1
+++ b/examples/mliap/log.04Dec20.mliap.pytorch.Ta06A.g++.1
@ -0,0 +1,157 @@
+LAMMPS (30 Nov 2020)
+  using 48 OpenMP thread(s) per MPI task
+# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
+
+# Initialize simulation
+
+variable nsteps index 100
+variable nrep equal 4
+variable a equal 3.316
+units           metal
+
+# generate the box and atom positions using a BCC lattice
+
+variable nx equal ${nrep}
+variable nx equal 4
+variable ny equal ${nrep}
+variable ny equal 4
+variable nz equal ${nrep}
+variable nz equal 4
+
+boundary        p p p
+
+lattice         bcc $a
+lattice         bcc 3.316
+Lattice spacing in x,y,z = 3.3160000 3.3160000 3.3160000
+region          box block 0 ${nx} 0 ${ny} 0 ${nz}
+region          box block 0 4 0 ${ny} 0 ${nz}
+region          box block 0 4 0 4 0 ${nz}
+region          box block 0 4 0 4 0 4
+create_box      1 box
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (13.264000 13.264000 13.264000)
+  1 by 1 by 1 MPI processor grid
+create_atoms    1 box
+Created 128 atoms
+  create_atoms CPU = 0.002 seconds
+
+mass 1 180.88
+
+# choose potential
+
+include Ta06A.mliap.pytorch
+# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
+
+# Definition of SNAP potential Ta_Cand06A
+# Assumes 1 LAMMPS atom type
+
+variable zblcutinner equal 4
+variable zblcutouter equal 4.8
+variable zblz equal 73
+
+# Specify hybrid with SNAP, ZBL
+
+pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
+pair_style hybrid/overlay zbl 4 ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
+pair_style hybrid/overlay zbl 4 4.8 mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
+Loading python model complete.
+Reading potential file Ta06A.mliap.descriptor with DATE: 2014-09-05
+SNAP keyword rcutfac 4.67637 
+SNAP keyword twojmax 6 
+SNAP keyword nelems 1 
+SNAP keyword elems Ta 
+SNAP keyword radelems 0.5 
+SNAP keyword welems 1 
+SNAP keyword rfac0 0.99363 
+SNAP keyword rmin0 0 
+SNAP keyword bzeroflag 0 
+pair_coeff 1 1 zbl ${zblz} ${zblz}
+pair_coeff 1 1 zbl 73 ${zblz}
+pair_coeff 1 1 zbl 73 73
+pair_coeff * * mliap Ta
+
+
+# Setup output
+
+compute  eatom all pe/atom
+compute  energy all reduce sum c_eatom
+
+compute  satom all stress/atom NULL
+compute  str all reduce sum c_satom[1] c_satom[2] c_satom[3]
+variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
+
+thermo_style    custom step temp epair c_energy etotal press v_press
+thermo          10
+thermo_modify norm yes
+
+# Set up NVE run
+
+timestep 0.5e-3
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Run MD
+
+velocity all create 300.0 4928459 loop geom
+fix 1 all nve
+run             ${nsteps}
+run             100
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 5.8
+  ghost atom cutoff = 5.8
+  binsize = 2.9, bins = 5 5 5
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair zbl, perpetual, half/full from (2)
+      attributes: half, newton on
+      pair build: halffull/newton
+      stencil: none
+      bin: none
+  (2) pair mliap, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 159.8 | 159.8 | 159.8 Mbytes
+Step Temp E_pair c_energy TotEng Press v_press 
+       0          300    -11.85157    -11.85157   -11.813095    2717.1661   -2717.1661 
+      10    296.01467   -11.851059   -11.851059   -11.813095    2697.4796   -2697.4796 
+      20    284.53666   -11.849587   -11.849587   -11.813095    2289.1527   -2289.1527 
+      30    266.51577   -11.847275   -11.847275   -11.813095    1851.7131   -1851.7131 
+      40    243.05007   -11.844266   -11.844266   -11.813095     1570.684    -1570.684 
+      50    215.51032   -11.840734   -11.840734   -11.813094    1468.1899   -1468.1899 
+      60    185.48331   -11.836883   -11.836883   -11.813094    1524.8757   -1524.8757 
+      70     154.6736   -11.832931   -11.832931   -11.813094    1698.3351   -1698.3351 
+      80    124.79303   -11.829099   -11.829099   -11.813094    1947.0715   -1947.0715 
+      90    97.448054   -11.825592   -11.825592   -11.813094    2231.9563   -2231.9563 
+     100    74.035418   -11.822589   -11.822589   -11.813094    2515.8526   -2515.8526 
+Loop time of 2.00236 on 48 procs for 100 steps with 128 atoms
+
+Performance: 2.157 ns/day, 11.124 hours/ns, 49.941 timesteps/s
+288.8% CPU use with 1 MPI tasks x 48 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.9998     | 1.9998     | 1.9998     |   0.0 | 99.87
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0011814  | 0.0011814  | 0.0011814  |   0.0 |  0.06
+Output  | 0.00059724 | 0.00059724 | 0.00059724 |   0.0 |  0.03
+Modify  | 0.00047352 | 0.00047352 | 0.00047352 |   0.0 |  0.02
+Other   |            | 0.0003468  |            |       |  0.02
+
+Nlocal:        128.000 ave         128 max         128 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:        727.000 ave         727 max         727 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:        3712.00 ave        3712 max        3712 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:      7424.00 ave        7424 max        7424 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 7424
+Ave neighs/atom = 58.000000
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:03
--- a/examples/mliap/log.04Dec20.mliap.pytorch.Ta06A.g++.4
+++ b/examples/mliap/log.04Dec20.mliap.pytorch.Ta06A.g++.4
@ -0,0 +1,157 @@
+LAMMPS (30 Nov 2020)
+  using 48 OpenMP thread(s) per MPI task
+# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
+
+# Initialize simulation
+
+variable nsteps index 100
+variable nrep equal 4
+variable a equal 3.316
+units           metal
+
+# generate the box and atom positions using a BCC lattice
+
+variable nx equal ${nrep}
+variable nx equal 4
+variable ny equal ${nrep}
+variable ny equal 4
+variable nz equal ${nrep}
+variable nz equal 4
+
+boundary        p p p
+
+lattice         bcc $a
+lattice         bcc 3.316
+Lattice spacing in x,y,z = 3.3160000 3.3160000 3.3160000
+region          box block 0 ${nx} 0 ${ny} 0 ${nz}
+region          box block 0 4 0 ${ny} 0 ${nz}
+region          box block 0 4 0 4 0 ${nz}
+region          box block 0 4 0 4 0 4
+create_box      1 box
+Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (13.264000 13.264000 13.264000)
+  1 by 2 by 2 MPI processor grid
+create_atoms    1 box
+Created 128 atoms
+  create_atoms CPU = 0.002 seconds
+
+mass 1 180.88
+
+# choose potential
+
+include Ta06A.mliap.pytorch
+# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
+
+# Definition of SNAP potential Ta_Cand06A
+# Assumes 1 LAMMPS atom type
+
+variable zblcutinner equal 4
+variable zblcutouter equal 4.8
+variable zblz equal 73
+
+# Specify hybrid with SNAP, ZBL
+
+pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
+pair_style hybrid/overlay zbl 4 ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
+pair_style hybrid/overlay zbl 4 4.8 mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
+Loading python model complete.
+Reading potential file Ta06A.mliap.descriptor with DATE: 2014-09-05
+SNAP keyword rcutfac 4.67637 
+SNAP keyword twojmax 6 
+SNAP keyword nelems 1 
+SNAP keyword elems Ta 
+SNAP keyword radelems 0.5 
+SNAP keyword welems 1 
+SNAP keyword rfac0 0.99363 
+SNAP keyword rmin0 0 
+SNAP keyword bzeroflag 0 
+pair_coeff 1 1 zbl ${zblz} ${zblz}
+pair_coeff 1 1 zbl 73 ${zblz}
+pair_coeff 1 1 zbl 73 73
+pair_coeff * * mliap Ta
+
+
+# Setup output
+
+compute  eatom all pe/atom
+compute  energy all reduce sum c_eatom
+
+compute  satom all stress/atom NULL
+compute  str all reduce sum c_satom[1] c_satom[2] c_satom[3]
+variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
+
+thermo_style    custom step temp epair c_energy etotal press v_press
+thermo          10
+thermo_modify norm yes
+
+# Set up NVE run
+
+timestep 0.5e-3
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Run MD
+
+velocity all create 300.0 4928459 loop geom
+fix 1 all nve
+run             ${nsteps}
+run             100
+Neighbor list info ...
+  update every 1 steps, delay 0 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 5.8
+  ghost atom cutoff = 5.8
+  binsize = 2.9, bins = 5 5 5
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair zbl, perpetual, half/full from (2)
+      attributes: half, newton on
+      pair build: halffull/newton
+      stencil: none
+      bin: none
+  (2) pair mliap, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 159.7 | 159.7 | 159.7 Mbytes
+Step Temp E_pair c_energy TotEng Press v_press 
+       0          300    -11.85157    -11.85157   -11.813095    2717.1661   -2717.1661 
+      10    296.01467   -11.851059   -11.851059   -11.813095    2697.4796   -2697.4796 
+      20    284.53666   -11.849587   -11.849587   -11.813095    2289.1527   -2289.1527 
+      30    266.51577   -11.847275   -11.847275   -11.813095    1851.7131   -1851.7131 
+      40    243.05007   -11.844266   -11.844266   -11.813095     1570.684    -1570.684 
+      50    215.51032   -11.840734   -11.840734   -11.813094    1468.1899   -1468.1899 
+      60    185.48331   -11.836883   -11.836883   -11.813094    1524.8757   -1524.8757 
+      70     154.6736   -11.832931   -11.832931   -11.813094    1698.3351   -1698.3351 
+      80    124.79303   -11.829099   -11.829099   -11.813094    1947.0715   -1947.0715 
+      90    97.448054   -11.825592   -11.825592   -11.813094    2231.9563   -2231.9563 
+     100    74.035418   -11.822589   -11.822589   -11.813094    2515.8526   -2515.8526 
+Loop time of 0.562802 on 192 procs for 100 steps with 128 atoms
+
+Performance: 7.676 ns/day, 3.127 hours/ns, 177.682 timesteps/s
+99.7% CPU use with 4 MPI tasks x 48 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.53583    | 0.54622    | 0.55401    |   0.9 | 97.05
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0071442  | 0.01491    | 0.025289   |   5.4 |  2.65
+Output  | 0.00092525 | 0.00095771 | 0.0010166  |   0.0 |  0.17
+Modify  | 0.00014479 | 0.00015043 | 0.00015893 |   0.0 |  0.03
+Other   |            | 0.0005624  |            |       |  0.10
+
+Nlocal:        32.0000 ave          32 max          32 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:        431.000 ave         431 max         431 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Neighs:        928.000 ave         928 max         928 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:      1856.00 ave        1856 max        1856 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 7424
+Ave neighs/atom = 58.000000
+Neighbor list builds = 0
+Dangerous builds = 0
+
+Total wall time: 0:00:02
--- a/examples/mliap/mliap_pytorch_Ta06A.py
+++ b/examples/mliap/mliap_pytorch_Ta06A.py
@ -0,0 +1,104 @@
+# Demonstrate how to load a model from the python side.
+# This is essentially the same as in.mliap.pytorch.Ta06A
+# except that python is the driving program, and lammps
+# is in library mode.
+
+before_loading =\
+"""# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
+
+# Initialize simulation
+
+variable nsteps index 100
+variable nrep equal 4
+variable a equal 3.316
+units           metal
+
+# generate the box and atom positions using a BCC lattice
+
+variable nx equal ${nrep}
+variable ny equal ${nrep}
+variable nz equal ${nrep}
+
+boundary        p p p
+
+lattice         bcc $a
+region          box block 0 ${nx} 0 ${ny} 0 ${nz}
+create_box      1 box
+create_atoms    1 box
+
+mass 1 180.88
+
+# choose potential
+
+# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
+
+# Definition of SNAP potential Ta_Cand06A
+# Assumes 1 LAMMPS atom type
+
+variable zblcutinner equal 4
+variable zblcutouter equal 4.8
+variable zblz equal 73
+
+# Specify hybrid with SNAP, ZBL
+
+pair_style hybrid/overlay &
+zbl ${zblcutinner} ${zblcutouter} &
+mliap model mliappy LATER &
+descriptor sna Ta06A.mliap.descriptor
+pair_coeff 1 1 zbl ${zblz} ${zblz}
+pair_coeff * * mliap Ta
+"""
+after_loading =\
+"""
+
+# Setup output
+
+compute  eatom all pe/atom
+compute  energy all reduce sum c_eatom
+
+compute  satom all stress/atom NULL
+compute  str all reduce sum c_satom[1] c_satom[2] c_satom[3]
+variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
+
+thermo_style    custom step temp epair c_energy etotal press v_press
+thermo          10
+thermo_modify norm yes
+
+# Set up NVE run
+
+timestep 0.5e-3
+neighbor 1.0 bin
+neigh_modify once no every 1 delay 0 check yes
+
+# Run MD
+
+velocity all create 300.0 4928459 loop geom
+fix 1 all nve
+run             ${nsteps}
+"""
+
+import lammps
+
+lmp = lammps.lammps(cmdargs=['-echo','both'])
+
+# Before defining the pair style, one must do the following:
+import lammps.mliap
+lammps.mliap.activate_mliappy(lmp)
+# Otherwise, when running lammps in library mode,
+# you will get an error:
+# "ERROR: Loading MLIAPPY coupling module failure."
+
+# Setup the simulation and declare an empty model
+# by specifying model filename as "LATER"
+lmp.commands_string(before_loading)
+
+# Define the model however you like. In this example
+# we load it from disk:
+import torch
+model = torch.load('Ta06A.mliap.pytorch.model.pt')
+
+# Connect the PyTorch model to the mliap pair style.
+lammps.mliap.load_model(model)
+  
+# run the simulation with the mliap pair style
+lmp.commands_string(after_loading)
--- a/examples/mliap/relu1hidden.mliap.pytorch
+++ b/examples/mliap/relu1hidden.mliap.pytorch
@ -0,0 +1,18 @@
+# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014) 
+
+# Definition of SNAP potential Ta_Cand06A
+# Assumes 1 LAMMPS atom type
+ 
+variable zblcutinner equal 4
+variable zblcutouter equal 4.8
+variable zblz equal 73
+
+# Specify hybrid with SNAP, ZBL
+
+pair_style hybrid/overlay &
+zbl ${zblcutinner} ${zblcutouter} &
+mliap model mliappy relu1hidden.mliap.pytorch.model.pt &
+descriptor sna Ta06A.mliap.descriptor
+pair_coeff 1 1 zbl ${zblz} ${zblz}
+pair_coeff * * mliap Ta
+
--- a/examples/mliap/relu1hidden.mliap.pytorch.model.pt
+++ b/examples/mliap/relu1hidden.mliap.pytorch.model.pt
--- a/lib/python/Makefile.mliap_python
+++ b/lib/python/Makefile.mliap_python
@ -0,0 +1,3 @@
+
+../mliap_model_python_couple.cpp: ../mliap_model_python_couple.pyx
+	cythonize -3 ../mliap_model_python_couple.cpp
--- a/python/install.py
+++ b/python/install.py
@ -98,19 +98,23 @@ os.chdir(os.path.dirname(args.package))
 from distutils.core import setup
 from distutils.sysconfig import get_python_lib
 import site
-tryuser=False

+#Arguments common to global or user install -- everything but data_files
+setup_kwargs= dict(name="lammps",
+        version=verstr,
+        author="Steve Plimpton",
+        author_email="sjplimp@sandia.gov",
+        url="https://lammps.sandia.gov",
+        description="LAMMPS Molecular Dynamics Python package",
+        license="GPL",
+        packages=["lammps","lammps.mliap"],
+        )
+
+tryuser=False
 try:
  sys.argv = ["setup.py","install"]    # as if had run "python setup.py install"
-  setup(name = "lammps",
-        version = verstr,
-        author = "Steve Plimpton",
-        author_email = "sjplimp@sandia.gov",
-        url = "https://lammps.sandia.gov",
-        description = "LAMMPS Molecular Dynamics Python package",
-        license = "GPL",
-        packages=['lammps'],
-        data_files = [(os.path.join(get_python_lib(), 'lammps'), [args.lib])])
+  setup_kwargs['data_files']=[(os.path.join(get_python_lib(), 'lammps'), [args.lib])]
+  setup(**setup_kwargs)
 except:
  tryuser=True
  print ("Installation into global site-packages folder failed.\nTrying user folder %s now." % site.USER_SITE)
@ -118,14 +122,7 @@ except:
 if tryuser:
  try:
    sys.argv = ["setup.py","install","--user"]    # as if had run "python setup.py install --user"
-    setup(name = "lammps",
-          version = verstr,
-          author = "Steve Plimpton",
-          author_email = "sjplimp@sandia.gov",
-          url = "https://lammps.sandia.gov",
-          description = "LAMMPS Molecular Dynamics Python package",
-          license = "GPL",
-          packages=['lammps'],
-          data_files = [(os.path.join(site.USER_SITE, 'lammps'), [args.lib])])
+    setup_kwargs['data_files']=[(os.path.join(site.USER_SITE, 'lammps'), [args.lib])]
+    setup(**setup_kwargs)
  except:
    print("Installation into user site package folder failed.")
--- a/python/lammps/mliap/init.py
+++ b/python/lammps/mliap/init.py
@ -0,0 +1,13 @@
+
+# Check compatiblity of this build with the python shared library.
+# If this fails, lammps will segfault because its library will
+# try to improperly start up a new interpreter.
+import sysconfig
+import ctypes
+library = sysconfig.get_config_vars('INSTSONAME')[0]
+pylib = ctypes.CDLL(library)
+if not pylib.Py_IsInitialized():
+    raise RuntimeError("This interpreter is not compatible with python-based mliap for LAMMPS.")
+del sysconfig, ctypes, library, pylib
+
+from .loader import load_model, activate_mliappy
--- a/python/lammps/mliap/loader.py
+++ b/python/lammps/mliap/loader.py
@ -0,0 +1,52 @@
+# ----------------------------------------------------------------------
+#   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+#   http://lammps.sandia.gov, Sandia National Laboratories
+#   Steve Plimpton, sjplimp@sandia.gov
+#
+#   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+#   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+#   certain rights in this software.  This software is distributed under
+#   the GNU General Public License.
+#
+#   See the README file in the top-level LAMMPS directory.
+# -------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------
+#   Contributing author: Nicholas Lubbers (LANL)
+# -------------------------------------------------------------------------
+
+
+import sys
+import importlib.util
+import importlib.machinery
+
+def activate_mliappy(lmp):
+    try:
+        # Begin Importlib magic to find the embedded python module
+        # This is needed because the filename for liblammps does not
+        # match the spec for normal python modules, wherein
+        # file names match with PyInit function names.
+        # Also, python normally doesn't look for extensions besides '.so'
+        # We fix both of these problems by providing an explict
+        # path to the extension module 'mliap_model_python_couple' in
+
+        path = lmp.lib._name
+        loader = importlib.machinery.ExtensionFileLoader('mliap_model_python_couple', path)
+        spec = importlib.util.spec_from_loader('mliap_model_python_couple', loader)
+        module = importlib.util.module_from_spec(spec)
+        sys.modules['mliap_model_python_couple'] = module
+        spec.loader.exec_module(module)
+        # End Importlib magic to find the embedded python module
+
+    except Exception as ee:
+        raise ImportError("Could not load MLIAP python coupling module.") from ee
+
+def load_model(model):
+    try:
+        import mliap_model_python_couple
+    except ImportError as ie:
+        raise ImportError("MLIAP python module must be activated before loading\n"
+                          "the pair style. Call lammps.mliap.activate_mliappy(lmp)."
+                          ) from ie
+    mliap_model_python_couple.load_from_python(model)
+
--- a/python/lammps/mliap/pytorch.py
+++ b/python/lammps/mliap/pytorch.py
@ -0,0 +1,65 @@
+# ----------------------------------------------------------------------
+#   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+#   http://lammps.sandia.gov, Sandia National Laboratories
+#   Steve Plimpton, sjplimp@sandia.gov
+#
+#   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+#   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+#   certain rights in this software.  This software is distributed under
+#   the GNU General Public License.
+#
+#   See the README file in the top-level LAMMPS directory.
+# -------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------
+#   Contributing author: Nicholas Lubbers (LANL)
+# -------------------------------------------------------------------------
+
+import numpy as np
+import torch
+
+def calc_n_params(model):
+    return sum(p.nelement() for p in model.parameters())
+
+class TorchWrapper(torch.nn.Module):
+    def __init__(self, model,n_descriptors,n_elements,n_params=None,device=None,dtype=torch.float64):
+        super().__init__()
+
+        self.model = model
+        self.device = device
+        self.dtype = dtype
+
+        # Put model on device and convert to dtype
+        self.to(self.dtype)
+        self.to(self.device)
+
+        if n_params is None:
+            n_params = calc_n_params(model)
+
+        self.n_params = n_params
+        self.n_descriptors = n_descriptors
+        self.n_elements = n_elements
+
+    def forward(self, elems, bispectrum, beta, energy):
+
+        bispectrum = torch.from_numpy(bispectrum).to(dtype=self.dtype, device=self.device).requires_grad_(True)
+        elems = torch.from_numpy(elems).to(dtype=torch.long, device=self.device) - 1
+
+        with torch.autograd.enable_grad():
+
+            energy_nn = self.model(bispectrum, elems)
+            if energy_nn.ndim > 1:
+                energy_nn = energy_nn.flatten()
+
+            beta_nn = torch.autograd.grad(energy_nn.sum(), bispectrum)[0]
+
+        beta[:] = beta_nn.detach().cpu().numpy().astype(np.float64)
+        energy[:] = energy_nn.detach().cpu().numpy().astype(np.float64)
+
+class IgnoreElems(torch.nn.Module):
+    def __init__(self,subnet):
+        super().__init__()
+        self.subnet = subnet
+
+    def forward(self,bispectrum,elems):
+        return self.subnet(bispectrum)
--- a/python/setup.py
+++ b/python/setup.py
@ -22,5 +22,5 @@ setup(
    url = "https://lammps.sandia.gov",
    description = "LAMMPS Molecular Dynamics Python package",
    license = "GPL",
-    packages=["lammps"]
+    packages=["lammps","lammps.mliap"],
 )
--- a/src/COLLOID/pair_brownian.cpp
+++ b/src/COLLOID/pair_brownian.cpp
@ -99,20 +99,20 @@ void PairBrownian::compute(int eflag, int vflag)

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
        double wallhi[3], walllo[3];
-        for (int j = 0; j < 3; j++){
+        for (int j = 0; j < 3; j++) {
          wallhi[j] = domain->prd[j];
          walllo[j] = 0;
        }
-        for (int m = 0; m < wallfix->nwall; m++){
+        for (int m = 0; m < wallfix->nwall; m++) {
          int dim = wallfix->wallwhich[m] / 2;
          int side = wallfix->wallwhich[m] % 2;
-          if (wallfix->xstyle[m] == VARIABLE){
+          if (wallfix->xstyle[m] == VARIABLE) {
            wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
          }
          else wallcoord = wallfix->coord0[m];
@ -494,7 +494,7 @@ void PairBrownian::init_style()
  // are re-calculated at every step.

  flagdeform = flagwall = 0;
-  for (int i = 0; i < modify->nfix; i++){
+  for (int i = 0; i < modify->nfix; i++) {
    if (strcmp(modify->fix[i]->style,"deform") == 0)
      flagdeform = 1;
    else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
@ -514,14 +514,14 @@ void PairBrownian::init_style()
  if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
  else {
    double wallhi[3], walllo[3];
-    for (int j = 0; j < 3; j++){
+    for (int j = 0; j < 3; j++) {
      wallhi[j] = domain->prd[j];
      walllo[j] = 0;
    }
-    for (int m = 0; m < wallfix->nwall; m++){
+    for (int m = 0; m < wallfix->nwall; m++) {
      int dim = wallfix->wallwhich[m] / 2;
      int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE){
+      if (wallfix->xstyle[m] == VARIABLE) {
        wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
        // Since fix->wall->init happens after pair->init_style
        wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
--- a/src/COLLOID/pair_brownian_poly.cpp
+++ b/src/COLLOID/pair_brownian_poly.cpp
@ -82,20 +82,20 @@ void PairBrownianPoly::compute(int eflag, int vflag)

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
        double wallhi[3], walllo[3];
-        for (j = 0; j < 3; j++){
+        for (j = 0; j < 3; j++) {
          wallhi[j] = domain->prd[j];
          walllo[j] = 0;
        }
-        for (int m = 0; m < wallfix->nwall; m++){
+        for (int m = 0; m < wallfix->nwall; m++) {
          int dim = wallfix->wallwhich[m] / 2;
          int side = wallfix->wallwhich[m] % 2;
-          if (wallfix->xstyle[m] == VARIABLE){
+          if (wallfix->xstyle[m] == VARIABLE) {
            wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
          }
          else wallcoord = wallfix->coord0[m];
@ -355,7 +355,7 @@ void PairBrownianPoly::init_style()
  // are re-calculated at every step.

  flagdeform = flagwall = 0;
-  for (int i = 0; i < modify->nfix; i++){
+  for (int i = 0; i < modify->nfix; i++) {
    if (strcmp(modify->fix[i]->style,"deform") == 0)
      flagdeform = 1;
    else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
@ -375,14 +375,14 @@ void PairBrownianPoly::init_style()
  if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
  else {
    double wallhi[3], walllo[3];
-    for (int j = 0; j < 3; j++){
+    for (int j = 0; j < 3; j++) {
      wallhi[j] = domain->prd[j];
      walllo[j] = 0;
    }
-    for (int m = 0; m < wallfix->nwall; m++){
+    for (int m = 0; m < wallfix->nwall; m++) {
      int dim = wallfix->wallwhich[m] / 2;
      int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE){
+      if (wallfix->xstyle[m] == VARIABLE) {
        wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
        // Since fix->wall->init happens after pair->init_style
        wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
--- a/src/COLLOID/pair_lubricate.cpp
+++ b/src/COLLOID/pair_lubricate.cpp
@ -155,20 +155,20 @@ void PairLubricate::compute(int eflag, int vflag)

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
         double wallhi[3], walllo[3];
-         for (int j = 0; j < 3; j++){
+         for (int j = 0; j < 3; j++) {
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
-         for (int m = 0; m < wallfix->nwall; m++){
+         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE){
+           if (wallfix->xstyle[m] == VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
@ -559,7 +559,7 @@ void PairLubricate::init_style()
  // are re-calculated at every step.

  shearing = flagdeform = flagwall = 0;
-  for (int i = 0; i < modify->nfix; i++){
+  for (int i = 0; i < modify->nfix; i++) {
    if (strcmp(modify->fix[i]->style,"deform") == 0) {
      shearing = flagdeform = 1;
      if (((FixDeform *) modify->fix[i])->remapflag != Domain::V_REMAP)
@ -584,15 +584,15 @@ void PairLubricate::init_style()
  if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
  else {
    double wallhi[3], walllo[3];
-    for (int j = 0; j < 3; j++){
+    for (int j = 0; j < 3; j++) {
      wallhi[j] = domain->prd[j];
      walllo[j] = 0;
    }

-    for (int m = 0; m < wallfix->nwall; m++){
+    for (int m = 0; m < wallfix->nwall; m++) {
      int dim = wallfix->wallwhich[m] / 2;
      int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE){
+      if (wallfix->xstyle[m] == VARIABLE) {
        wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
        //Since fix->wall->init happens after pair->init_style
        wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
--- a/src/COLLOID/pair_lubricateU.cpp
+++ b/src/COLLOID/pair_lubricateU.cpp
@ -158,8 +158,8 @@ void PairLubricateU::compute(int eflag, int vflag)

  // store back the saved forces and torques in original arrays

-  for(i=0;i<nlocal+nghost;i++) {
-    for(j=0;j<3;j++) {
+  for (i=0;i<nlocal+nghost;i++) {
+    for (j=0;j<3;j++) {
      f[i][j] = fl[i][j];
      torque[i][j] = Tl[i][j];
    }
@ -223,7 +223,7 @@ void PairLubricateU::stage_one()
  // Find the right hand side= -ve of all forces/torques
  // b = 6*Npart in overall size

-  for(ii = 0; ii < inum; ii++) {
+  for (ii = 0; ii < inum; ii++) {
    i = ilist[ii];
    for (j = 0; j < 3; j++) {
      bcg[6*ii+j] = -f[i][j];
@ -407,7 +407,7 @@ void PairLubricateU::stage_two(double **x)
  // Find the right hand side= -ve of all forces/torques
  // b = 6*Npart in overall size

-  for(ii = 0; ii < inum; ii++) {
+  for (ii = 0; ii < inum; ii++) {
    i = ilist[ii];
    for (j = 0; j < 3; j++) {
      bcg[6*ii+j] = -f[i][j];
@ -581,20 +581,20 @@ void PairLubricateU::compute_Fh(double **x)

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
         double wallhi[3], walllo[3];
-         for (int j = 0; j < 3; j++){
+         for (int j = 0; j < 3; j++) {
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
-         for (int m = 0; m < wallfix->nwall; m++){
+         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE){
+           if (wallfix->xstyle[m] == VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
@ -813,20 +813,20 @@ void PairLubricateU::compute_RU()

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
         double wallhi[3], walllo[3];
-         for (int j = 0; j < 3; j++){
+         for (int j = 0; j < 3; j++) {
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
-         for (int m = 0; m < wallfix->nwall; m++){
+         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE){
+           if (wallfix->xstyle[m] == VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
@ -1013,7 +1013,7 @@ void PairLubricateU::compute_RU()
          torque[i][1] -= vxmu2f*ty;
          torque[i][2] -= vxmu2f*tz;

-          if(newton_pair || j < nlocal) {
+          if (newton_pair || j < nlocal) {
            torque[j][0] -= vxmu2f*tx;
            torque[j][1] -= vxmu2f*ty;
            torque[j][2] -= vxmu2f*tz;
@ -1084,20 +1084,20 @@ void PairLubricateU::compute_RU(double **x)

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
         double wallhi[3], walllo[3];
-         for (int j = 0; j < 3; j++){
+         for (int j = 0; j < 3; j++) {
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
-         for (int m = 0; m < wallfix->nwall; m++){
+         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE){
+           if (wallfix->xstyle[m] == VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
@ -1284,7 +1284,7 @@ void PairLubricateU::compute_RU(double **x)
          torque[i][1] -= vxmu2f*ty;
          torque[i][2] -= vxmu2f*tz;

-          if(newton_pair || j < nlocal) {
+          if (newton_pair || j < nlocal) {
            torque[j][0] -= vxmu2f*tx;
            torque[j][1] -= vxmu2f*ty;
            torque[j][2] -= vxmu2f*tz;
@ -1791,7 +1791,7 @@ void PairLubricateU::init_style()
  // are re-calculated at every step.

  flagdeform = flagwall = 0;
-  for (int i = 0; i < modify->nfix; i++){
+  for (int i = 0; i < modify->nfix; i++) {
    if (strcmp(modify->fix[i]->style,"deform") == 0)
      flagdeform = 1;
    else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
@ -1811,14 +1811,14 @@ void PairLubricateU::init_style()
    if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
  else {
    double wallhi[3], walllo[3];
-    for (int j = 0; j < 3; j++){
+    for (int j = 0; j < 3; j++) {
      wallhi[j] = domain->prd[j];
      walllo[j] = 0;
    }
-    for (int m = 0; m < wallfix->nwall; m++){
+    for (int m = 0; m < wallfix->nwall; m++) {
      int dim = wallfix->wallwhich[m] / 2;
      int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE){
+      if (wallfix->xstyle[m] == VARIABLE) {
        wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
        //Since fix->wall->init happens after pair->init_style
        wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
--- a/src/COLLOID/pair_lubricateU_poly.cpp
+++ b/src/COLLOID/pair_lubricateU_poly.cpp
@ -126,8 +126,8 @@ void PairLubricateUPoly::compute(int eflag, int vflag)

  // Store back the saved forces and torques in original arrays

-  for(i=0;i<nlocal+nghost;i++) {
-    for(j=0;j<3;j++) {
+  for (i=0;i<nlocal+nghost;i++) {
+    for (j=0;j<3;j++) {
      f[i][j] = fl[i][j];
      torque[i][j] = Tl[i][j];
    }
@ -172,7 +172,7 @@ void PairLubricateUPoly::iterate(double **x, int stage)
  // Find the right hand side= -ve of all forces/torques
  // b = 6*Npart in overall size

-  for(ii = 0; ii < inum; ii++) {
+  for (ii = 0; ii < inum; ii++) {
    i = ilist[ii];
    for (j = 0; j < 3; j++) {
      bcg[6*ii+j] = -f[i][j];
@ -351,20 +351,20 @@ void PairLubricateUPoly::compute_Fh(double **x)

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
         double wallhi[3], walllo[3];
-         for (int j = 0; j < 3; j++){
+         for (int j = 0; j < 3; j++) {
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
-         for (int m = 0; m < wallfix->nwall; m++){
+         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE){
+           if (wallfix->xstyle[m] == VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
@ -626,20 +626,20 @@ void PairLubricateUPoly::compute_RU(double **x)

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
         double wallhi[3], walllo[3];
-         for (j = 0; j < 3; j++){
+         for (j = 0; j < 3; j++) {
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
-         for (int m = 0; m < wallfix->nwall; m++){
+         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE){
+           if (wallfix->xstyle[m] == VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
@ -1155,10 +1155,10 @@ void PairLubricateUPoly::init_style()
  // are re-calculated at every step.

  flagdeform = flagwall = 0;
-  for (int i = 0; i < modify->nfix; i++){
+  for (int i = 0; i < modify->nfix; i++) {
    if (strcmp(modify->fix[i]->style,"deform") == 0)
      flagdeform = 1;
-    else if (strstr(modify->fix[i]->style,"wall") != nullptr){
+    else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
      if (flagwall)
        error->all(FLERR,
                   "Cannot use multiple fix wall commands with "
@ -1176,14 +1176,14 @@ void PairLubricateUPoly::init_style()
    if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
  else {
    double wallhi[3], walllo[3];
-    for (int j = 0; j < 3; j++){
+    for (int j = 0; j < 3; j++) {
      wallhi[j] = domain->prd[j];
      walllo[j] = 0;
    }
-    for (int m = 0; m < wallfix->nwall; m++){
+    for (int m = 0; m < wallfix->nwall; m++) {
      int dim = wallfix->wallwhich[m] / 2;
      int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE){
+      if (wallfix->xstyle[m] == VARIABLE) {
        wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
        //Since fix->wall->init happens after pair->init_style
        wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
@ -1214,7 +1214,7 @@ void PairLubricateUPoly::init_style()
  if (!flagVF) vol_f = 0;

  if (!comm->me) {
-    if(logfile)
+    if (logfile)
      fprintf(logfile, "lubricateU: vol_f = %g, vol_p = %g, vol_T = %g\n",
          vol_f,vol_P,vol_T);
    if (screen)
--- a/src/COLLOID/pair_lubricate_poly.cpp
+++ b/src/COLLOID/pair_lubricate_poly.cpp
@ -137,20 +137,20 @@ void PairLubricatePoly::compute(int eflag, int vflag)

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
-    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
+    if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
-      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
+      else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
         double wallhi[3], walllo[3];
-         for (int j = 0; j < 3; j++){
+         for (int j = 0; j < 3; j++) {
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
-         for (int m = 0; m < wallfix->nwall; m++){
+         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE){
+           if (wallfix->xstyle[m] == VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
@ -461,7 +461,7 @@ void PairLubricatePoly::init_style()
  // are re-calculated at every step.

  shearing = flagdeform = flagwall = 0;
-  for (int i = 0; i < modify->nfix; i++){
+  for (int i = 0; i < modify->nfix; i++) {
    if (strcmp(modify->fix[i]->style,"deform") == 0) {
      shearing = flagdeform = 1;
      if (((FixDeform *) modify->fix[i])->remapflag != Domain::V_REMAP)
@ -478,9 +478,9 @@ void PairLubricatePoly::init_style()
      if (wallfix->xflag) flagwall = 2; // Moving walls exist
    }

-    if (strstr(modify->fix[i]->style,"wall") != nullptr){
+    if (strstr(modify->fix[i]->style,"wall") != nullptr) {
      flagwall = 1; // Walls exist
-      if (((FixWall *) modify->fix[i])->xflag ) {
+      if (((FixWall *) modify->fix[i])->xflag) {
        flagwall = 2; // Moving walls exist
        wallfix = (FixWall *) modify->fix[i];
      }
@ -492,14 +492,14 @@ void PairLubricatePoly::init_style()
  if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
  else {
    double wallhi[3], walllo[3];
-    for (int j = 0; j < 3; j++){
+    for (int j = 0; j < 3; j++) {
      wallhi[j] = domain->prd[j];
      walllo[j] = 0;
    }
-    for (int m = 0; m < wallfix->nwall; m++){
+    for (int m = 0; m < wallfix->nwall; m++) {
      int dim = wallfix->wallwhich[m] / 2;
      int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE){
+      if (wallfix->xstyle[m] == VARIABLE) {
        wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
        //Since fix->wall->init happens after pair->init_style
        wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
--- a/src/COMPRESS/dump_atom_gz.cpp
+++ b/src/COMPRESS/dump_atom_gz.cpp
@ -168,7 +168,7 @@ void DumpAtomGZ::write()
 int DumpAtomGZ::modify_param(int narg, char **arg)
 {
  int consumed = DumpAtom::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    if (strcmp(arg[0],"compression_level") == 0) {
      if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
      int min_level = Z_DEFAULT_COMPRESSION;
--- a/src/COMPRESS/dump_atom_zstd.cpp
+++ b/src/COMPRESS/dump_atom_zstd.cpp
@ -171,7 +171,7 @@ void DumpAtomZstd::write()
 int DumpAtomZstd::modify_param(int narg, char **arg)
 {
  int consumed = DumpAtom::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    try {
      if (strcmp(arg[0],"checksum") == 0) {
        if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
--- a/src/COMPRESS/dump_cfg_gz.cpp
+++ b/src/COMPRESS/dump_cfg_gz.cpp
@ -176,7 +176,7 @@ void DumpCFGGZ::write()
 int DumpCFGGZ::modify_param(int narg, char **arg)
 {
  int consumed = DumpCFG::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    if (strcmp(arg[0],"compression_level") == 0) {
      if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
      int min_level = Z_DEFAULT_COMPRESSION;
--- a/src/COMPRESS/dump_cfg_zstd.cpp
+++ b/src/COMPRESS/dump_cfg_zstd.cpp
@ -173,7 +173,7 @@ void DumpCFGZstd::write()
 int DumpCFGZstd::modify_param(int narg, char **arg)
 {
  int consumed = DumpCFG::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    try {
      if (strcmp(arg[0],"checksum") == 0) {
        if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
--- a/src/COMPRESS/dump_custom_gz.cpp
+++ b/src/COMPRESS/dump_custom_gz.cpp
@ -168,7 +168,7 @@ void DumpCustomGZ::write()
 int DumpCustomGZ::modify_param(int narg, char **arg)
 {
  int consumed = DumpCustom::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    if (strcmp(arg[0],"compression_level") == 0) {
      if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
      int min_level = Z_DEFAULT_COMPRESSION;
--- a/src/COMPRESS/dump_custom_zstd.cpp
+++ b/src/COMPRESS/dump_custom_zstd.cpp
@ -171,7 +171,7 @@ void DumpCustomZstd::write()
 int DumpCustomZstd::modify_param(int narg, char **arg)
 {
  int consumed = DumpCustom::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    try {
      if (strcmp(arg[0],"checksum") == 0) {
        if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
--- a/src/COMPRESS/dump_local_gz.cpp
+++ b/src/COMPRESS/dump_local_gz.cpp
@ -183,7 +183,7 @@ void DumpLocalGZ::write()
 int DumpLocalGZ::modify_param(int narg, char **arg)
 {
  int consumed = DumpLocal::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    if (strcmp(arg[0],"compression_level") == 0) {
      if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
      int min_level = Z_DEFAULT_COMPRESSION;
--- a/src/COMPRESS/dump_local_zstd.cpp
+++ b/src/COMPRESS/dump_local_zstd.cpp
@ -171,7 +171,7 @@ void DumpLocalZstd::write()
 int DumpLocalZstd::modify_param(int narg, char **arg)
 {
  int consumed = DumpLocal::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    try {
      if (strcmp(arg[0],"checksum") == 0) {
        if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
--- a/src/COMPRESS/dump_xyz_gz.cpp
+++ b/src/COMPRESS/dump_xyz_gz.cpp
@ -147,7 +147,7 @@ void DumpXYZGZ::write()
 int DumpXYZGZ::modify_param(int narg, char **arg)
 {
  int consumed = DumpXYZ::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    if (strcmp(arg[0],"compression_level") == 0) {
      if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
      int min_level = Z_DEFAULT_COMPRESSION;
--- a/src/COMPRESS/dump_xyz_zstd.cpp
+++ b/src/COMPRESS/dump_xyz_zstd.cpp
@ -145,7 +145,7 @@ void DumpXYZZstd::write()
 int DumpXYZZstd::modify_param(int narg, char **arg)
 {
  int consumed = DumpXYZ::modify_param(narg, arg);
-  if(consumed == 0) {
+  if (consumed == 0) {
    try {
      if (strcmp(arg[0],"checksum") == 0) {
        if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
--- a/src/COMPRESS/zstd_file_writer.cpp
+++ b/src/COMPRESS/zstd_file_writer.cpp
@ -48,7 +48,7 @@ ZstdFileWriter::~ZstdFileWriter()

 void ZstdFileWriter::open(const std::string &path)
 {
-    if(isopen()) return;
+    if (isopen()) return;

    fp = fopen(path.c_str(), "wb");

@ -72,7 +72,7 @@ void ZstdFileWriter::open(const std::string &path)

 size_t ZstdFileWriter::write(const void * buffer, size_t length)
 {
-  if(!isopen()) return 0;
+  if (!isopen()) return 0;

  ZSTD_inBuffer input = { buffer, length, 0 };
  ZSTD_EndDirective mode = ZSTD_e_continue;
@ -81,7 +81,7 @@ size_t ZstdFileWriter::write(const void * buffer, size_t length)
    ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
    ZSTD_compressStream2(cctx, &output, &input, mode);
    fwrite(out_buffer, sizeof(char), output.pos, fp);
-  } while(input.pos < input.size);
+  } while (input.pos < input.size);

  return length;
 }
@ -90,7 +90,7 @@ size_t ZstdFileWriter::write(const void * buffer, size_t length)

 void ZstdFileWriter::flush()
 {
-  if(!isopen()) return;
+  if (!isopen()) return;

  size_t remaining;
  ZSTD_inBuffer input = { nullptr, 0, 0 };
@ -100,7 +100,7 @@ void ZstdFileWriter::flush()
    ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
    remaining = ZSTD_compressStream2(cctx, &output, &input, mode);
    fwrite(out_buffer, sizeof(char), output.pos, fp);
-  } while(remaining);
+  } while (remaining);

  fflush(fp);
 }
@ -109,7 +109,7 @@ void ZstdFileWriter::flush()

 void ZstdFileWriter::close()
 {
-  if(!isopen()) return;
+  if (!isopen()) return;

  size_t remaining;
  ZSTD_inBuffer input = { nullptr, 0, 0 };
@ -119,7 +119,7 @@ void ZstdFileWriter::close()
    ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
    remaining = ZSTD_compressStream2(cctx, &output, &input, mode);
    fwrite(out_buffer, sizeof(char), output.pos, fp);
-  } while(remaining);
+  } while (remaining);

  ZSTD_freeCCtx(cctx);
  cctx = nullptr;
@ -144,7 +144,7 @@ void ZstdFileWriter::setCompressionLevel(int level)
  const int min_level = ZSTD_minCLevel();
  const int max_level = ZSTD_maxCLevel();

-  if(level < min_level || level > max_level)
+  if (level < min_level || level > max_level)
    throw FileWriterException(fmt::format("Compression level must in the range of [{}, {}]", min_level, max_level));

  compression_level = level;
--- a/src/CORESHELL/compute_temp_cs.cpp
+++ b/src/CORESHELL/compute_temp_cs.cpp
@ -232,7 +232,7 @@ double ComputeTempCS::compute_scalar()

  double t = 0.0;

-  for (int i = 0; i < nlocal; i++){
+  for (int i = 0; i < nlocal; i++) {
    if (mask[i] & groupbit) {
      vthermal[0] = v[i][0] - vint[i][0];
      vthermal[1] = v[i][1] - vint[i][1];
@ -271,7 +271,7 @@ void ComputeTempCS::compute_vector()
  double t[6];
  for (int i = 0; i < 6; i++) t[i] = 0.0;

-  for (int i = 0; i < nlocal; i++){
+  for (int i = 0; i < nlocal; i++) {
    if (mask[i] & groupbit) {
      if (rmass) massone = rmass[i];
      else massone = mass[type[i]];
--- a/src/Depend.sh
+++ b/src/Depend.sh
@ -106,6 +106,10 @@ if (test $1 = "PERI") then
  depend USER-OMP
 fi

+if (test $1 = "PYTHON") then
+  depend MLIAP
+fi
+
 if (test $1 = "RIGID") then
  depend KOKKOS
  depend USER-OMP
@ -114,6 +118,7 @@ fi

 if (test $1 = "SNAP") then
  depend KOKKOS
+  depend MLIAP
 fi

 if (test $1 = "USER-CGSDK") then
--- a/src/GPU/pair_eam_alloy_gpu.cpp
+++ b/src/GPU/pair_eam_alloy_gpu.cpp
@ -370,7 +370,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
  Setfl *file = setfl;

  // read potential file
-  if(comm->me == 0) {
+  if (comm->me == 0) {
    PotentialFileReader reader(PairEAM::lmp, filename,
                               "eam/alloy", unit_convert_flag);

--- a/src/GPU/pair_eam_fs_gpu.cpp
+++ b/src/GPU/pair_eam_fs_gpu.cpp
@ -370,7 +370,7 @@ void PairEAMFSGPU::read_file(char *filename)
  Fs *file = fs;

  // read potential file
-  if(comm->me == 0) {
+  if (comm->me == 0) {
    PotentialFileReader reader(PairEAM::lmp, filename, "eam/fs",
                               unit_convert_flag);

--- a/src/GPU/pair_vashishta_gpu.cpp
+++ b/src/GPU/pair_vashishta_gpu.cpp
@ -141,7 +141,7 @@ void PairVashishtaGPU::compute(int eflag, int vflag)

 void PairVashishtaGPU::allocate()
 {
-  if(!allocated) {
+  if (!allocated) {
    PairVashishta::allocate();
  }
  int n = atom->ntypes;
@ -260,7 +260,7 @@ void PairVashishtaGPU::init_style()

 double PairVashishtaGPU::init_one(int i, int j)
 {
-  if(!gpu_allocated) {
+  if (!gpu_allocated) {
    allocate();
  }
  if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
--- a/src/GRANULAR/fix_wall_gran.cpp
+++ b/src/GRANULAR/fix_wall_gran.cpp
@ -1303,7 +1303,7 @@ void FixWallGran::granular(double rsq, double dx, double dy, double dz,
    relrot2 = omega[1];
    relrot3 = omega[2];
  }
-  if (roll_model != ROLL_NONE){
+  if (roll_model != ROLL_NONE) {

    // rolling velocity, see eq. 31 of Wang et al, Particuology v 23, p 49 (2015)
    // This is different from the Marshall papers,
--- a/src/GRANULAR/pair_granular.cpp
+++ b/src/GRANULAR/pair_granular.cpp
@ -536,7 +536,7 @@ void PairGranular::compute(int eflag, int vflag)
        }

        if (roll_model[itype][jtype] != ROLL_NONE ||
-            twist_model[itype][jtype] != TWIST_NONE){
+            twist_model[itype][jtype] != TWIST_NONE) {
          relrot1 = omega[i][0] - omega[j][0];
          relrot2 = omega[i][1] - omega[j][1];
          relrot3 = omega[i][2] - omega[j][2];
--- a/src/KIM/kim_query.cpp
+++ b/src/KIM/kim_query.cpp
@ -252,7 +252,7 @@ char *do_query(char *qfunction, char * model_name, int narg, char **arg,
          }
        } else {
          query += fmt::format("&{}=[", key);
-          while (n != std::string::npos){
+          while (n != std::string::npos) {
            std::string sval = val.substr(0, n);
            if (utils::is_integer(sval) ||
                utils::is_double(sval) ||
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@ -230,9 +230,9 @@ void AtomKokkos::sort()
   reallocate memory to the pointer selected by the mask
 ------------------------------------------------------------------------- */

-void AtomKokkos::grow(unsigned int mask){
+void AtomKokkos::grow(unsigned int mask) {

-  if (mask & SPECIAL_MASK){
+  if (mask & SPECIAL_MASK) {
    memoryKK->destroy_kokkos(k_special, special);
    sync(Device, mask);
    modified(Device, mask);
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@ -83,16 +83,16 @@ struct SortFunctor {
  ViewType source;
  Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type> dest;
  IndexView index;
-  SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind){
+  SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind) {
    dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0));
  }
-  SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind){
+  SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind) {
    dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1));
  }
-  SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind){
+  SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind) {
    dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1),src.extent(2));
  }
-  SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind){
+  SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind) {
    dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1),src.extent(2),src.extent(3));
  }
  KOKKOS_INLINE_FUNCTION
@ -100,18 +100,18 @@ struct SortFunctor {
    dest(i) = source(index(i));
  }
  void operator()(const typename std::enable_if<ViewType::rank==2, int>::type& i) {
-    for(int j=0; j < (int)source.extent(1); j++)
+    for (int j=0; j < (int)source.extent(1); j++)
      dest(i,j) = source(index(i),j);
  }
  void operator()(const typename std::enable_if<ViewType::rank==3, int>::type& i) {
-    for(int j=0; j < (int)source.extent(1); j++)
-      for(int k=0; k < (int)source.extent(2); k++)
+    for (int j=0; j < (int)source.extent(1); j++)
+      for (int k=0; k < (int)source.extent(2); k++)
        dest(i,j,k) = source(index(i),j,k);
  }
  void operator()(const typename std::enable_if<ViewType::rank==4, int>::type& i) {
-    for(int j=0; j < (int)source.extent(1); j++)
-      for(int k=0; k < (int)source.extent(2); k++)
-        for(int l=0; l < (int)source.extent(3); l++)
+    for (int j=0; j < (int)source.extent(1); j++)
+      for (int k=0; k < (int)source.extent(2); k++)
+        for (int l=0; l < (int)source.extent(3); l++)
          dest(i,j,k,l) = source(index(i),j,k,l);
  }
 };
--- a/src/KOKKOS/atom_vec_angle_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp
@ -281,10 +281,10 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
  // Check whether to always run forward communication on the host
  // Choose correct forward PackComm kernel

-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -296,7 +296,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecAngleKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -310,8 +310,8 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
    }
  } else {
    atomKK->sync(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -323,7 +323,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -396,11 +396,11 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
                                       const int & iswap,
                                       const int nfirst, const int &pbc_flag,
                                       const int* const pbc) {
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK);
    atomKK->modified(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1>
        f(atomKK->k_x,nfirst,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
@ -414,7 +414,7 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
      Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
      struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,1>
        f(atomKK->k_x,nfirst,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
@ -431,8 +431,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
  } else {
    atomKK->sync(Device,X_MASK);
    atomKK->modified(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1>
        f(atomKK->k_x,nfirst,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
@ -446,7 +446,7 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
      Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
      struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,1>
        f(atomKK->k_x,nfirst,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
@ -491,8 +491,8 @@ struct AtomVecAngleKokkos_UnpackComm {
 /* ---------------------------------------------------------------------- */

 void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
+    const DAT::tdual_xfloat_2d &buf) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK);
    atomKK->modified(Host,X_MASK);
    struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
@ -641,7 +641,7 @@ void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf)

 int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
 {
-  if(n > 0)
+  if (n > 0)
    atomKK->sync(Host,F_MASK);

  int m = 0;
@ -658,7 +658,7 @@ int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)

 void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf)
 {
-  if(n > 0)
+  if (n > 0)
    atomKK->modified(Host,F_MASK);

  int m = 0;
@ -742,7 +742,7 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
      dy = pbc[1];
      dz = pbc[2];
    }
-    if(space==Host) {
+    if (space==Host) {
      AtomVecAngleKokkos_PackBorder<LMPHostType,1> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -756,7 +756,7 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,

  } else {
    dx = dy = dz = 0;
-    if(space==Host) {
+    if (space==Host) {
      AtomVecAngleKokkos_PackBorder<LMPHostType,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -939,7 +939,7 @@ struct AtomVecAngleKokkos_UnpackBorder {
      typename AT::t_tagint_1d &molecule,
      const int& first):
    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
-    _first(first){
+    _first(first) {
  };

  KOKKOS_INLINE_FUNCTION
@ -963,7 +963,7 @@ void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
  while (first+n >= nmax) grow(0);
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
    Kokkos::parallel_for(n,f);
@ -1129,7 +1129,7 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
    _sendlist(sendlist.template view<DeviceType>()),
    _copylist(copylist.template view<DeviceType>()),
    _nlocal(nlocal),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {
    // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
    // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
    // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
@ -1178,7 +1178,7 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {

    const int j = _copylist(mysend);

-    if(j>-1) {
+    if (j>-1) {
    _xw(i,0) = _x(j,0);
    _xw(i,1) = _x(j,1);
    _xw(i,2) = _x(j,2);
@ -1220,12 +1220,12 @@ int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_
                                            X_FLOAT hi )
 {
  const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
              k_buf.view<LMPHostType>().extent(1))/elements) {
    int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
  }
-  if(space == Host) {
+  if (space == Host) {
    AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
    Kokkos::parallel_for(nsend,f);
@ -1333,7 +1333,7 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
    _angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
    _angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {
    elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                             buf.template view<DeviceType>().extent(1))/elements;
@ -1386,7 +1386,7 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
                                              int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
                                              ExecutionSpace space) {
  const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
-  if(space == Host) {
+  if (space == Host) {
    k_count.h_view(0) = nlocal;
    AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_count,dim,lo,hi);
--- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
@ -200,7 +200,7 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
      dy = pbc[1];
      dz = pbc[2];
    }
-    if(space==Host) {
+    if (space==Host) {
      AtomVecAtomicKokkos_PackBorder<LMPHostType,1> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
@ -214,7 +214,7 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,

  } else {
    dx = dy = dz = 0;
-    if(space==Host) {
+    if (space==Host) {
      AtomVecAtomicKokkos_PackBorder<LMPHostType,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
@ -374,7 +374,7 @@ struct AtomVecAtomicKokkos_UnpackBorder {
      typename ArrayTypes<DeviceType>::t_int_1d &type,
      typename ArrayTypes<DeviceType>::t_int_1d &mask,
      const int& first):
-      _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first){
+      _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first) {
  };

  KOKKOS_INLINE_FUNCTION
@ -396,7 +396,7 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
  while (first+n >= nmax) grow(0);
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
    Kokkos::parallel_for(n,f);
  } else {
@ -504,7 +504,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
                _sendlist(sendlist.template view<DeviceType>()),
                _copylist(copylist.template view<DeviceType>()),
                _nlocal(nlocal),_dim(dim),
-                _lo(lo),_hi(hi){
+                _lo(lo),_hi(hi) {
    const size_t elements = 11;
    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;

@ -527,7 +527,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
    _buf(mysend,10) = d_ubuf(_image[i]).d;
    const int j = _copylist(mysend);

-    if(j>-1) {
+    if (j>-1) {
    _xw(i,0) = _x(j,0);
    _xw(i,1) = _x(j,1);
    _xw(i,2) = _x(j,2);
@ -546,11 +546,11 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {

 int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
 {
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/11) {
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/11) {
    int newsize = nsend*11/k_buf.view<LMPHostType>().extent(1)+1;
    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
  }
-  if(space == Host) {
+  if (space == Host) {
    AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
    Kokkos::parallel_for(nsend,f);
    return nsend*11;
@ -615,7 +615,7 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
                _mask(atom->k_mask.view<DeviceType>()),
                _image(atom->k_image.view<DeviceType>()),
                _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-                _lo(lo),_hi(hi){
+                _lo(lo),_hi(hi) {
    const size_t elements = 11;
    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;

@ -644,7 +644,7 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
 /* ---------------------------------------------------------------------- */

 int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
-  if(space == Host) {
+  if (space == Host) {
    k_count.h_view(0) = nlocal;
    AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
    Kokkos::parallel_for(nrecv/11,f);
--- a/src/KOKKOS/atom_vec_bond_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp
@ -250,7 +250,7 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
      dy = pbc[1];
      dz = pbc[2];
    }
-    if(space==Host) {
+    if (space==Host) {
      AtomVecBondKokkos_PackBorder<LMPHostType,1> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -264,7 +264,7 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,

  } else {
    dx = dy = dz = 0;
-    if(space==Host) {
+    if (space==Host) {
      AtomVecBondKokkos_PackBorder<LMPHostType,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -447,7 +447,7 @@ struct AtomVecBondKokkos_UnpackBorder {
      typename AT::t_tagint_1d &molecule,
      const int& first):
    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
-    _first(first){
+    _first(first) {
  };

  KOKKOS_INLINE_FUNCTION
@ -471,7 +471,7 @@ void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
  while (first+n >= nmax) grow(0);
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
    Kokkos::parallel_for(n,f);
@ -621,7 +621,7 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
    _sendlist(sendlist.template view<DeviceType>()),
    _copylist(copylist.template view<DeviceType>()),
    _nlocal(nlocal),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {
    // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
    // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
    // 1 to store buffer length
@ -661,7 +661,7 @@ struct AtomVecBondKokkos_PackExchangeFunctor {

    const int j = _copylist(mysend);

-    if(j>-1) {
+    if (j>-1) {
    _xw(i,0) = _x(j,0);
    _xw(i,1) = _x(j,1);
    _xw(i,2) = _x(j,2);
@ -696,12 +696,12 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
                                            X_FLOAT hi )
 {
  const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
              k_buf.view<LMPHostType>().extent(1))/elements) {
    int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
  }
-  if(space == Host) {
+  if (space == Host) {
    AtomVecBondKokkos_PackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
    Kokkos::parallel_for(nsend,f);
@ -794,7 +794,7 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
                _bond_type(atom->k_bond_type.view<DeviceType>()),
                _bond_atom(atom->k_bond_atom.view<DeviceType>()),
    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {
    elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                             buf.template view<DeviceType>().extent(1))/elements;
@ -840,7 +840,7 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
                                              int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
                                              ExecutionSpace space) {
  const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
-  if(space == Host) {
+  if (space == Host) {
    k_count.h_view(0) = nlocal;
    AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_count,dim,lo,hi);
--- a/src/KOKKOS/atom_vec_charge_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp
@ -267,7 +267,7 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
      dy = pbc[1];
      dz = pbc[2];
    }
-    if(space==Host) {
+    if (space==Host) {
      AtomVecChargeKokkos_PackBorder<LMPHostType,1> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
@ -281,7 +281,7 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,

  } else {
    dx = dy = dz = 0;
-    if(space==Host) {
+    if (space==Host) {
      AtomVecChargeKokkos_PackBorder<LMPHostType,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
@ -463,7 +463,7 @@ struct AtomVecChargeKokkos_UnpackBorder {
      typename ArrayTypes<DeviceType>::t_int_1d &mask,
      typename ArrayTypes<DeviceType>::t_float_1d &q,
      const int& first):
-    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first){
+    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) {
  };

  KOKKOS_INLINE_FUNCTION
@ -485,7 +485,7 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
  if (first+n >= nmax) {
    grow(first+n+100);
  }
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecChargeKokkos_UnpackBorder<LMPHostType>
      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,first);
    Kokkos::parallel_for(n,f);
@ -618,7 +618,7 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
    _sendlist(sendlist.template view<DeviceType>()),
    _copylist(copylist.template view<DeviceType>()),
    _nlocal(nlocal),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {
    const size_t elements = 12;
    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                             buf.template view<DeviceType>().extent(1))/elements;
@ -643,7 +643,7 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
    _buf(mysend,11) = _q[i];
    const int j = _copylist(mysend);

-    if(j>-1) {
+    if (j>-1) {
    _xw(i,0) = _x(j,0);
    _xw(i,1) = _x(j,1);
    _xw(i,2) = _x(j,2);
@ -667,11 +667,11 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat
                                              ExecutionSpace space,int dim,
                                              X_FLOAT lo,X_FLOAT hi )
 {
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
    int newsize = nsend*12/k_buf.view<LMPHostType>().extent(1)+1;
    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
  }
-  if(space == Host) {
+  if (space == Host) {
    AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
    Kokkos::parallel_for(nsend,f);
@ -740,7 +740,7 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
    _image(atom->k_image.view<DeviceType>()),
    _q(atom->k_q.view<DeviceType>()),
    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {
    const size_t elements = 12;
    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;

@ -772,7 +772,7 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
 int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
                                                int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
                                                ExecutionSpace space) {
-  if(space == Host) {
+  if (space == Host) {
    k_count.h_view(0) = nlocal;
    AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
    Kokkos::parallel_for(nrecv/12,f);
@ -1131,7 +1131,7 @@ void AtomVecChargeKokkos::sync_overlapping_device(ExecutionSpace space, unsigned
      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
-    if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
+    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
  } else {
    if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
@ -1148,7 +1148,7 @@ void AtomVecChargeKokkos::sync_overlapping_device(ExecutionSpace space, unsigned
      perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
    if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
      perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
-    if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPHostType>())
+    if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>())
      perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
  }
 }
--- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
@ -267,10 +267,10 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
  // Check whether to always run forward communication on the host
  // Choose correct forward PackComm kernel

-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecDPDKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,
          atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
          buf,list,iswap,
@ -286,7 +286,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecDPDKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,
          atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
          buf,list,iswap,
@ -304,8 +304,8 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
    }
  } else {
    atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecDPDKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,
          atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
          buf,list,iswap,
@ -321,7 +321,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecDPDKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,
          atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
          buf,list,iswap,
@ -409,11 +409,11 @@ struct AtomVecDPDKokkos_PackCommSelf {

 int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
                                                                                const int nfirst, const int &pbc_flag, const int* const pbc) {
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
    atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
      struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,
          atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
          nfirst,list,iswap,
@ -429,7 +429,7 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
      Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
      struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,
          atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
          nfirst,list,iswap,
@ -448,8 +448,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
  } else {
    atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
    atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
      struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,
          atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
          nfirst,list,iswap,
@ -465,7 +465,7 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
      Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
      struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,
          atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
          nfirst,list,iswap,
@ -526,8 +526,8 @@ struct AtomVecDPDKokkos_UnpackComm {
 /* ---------------------------------------------------------------------- */

 void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
+    const DAT::tdual_xfloat_2d &buf) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
    atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
    struct AtomVecDPDKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,
@ -716,7 +716,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)

 int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
 {
-  if(n > 0)
+  if (n > 0)
    atomKK->sync(Host,F_MASK);

  int m = 0;
@ -733,7 +733,7 @@ int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)

 void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf)
 {
-  if(n > 0) {
+  if (n > 0) {
    atomKK->sync(Host,F_MASK);
    atomKK->modified(Host,F_MASK);
  }
@ -831,7 +831,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
      dy = pbc[1];
      dz = pbc[2];
    }
-    if(space==Host) {
+    if (space==Host) {
      AtomVecDPDKokkos_PackBorder<LMPHostType,1> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,
@ -849,7 +849,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA

  } else {
    dx = dy = dz = 0;
-    if(space==Host) {
+    if (space==Host) {
      AtomVecDPDKokkos_PackBorder<LMPHostType,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,
@ -1134,7 +1134,7 @@ void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first,
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
                 DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
                 UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecDPDKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),
      h_x,h_tag,h_type,h_mask,
      h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew,
@ -1326,7 +1326,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
                _sendlist(sendlist.template view<DeviceType>()),
                _copylist(copylist.template view<DeviceType>()),
                _nlocal(nlocal),_dim(dim),
-                _lo(lo),_hi(hi){
+                _lo(lo),_hi(hi) {
    const size_t elements = 17;
    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;

@ -1355,7 +1355,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
    _buf(mysend,16) = _uCGnew[i];
    const int j = _copylist(mysend);

-    if(j>-1) {
+    if (j>-1) {
    _xw(i,0) = _x(j,0);
    _xw(i,1) = _x(j,1);
    _xw(i,2) = _x(j,2);
@ -1380,7 +1380,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {

 int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
 {
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/17) {
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/17) {
    int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
  }
@ -1388,7 +1388,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
             MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
             UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
             DVECTOR_MASK);
-  if(space == Host) {
+  if (space == Host) {
    AtomVecDPDKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
    Kokkos::parallel_for(nsend,f);
  } else {
@ -1469,7 +1469,7 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
                _mask(atom->k_mask.view<DeviceType>()),
                _image(atom->k_image.view<DeviceType>()),
                _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-                _lo(lo),_hi(hi){
+                _lo(lo),_hi(hi) {
    const size_t elements = 17;
    const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;

@ -1504,7 +1504,7 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
 /* ---------------------------------------------------------------------- */

 int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
-  if(space == Host) {
+  if (space == Host) {
    k_count.h_view(0) = nlocal;
    AtomVecDPDKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
    Kokkos::parallel_for(nrecv/17,f);
--- a/src/KOKKOS/atom_vec_full_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_full_kokkos.cpp
@ -381,7 +381,7 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
      dy = pbc[1];
      dz = pbc[2];
    }
-    if(space==Host) {
+    if (space==Host) {
      AtomVecFullKokkos_PackBorder<LMPHostType,1> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
@ -395,7 +395,7 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,

  } else {
    dx = dy = dz = 0;
-    if(space==Host) {
+    if (space==Host) {
      AtomVecFullKokkos_PackBorder<LMPHostType,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
@ -586,7 +586,7 @@ struct AtomVecFullKokkos_UnpackBorder {
      typename AT::t_tagint_1d &molecule,
      const int& first):
    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule),
-    _first(first){
+    _first(first) {
  };

  KOKKOS_INLINE_FUNCTION
@ -611,7 +611,7 @@ void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
  while (first+n >= nmax) grow(0);
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
    Kokkos::parallel_for(n,f);
@ -824,7 +824,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
    _sendlist(sendlist.template view<DeviceType>()),
    _copylist(copylist.template view<DeviceType>()),
    _nlocal(nlocal),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {
    // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
    // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
    // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
@ -895,7 +895,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor {

    const int j = _copylist(mysend);

-    if(j>-1) {
+    if (j>-1) {
    _xw(i,0) = _x(j,0);
    _xw(i,1) = _x(j,1);
    _xw(i,2) = _x(j,2);
@ -955,12 +955,12 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
 {
  const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
              k_buf.view<LMPHostType>().extent(1))/elements) {
    int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
  }
-  if(space == Host) {
+  if (space == Host) {
    AtomVecFullKokkos_PackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
    Kokkos::parallel_for(nsend,f);
@ -1106,7 +1106,7 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
    _improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
    _improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {

    elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
@ -1178,7 +1178,7 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
                                              ExecutionSpace space) {
  const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
    5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
-  if(space == Host) {
+  if (space == Host) {
    k_count.h_view(0) = nlocal;
    AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_count,dim,lo,hi);
--- a/src/KOKKOS/atom_vec_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_kokkos.cpp
@ -112,10 +112,10 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
  // Check whether to always run forward communication on the host
  // Choose correct forward PackComm kernel

-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    sync(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -127,7 +127,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -141,8 +141,8 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
    }
  } else {
    sync(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -154,7 +154,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -225,11 +225,11 @@ struct AtomVecKokkos_PackCommSelf {

 int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
                                        const int nfirst, const int &pbc_flag, const int* const pbc) {
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    sync(Host,X_MASK);
    modified(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
      struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -241,7 +241,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
      Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
      struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -256,8 +256,8 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
  } else {
    sync(Device,X_MASK);
    modified(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -269,7 +269,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
      Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
      struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
          domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -361,10 +361,10 @@ struct AtomVecKokkos_PackCommSelfFused {
 int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan,
                                         const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc,
                                         const DAT::tdual_int_1d &g2l) {
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    sync(Host,X_MASK);
    modified(Host,X_MASK);
-    if(domain->triclinic) {
+    if (domain->triclinic) {
    struct AtomVecKokkos_PackCommSelfFused<LMPHostType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
        domain->xprd,domain->yprd,domain->zprd,
        domain->xy,domain->xz,domain->yz);
@ -378,7 +378,7 @@ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &l
  } else {
    sync(Device,X_MASK);
    modified(Device,X_MASK);
-    if(domain->triclinic) {
+    if (domain->triclinic) {
    struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
        domain->xprd,domain->yprd,domain->zprd,
        domain->xy,domain->xz,domain->yz);
@ -420,8 +420,8 @@ struct AtomVecKokkos_UnpackComm {
 /* ---------------------------------------------------------------------- */

 void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
+    const DAT::tdual_xfloat_2d &buf) {
+  if (commKK->forward_comm_on_host) {
    sync(Host,X_MASK);
    modified(Host,X_MASK);
    struct AtomVecKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
@ -530,7 +530,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
  const int &pbc_flag,
  const int* const pbc)
 {
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    sync(Host,X_MASK|V_MASK);
    if (pbc_flag) {
      if (deform_vremap) {
@ -552,7 +552,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
          Kokkos::parallel_for(n,f);
        }
      } else {
-        if(domain->triclinic) {
+        if (domain->triclinic) {
          struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,0> f(
            atomKK->k_x,atomKK->k_mask,
            atomKK->k_v,
@ -571,7 +571,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
        }
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecKokkos_PackCommVel<LMPHostType,0,1,0> f(
          atomKK->k_x,atomKK->k_mask,
          atomKK->k_v,
@ -591,9 +591,9 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
    }
  } else {
    sync(Device,X_MASK|V_MASK);
-    if(pbc_flag) {
-      if(deform_vremap) {
-        if(domain->triclinic) {
+    if (pbc_flag) {
+      if (deform_vremap) {
+        if (domain->triclinic) {
          struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,1> f(
            atomKK->k_x,atomKK->k_mask,
            atomKK->k_v,
@ -611,7 +611,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
          Kokkos::parallel_for(n,f);
        }
      } else {
-        if(domain->triclinic) {
+        if (domain->triclinic) {
          struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,0> f(
            atomKK->k_x,atomKK->k_mask,
            atomKK->k_v,
@ -630,7 +630,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
        }
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,1,0> f(
          atomKK->k_x,atomKK->k_mask,
          atomKK->k_v,
@ -691,8 +691,8 @@ struct AtomVecKokkos_UnpackCommVel {
 /* ---------------------------------------------------------------------- */

 void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
+    const DAT::tdual_xfloat_2d &buf) {
+  if (commKK->forward_comm_on_host) {
    sync(Host,X_MASK|V_MASK);
    modified(Host,X_MASK|V_MASK);
    struct AtomVecKokkos_UnpackCommVel<LMPHostType> f(atomKK->k_x,atomKK->k_v,buf,first);
@ -864,8 +864,8 @@ struct AtomVecKokkos_PackReverse {
 /* ---------------------------------------------------------------------- */

 int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first,
-    const DAT::tdual_ffloat_2d &buf ) {
-  if(commKK->reverse_comm_on_host) {
+    const DAT::tdual_ffloat_2d &buf) {
+  if (commKK->reverse_comm_on_host) {
    sync(Host,F_MASK);
    struct AtomVecKokkos_PackReverse<LMPHostType> f(atomKK->k_f,buf,first);
    Kokkos::parallel_for(n,f);
@ -911,7 +911,7 @@ struct AtomVecKokkos_UnPackReverseSelf {

 int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
                                        const int nfirst) {
-  if(commKK->reverse_comm_on_host) {
+  if (commKK->reverse_comm_on_host) {
    sync(Host,F_MASK);
    struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list,iswap);
    Kokkos::parallel_for(n,f);
@ -966,7 +966,7 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
  // Check whether to always run reverse communication on the host
  // Choose correct reverse UnPackReverse kernel

-  if(commKK->reverse_comm_on_host) {
+  if (commKK->reverse_comm_on_host) {
    struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list,iswap);
    Kokkos::parallel_for(n,f);
    modified(Host,F_MASK);
@ -981,7 +981,7 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n,

 int AtomVecKokkos::pack_reverse(int n, int first, double *buf)
 {
-  if(n > 0)
+  if (n > 0)
    sync(Host,F_MASK);

  int m = 0;
@ -1007,7 +1007,7 @@ void AtomVecKokkos::unpack_reverse(int n, int *list, double *buf)
    h_f(j,2) += buf[m++];
  }

-  if(n > 0)
+  if (n > 0)
    modified(Host,F_MASK);
 }

--- a/src/KOKKOS/atom_vec_kokkos.h
+++ b/src/KOKKOS/atom_vec_kokkos.h
@ -178,7 +178,7 @@ class AtomVecKokkos : public AtomVec {
    }
    mirror_type tmp_view((typename ViewType::value_type*)buffer, src.d_view.layout());

-    if(space == Device) {
+    if (space == Device) {
      Kokkos::deep_copy(LMPHostType(),tmp_view,src.h_view),
      Kokkos::deep_copy(LMPHostType(),src.d_view,tmp_view);
      src.clear_sync_state();
@ -191,7 +191,7 @@ class AtomVecKokkos : public AtomVec {
  #else
  template<class ViewType>
  void perform_async_copy(ViewType& src, unsigned int space) {
-    if(space == Device)
+    if (space == Device)
      src.template sync<LMPDeviceType>();
    else
      src.template sync<LMPHostType>();
--- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
@ -360,10 +360,10 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
  // Check whether to always run forward communication on the host
  // Choose correct forward PackComm kernel

-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1>
          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
            domain->xy,domain->xz,domain->yz,pbc);
@ -375,7 +375,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,1>
          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
            domain->xy,domain->xz,domain->yz,pbc);
@ -389,8 +389,8 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
    }
  } else {
    atomKK->sync(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1>
          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
            domain->xy,domain->xz,domain->yz,pbc);
@ -402,7 +402,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,1>
          f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
            domain->xy,domain->xz,domain->yz,pbc);
@ -476,11 +476,11 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
                                           const int & iswap,
                                           const int nfirst, const int &pbc_flag,
                                           const int* const pbc) {
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK);
    atomKK->modified(Host,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1>
        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -492,7 +492,7 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
      Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
      struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,1>
        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -507,8 +507,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
  } else {
    atomKK->sync(Device,X_MASK);
    atomKK->modified(Device,X_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1>
        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -520,7 +520,7 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
      Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
      struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,1>
        f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
          domain->xy,domain->xz,domain->yz,pbc);
@ -563,8 +563,8 @@ struct AtomVecMolecularKokkos_UnpackComm {
 /* ---------------------------------------------------------------------- */

 void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
-    const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
+    const DAT::tdual_xfloat_2d &buf) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK);
    atomKK->modified(Host,X_MASK);
    struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
@ -713,7 +713,7 @@ void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf)

 int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
 {
-  if(n > 0)
+  if (n > 0)
    atomKK->sync(Host,F_MASK);

  int m = 0;
@ -730,7 +730,7 @@ int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)

 void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf)
 {
-  if(n > 0)
+  if (n > 0)
    atomKK->modified(Host,F_MASK);

  int m = 0;
@ -814,7 +814,7 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
      dy = pbc[1];
      dz = pbc[2];
    }
-    if(space==Host) {
+    if (space==Host) {
      AtomVecMolecularKokkos_PackBorder<LMPHostType,1> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -828,7 +828,7 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli

  } else {
    dx = dy = dz = 0;
-    if(space==Host) {
+    if (space==Host) {
      AtomVecMolecularKokkos_PackBorder<LMPHostType,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -1011,7 +1011,7 @@ struct AtomVecMolecularKokkos_UnpackBorder {
      typename AT::t_tagint_1d &molecule,
      const int& first):
    _buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
-    _first(first){
+    _first(first) {
  };

  KOKKOS_INLINE_FUNCTION
@ -1035,7 +1035,7 @@ void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
  while (first+n >= nmax) grow(0);
  atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
      f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
    Kokkos::parallel_for(n,f);
@ -1240,7 +1240,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
    _sendlist(sendlist.template view<DeviceType>()),
    _copylist(copylist.template view<DeviceType>()),
    _nlocal(nlocal),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {
    // 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
    // maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
    // 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
@ -1309,7 +1309,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {

    const int j = _copylist(mysend);

-    if(j>-1) {
+    if (j>-1) {
    _xw(i,0) = _x(j,0);
    _xw(i,1) = _x(j,1);
    _xw(i,2) = _x(j,2);
@ -1368,12 +1368,12 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl
 {
  const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
              k_buf.view<LMPHostType>().extent(1))/elements) {
    int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
  }
-  if(space == Host) {
+  if (space == Host) {
    AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
    Kokkos::parallel_for(nsend,f);
@ -1517,7 +1517,7 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
    _improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
    _improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-    _lo(lo),_hi(hi){
+    _lo(lo),_hi(hi) {

    elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
      5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
@ -1589,7 +1589,7 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i
                                                   ExecutionSpace space) {
  const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
    5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
-  if(space == Host) {
+  if (space == Host) {
    k_count.h_view(0) = nlocal;
    AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
      f(atomKK,k_buf,k_count,dim,lo,hi);
--- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp
@ -274,10 +274,10 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
    return AtomVecKokkos::pack_comm_kokkos(n,list,iswap,buf,pbc_flag,pbc);
  // Check whether to always run forward communication on the host
  // Choose correct forward PackComm kernel
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f(
          atomKK->k_x,
          atomKK->k_radius,atomKK->k_rmass,
@ -295,7 +295,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecSphereKokkos_PackComm<LMPHostType,0,1> f(
          atomKK->k_x,
          atomKK->k_radius,atomKK->k_rmass,
@ -315,8 +315,8 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
    }
  } else {
    atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f(
          atomKK->k_x,
          atomKK->k_radius,atomKK->k_rmass,
@ -334,7 +334,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,1> f(
          atomKK->k_x,
          atomKK->k_radius,atomKK->k_rmass,
@ -461,11 +461,11 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
  const int &pbc_flag,
  const int* const pbc)
 {
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
-    if(pbc_flag) {
-      if(deform_vremap) {
-        if(domain->triclinic) {
+    if (pbc_flag) {
+      if (deform_vremap) {
+        if (domain->triclinic) {
          if (radvary == 0) {
            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,1> f(
              atomKK->k_x,atomKK->k_mask,
@ -507,7 +507,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
          }
        }
      } else {
-        if(domain->triclinic) {
+        if (domain->triclinic) {
          if (radvary == 0) {
            struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,0> f(
              atomKK->k_x,atomKK->k_mask,
@ -550,7 +550,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
        }
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        if (radvary == 0) {
          struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,0,1,0> f(
            atomKK->k_x,atomKK->k_mask,
@ -594,9 +594,9 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
    }
  } else {
    atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
-    if(pbc_flag) {
-      if(deform_vremap) {
-        if(domain->triclinic) {
+    if (pbc_flag) {
+      if (deform_vremap) {
+        if (domain->triclinic) {
          if (radvary == 0) {
            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,1> f(
              atomKK->k_x,atomKK->k_mask,
@ -638,7 +638,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
          }
        }
      } else {
-        if(domain->triclinic) {
+        if (domain->triclinic) {
          if (radvary == 0) {
            struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,0> f(
              atomKK->k_x,atomKK->k_mask,
@ -681,7 +681,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
        }
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        if (radvary == 0) {
          struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,0,1,0> f(
            atomKK->k_x,atomKK->k_mask,
@ -792,11 +792,11 @@ int AtomVecSphereKokkos::pack_comm_self(
  // Fallback to AtomVecKokkos if radvary == 0
  if (radvary == 0)
    return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc);
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
    atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f(
          atomKK->k_x,
          atomKK->k_radius,atomKK->k_rmass,
@ -814,7 +814,7 @@ int AtomVecSphereKokkos::pack_comm_self(
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,1> f(
          atomKK->k_x,
          atomKK->k_radius,atomKK->k_rmass,
@ -835,8 +835,8 @@ int AtomVecSphereKokkos::pack_comm_self(
  } else {
    atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
    atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
-    if(pbc_flag) {
-      if(domain->triclinic) {
+    if (pbc_flag) {
+      if (domain->triclinic) {
        struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f(
          atomKK->k_x,
          atomKK->k_radius,atomKK->k_rmass,
@ -854,7 +854,7 @@ int AtomVecSphereKokkos::pack_comm_self(
        Kokkos::parallel_for(n,f);
      }
    } else {
-      if(domain->triclinic) {
+      if (domain->triclinic) {
        struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,1> f(
          atomKK->k_x,
          atomKK->k_radius,atomKK->k_rmass,
@ -917,13 +917,13 @@ struct AtomVecSphereKokkos_UnpackComm {

 void AtomVecSphereKokkos::unpack_comm_kokkos(
  const int &n, const int &first,
-  const DAT::tdual_xfloat_2d &buf ) {
+  const DAT::tdual_xfloat_2d &buf) {
  // Fallback to AtomVecKokkos if radvary == 0
  if (radvary == 0) {
    AtomVecKokkos::unpack_comm_kokkos(n,first,buf);
    return;
  }
-  if(commKK->forward_comm_on_host) {
+  if (commKK->forward_comm_on_host) {
    atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
    struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f(
      atomKK->k_x,
@ -994,8 +994,8 @@ struct AtomVecSphereKokkos_UnpackCommVel {

 void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
  const int &n, const int &first,
-  const DAT::tdual_xfloat_2d &buf ) {
-  if(commKK->forward_comm_on_host) {
+  const DAT::tdual_xfloat_2d &buf) {
+  if (commKK->forward_comm_on_host) {
    atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
    if (radvary == 0) {
      struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f(
@ -1352,7 +1352,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)

 int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
 {
-  if(n > 0)
+  if (n > 0)
    atomKK->sync(Host,F_MASK|TORQUE_MASK);

  int m = 0;
@ -1372,7 +1372,7 @@ int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)

 int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
 {
-  if(n > 0)
+  if (n > 0)
    atomKK->sync(Host,TORQUE_MASK);

  int m = 0;
@ -1389,7 +1389,7 @@ int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)

 void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
 {
-  if(n > 0) {
+  if (n > 0) {
    atomKK->modified(Host,F_MASK|TORQUE_MASK);
  }

@ -1409,7 +1409,7 @@ void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)

 int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf)
 {
-  if(n > 0) {
+  if (n > 0) {
    atomKK->modified(Host,TORQUE_MASK);
  }

@ -1502,7 +1502,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
      dy = pbc[1];
      dz = pbc[2];
    }
-    if(space==Host) {
+    if (space==Host) {
      AtomVecSphereKokkos_PackBorder<LMPHostType,1> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,
@ -1519,7 +1519,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
    }
  } else {
    dx = dy = dz = 0;
-    if(space==Host) {
+    if (space==Host) {
      AtomVecSphereKokkos_PackBorder<LMPHostType,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,
@ -1697,7 +1697,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
      dz = pbc[2];
    }
    if (!deform_vremap) {
-      if(space==Host) {
+      if (space==Host) {
        AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,0> f(
          buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
          iswap,h_x,h_tag,h_type,h_mask,
@ -1721,7 +1721,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
      dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
      dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
      dvz = pbc[2]*h_rate[2];
-      if(space==Host) {
+      if (space==Host) {
        AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,1> f(
          buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
          iswap,h_x,h_tag,h_type,h_mask,
@ -1742,7 +1742,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
      }
    }
  } else {
-    if(space==Host) {
+    if (space==Host) {
      AtomVecSphereKokkos_PackBorderVel<LMPHostType,0,0> f(
        buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
        iswap,h_x,h_tag,h_type,h_mask,
@ -1926,7 +1926,7 @@ struct AtomVecSphereKokkos_UnpackBorder {
 void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first,
                                               const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
  while (first+n >= nmax) grow(0);
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecSphereKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),
      h_x,h_tag,h_type,h_mask,
      h_radius,h_rmass,
@ -2034,7 +2034,7 @@ void AtomVecSphereKokkos::unpack_border_vel_kokkos(
  const int &n, const int &first,
  const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
  while (first+n >= nmax) grow(0);
-  if(space==Host) {
+  if (space==Host) {
    struct AtomVecSphereKokkos_UnpackBorderVel<LMPHostType> f(buf.view<LMPHostType>(),
      h_x,h_tag,h_type,h_mask,
      h_radius,h_rmass,
@ -2212,7 +2212,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
  DAT::tdual_int_1d k_copylist,
  ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi)
 {
-  if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
+  if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
    int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
    k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
  }
@ -2220,7 +2220,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
             MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
             OMEGA_MASK);

-  if(space == Host) {
+  if (space == Host) {
    AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
    Kokkos::parallel_for(nsend,f);
  } else {
@ -2338,7 +2338,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
 /* ---------------------------------------------------------------------- */

 int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
-  if(space == Host) {
+  if (space == Host) {
    k_count.h_view(0) = nlocal;
    AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
    Kokkos::parallel_for(nrecv/16,f);
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@ -107,6 +107,8 @@ void CommKokkos::init()
  atomKK = (AtomKokkos *) atom;
  exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
  forward_comm_classic = lmp->kokkos->forward_comm_classic;
+  forward_pair_comm_classic = lmp->kokkos->forward_pair_comm_classic;
+  forward_fix_comm_classic = lmp->kokkos->forward_fix_comm_classic;
  reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
  exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
  forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
@ -361,12 +363,12 @@ void CommKokkos::reverse_comm_device()

 void CommKokkos::forward_comm_fix(Fix *fix, int size)
 {
-  if (fix->execution_space == Device && fix->forward_comm_device) {
-    k_sendlist.sync<LMPDeviceType>();
-    forward_comm_fix_device<LMPDeviceType>(fix,size);
-  } else {
+  if (fix->execution_space == Host || !fix->forward_comm_device || forward_fix_comm_classic) {
    k_sendlist.sync<LMPHostType>();
    CommBrick::forward_comm_fix(fix,size);
+  } else {
+    k_sendlist.sync<LMPDeviceType>();
+    forward_comm_fix_device<LMPDeviceType>(fix);
  }
 }

@ -456,10 +458,10 @@ void CommKokkos::reverse_comm_compute(Compute *compute)

 void CommKokkos::forward_comm_pair(Pair *pair)
 {
-  if (pair->execution_space == Host) {
+  if (pair->execution_space == Host || forward_pair_comm_classic) {
    k_sendlist.sync<LMPHostType>();
    CommBrick::forward_comm_pair(pair);
-  } else if (pair->execution_space == Device) {
+  } else {
    k_sendlist.sync<LMPDeviceType>();
    forward_comm_pair_device<LMPDeviceType>(pair);
  }
@ -571,10 +573,10 @@ void CommKokkos::reverse_comm_dump(Dump *dump)

 void CommKokkos::exchange()
 {
-  if(atom->nextra_grow + atom->nextra_border) {
-    if(!exchange_comm_classic) {
+  if (atom->nextra_grow + atom->nextra_border) {
+    if (!exchange_comm_classic) {
      static int print = 1;
-      if(print && comm->me==0) {
+      if (print && comm->me==0) {
        error->warning(FLERR,"Fixes cannot yet send exchange data in Kokkos communication, "
                      "switching to classic exchange/border communication");
      }
@ -625,7 +627,7 @@ struct BuildExchangeListFunctor {
  void operator() (int i) const {
    if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
      const int mysend=Kokkos::atomic_fetch_add(&_nsend(),1);
-      if(mysend < (int)_sendlist.extent(0)) {
+      if (mysend < (int)_sendlist.extent(0)) {
        _sendlist(mysend) = i;
        _sendflag(i) = 1;
      }
@ -713,7 +715,7 @@ void CommKokkos::exchange_device()

        int sendpos = nlocal-1;
        nlocal -= k_count.h_view();
-        for(int i = 0; i < k_count.h_view(); i++) {
+        for (int i = 0; i < k_count.h_view(); i++) {
          if (k_exchange_sendlist.h_view(i)<nlocal) {
            while (k_sendflag.h_view(sendpos)) sendpos--;
            k_exchange_copylist.h_view(i) = sendpos;
@ -887,7 +889,7 @@ struct BuildBorderListFunctor {

    if (my_store_pos+mysend < maxsendlist) {
    mysend = my_store_pos;
-      for(int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()){
+      for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
        if (x(i,dim) >= lo && x(i,dim) <= hi) {
          sendlist(iswap,mysend++) = i;
        }
@ -979,7 +981,7 @@ void CommKokkos::borders_device() {

            k_sendlist.modify<DeviceType>();

-            if(k_total_send.h_view() >= maxsendlist[iswap]) {
+            if (k_total_send.h_view() >= maxsendlist[iswap]) {
              grow_list(iswap,k_total_send.h_view());

              k_total_send.h_view() = 0;
@ -1227,7 +1229,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
  maxsend = static_cast<int> (BUFFACTOR * n);
  int maxsend_border = (maxsend+BUFEXTRA+5)/atom->avec->size_border + 2;
  if (flag) {
-    if(space == Device)
+    if (space == Device)
      k_buf_send.modify<LMPDeviceType>();
    else
      k_buf_send.modify<LMPHostType>();
@ -1280,7 +1282,7 @@ void CommKokkos::grow_list(int /*iswap*/, int n)

  memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");

-  for(int i=0;i<maxswap;i++) {
+  for (int i=0;i<maxswap;i++) {
    maxsendlist[i]=size; sendlist[i]=&k_sendlist.view<LMPHostType>()(i,0);
  }
 }
--- a/src/KOKKOS/comm_kokkos.h
+++ b/src/KOKKOS/comm_kokkos.h
@ -25,6 +25,8 @@ class CommKokkos : public CommBrick {

  bool exchange_comm_classic;
  bool forward_comm_classic;
+  bool forward_pair_comm_classic;
+  bool forward_fix_comm_classic;
  bool reverse_comm_classic;
  bool exchange_comm_on_host;
  bool forward_comm_on_host;
--- a/src/KOKKOS/compute_orientorder_atom_kokkos.cpp
+++ b/src/KOKKOS/compute_orientorder_atom_kokkos.cpp
@ -241,7 +241,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
    int ncount = 0;
    Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum),
        [&] (const int jj, int& count) {
-      Kokkos::single(Kokkos::PerThread(team), [&] (){
+      Kokkos::single(Kokkos::PerThread(team), [&] () {
        int j = d_neighbors(i,jj);
        j &= NEIGHMASK;
        const F_FLOAT delx = x(j,0) - xtmp;
@ -341,17 +341,17 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder

 #define SWAP(view,i,j) do {       \
    tmp = view(i); view(i) = view(j); view(j) = tmp; \
-  } while(0)
+  } while (0)

 #define ISWAP(view,i,j) do {        \
    itmp = view(i); view(i) = view(j); view(j) = itmp; \
-  } while(0)
+  } while (0)

 #define SWAP3(view,i,j) do {                  \
    tmp = view(i,0); view(i,0) = view(j,0); view(j,0) = tmp; \
    tmp = view(i,1); view(i,1) = view(j,1); view(j,1) = tmp; \
    tmp = view(i,2); view(i,2) = view(j,2); view(j,2) = tmp; \
-  } while(0)
+  } while (0)

 /* ---------------------------------------------------------------------- */

@ -439,14 +439,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii
  const double r1 = d_rlist(ii,ineigh,1);
  const double r2 = d_rlist(ii,ineigh,2);
  const double rmag = sqrt(r0*r0 + r1*r1 + r2*r2);
-  if(rmag <= MY_EPSILON) {
+  if (rmag <= MY_EPSILON) {
    return;
  }

  const double costheta = r2 / rmag;
  SNAcomplex expphi = {r0,r1};
  const double rxymag = sqrt(expphi.re*expphi.re+expphi.im*expphi.im);
-  if(rxymag <= MY_EPSILON) {
+  if (rxymag <= MY_EPSILON) {
    expphi.re = 1.0;
    expphi.im = 0.0;
  } else {
@ -466,14 +466,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii
    const double polar_pf = polar_prefactor(l, 0, costheta);
    Kokkos::atomic_add(&(d_qnm(ii,il,l).re), polar_pf);
    SNAcomplex expphim = {expphi.re,expphi.im};
-    for(int m = 1; m <= +l; m++) {
+    for (int m = 1; m <= +l; m++) {
      const double prefactor = polar_prefactor(l, m, costheta);
      SNAcomplex ylm = {prefactor * expphim.re, prefactor * expphim.im};
      //d_qnm(ii,il,m+l).re += ylm.re;
      //d_qnm(ii,il,m+l).im += ylm.im;
      Kokkos::atomic_add(&(d_qnm(ii,il,m+l).re), ylm.re);
      Kokkos::atomic_add(&(d_qnm(ii,il,m+l).im), ylm.im);
-      if(m & 1) {
+      if (m & 1) {
        //d_qnm(ii,il,-m+l).re -= ylm.re;
        //d_qnm(ii,il,-m+l).im += ylm.im;
        Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), -ylm.re);
@ -508,7 +508,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
  double facn = 1.0 / ncount;
  for (int il = 0; il < nqlist; il++) {
    int l = d_qlist[il];
-    for(int m = 0; m < 2*l+1; m++) {
+    for (int m = 0; m < 2*l+1; m++) {
      d_qnm(ii,il,m).re *= facn;
      d_qnm(ii,il,m).im *= facn;
    }
@ -522,7 +522,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
    int l = d_qlist[il];
    double qnormfac = sqrt(MY_4PI/(2*l+1));
    double qm_sum = 0.0;
-    for(int m = 0; m < 2*l+1; m++)
+    for (int m = 0; m < 2*l+1; m++)
      qm_sum += d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im;
    d_qnarray(i,jj++) = qnormfac * sqrt(qm_sum);
  }
@ -534,8 +534,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
    for (int il = 0; il < nqlist; il++) {
      int l = d_qlist[il];
      double wlsum = 0.0;
-      for(int m1 = 0; m1 < 2*l+1; m1++) {
-        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
+      for (int m1 = 0; m1 < 2*l+1; m1++) {
+        for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
          int m = m1 + m2 - l;
          SNAcomplex qm1qm2;
          qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
@ -555,8 +555,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
    for (int il = 0; il < nqlist; il++) {
      int l = d_qlist[il];
      double wlsum = 0.0;
-      for(int m1 = 0; m1 < 2*l+1; m1++) {
-        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
+      for (int m1 = 0; m1 < 2*l+1; m1++) {
+        for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
          const int m = m1 + m2 - l;
          SNAcomplex qm1qm2;
          qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
@ -581,14 +581,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
    const int il = iqlcomp;
    const int l = qlcomp;
    if (d_qnarray(i,il) < QEPSILON)
-      for(int m = 0; m < 2*l+1; m++) {
+      for (int m = 0; m < 2*l+1; m++) {
        d_qnarray(i,jj++) = 0.0;
        d_qnarray(i,jj++) = 0.0;
      }
    else {
      const double qnormfac = sqrt(MY_4PI/(2*l+1));
      const double qnfac = qnormfac/d_qnarray(i,il);
-      for(int m = 0; m < 2*l+1; m++) {
+      for (int m = 0; m < 2*l+1; m++) {
        d_qnarray(i,jj++) = d_qnm(ii,il,m).re * qnfac;
        d_qnarray(i,jj++) = d_qnm(ii,il,m).im * qnfac;
      }
@ -665,8 +665,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
  idxcg_count = 0;
  for (int il = 0; il < nqlist; il++) {
    int l = qlist[il];
-    for(int m1 = 0; m1 < 2*l+1; m1++)
-      for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
+    for (int m1 = 0; m1 < 2*l+1; m1++)
+      for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
        idxcg_count++;
  }
  idxcg_max = idxcg_count;
@ -676,9 +676,9 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
  idxcg_count = 0;
  for (int il = 0; il < nqlist; il++) {
    int l = qlist[il];
-    for(int m1 = 0; m1 < 2*l+1; m1++) {
+    for (int m1 = 0; m1 < 2*l+1; m1++) {
        aa2 = m1 - l;
-        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
+        for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
          bb2 = m2 - l;
          m = aa2 + bb2 + l;

@ -727,7 +727,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::check_team_size_for(int inum, int

  team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());

-  if(team_size*vector_length > team_size_max)
+  if (team_size*vector_length > team_size_max)
    team_size = team_size_max/vector_length;
 }

--- a/src/KOKKOS/compute_temp_kokkos.h
+++ b/src/KOKKOS/compute_temp_kokkos.h
@ -34,7 +34,7 @@ namespace LAMMPS_NS {
      t0 = t1 = t2 = t3 = t4 = t5 = 0.0;
    }
    KOKKOS_INLINE_FUNCTION
-    s_CTEMP& operator+=(const s_CTEMP &rhs){
+    s_CTEMP& operator+=(const s_CTEMP &rhs) {
      t0 += rhs.t0;
      t1 += rhs.t1;
      t2 += rhs.t2;
--- a/src/KOKKOS/domain_kokkos.cpp
+++ b/src/KOKKOS/domain_kokkos.cpp
@ -235,7 +235,7 @@ struct DomainPBCFunctor {
    x(_x.view<DeviceType>()), v(_v.view<DeviceType>()),
    mask(_mask.view<DeviceType>()), image(_image.view<DeviceType>()),
    deform_groupbit(_deform_groupbit),
-    xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic){
+    xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic) {
    lo[0]=_lo[0]; lo[1]=_lo[1]; lo[2]=_lo[2];
    hi[0]=_hi[0]; hi[1]=_hi[1]; hi[2]=_hi[2];
    period[0]=_period[0]; period[1]=_period[1]; period[2]=_period[2];
--- a/src/KOKKOS/fft3d_kokkos.cpp
+++ b/src/KOKKOS/fft3d_kokkos.cpp
@ -127,7 +127,7 @@ void FFT3dKokkos<DeviceType>::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, in
   in           starting address of input data on this proc
   out          starting address of where output data for this proc
                  will be placed (can be same as in)
-   flag         1 for forward FFT, -1 for inverse FFT
+   flag         1 for forward FFT, -1 for backward FFT
   plan         plan returned by previous call to fft_3d_create_plan
 ------------------------------------------------------------------------- */

@ -215,22 +215,22 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
  length = plan->length1;

  #if defined(FFT_MKL)
-    if (flag == -1)
+    if (flag == 1)
      DftiComputeForward(plan->handle_fast,d_data.data());
    else
      DftiComputeBackward(plan->handle_fast,d_data.data());
  #elif defined(FFT_FFTW3)
-    if (flag == -1)
+    if (flag == 1)
      FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    else
      FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
  #elif defined(FFT_CUFFT)
-    cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
+    cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
  #else
    typename FFT_AT::t_FFT_DATA_1d d_tmp =
     typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
    kiss_fft_functor<DeviceType> f;
-    if (flag == -1)
+    if (flag == 1)
      f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length);
    else
      f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_backward,length);
@ -238,7 +238,6 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
    d_data = d_tmp;
  #endif

-
  // 1st mid-remap to prepare for 2nd FFTs
  // copy = loc for remap result

@ -260,20 +259,20 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
  length = plan->length2;

  #if defined(FFT_MKL)
-    if (flag == -1)
+    if (flag == 1)
      DftiComputeForward(plan->handle_mid,d_data.data());
    else
      DftiComputeBackward(plan->handle_mid,d_data.data());
  #elif defined(FFT_FFTW3)
-    if (flag == -1)
+    if (flag == 1)
      FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    else
      FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
  #elif defined(FFT_CUFFT)
-    cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
+    cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
  #else
    d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
-    if (flag == -1)
+    if (flag == 1)
      f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_forward,length);
    else
      f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_backward,length);
@ -302,20 +301,20 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
  length = plan->length3;

  #if defined(FFT_MKL)
-    if (flag == -1)
+    if (flag == 1)
      DftiComputeForward(plan->handle_slow,d_data.data());
    else
      DftiComputeBackward(plan->handle_slow,d_data.data());
  #elif defined(FFT_FFTW3)
-    if (flag == -1)
+    if (flag == 1)
      FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    else
      FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
  #elif defined(FFT_CUFFT)
-    cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
+    cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
  #else
    d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
-    if (flag == -1)
+    if (flag == 1)
      f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_forward,length);
    else
      f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_backward,length);
@ -323,7 +322,6 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
    d_data = d_tmp;
  #endif

-
  // post-remap to put data in output format if needed
  // destination is always out

@ -338,7 +336,7 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,

  // scaling if required

-  if (flag == 1 && plan->scaled) {
+  if (flag == -1 && plan->scaled) {
    FFT_SCALAR norm = plan->norm;
    int num = plan->normnum;

@ -807,7 +805,7 @@ void FFT3dKokkos<DeviceType>::bifactor(int n, int *factor1, int *factor2)
   Arguments:
   in           starting address of input data on this proc, all set to 0.0
   nsize        size of in
-   flag         1 for forward FFT, -1 for inverse FFT
+   flag         1 for forward FFT, -1 for backward FFT
   plan         plan returned by previous call to fft_3d_create_plan
 ------------------------------------------------------------------------- */

@ -861,9 +859,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_
    FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
  }
 #elif defined(FFT_CUFFT)
-  cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
-  cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
-  cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
+  cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
+  cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
+  cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
 #else
  kiss_fft_functor<DeviceType> f;
    typename FFT_AT::t_FFT_DATA_1d d_tmp =
--- a/src/KOKKOS/fft3d_kokkos.h
+++ b/src/KOKKOS/fft3d_kokkos.h
@ -72,6 +72,7 @@ struct fft_plan_3d_kokkos {
 template<class DeviceType>
 class FFT3dKokkos : protected Pointers {
 public:
+  enum{FORWARD=1,BACKWARD=-1};
  typedef DeviceType device_type;
  typedef FFTArrayTypes<DeviceType> FFT_AT;

--- a/src/KOKKOS/fix_enforce2d_kokkos.cpp
+++ b/src/KOKKOS/fix_enforce2d_kokkos.cpp
@ -77,7 +77,7 @@ void FixEnforce2DKokkos<DeviceType>::post_force(int /*vflag*/)
  if (atomKK->torque_flag) flag_mask |= 4;

  copymode = 1;
-  switch( flag_mask ){
+  switch (flag_mask) {
    case 0:{
      FixEnforce2DKokkosPostForceFunctor<DeviceType,0,0,0> functor(this);
      Kokkos::parallel_for(nlocal,functor);
@ -139,21 +139,21 @@ template <int omega_flag, int angmom_flag, int torque_flag>
 KOKKOS_INLINE_FUNCTION
 void FixEnforce2DKokkos<DeviceType>::post_force_item( int i ) const
 {
-  if (mask[i] & groupbit){
+  if (mask[i] & groupbit) {
    v(i,2) = 0.0;
    f(i,2) = 0.0;

-    if(omega_flag){
+    if (omega_flag) {
      omega(i,0) = 0.0;
      omega(i,1) = 0.0;
    }

-    if(angmom_flag){
+    if (angmom_flag) {
      angmom(i,0) = 0.0;
      angmom(i,1) = 0.0;
    }

-    if(torque_flag){
+    if (torque_flag) {
      torque(i,0) = 0.0;
      torque(i,1) = 0.0;
    }
--- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp
+++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp
@ -193,7 +193,7 @@ KOKKOS_INLINE_FUNCTION
 void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXInit, const int &i) const {
  double tmp;
  if (mask[i] & groupbit) {
-    if(dpdTheta[i] <= 0.0)
+    if (dpdTheta[i] <= 0.0)
      k_error_flag.template view<DeviceType>()() = 1;
    energy_lookup(i,dpdTheta[i],tmp);
    uCond[i] = 0.0;
@ -233,7 +233,7 @@ void FixEOStableRXKokkos<DeviceType>::post_integrate()
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXTemperatureLookup2, const int &i) const {
-  if (mask[i] & groupbit){
+  if (mask[i] & groupbit) {
    temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]);
    if (dpdTheta[i] <= 0.0)
      k_error_flag.template view<DeviceType>()() = 1;
@ -303,7 +303,7 @@ void FixEOStableRXKokkos<DeviceType>::energy_lookup(int id, double thetai, doubl
  nPG = 0;

  if (rx_flag) {
-    for (int ispecies = 0; ispecies < nspecies; ispecies++ ) {
+    for (int ispecies = 0; ispecies < nspecies; ispecies++) {
      nTotal += dvector(ispecies,id);
      if (fabs(d_moleculeCorrCoeff[ispecies]) > tolerance) {
        nPG++;
@ -314,7 +314,7 @@ void FixEOStableRXKokkos<DeviceType>::energy_lookup(int id, double thetai, doubl
    nTotal = 1.0;
  }

-  for(int ispecies=0;ispecies<nspecies;ispecies++){
+  for (int ispecies=0;ispecies<nspecies;ispecies++) {
    //Table *tb = &tables[ispecies];
    //thetai = MAX(thetai,tb->lo);
    thetai = MAX(thetai,d_table_const.lo(ispecies));
@ -364,7 +364,7 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
  // Store the current thetai in t1
  t1 = MAX(thetai,lo);
  t1 = MIN(t1,hi);
-  if(t1==hi) delta = -delta;
+  if (t1==hi) delta = -delta;

  // Compute u1 at thetai
  energy_lookup(id,t1,u1);
@ -382,9 +382,9 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
  f2 = u2 - ui;

  // Apply the Secant Method
-  for(it=0; it<maxit; it++){
-    if(fabs(f2-f1) < MY_EPSILON){
-      if(std::isnan(f1) || std::isnan(f2)) k_error_flag.template view<DeviceType>()() = 2;
+  for (it=0; it<maxit; it++) {
+    if (fabs(f2-f1) < MY_EPSILON) {
+      if (std::isnan(f1) || std::isnan(f2)) k_error_flag.template view<DeviceType>()() = 2;
      temp = t1;
      temp = MAX(temp,lo);
      temp = MIN(temp,hi);
@ -392,15 +392,15 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
      break;
    }
    temp = t2 - f2*(t2-t1)/(f2-f1);
-    if(fabs(temp-t2) < tolerance) break;
+    if (fabs(temp-t2) < tolerance) break;
    f1 = f2;
    t1 = t2;
    t2 = temp;
    energy_lookup(id,t2,u2);
    f2 = u2 - ui;
  }
-  if(it==maxit){
-    if(std::isnan(f1) || std::isnan(f2) || std::isnan(ui) || std::isnan(thetai) || std::isnan(t1) || std::isnan(t2))
+  if (it==maxit) {
+    if (std::isnan(f1) || std::isnan(f2) || std::isnan(ui) || std::isnan(thetai) || std::isnan(t1) || std::isnan(t2))
      k_error_flag.template view<DeviceType>()() = 2;
    else
      k_error_flag.template view<DeviceType>()() = 3;
@ -440,7 +440,7 @@ void FixEOStableRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, doub

  m = 0;
  last = first + n ;
-  for (ii = first; ii < last; ii++){
+  for (ii = first; ii < last; ii++) {
    h_uChem[ii]  = buf[m++];
    h_uCG[ii]    = buf[m++];
    h_uCGnew[ii] = buf[m++];
@ -518,24 +518,24 @@ void FixEOStableRXKokkos<DeviceType>::create_kokkos_tables()
  memoryKK->create_kokkos(d_table->hi,h_table->hi,ntables,"Table::hi");
  memoryKK->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");

-  if(tabstyle == LINEAR) {
+  if (tabstyle == LINEAR) {
    memoryKK->create_kokkos(d_table->r,h_table->r,ntables,tablength,"Table::r");
    memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
    memoryKK->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
  }

-  for(int i=0; i < ntables; i++) {
+  for (int i=0; i < ntables; i++) {
    Table* tb = &tables[i];

    h_table->lo[i] = tb->lo;
    h_table->hi[i] = tb->hi;
    h_table->invdelta[i] = tb->invdelta;

-    for(int j = 0; j<h_table->r.extent(1); j++)
+    for (int j = 0; j<h_table->r.extent(1); j++)
      h_table->r(i,j) = tb->r[j];
-    for(int j = 0; j<h_table->e.extent(1); j++)
+    for (int j = 0; j<h_table->e.extent(1); j++)
      h_table->e(i,j) = tb->e[j];
-    for(int j = 0; j<h_table->de.extent(1); j++)
+    for (int j = 0; j<h_table->de.extent(1); j++)
      h_table->de(i,j) = tb->de[j];
  }

--- a/src/KOKKOS/fix_langevin_kokkos.cpp
+++ b/src/KOKKOS/fix_langevin_kokkos.cpp
@ -61,7 +61,7 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
  for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0;
  k_ratio.template modify<LMPHostType>();

-  if(gjfflag){
+  if (gjfflag) {
    grow_arrays(atomKK->nmax);
    atom->add_callback(Atom::GROW);
    // initialize franprev to zero
@ -76,7 +76,7 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
    k_franprev.template modify<LMPHostType>();
    k_lv.template modify<LMPHostType>();
  }
-  if(zeroflag){
+  if (zeroflag) {
    k_fsumall = tdual_double_1d_3n("langevin:fsumall");
    h_fsumall = k_fsumall.template view<LMPHostType>();
    d_fsumall = k_fsumall.template view<DeviceType>();
@ -97,8 +97,8 @@ FixLangevinKokkos<DeviceType>::~FixLangevinKokkos()
  memoryKK->destroy_kokkos(k_gfactor2,gfactor2);
  memoryKK->destroy_kokkos(k_ratio,ratio);
  memoryKK->destroy_kokkos(k_flangevin,flangevin);
-  if(gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev);
-  if(gjfflag) memoryKK->destroy_kokkos(k_lv,lv);
+  if (gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev);
+  if (gjfflag) memoryKK->destroy_kokkos(k_lv,lv);
  memoryKK->destroy_kokkos(k_tforce,tforce);
 }

@ -108,13 +108,13 @@ template<class DeviceType>
 void FixLangevinKokkos<DeviceType>::init()
 {
  FixLangevin::init();
-  if(oflag)
+  if (oflag)
    error->all(FLERR,"Fix langevin omega is not yet implemented with kokkos");
-  if(ascale)
+  if (ascale)
    error->all(FLERR,"Fix langevin angmom is not yet implemented with kokkos");
-  if(gjfflag && tbiasflag)
+  if (gjfflag && tbiasflag)
    error->all(FLERR,"Fix langevin gjf + tbias is not yet implemented with kokkos");
-  if(gjfflag && tbiasflag)
+  if (gjfflag && tbiasflag)
    error->warning(FLERR,"Fix langevin gjf + kokkos is not implemented with random gaussians");

  // prefactors are modified in the init
@ -182,8 +182,8 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
  k_gfactor1.template sync<DeviceType>();
  k_gfactor2.template sync<DeviceType>();
  k_ratio.template sync<DeviceType>();
-  if(gjfflag) k_franprev.template sync<DeviceType>();
-  if(gjfflag) k_lv.template sync<DeviceType>();
+  if (gjfflag) k_franprev.template sync<DeviceType>();
+  if (gjfflag) k_lv.template sync<DeviceType>();

  boltz = force->boltz;
  dt = update->dt;
@ -217,7 +217,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
  }

  // account for bias velocity
-  if(tbiasflag == BIAS){
+  if (tbiasflag == BIAS) {
    atomKK->sync(temperature->execution_space,temperature->datamask_read);
    temperature->compute_scalar();
    temperature->remove_bias_all(); // modifies velocities
@ -516,7 +516,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
            }


-  if(tbiasflag == BIAS){
+  if (tbiasflag == BIAS) {
    atomKK->sync(temperature->execution_space,temperature->datamask_read);
    temperature->restore_bias_all(); // modifies velocities
    atomKK->modified(temperature->execution_space,temperature->datamask_modify);
@ -566,8 +566,8 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const

  if (mask[i] & groupbit) {
    rand_type rand_gen = rand_pool.get_state();
-    if(Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
-    if(Tp_RMASS){
+    if (Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
+    if (Tp_RMASS) {
      gamma1 = -rmass[i] / t_period / ftm2v;
      gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
      gamma1 *= 1.0/d_ratio[type[i]];
@ -581,7 +581,7 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
    fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
    fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);

-    if(Tp_BIAS){
+    if (Tp_BIAS) {
      fdrag[0] = gamma1*v(i,0);
      fdrag[1] = gamma1*v(i,1);
      fdrag[2] = gamma1*v(i,2);
@ -625,7 +625,7 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
    f(i,2) += fdrag[2] + fran[2];

    if (Tp_TALLY) {
-      if (Tp_GJF){
+      if (Tp_GJF) {
        fdrag[0] = gamma1*d_lv(i,0)/gjfsib/gjfsib;
        fdrag[1] = gamma1*d_lv(i,1)/gjfsib/gjfsib;
        fdrag[2] = gamma1*d_lv(i,2)/gjfsib/gjfsib;
@ -794,7 +794,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step()
  FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
  Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);

-  if (gjfflag){
+  if (gjfflag) {
    if (rmass.data()) {
      FixLangevinKokkosEndOfStepFunctor<DeviceType,1> functor(this);
      Kokkos::parallel_for(nlocal,functor);
@ -817,7 +817,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step_item(int i) const {
    tmp[0] = v(i,0);
    tmp[1] = v(i,1);
    tmp[2] = v(i,2);
-    if (!osflag){
+    if (!osflag) {
      v(i,0) = d_lv(i,0);
      v(i,1) = d_lv(i,1);
      v(i,2) = d_lv(i,2);
@ -848,7 +848,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step_rmass_item(int i) const
    tmp[0] = v(i,0);
    tmp[1] = v(i,1);
    tmp[2] = v(i,2);
-    if (!osflag){
+    if (!osflag) {
      v(i,0) = d_lv(i,0);
      v(i,1) = d_lv(i,1);
      v(i,2) = d_lv(i,2);
--- a/src/KOKKOS/fix_langevin_kokkos.h
+++ b/src/KOKKOS/fix_langevin_kokkos.h
@ -36,7 +36,7 @@ namespace LAMMPS_NS {
      fx = fy = fz = 0.0;
    }
    KOKKOS_INLINE_FUNCTION
-    s_FSUM& operator+=(const s_FSUM &rhs){
+    s_FSUM& operator+=(const s_FSUM &rhs) {
      fx += rhs.fx;
      fy += rhs.fy;
      fz += rhs.fz;
@ -175,7 +175,7 @@ namespace LAMMPS_NS {

    FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
      c(*c_ptr) {}
-      ~FixLangevinKokkosPostForceFunctor(){c.cleanup_copy();}
+      ~FixLangevinKokkosPostForceFunctor() {c.cleanup_copy();}

      KOKKOS_INLINE_FUNCTION
      void operator()(const int i) const {
--- a/src/KOKKOS/fix_neigh_history_kokkos.cpp
+++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp
@ -43,11 +43,7 @@ FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg,
  grow_arrays(atom->nmax);

  d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize");
-#ifndef KOKKOS_USE_CUDA_UVM
  h_resize = Kokkos::create_mirror_view(d_resize);
-#else
-  h_resize = d_resize;
-#endif
  h_resize() = 1;
 }

--- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp
+++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp
@ -51,8 +51,8 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) :
  atomKK = (AtomKokkos *) atom;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;

-  datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK;
-  datamask_modify = Q_MASK | X_MASK;
+  datamask_read = X_MASK | V_MASK | F_MASK | Q_MASK | MASK_MASK | TYPE_MASK | TAG_MASK;
+  datamask_modify = X_MASK;

  nmax = m_cap = 0;
  allocated_flag = 0;
@ -81,8 +81,7 @@ FixQEqReaxKokkos<DeviceType>::~FixQEqReaxKokkos()
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::init()
 {
-  atomKK->k_q.modify<LMPHostType>();
-  atomKK->k_q.sync<DeviceType>();
+  atomKK->sync(execution_space,Q_MASK);

  FixQEqReax::init();

@ -139,8 +138,8 @@ void FixQEqReaxKokkos<DeviceType>::init_shielding_k()
  k_shield = DAT::tdual_ffloat_2d("qeq/kk:shield",ntypes+1,ntypes+1);
  d_shield = k_shield.template view<DeviceType>();

-  for( i = 1; i <= ntypes; ++i )
-    for( j = 1; j <= ntypes; ++j )
+  for (i = 1; i <= ntypes; ++i)
+    for (j = 1; j <= ntypes; ++j)
      k_shield.h_view(i,j) = pow( gamma[i] * gamma[j], -1.5 );

  k_shield.template modify<LMPHostType>();
@ -263,15 +262,15 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int /*vflag*/)

  // comm->forward_comm_fix(this); //Dist_vector( s );
  pack_flag = 2;
-  k_s.template sync<DeviceType>();
-  comm->forward_comm_fix(this);
  k_s.template modify<DeviceType>();
+  comm->forward_comm_fix(this);
+  k_s.template sync<DeviceType>();

  // comm->forward_comm_fix(this); //Dist_vector( t );
  pack_flag = 3;
-  k_t.template sync<DeviceType>();
-  comm->forward_comm_fix(this);
  k_t.template modify<DeviceType>();
+  comm->forward_comm_fix(this);
+  k_t.template sync<DeviceType>();

  need_dup = lmp->kokkos->need_dup<DeviceType>();

@ -752,9 +751,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()

  if (neighflag != FULL) {
    k_o.template modify<DeviceType>();
-    k_o.template sync<LMPHostType>();
    comm->reverse_comm_fix(this); //Coll_vector( q );
-    k_o.template modify<LMPHostType>();
    k_o.template sync<DeviceType>();
  }

@ -781,9 +778,9 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()

    // comm->forward_comm_fix(this); //Dist_vector( d );
    pack_flag = 1;
-    k_d.template sync<DeviceType>();
-    comm->forward_comm_fix(this);
    k_d.template modify<DeviceType>();
+    comm->forward_comm_fix(this);
+    k_d.template sync<DeviceType>();

    // sparse_matvec( &H, d, q );
    FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
@ -807,9 +804,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()

    if (neighflag != FULL) {
      k_o.template modify<DeviceType>();
-      k_o.template sync<LMPHostType>();
      comm->reverse_comm_fix(this); //Coll_vector( q );
-      k_o.template modify<LMPHostType>();
      k_o.template sync<DeviceType>();
    }

@ -888,9 +883,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()

  if (neighflag != FULL) {
    k_o.template modify<DeviceType>();
-    k_o.template sync<LMPHostType>();
    comm->reverse_comm_fix(this); //Coll_vector( q );
-    k_o.template modify<LMPHostType>();
    k_o.template sync<DeviceType>();
  }

@ -917,9 +910,9 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()

    // comm->forward_comm_fix(this); //Dist_vector( d );
    pack_flag = 1;
-    k_d.template sync<DeviceType>();
-    comm->forward_comm_fix(this);
    k_d.template modify<DeviceType>();
+    comm->forward_comm_fix(this);
+    k_d.template sync<DeviceType>();

    // sparse_matvec( &H, d, q );
    FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
@ -943,9 +936,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()

    if (neighflag != FULL) {
      k_o.template modify<DeviceType>();
-      k_o.template sync<LMPHostType>();
      comm->reverse_comm_fix(this); //Coll_vector( q );
-      k_o.template modify<LMPHostType>();
      k_o.template sync<DeviceType>();
    }

@ -1017,13 +1008,11 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
  // q[i] = s[i] - u * t[i];
  FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this);
  Kokkos::parallel_for(inum,calculateQ_functor);
+  atomKK->modified(execution_space,Q_MASK);

  pack_flag = 4;
  //comm->forward_comm_fix( this ); //Dist_vector( atom->q );
-  atomKK->k_q.sync<DeviceType>();
  comm->forward_comm_fix(this);
-  atomKK->k_q.modify<DeviceType>();
-
 }

 /* ---------------------------------------------------------------------- */
@ -1053,7 +1042,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse13_item(int ii) const
  const int i = d_ilist[ii];
  if (mask[i] & groupbit) {
    F_FLOAT tmp = 0.0;
-    for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
+    for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
      const int j = d_jlist(jj);
      tmp += d_val(jj) * d_s[j];
      a_o[j] += d_val(jj) * d_s[i];
@ -1106,7 +1095,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse23_item(int ii) const
  const int i = d_ilist[ii];
  if (mask[i] & groupbit) {
    F_FLOAT tmp = 0.0;
-    for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
+    for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
      const int j = d_jlist(jj);
      tmp += d_val(jj) * d_d[j];
      a_o[j] += d_val(jj) * d_d[i];
@ -1166,7 +1155,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse33_item(int ii) const
  const int i = d_ilist[ii];
  if (mask[i] & groupbit) {
    F_FLOAT tmp = 0.0;
-    for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
+    for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
      const int j = d_jlist(jj);
      tmp += d_val(jj) * d_t[j];
      a_o[j] += d_val(jj) * d_t[i];
@ -1371,11 +1360,11 @@ void FixQEqReaxKokkos<DeviceType>::operator()(TagFixQEqReaxPackForwardComm, cons

  if (pack_flag == 1)
    d_buf[i] = d_d[j];
-  else if( pack_flag == 2 )
+  else if (pack_flag == 2)
    d_buf[i] = d_s[j];
-  else if( pack_flag == 3 )
+  else if (pack_flag == 3)
    d_buf[i] = d_t[j];
-  else if( pack_flag == 4 )
+  else if (pack_flag == 4)
    d_buf[i] = q[j];
 }

@ -1387,6 +1376,9 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm_fix_kokkos(int n, int fir
  first = first_in;
  d_buf = buf.view<DeviceType>();
  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixQEqReaxUnpackForwardComm>(0,n),*this);
+
+  if (pack_flag == 4)
+    atomKK->modified(execution_space,Q_MASK); // needed for auto_sync
 }

 template<class DeviceType>
@ -1394,11 +1386,11 @@ KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::operator()(TagFixQEqReaxUnpackForwardComm, const int &i) const {
  if (pack_flag == 1)
    d_d[i + first] = d_buf[i];
-  else if( pack_flag == 2)
+  else if ( pack_flag == 2)
    d_s[i + first] = d_buf[i];
-  else if( pack_flag == 3)
+  else if ( pack_flag == 3)
    d_t[i + first] = d_buf[i];
-  else if( pack_flag == 4)
+  else if ( pack_flag == 4)
    q[i + first] = d_buf[i];

 }
@ -1411,14 +1403,19 @@ int FixQEqReaxKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *bu
 {
  int m;

-  if (pack_flag == 1)
-    for(m = 0; m < n; m++) buf[m] = h_d[list[m]];
-  else if( pack_flag == 2 )
-    for(m = 0; m < n; m++) buf[m] = h_s[list[m]];
-  else if( pack_flag == 3 )
-    for(m = 0; m < n; m++) buf[m] = h_t[list[m]];
-  else if( pack_flag == 4 )
-    for(m = 0; m < n; m++) buf[m] = atom->q[list[m]];
+  if (pack_flag == 1) {
+    k_d.sync_host();
+    for (m = 0; m < n; m++) buf[m] = h_d[list[m]];
+  } else if (pack_flag == 2) {
+    k_s.sync_host();
+    for (m = 0; m < n; m++) buf[m] = h_s[list[m]];
+  } else if (pack_flag == 3) {
+    k_t.sync_host();
+    for (m = 0; m < n; m++) buf[m] = h_t[list[m]];
+  } else if (pack_flag == 4) {
+    atomKK->sync(Host,Q_MASK);
+    for (m = 0; m < n; m++) buf[m] = atom->q[list[m]];
+  }

  return n;
 }
@ -1430,14 +1427,23 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double
 {
  int i, m;

-  if (pack_flag == 1)
-    for(m = 0, i = first; m < n; m++, i++) h_d[i] = buf[m];
-  else if( pack_flag == 2)
-    for(m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m];
-  else if( pack_flag == 3)
-    for(m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m];
-  else if( pack_flag == 4)
-    for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
+  if (pack_flag == 1) {
+    k_d.sync_host();
+    for (m = 0, i = first; m < n; m++, i++) h_d[i] = buf[m];
+    k_d.modify_host();
+  } else if (pack_flag == 2) {
+    k_s.sync_host();
+    for (m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m];
+    k_s.modify_host();
+  } else if (pack_flag == 3) {
+    k_t.sync_host();
+    for (m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m];
+    k_t.modify_host();
+  } else if (pack_flag == 4) {
+    atomKK->sync(Host,Q_MASK);
+    for (m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
+    atomKK->modified(Host,Q_MASK);
+  }
 }

 /* ---------------------------------------------------------------------- */
@ -1445,8 +1451,10 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double
 template<class DeviceType>
 int FixQEqReaxKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
 {
+  k_o.sync_host();
+
  int i, m;
-  for(m = 0, i = first; m < n; m++, i++) {
+  for (m = 0, i = first; m < n; m++, i++) {
    buf[m] = h_o[i];
  }
  return n;
@ -1457,9 +1465,13 @@ int FixQEqReaxKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *bu
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
 {
+  k_o.sync_host();
+
  for(int m = 0; m < n; m++) {
    h_o[list[m]] += buf[m];
  }
+
+  k_o.modify_host();
 }

 /* ---------------------------------------------------------------------- */
--- a/src/KOKKOS/fix_qeq_reax_kokkos.h
+++ b/src/KOKKOS/fix_qeq_reax_kokkos.h
@ -147,9 +147,9 @@ class FixQEqReaxKokkos : public FixQEqReax, public KokkosBase {

  struct params_qeq{
    KOKKOS_INLINE_FUNCTION
-    params_qeq(){chi=0;eta=0;gamma=0;};
+    params_qeq() {chi=0;eta=0;gamma=0;};
    KOKKOS_INLINE_FUNCTION
-    params_qeq(int /*i*/){chi=0;eta=0;gamma=0;};
+    params_qeq(int /*i*/) {chi=0;eta=0;gamma=0;};
    F_FLOAT chi, eta, gamma;
  };

--- a/src/KOKKOS/fix_rx_kokkos.cpp
+++ b/src/KOKKOS/fix_rx_kokkos.cpp
@ -130,7 +130,7 @@ void FixRxKokkos<DeviceType>::init()
  bool eos_flag = false;
  for (int i = 0; i < modify->nfix; i++)
    if (utils::strmatch(modify->fix[i]->style,"^eos/table/rx")) eos_flag = true;
-  if(!eos_flag) error->all(FLERR,"fix rx requires fix eos/table/rx to be specified");
+  if (!eos_flag) error->all(FLERR,"fix rx requires fix eos/table/rx to be specified");

  if (update_kinetics_data)
    create_kinetics_data();
@ -322,7 +322,7 @@ void FixRxKokkos<DeviceType>::k_rkf45_step (const int neq, const double h, Vecto
   // 1)
   k_rhs (0.0, y, f1, userData);

-   for (int k = 0; k < neq; k++){
+   for (int k = 0; k < neq; k++) {
      f1[k] *= h;
      ytmp[k] = y[k] + c21 * f1[k];
   }
@ -330,7 +330,7 @@ void FixRxKokkos<DeviceType>::k_rkf45_step (const int neq, const double h, Vecto
   // 2)
   k_rhs(0.0, ytmp, f2, userData);

-   for (int k = 0; k < neq; k++){
+   for (int k = 0; k < neq; k++) {
      f2[k] *= h;
      ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k];
   }
@ -417,7 +417,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
   // compute ydot at t=t0
   k_rhs (t, y, ydot, userData);

-   while(1)
+   while (1)
   {
      // Estimate y'' with finite-difference ...

@ -429,7 +429,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do

      // Compute WRMS norm of y''
      double yddnrm = 0.0;
-      for (int k = 0; k < neq; k++){
+      for (int k = 0; k < neq; k++) {
         double ydd = (ydot1[k] - ydot[k]) / hg;
         double wterr = ydd / (relTol * fabs( y[k] ) + absTol);
         yddnrm += wterr * wterr;
@ -441,7 +441,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
      //std::cout << "ydot " << ydot[neq-1] << std::endl;

      // should we accept this?
-      if (hnew_is_ok || iter == max_iters){
+      if (hnew_is_ok || iter == max_iters) {
         hnew = hg;
         //if (iter == max_iters)
         //   fprintf(stderr, "ERROR_HIN_MAX_ITERS\n");
@ -455,11 +455,11 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
      double hrat = hnew / hg;

      // Accept this value ... the bias factor should bring it within range.
-      if ( (hrat > 0.5) && (hrat < 2.0) )
+      if ((hrat > 0.5) && (hrat < 2.0))
         hnew_is_ok = true;

      // If y'' is still bad after a few iterations, just accept h and give up.
-      if ( (iter > 1) && hrat > 2.0 ) {
+      if ((iter > 1) && hrat > 2.0) {
         hnew = hg;
         hnew_is_ok = true;
      }
@ -510,7 +510,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector

  double t = 0.0;

-  if (h < h_min){
+  if (h < h_min) {
    //fprintf(stderr,"hin not implemented yet\n");
    //exit(-1);
    nfe = k_rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, userData);
@ -530,7 +530,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
    // Estimate the solution error.
      // ... weighted 2-norm of the error.
      double err2 = 0.0;
-      for (int k = 0; k < neq; k++){
+      for (int k = 0; k < neq; k++) {
        const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol);
        err2 += wterr * wterr;
      }
@ -538,7 +538,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
    double err = fmax( uround, sqrt( err2 / double(nspecies) ));

    // Accept the solution?
-    if (err <= 1.0 || h <= h_min){
+    if (err <= 1.0 || h <= h_min) {
      t += h;
      nst++;

@ -571,7 +571,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
    nit++;
    nfe += 6;

-    if (maxIters && nit > maxIters){
+    if (maxIters && nit > maxIters) {
      //fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop);
      counter.nFails ++;
      break;
@ -643,7 +643,7 @@ void FixRxKokkos<DeviceType>::rkf45_step (const int neq, const double h, double
   // 1)
   rhs (0.0, y, f1, v_param);

-   for (int k = 0; k < neq; k++){
+   for (int k = 0; k < neq; k++) {
      f1[k] *= h;
      ytmp[k] = y[k] + c21 * f1[k];
   }
@ -651,7 +651,7 @@ void FixRxKokkos<DeviceType>::rkf45_step (const int neq, const double h, double
   // 2)
   rhs(0.0, ytmp, f2, v_param);

-   for (int k = 0; k < neq; k++){
+   for (int k = 0; k < neq; k++) {
      f2[k] *= h;
      ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k];
   }
@ -736,7 +736,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
   // compute ydot at t=t0
   rhs (t, y, ydot, v_params);

-   while(1)
+   while (1)
   {
      // Estimate y'' with finite-difference ...

@ -748,7 +748,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl

      // Compute WRMS norm of y''
      double yddnrm = 0.0;
-      for (int k = 0; k < neq; k++){
+      for (int k = 0; k < neq; k++) {
         double ydd = (ydot1[k] - ydot[k]) / hg;
         double wterr = ydd / (relTol * fabs( y[k] ) + absTol);
         yddnrm += wterr * wterr;
@ -760,7 +760,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
      //std::cout << "ydot " << ydot[neq-1] << std::endl;

      // should we accept this?
-      if (hnew_is_ok || iter == max_iters){
+      if (hnew_is_ok || iter == max_iters) {
         hnew = hg;
         if (iter == max_iters)
            fprintf(stderr, "ERROR_HIN_MAX_ITERS\n");
@ -774,11 +774,11 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
      double hrat = hnew / hg;

      // Accept this value ... the bias factor should bring it within range.
-      if ( (hrat > 0.5) && (hrat < 2.0) )
+      if ((hrat > 0.5) && (hrat < 2.0))
         hnew_is_ok = true;

      // If y'' is still bad after a few iterations, just accept h and give up.
-      if ( (iter > 1) && hrat > 2.0 ) {
+      if ((iter > 1) && hrat > 2.0) {
         hnew = hg;
         hnew_is_ok = true;
      }
@ -827,7 +827,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *

  double t = 0.0;

-  if (h < h_min){
+  if (h < h_min) {
    //fprintf(stderr,"hin not implemented yet\n");
    //exit(-1);
    nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, v_param);
@ -836,7 +836,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
  //printf("t= %e t_stop= %e h= %e\n", t, t_stop, h);

  // Integrate until we reach the end time.
-  while (fabs(t - t_stop) > tround){
+  while (fabs(t - t_stop) > tround) {
    double *yout = rwork;
    double *eout = yout + neq;

@ -846,7 +846,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
    // Estimate the solution error.
      // ... weighted 2-norm of the error.
      double err2 = 0.0;
-      for (int k = 0; k < neq; k++){
+      for (int k = 0; k < neq; k++) {
        const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol);
        err2 += wterr * wterr;
      }
@ -854,7 +854,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
    double err = fmax( uround, sqrt( err2 / double(nspecies) ));

    // Accept the solution?
-    if (err <= 1.0 || h <= h_min){
+    if (err <= 1.0 || h <= h_min) {
      t += h;
      nst++;

@ -887,7 +887,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
    nit++;
    nfe += 6;

-    if (maxIters && nit > maxIters){
+    if (maxIters && nit > maxIters) {
      //fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop);
      counter.nFails ++;
      break;
@ -928,14 +928,14 @@ int FixRxKokkos<DeviceType>::rhs_dense(double /*t*/, const double *y, double *dy
  //const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
  //const int nspecies = atom->nspecies_dpd;

-  for(int ispecies=0; ispecies<nspecies; ispecies++)
+  for (int ispecies=0; ispecies<nspecies; ispecies++)
    dydt[ispecies] = 0.0;

  // Construct the reaction rate laws
-  for(int jrxn=0; jrxn<nreactions; jrxn++){
+  for (int jrxn=0; jrxn<nreactions; jrxn++) {
    double rxnRateLawForward = kFor[jrxn];

-    for(int ispecies=0; ispecies<nspecies; ispecies++){
+    for (int ispecies=0; ispecies<nspecies; ispecies++) {
      const double concentration = y[ispecies]/VDPD;
      rxnRateLawForward *= pow( concentration, d_kineticsData.stoichReactants(jrxn,ispecies) );
      //rxnRateLawForward *= pow(concentration,stoichReactants[jrxn][ispecies]);
@ -944,8 +944,8 @@ int FixRxKokkos<DeviceType>::rhs_dense(double /*t*/, const double *y, double *dy
  }

  // Construct the reaction rates for each species
-  for(int ispecies=0; ispecies<nspecies; ispecies++)
-    for(int jrxn=0; jrxn<nreactions; jrxn++)
+  for (int ispecies=0; ispecies<nspecies; ispecies++)
+    for (int jrxn=0; jrxn<nreactions; jrxn++)
    {
      dydt[ispecies] += d_kineticsData.stoich(jrxn,ispecies) *VDPD*rxnRateLaw[jrxn];
      //dydt[ispecies] += stoich[jrxn][ispecies]*VDPD*rxnRateLaw[jrxn];
@ -982,9 +982,9 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
   for (int i = 0; i < nreactions; ++i)
   {
      double rxnRateLawForward;
-      if (isIntegral(i)){
+      if (isIntegral(i)) {
         rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) );
-         for (int kk = 1; kk < maxReactants; ++kk){
+         for (int kk = 1; kk < maxReactants; ++kk) {
            const int k = nuk(i,kk);
            if (k == SparseKinetics_invalidIndex) break;
            //if (k != SparseKinetics_invalidIndex)
@ -992,7 +992,7 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
         }
      } else {
         rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) );
-         for (int kk = 1; kk < maxReactants; ++kk){
+         for (int kk = 1; kk < maxReactants; ++kk) {
            const int k = nuk(i,kk);
            if (k == SparseKinetics_invalidIndex) break;
            //if (k != SparseKinetics_invalidIndex)
@ -1008,10 +1008,10 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
   for (int k = 0; k < nspecies; ++k)
      dydt[k] = 0.0;

-   for (int i = 0; i < nreactions; ++i){
+   for (int i = 0; i < nreactions; ++i) {
      // Reactants ...
      dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i];
-      for (int kk = 1; kk < maxReactants; ++kk){
+      for (int kk = 1; kk < maxReactants; ++kk) {
         const int k = nuk(i,kk);
         if (k == SparseKinetics_invalidIndex) break;
         //if (k != SparseKinetics_invalidIndex)
@ -1020,7 +1020,7 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d

      // Products ...
      dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i];
-      for (int kk = maxReactants+1; kk < maxSpecies; ++kk){
+      for (int kk = maxReactants+1; kk < maxSpecies; ++kk) {
         const int k = nuk(i,kk);
         if (k == SparseKinetics_invalidIndex) break;
         //if (k != SparseKinetics_invalidIndex)
@ -1074,14 +1074,14 @@ int FixRxKokkos<DeviceType>::k_rhs_dense(double /*t*/, const VectorType& y, Vect
  //const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
  //const int nspecies = atom->nspecies_dpd;

-  for(int ispecies=0; ispecies<nspecies; ispecies++)
+  for (int ispecies=0; ispecies<nspecies; ispecies++)
    dydt[ispecies] = 0.0;

  // Construct the reaction rate laws
-  for(int jrxn=0; jrxn<nreactions; jrxn++){
+  for (int jrxn=0; jrxn<nreactions; jrxn++) {
    double rxnRateLawForward = kFor[jrxn];

-    for(int ispecies=0; ispecies<nspecies; ispecies++){
+    for (int ispecies=0; ispecies<nspecies; ispecies++) {
      const double concentration = y[ispecies]/VDPD;
      rxnRateLawForward *= pow( concentration, d_kineticsData.stoichReactants(jrxn,ispecies) );
      //rxnRateLawForward *= pow(concentration,stoichReactants[jrxn][ispecies]);
@ -1090,8 +1090,8 @@ int FixRxKokkos<DeviceType>::k_rhs_dense(double /*t*/, const VectorType& y, Vect
  }

  // Construct the reaction rates for each species
-  for(int ispecies=0; ispecies<nspecies; ispecies++)
-    for(int jrxn=0; jrxn<nreactions; jrxn++)
+  for (int ispecies=0; ispecies<nspecies; ispecies++)
+    for (int jrxn=0; jrxn<nreactions; jrxn++)
    {
      dydt[ispecies] += d_kineticsData.stoich(jrxn,ispecies) *VDPD*rxnRateLaw[jrxn];
      //dydt[ispecies] += stoich[jrxn][ispecies]*VDPD*rxnRateLaw[jrxn];
@ -1129,9 +1129,9 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
   for (int i = 0; i < nreactions; ++i)
   {
      double rxnRateLawForward;
-      if (isIntegral(i)){
+      if (isIntegral(i)) {
         rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) );
-         for (int kk = 1; kk < maxReactants; ++kk){
+         for (int kk = 1; kk < maxReactants; ++kk) {
            const int k = nuk(i,kk);
            if (k == SparseKinetics_invalidIndex) break;
            //if (k != SparseKinetics_invalidIndex)
@ -1139,7 +1139,7 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
         }
      } else {
         rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) );
-         for (int kk = 1; kk < maxReactants; ++kk){
+         for (int kk = 1; kk < maxReactants; ++kk) {
            const int k = nuk(i,kk);
            if (k == SparseKinetics_invalidIndex) break;
            //if (k != SparseKinetics_invalidIndex)
@ -1155,10 +1155,10 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
   for (int k = 0; k < nspecies; ++k)
      dydt[k] = 0.0;

-   for (int i = 0; i < nreactions; ++i){
+   for (int i = 0; i < nreactions; ++i) {
      // Reactants ...
      dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i];
-      for (int kk = 1; kk < maxReactants; ++kk){
+      for (int kk = 1; kk < maxReactants; ++kk) {
         const int k = nuk(i,kk);
         if (k == SparseKinetics_invalidIndex) break;
         //if (k != SparseKinetics_invalidIndex)
@ -1167,7 +1167,7 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec

      // Products ...
      dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i];
-      for (int kk = maxReactants+1; kk < maxSpecies; ++kk){
+      for (int kk = maxReactants+1; kk < maxSpecies; ++kk) {
         const int k = nuk(i,kk);
         if (k == SparseKinetics_invalidIndex) break;
         //if (k != SparseKinetics_invalidIndex)
@ -1686,7 +1686,7 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int /*vflag*/, const bool is
  //                       getElapsedTime(timer_ODE, timer_stop), nlocal, TotalCounters.nFuncs, TotalCounters.nSteps);

  // Warn the user if a failure was detected in the ODE solver.
-  if (TotalCounters.nFails > 0){
+  if (TotalCounters.nFails > 0) {
    char sbuf[128];
    sprintf(sbuf,"in FixRX::pre_force, ODE solver failed for %d atoms.", TotalCounters.nFails);
    error->warning(FLERR, sbuf);
@ -1752,7 +1752,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
  double min_per_proc[numCounters];

  // Compute counters per dpd time-step.
-  for (int i = 0; i < numCounters; ++i){
+  for (int i = 0; i < numCounters; ++i) {
    my_vals[i] = this->diagnosticCounter[i] / nTimes;
    //printf("my sum[%d] = %f %d\n", i, my_vals[i], comm->me);
  }
@ -1767,7 +1767,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
  double avg_per_atom[numCounters], avg_per_proc[numCounters];

  // Averages per-ODE and per-proc per time-step.
-  for (int i = 0; i < numCounters; ++i){
+  for (int i = 0; i < numCounters; ++i) {
    avg_per_atom[i] = sums[i] / nODEs;
    avg_per_proc[i] = sums[i] / comm->nprocs;
  }
@ -1775,7 +1775,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
  // Sum up the differences from each task.
  double sum_sq[2*numCounters];
  double my_sum_sq[2*numCounters];
-  for (int i = 0; i < numCounters; ++i){
+  for (int i = 0; i < numCounters; ++i) {
    double diff_i = my_vals[i] - avg_per_proc[i];
    my_sum_sq[i] = diff_i * diff_i;
  }
@ -1835,7 +1835,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
  TimerType timer_stop = getTimeStamp();
  double time_local = getElapsedTime( timer_start, timer_stop );

-  if (comm->me == 0){
+  if (comm->me == 0) {
    char smesg[128];

 #define print_mesg(smesg) {\
@ -1849,7 +1849,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
    print_mesg(smesg);

    // only valid for single time-step!
-    if (diagnosticFrequency == 1){
+    if (diagnosticFrequency == 1) {
      double rms_per_ODE[numCounters];
      for (int i = 0; i < numCounters; ++i)
        rms_per_ODE[i] = sqrt( sum_sq[i+numCounters] / nODEs );
@ -1867,7 +1867,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
    sprintf(smesg, "         AVG per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", avg_per_proc[StepSum], avg_per_proc[FuncSum], avg_per_proc[TimeSum], avg_per_proc[AtomSum]);
    print_mesg(smesg);

-    if (comm->nprocs > 1){
+    if (comm->nprocs > 1) {
      double rms_per_proc[numCounters];
      for (int i = 0; i < numCounters; ++i)
        rms_per_proc[i] = sqrt( sum_sq[i] / comm->nprocs );
@ -2206,7 +2206,7 @@ int FixRxKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, in
  int m = 0;
  for (int ii = 0; ii < n; ii++) {
    const int jj = list[ii];
-    for(int ispecies = 0; ispecies < nspecies; ispecies++){
+    for (int ispecies = 0; ispecies < nspecies; ispecies++) {
      buf[m++] = h_dvector(ispecies,jj);
      buf[m++] = h_dvector(ispecies+nspecies,jj);
    }
@ -2228,8 +2228,8 @@ void FixRxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)

  const int last = first + n ;
  int m = 0;
-  for (int ii = first; ii < last; ii++){
-    for (int ispecies = 0; ispecies < nspecies; ispecies++){
+  for (int ii = first; ii < last; ii++) {
+    for (int ispecies = 0; ispecies < nspecies; ispecies++) {
      h_dvector(ispecies,ii) = buf[m++];
      h_dvector(ispecies+nspecies,ii) = buf[m++];
    }
--- a/src/KOKKOS/fix_setforce_kokkos.h
+++ b/src/KOKKOS/fix_setforce_kokkos.h
@ -34,7 +34,7 @@ struct s_double_3 {
    d0 = d1 = d2 = 0.0;
  }
  KOKKOS_INLINE_FUNCTION
-  s_double_3& operator+=(const s_double_3 &rhs){
+  s_double_3& operator+=(const s_double_3 &rhs) {
    d0 += rhs.d0;
    d1 += rhs.d1;
    d2 += rhs.d2;
--- a/src/KOKKOS/fix_shardlow_kokkos.cpp
+++ b/src/KOKKOS/fix_shardlow_kokkos.cpp
@ -79,7 +79,7 @@ FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a
 //  k_pairDPD = (PairDPDfdtKokkos *) force->pair_match("dpd/fdt",1);
  k_pairDPDE = dynamic_cast<PairDPDfdtEnergyKokkos<DeviceType> *>(force->pair_match("dpd/fdt/energy",0));

-//   if(k_pairDPDE){
+//   if (k_pairDPDE) {
    comm_forward = 3;
    comm_reverse = 5;
 //   } else {
@ -88,19 +88,14 @@ FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a
 //   }


-  if(/* k_pairDPD == nullptr &&*/ k_pairDPDE == nullptr)
+  if (/* k_pairDPD == nullptr &&*/ k_pairDPDE == nullptr)
    error->all(FLERR,"Must use pair_style "/*"dpd/fdt/kk or "*/"dpd/fdt/energy/kk with fix shardlow/kk");

 #ifdef DEBUG_SSA_PAIR_CT
  d_counters = typename AT::t_int_2d("FixShardlowKokkos::d_counters", 2, 3);
  d_hist = typename AT::t_int_1d("FixShardlowKokkos::d_hist", 32);
-#ifndef KOKKOS_USE_CUDA_UVM
  h_counters = Kokkos::create_mirror_view(d_counters);
  h_hist = Kokkos::create_mirror_view(d_hist);
-#else
-  h_counters = d_counters;
-  h_hist = d_hist;
-#endif
 #endif
 }

@ -160,7 +155,7 @@ void FixShardlowKokkos<DeviceType>::init()

      k_params.h_view(j,i) = k_params.h_view(i,j);

-      if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+      if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
        m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
        m_cutsq[j][i] = m_cutsq[i][j] = k_pairDPDE->k_cutsq.h_view(i,j);
      }
@ -196,7 +191,7 @@ void FixShardlowKokkos<DeviceType>::pre_neighbor()
  if (domain->triclinic)
    error->all(FLERR,"Fix shardlow does not yet support triclinic geometries");

-  if(rcut >= bbx || rcut >= bby || rcut>= bbz )
+  if (rcut >= bbx || rcut >= bby || rcut>= bbz )
  {
    char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"};
    char *msg = (char *) malloc(sizeof(fmt) + 4*15);
@ -231,7 +226,7 @@ void FixShardlowKokkos<DeviceType>::pre_neighbor()
    massPerI = false;
    masses = atomKK->k_mass.view<DeviceType>();
  }
-//   if(k_pairDPDE){
+//   if (k_pairDPDE) {
  dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();

 //} else {
@ -632,7 +627,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)
  for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
    int workItemCt = h_ssa_phaseLen[workPhase];

-    if(atom->ntypes > MAX_TYPES_STACKPARAMS)
+    if (atom->ntypes > MAX_TYPES_STACKPARAMS)
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<false> >(0,workItemCt),*this);
    else
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<true> >(0,workItemCt),*this);
@ -649,7 +644,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)
    comm->forward_comm_fix(this);
    atomKK->modified(Host,V_MASK);

-    if(k_pairDPDE){
+    if (k_pairDPDE) {
      // Zero out the ghosts' uCond & uMech to be used as delta accumulators
 //      memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
 //      memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
@ -667,7 +662,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)

    // process neighbors in this AIR
    atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
-    if(atom->ntypes > MAX_TYPES_STACKPARAMS)
+    if (atom->ntypes > MAX_TYPES_STACKPARAMS)
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<false> >(0,workItemCt),*this);
    else
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<true> >(0,workItemCt),*this);
@ -759,7 +754,7 @@ int FixShardlowKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *b
    buf[m++] = h_v(i, 0) - h_v_t0(i - nlocal, 0);
    buf[m++] = h_v(i, 1) - h_v_t0(i - nlocal, 1);
    buf[m++] = h_v(i, 2) - h_v_t0(i - nlocal, 2);
-    if(k_pairDPDE){
+    if (k_pairDPDE) {
      buf[m++] = h_uCond(i); // for ghosts, this is an accumulated delta
      buf[m++] = h_uMech(i); // for ghosts, this is an accumulated delta
    }
@ -781,7 +776,7 @@ void FixShardlowKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double
    h_v(j, 0) += buf[m++];
    h_v(j, 1) += buf[m++];
    h_v(j, 2) += buf[m++];
-    if(k_pairDPDE){
+    if (k_pairDPDE) {
      h_uCond(j) += buf[m++]; // add in the accumulated delta
      h_uMech(j) += buf[m++]; // add in the accumulated delta
    }
--- a/src/KOKKOS/fix_shardlow_kokkos.h
+++ b/src/KOKKOS/fix_shardlow_kokkos.h
@ -62,9 +62,9 @@ class FixShardlowKokkos : public FixShardlow {

  struct params_ssa {
    KOKKOS_INLINE_FUNCTION
-    params_ssa(){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
+    params_ssa() {cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
    KOKKOS_INLINE_FUNCTION
-    params_ssa(int /*i*/){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
+    params_ssa(int /*i*/) {cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
    F_FLOAT cutinv,halfsigma,kappa,alpha;
  };

--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@ -39,7 +39,7 @@
 GPU_AWARE_UNKNOWN
 #elif defined(KOKKOS_ENABLE_CUDA)

-// OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0
+// OpenMPI supports detecting GPU-aware MPI as of version 2.0.0

 #if (OPEN_MPI)
 #if (OMPI_MAJOR_VERSION >= 2)
@ -77,6 +77,8 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)

  exchange_comm_changed = 0;
  forward_comm_changed = 0;
+  forward_pair_comm_changed = 0;
+  forward_fix_comm_changed = 0;
  reverse_comm_changed = 0;

  delete memory;
@ -147,7 +149,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)

      if (ngpus > 1 && !set_flag)
        error->all(FLERR,"Could not determine local MPI rank for multiple "
-                           "GPUs with Kokkos CUDA because MPI library not recognized");
+                           "GPUs with Kokkos CUDA or HIP because MPI library not recognized");

    } else if (strcmp(arg[iarg],"t") == 0 ||
               strcmp(arg[iarg],"threads") == 0) {
@ -203,7 +205,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
    neighflag = FULL;
    neighflag_qeq = FULL;
    newtonflag = 0;
+
    exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
+    forward_pair_comm_classic = forward_fix_comm_classic = 0;
+
    exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
  } else {
    if (nthreads > 1) {
@ -214,14 +219,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
      neighflag_qeq = HALF;
    }
    newtonflag = 1;
+
    exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
+    forward_pair_comm_classic = forward_fix_comm_classic = 1;
+
    exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
  }

 #ifdef LMP_KOKKOS_GPU

-  // check and warn about CUDA-aware MPI availability when using multiple MPI tasks
-  // change default only if we can safely detect that CUDA-aware MPI is not available
+  // check and warn about GPU-aware MPI availability when using multiple MPI tasks
+  // change default only if we can safely detect that GPU-aware MPI is not available

  int nmpi = 0;
  MPI_Comm_size(world,&nmpi);
@ -237,21 +245,21 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
      gpu_aware_flag = 0;
      char* str;
      if ((str = getenv("OMPI_MCA_pml_pami_enable_cuda")))
-        if((strcmp(str,"1") == 0)) {
+        if ((strcmp(str,"1") == 0)) {
          have_gpu_aware = 1;
          gpu_aware_flag = 1;
        }

      if (!gpu_aware_flag)
        if (me == 0)
-          error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI");
+          error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling GPU-aware MPI");
    }
 #endif

    if (gpu_aware_flag == 1 && have_gpu_aware == 0) {
      if (me == 0)
-        error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, "
-                       "use '-pk kokkos cuda/aware on' to override");
+        error->warning(FLERR,"Turning off GPU-aware MPI since it is not detected, "
+                       "use '-pk kokkos gpu/aware on' to override");
      gpu_aware_flag = 0;
    } else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative?
    // MVAPICH2
@ -264,17 +272,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)

      if (!gpu_aware_flag)
        if (me == 0)
-          error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling CUDA-aware MPI");
+          error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling GPU-aware MPI");
    // pure MPICH or some unsupported MPICH derivative
 #elif defined(MPICH) && !defined(MVAPICH2_VERSION)
      if (me == 0)
-        error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI");
+        error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI");
      gpu_aware_flag = 0;
 #else
  if (me == 0)
-    error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available,"
+    error->warning(FLERR,"Kokkos with CUDA or HIP assumes GPU-aware MPI is available,"
                   " but cannot determine if this is the case\n         try"
-                   " '-pk kokkos cuda/aware off' if getting segmentation faults");
+                   " '-pk kokkos gpu/aware off' if getting segmentation faults");

 #endif
    } // if (-1 == have_gpu_aware)
@ -340,12 +348,18 @@ void KokkosLMP::accelerator(int narg, char **arg)
      if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
      if (strcmp(arg[iarg+1],"no") == 0) {
        exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
+        forward_pair_comm_classic = forward_fix_comm_classic = 1;
+
        exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
      } else if (strcmp(arg[iarg+1],"host") == 0) {
        exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
+        forward_pair_comm_classic = forward_fix_comm_classic = 1;
+
        exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
      } else if (strcmp(arg[iarg+1],"device") == 0) {
        exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
+        forward_pair_comm_classic = forward_fix_comm_classic = 0;
+
        exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
      } else error->all(FLERR,"Illegal package kokkos command");
      iarg += 2;
@ -373,9 +387,25 @@ void KokkosLMP::accelerator(int narg, char **arg)
      } else error->all(FLERR,"Illegal package kokkos command");
      forward_comm_changed = 0;
      iarg += 2;
+    } else if (strcmp(arg[iarg],"comm/pair/forward") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
+      if (strcmp(arg[iarg+1],"no") == 0) forward_pair_comm_classic = 1;
+      else if (strcmp(arg[iarg+1],"host") == 0) forward_pair_comm_classic = 1;
+      else if (strcmp(arg[iarg+1],"device") == 0) forward_pair_comm_classic = 0;
+      else error->all(FLERR,"Illegal package kokkos command");
+      forward_pair_comm_changed = 0;
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"comm/fix/forward") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
+      if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1;
+      else if (strcmp(arg[iarg+1],"host") == 0) forward_fix_comm_classic = 1;
+      else if (strcmp(arg[iarg+1],"device") == 0) forward_fix_comm_classic = 0;
+      else error->all(FLERR,"Illegal package kokkos command");
+      forward_fix_comm_changed = 0;
+      iarg += 2;
    } else if (strcmp(arg[iarg],"comm/reverse") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
-      if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
+      else if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
      else if (strcmp(arg[iarg+1],"host") == 0) {
        reverse_comm_classic = 0;
        reverse_comm_on_host = 1;
@ -385,7 +415,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
      } else error->all(FLERR,"Illegal package kokkos command");
      reverse_comm_changed = 0;
      iarg += 2;
-    } else if (strcmp(arg[iarg],"cuda/aware") == 0) {
+    } else if ((strcmp(arg[iarg],"gpu/aware") == 0)
+               || (strcmp(arg[iarg],"cuda/aware") == 0)) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
      if (strcmp(arg[iarg+1],"off") == 0) gpu_aware_flag = 0;
      else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1;
@ -425,7 +456,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
  int nmpi = 0;
  MPI_Comm_size(world,&nmpi);

-  // if "cuda/aware off" or "pair/only on", and "comm device", change to "comm no"
+  // if "gpu/aware off" or "pair/only on", and "comm device", change to "comm no"

  if ((!gpu_aware_flag && nmpi > 1) || pair_only_flag) {
    if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) {
@ -436,13 +467,21 @@ void KokkosLMP::accelerator(int narg, char **arg)
      forward_comm_classic = 1;
      forward_comm_changed = 1;
    }
+    if (forward_pair_comm_classic == 0) {
+      forward_pair_comm_classic = 1;
+      forward_pair_comm_changed = 1;
+    }
+    if (forward_fix_comm_classic == 0) {
+      forward_fix_comm_classic = 1;
+      forward_fix_comm_changed = 1;
+    }
    if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) {
      reverse_comm_classic = 1;
      reverse_comm_changed = 1;
    }
  }

-  // if "cuda/aware on" and "pair/only off", and comm flags were changed previously, change them back
+  // if "gpu/aware on" and "pair/only off", and comm flags were changed previously, change them back

  if (gpu_aware_flag && !pair_only_flag) {
    if (exchange_comm_changed) {
@ -453,6 +492,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
      forward_comm_classic = 0;
      forward_comm_changed = 0;
    }
+    if (forward_pair_comm_changed) {
+      forward_pair_comm_classic = 0;
+      forward_pair_comm_changed = 0;
+    }
+    if (forward_fix_comm_changed) {
+      forward_fix_comm_classic = 0;
+      forward_fix_comm_changed = 0;
+    }
    if (reverse_comm_changed) {
      reverse_comm_classic = 0;
      reverse_comm_changed = 0;
@ -490,25 +537,15 @@ int KokkosLMP::neigh_count(int m)
  if (nk->lists[m]->execution_space == Host) {
    NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
    inum = nlistKK->inum;
-#ifndef KOKKOS_USE_CUDA_UVM
    h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
    h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
-#else
-    h_ilist = nlistKK->d_ilist;
-    h_numneigh = nlistKK->d_numneigh;
-#endif
    Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
    Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
  } else if (nk->lists[m]->execution_space == Device) {
    NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
    inum = nlistKK->inum;
-#ifndef KOKKOS_USE_CUDA_UVM
    h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
    h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
-#else
-    h_ilist = nlistKK->d_ilist;
-    h_numneigh = nlistKK->d_numneigh;
-#endif
    Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
    Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
  }
--- a/src/KOKKOS/kokkos.h
+++ b/src/KOKKOS/kokkos.h
@ -28,12 +28,16 @@ class KokkosLMP : protected Pointers {
  int neighflag_qeq_set;
  int exchange_comm_classic;
  int forward_comm_classic;
+  int forward_pair_comm_classic;
+  int forward_fix_comm_classic;
  int reverse_comm_classic;
  int exchange_comm_on_host;
  int forward_comm_on_host;
  int reverse_comm_on_host;
  int exchange_comm_changed;
  int forward_comm_changed;
+  int forward_pair_comm_changed;
+  int forward_fix_comm_changed;
  int reverse_comm_changed;
  int nthreads,ngpus;
  int numa;
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@ -1068,28 +1068,42 @@ void memset_kokkos (ViewType &view) {

 struct params_lj_coul {
  KOKKOS_INLINE_FUNCTION
-  params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+  params_lj_coul() {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
  KOKKOS_INLINE_FUNCTION
-  params_lj_coul(int /*i*/){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
+  params_lj_coul(int /*i*/) {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
  F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
 };

 // Pair SNAP

+#define SNAP_KOKKOS_REAL double
+#define SNAP_KOKKOS_HOST_VECLEN 1
+
+#ifdef LMP_KOKKOS_GPU
+#define SNAP_KOKKOS_DEVICE_VECLEN 32
+#else
+#define SNAP_KOKKOS_DEVICE_VECLEN 1
+#endif
+
+
+// intentional: SNAreal/complex gets reused beyond SNAP
 typedef double SNAreal;

 //typedef struct { SNAreal re, im; } SNAcomplex;
-template <typename real>
-struct alignas(2*sizeof(real)) SNAComplex
+template <typename real_type_>
+struct alignas(2*sizeof(real_type_)) SNAComplex
 {
-  real re,im;
+  using real_type = real_type_;
+  using complex = SNAComplex<real_type>;
+  real_type re,im;

-  SNAComplex() = default;
+  KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
+   : re(static_cast<real_type>(0.)), im(static_cast<real_type>(0.)) { ; }

-  KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
-   : re(re), im(static_cast<real>(0.)) { ; }
+  KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re)
+   : re(re), im(static_cast<real_type>(0.)) { ; }

-  KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
+  KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im)
   : re(re), im(im) { ; }

  KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
@ -1117,27 +1131,24 @@ struct alignas(2*sizeof(real)) SNAComplex
    return *this;
  }

+  KOKKOS_INLINE_FUNCTION
+  static constexpr complex zero() { return complex(static_cast<real_type>(0.), static_cast<real_type>(0.)); }
+
+  KOKKOS_INLINE_FUNCTION
+  static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
+
+  KOKKOS_INLINE_FUNCTION
+  const complex conj() { return complex(re, -im); }
+
 };

-template <typename real>
-KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
-  return SNAComplex<real>(r*self.re, r*self.im);
+template <typename real_type>
+KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r, const SNAComplex<real_type>& self) {
+  return SNAComplex<real_type>(r*self.re, r*self.im);
 }

 typedef SNAComplex<SNAreal> SNAcomplex;

-// Cayley-Klein pack
-// Can guarantee it's aligned to 2 complex
-struct alignas(32) CayleyKleinPack {
-
-  SNAcomplex a, b;
-  SNAcomplex da[3], db[3];
-  SNAreal sfac;
-  SNAreal dsfacu[3];
-
-};
-
-
 #if defined(KOKKOS_ENABLE_CXX11)
 #undef ISFINITE
 #define ISFINITE(x) std::isfinite(x)
--- a/src/KOKKOS/memory_kokkos.h
+++ b/src/KOKKOS/memory_kokkos.h
@ -46,11 +46,7 @@ template <typename TYPE, typename HTYPE>
                     const char *name)
 {
  data = TYPE(std::string(name),n1);
-#ifndef KOKKOS_USE_CUDA_UVM
  h_data = Kokkos::create_mirror_view(data);
-#else
-  h_data = data;
-#endif
  array = h_data.data();
  return data;
 }
@ -61,11 +57,7 @@ template <typename TYPE, typename HTYPE>
                     int n1, const char *name)
 {
  data = TYPE(std::string(name),n1);
-#ifndef KOKKOS_USE_CUDA_UVM
  h_data = Kokkos::create_mirror_view(data);
-#else
-  h_data = data;
-#endif
  return data;
 }

@ -100,7 +92,7 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type* &array)
 template <typename TYPE>
 TYPE destroy_kokkos(TYPE &data)
 {
-  /*if(data.data()!=nullptr)
+  /*if (data.data()!=nullptr)
    free(data.data());*/
  data = TYPE();
  return data;
@ -167,11 +159,7 @@ template <typename TYPE, typename HTYPE>
                     const char *name)
 {
  data = TYPE(std::string(name),n1,n2);
-#ifndef KOKKOS_USE_CUDA_UVM
  h_data = Kokkos::create_mirror_view(data);
-#else
-  h_data = data;
-#endif
  return data;
 }

@ -185,7 +173,7 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,

  bigint n = 0;
  for (int i = 0; i < n1; i++) {
-    if(n2==0)
+    if (n2==0)
      array[i] = nullptr;
    else
      array[i] = &data.h_view(i,0);
@ -200,17 +188,13 @@ template <typename TYPE, typename HTYPE>
                     const char *name)
 {
  data = TYPE(std::string(name),n1,n2);
-#ifndef KOKKOS_USE_CUDA_UVM
  h_data = Kokkos::create_mirror_view(data);
-#else
-  h_data = data;
-#endif
  bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
  array = (typename TYPE::value_type **) smalloc(nbytes,name);

  bigint n = 0;
  for (int i = 0; i < n1; i++) {
-    if(n2==0)
+    if (n2==0)
      array[i] = nullptr;
    else
      array[i] = &h_data(i,0);
@ -234,7 +218,7 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
  array = (typename TYPE::value_type**) srealloc(array,nbytes,name);

  for (int i = 0; i < n1; i++)
-    if(n2==0)
+    if (n2==0)
      array[i] = nullptr;
    else
      array[i] = &data.h_view(i,0);
@ -251,7 +235,7 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
  array = (typename TYPE::value_type **) smalloc(nbytes,name);

  for (int i = 0; i < n1; i++)
-    if(data.h_view.extent(1)==0)
+    if (data.h_view.extent(1)==0)
      array[i] = nullptr;
    else
      array[i] = &data.h_view(i,0);
@ -271,7 +255,7 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
  array = (typename TYPE::value_type **) smalloc(nbytes,name);

  for (int i = 0; i < n1; i++)
-    if(data.h_view.extent(1)==0)
+    if (data.h_view.extent(1)==0)
      array[i] = nullptr;
    else
      array[i] = &data.h_view(i,0);
--- a/src/KOKKOS/min_linesearch_kokkos.h
+++ b/src/KOKKOS/min_linesearch_kokkos.h
@ -25,7 +25,7 @@ namespace LAMMPS_NS {
      d0 = d1 = 0.0;
    }
    KOKKOS_INLINE_FUNCTION
-    s_double2& operator+=(const s_double2 &rhs){
+    s_double2& operator+=(const s_double2 &rhs) {
      d0 += rhs.d0;
      d1 += rhs.d1;
      return *this;
--- a/src/KOKKOS/nbin_kokkos.cpp
+++ b/src/KOKKOS/nbin_kokkos.cpp
@ -30,11 +30,7 @@ NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
  atoms_per_bin = 16;

  d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
-#ifndef KOKKOS_USE_CUDA_UVM
  h_resize = Kokkos::create_mirror_view(d_resize);
-#else
-  h_resize = d_resize;
-#endif
  h_resize() = 1;

  kokkos = 1;
@ -92,7 +88,7 @@ void NBinKokkos<DeviceType>::bin_atoms()

  h_resize() = 1;

-  while(h_resize() > 0) {
+  while (h_resize() > 0) {
    h_resize() = 0;
    deep_copy(d_resize, h_resize);

@ -111,7 +107,7 @@ void NBinKokkos<DeviceType>::bin_atoms()
    Kokkos::parallel_for(atom->nlocal+atom->nghost, f);

    deep_copy(h_resize, d_resize);
-    if(h_resize()) {
+    if (h_resize()) {

      atoms_per_bin += 16;
      k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
@ -135,7 +131,7 @@ void NBinKokkos<DeviceType>::binatomsItem(const int &i) const

  atom2bin(i) = ibin;
  const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
-  if(ac < (int)bins.extent(1)) {
+  if (ac < (int)bins.extent(1)) {
    bins(ibin, ac) = i;
  } else {
    d_resize() = 1;
--- a/src/KOKKOS/nbin_ssa_kokkos.cpp
+++ b/src/KOKKOS/nbin_ssa_kokkos.cpp
@ -41,7 +41,6 @@ NBinSSAKokkos<DeviceType>::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp)
  d_lbinxhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxhi");
  d_lbinyhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinyhi");
  d_lbinzhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzhi");
-#ifndef KOKKOS_USE_CUDA_UVM
  h_resize = Kokkos::create_mirror_view(d_resize);
  h_lbinxlo = Kokkos::create_mirror_view(d_lbinxlo);
  h_lbinylo = Kokkos::create_mirror_view(d_lbinylo);
@ -49,15 +48,6 @@ NBinSSAKokkos<DeviceType>::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp)
  h_lbinxhi = Kokkos::create_mirror_view(d_lbinxhi);
  h_lbinyhi = Kokkos::create_mirror_view(d_lbinyhi);
  h_lbinzhi = Kokkos::create_mirror_view(d_lbinzhi);
-#else
-  h_resize = d_resize;
-  h_lbinxlo = d_lbinxlo;
-  h_lbinylo = d_lbinylo;
-  h_lbinzlo = d_lbinzlo;
-  h_lbinxhi = d_lbinxhi;
-  h_lbinyhi = d_lbinyhi;
-  h_lbinzhi = d_lbinzhi;
-#endif
  h_resize() = 1;

  k_gbincount = DAT::tdual_int_1d("NBinSSAKokkos::gbincount",8);
@ -156,7 +146,7 @@ void NBinSSAKokkos<DeviceType>::bin_atoms()

  // actually bin the ghost atoms
  {
-    if(ghosts_per_gbin > (int) gbins.extent(1)) {
+    if (ghosts_per_gbin > (int) gbins.extent(1)) {
      k_gbins = DAT::tdual_int_2d("gbins", 8, ghosts_per_gbin);
      gbins = k_gbins.view<DeviceType>();
    }
@ -293,7 +283,7 @@ void NBinSSAKokkos<DeviceType>::sortBin(
      child = parent*2+1; /* Find the next child */
    }
    gbins(ibin, parent) = t; /* We save t in the heap */
-  } while(1);
+  } while (1);
 }

 namespace LAMMPS_NS {
--- a/src/KOKKOS/nbin_ssa_kokkos.h
+++ b/src/KOKKOS/nbin_ssa_kokkos.h
@ -108,20 +108,20 @@ class NBinSSAKokkos : public NBinStandard {
    if (y >= subhi_[1]) iy = 1;
    if (x < sublo_[0]) ix = -1;
    if (x >= subhi_[0]) ix = 1;
-    if(iz < 0){
+    if (iz < 0) {
      return -1;
-    } else if(iz == 0){
-      if( iy<0 ) return -1; // bottom left/middle/right
-      if( (iy==0) && (ix<0)  ) return -1; // left atoms
-      if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
-      if( (iy==0) && (ix>0)  ) return 2; // Right atoms
-      if( (iy>0)  && (ix==0) ) return 1; // Top-middle atoms
-      if( (iy>0)  && (ix!=0) ) return 3; // Top-right and top-left atoms
+    } else if (iz == 0) {
+      if (iy<0) return -1; // bottom left/middle/right
+      if ((iy==0) && (ix<0) ) return -1; // left atoms
+      if ((iy==0) && (ix==0)) return 0; // Locally owned atoms
+      if ((iy==0) && (ix>0) ) return 2; // Right atoms
+      if ((iy>0)  && (ix==0)) return 1; // Top-middle atoms
+      if ((iy>0)  && (ix!=0)) return 3; // Top-right and top-left atoms
    } else { // iz > 0
-      if((ix==0) && (iy==0)) return 4; // Back atoms
-      if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
-      if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
-      if((ix!=0) && (iy!=0)) return 7; // Back corner atoms
+      if ((ix==0) && (iy==0)) return 4; // Back atoms
+      if ((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
+      if ((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
+      if ((ix!=0) && (iy!=0)) return 7; // Back corner atoms
    }
    return -2;
  }
--- a/src/KOKKOS/neighbor_kokkos.cpp
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@ -329,7 +329,7 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons

 /* ---------------------------------------------------------------------- */

-void NeighborKokkos::modify_ex_type_grow_kokkos(){
+void NeighborKokkos::modify_ex_type_grow_kokkos() {
  memoryKK->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
  k_ex1_type.modify<LMPHostType>();
  memoryKK->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type");
@ -337,7 +337,7 @@ void NeighborKokkos::modify_ex_type_grow_kokkos(){
 }

 /* ---------------------------------------------------------------------- */
-void NeighborKokkos::modify_ex_group_grow_kokkos(){
+void NeighborKokkos::modify_ex_group_grow_kokkos() {
  memoryKK->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group");
  k_ex1_group.modify<LMPHostType>();
  memoryKK->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group");
@ -345,13 +345,13 @@ void NeighborKokkos::modify_ex_group_grow_kokkos(){
 }

 /* ---------------------------------------------------------------------- */
-void NeighborKokkos::modify_mol_group_grow_kokkos(){
+void NeighborKokkos::modify_mol_group_grow_kokkos() {
  memoryKK->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group");
  k_ex_mol_group.modify<LMPHostType>();
 }

 /* ---------------------------------------------------------------------- */
-void NeighborKokkos::modify_mol_intra_grow_kokkos(){
+void NeighborKokkos::modify_mol_intra_grow_kokkos() {
  memoryKK->grow_kokkos(k_ex_mol_intra,ex_mol_intra,maxex_mol,"neigh:ex_mol_intra");
  k_ex_mol_intra.modify<LMPHostType>();
 }
--- a/src/KOKKOS/npair_kokkos.cpp
+++ b/src/KOKKOS/npair_kokkos.cpp
@ -207,7 +207,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
  data.special_flag[3] = special_flag[3];

  data.h_resize()=1;
-  while(data.h_resize()) {
+  while (data.h_resize()) {
    data.h_new_maxneighs() = list->maxneighs;
    data.h_resize() = 0;

@ -303,7 +303,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
    }
    Kokkos::deep_copy(h_scalars, d_scalars);

-    if(data.h_resize()) {
+    if (data.h_resize()) {
      list->maxneighs = data.h_new_maxneighs() * 1.2;
      list->d_neighbors = typename AT::t_neighbors_2d(Kokkos::NoInit("neighbors"), list->d_neighbors.extent(0), list->maxneighs);
      data.neigh_list.d_neighbors = list->d_neighbors;
@ -410,24 +410,24 @@ void NeighborKokkosExecute<DeviceType>::
    = d_stencil;

  // loop over all bins in neighborhood (includes ibin)
-  if(HalfNeigh)
-  for(int m = 0; m < c_bincount(ibin); m++) {
+  if (HalfNeigh)
+  for (int m = 0; m < c_bincount(ibin); m++) {
    const int j = c_bins(ibin,m);
    const int jtype = type(j);

    //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
-    if((j == i) || (HalfNeigh && !Newton && (j < i))  ||
+    if ((j == i) || (HalfNeigh && !Newton && (j < i))  ||
        (HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
                                       ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
                                        (x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
      ) continue;
-    if(exclude && exclusion(i,j,itype,jtype)) continue;
+    if (exclude && exclusion(i,j,itype,jtype)) continue;

    const X_FLOAT delx = xtmp - x(j, 0);
    const X_FLOAT dely = ytmp - x(j, 1);
    const X_FLOAT delz = ztmp - x(j, 2);
    const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
-    if(rsq <= cutneighsq(itype,jtype)) {
+    if (rsq <= cutneighsq(itype,jtype)) {
      if (molecular != Atom::ATOMIC) {
        if (!moltemplate)
          which = find_special(i,j);
@ -436,38 +436,38 @@ void NeighborKokkosExecute<DeviceType>::
            /*                        onemols[imol]->nspecial[iatom], */
            /*                        tag[j]-tagprev); */
            /* else which = 0; */
-        if (which == 0){
-          if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+        if (which == 0) {
+          if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
          else n++;
-        } else if (minimum_image_check(delx,dely,delz)){
-          if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+        } else if (minimum_image_check(delx,dely,delz)) {
+          if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
          else n++;
        }
        else if (which > 0) {
-          if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+          if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
          else n++;
        }
      } else {
-        if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+        if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
        else n++;
      }
    }
  }

-  for(int k = 0; k < nstencil; k++) {
+  for (int k = 0; k < nstencil; k++) {
    const int jbin = ibin + stencil[k];

    // get subview of jbin
-    if(HalfNeigh && (ibin==jbin)) continue;
+    if (HalfNeigh && (ibin==jbin)) continue;
    //const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
-      for(int m = 0; m < c_bincount(jbin); m++) {
+      for (int m = 0; m < c_bincount(jbin); m++) {

        const int j = c_bins(jbin,m);
        const int jtype = type(j);

-        if(HalfNeigh && !Newton && (j < i)) continue;
-        if(!HalfNeigh && j==i) continue;
-        if(Tri) {
+        if (HalfNeigh && !Newton && (j < i)) continue;
+        if (!HalfNeigh && j==i) continue;
+        if (Tri) {
          if (x(j,2) < ztmp) continue;
          if (x(j,2) == ztmp) {
            if (x(j,1) < ytmp) continue;
@ -477,14 +477,14 @@ void NeighborKokkosExecute<DeviceType>::
            }
          }
        }
-        if(exclude && exclusion(i,j,itype,jtype)) continue;
+        if (exclude && exclusion(i,j,itype,jtype)) continue;

        const X_FLOAT delx = xtmp - x(j, 0);
        const X_FLOAT dely = ytmp - x(j, 1);
        const X_FLOAT delz = ztmp - x(j, 2);
        const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;

-        if(rsq <= cutneighsq(itype,jtype)) {
+        if (rsq <= cutneighsq(itype,jtype)) {
          if (molecular != Atom::ATOMIC) {
            if (!moltemplate)
              which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
@ -493,19 +493,19 @@ void NeighborKokkosExecute<DeviceType>::
            /*                        onemols[imol]->nspecial[iatom], */
            /*                        tag[j]-tagprev); */
            /* else which = 0; */
-            if (which == 0){
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            if (which == 0) {
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
              else n++;
-            } else if (minimum_image_check(delx,dely,delz)){
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            } else if (minimum_image_check(delx,dely,delz)) {
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
              else n++;
            }
            else if (which > 0) {
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
              else n++;
            }
          } else {
-            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
            else n++;
          }
        }
@ -515,10 +515,10 @@ void NeighborKokkosExecute<DeviceType>::

  neigh_list.d_numneigh(i) = n;

-  if(n > neigh_list.maxneighs) {
+  if (n > neigh_list.maxneighs) {
    resize() = 1;

-    if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
+    if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
  }

  neigh_list.d_ilist(i) = i;
@ -562,7 +562,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli

  const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;

-  if(ibin >= mbins) return;
+  if (ibin >= mbins) return;
  X_FLOAT* other_x = sharedmem;
  other_x = other_x + 5*atoms_per_bin*MY_BIN;

@ -570,7 +570,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli

  int bincount_current = c_bincount[ibin];

-  for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
+  for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
    const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
  const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
  /* if necessary, goto next page and add pages */
@ -583,7 +583,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
  int itype;
  const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);

-  if(i >= 0) {
+  if (i >= 0) {
    xtmp = x(i, 0);
    ytmp = x(i, 1);
    ztmp = x(i, 2);
@ -596,23 +596,23 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
  other_id[MY_II] = i;
  int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);

-  if(test) return;
+  if (test) return;

-  if(i >= 0 && i < nlocal) {
+  if (i >= 0 && i < nlocal) {
    #pragma unroll 4
-    for(int m = 0; m < bincount_current; m++) {
+    for (int m = 0; m < bincount_current; m++) {
      int j = other_id[m];
      const int jtype = other_x[m + 3 * atoms_per_bin];

      //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
-      if((j == i) ||
+      if ((j == i) ||
         (HalfNeigh && !Newton && (j < i))  ||
         (HalfNeigh && Newton &&
            ((j < i) ||
            ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
              (x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
        ) continue;
-        if(Tri) {
+        if (Tri) {
          if (x(j,2) < ztmp) continue;
          if (x(j,2) == ztmp) {
            if (x(j,1) < ytmp) continue;
@ -622,13 +622,13 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
            }
          }
        }
-      if(exclude && exclusion(i,j,itype,jtype)) continue;
+      if (exclude && exclusion(i,j,itype,jtype)) continue;
      const X_FLOAT delx = xtmp - other_x[m];
      const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
      const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
      const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;

-      if(rsq <= cutneighsq(itype,jtype)) {
+      if (rsq <= cutneighsq(itype,jtype)) {
        if (molecular != Atom::ATOMIC) {
          int which = 0;
          if (!moltemplate)
@ -638,19 +638,19 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
          /*                        onemols[imol]->nspecial[iatom], */
          /*                        tag[j]-tagprev); */
          /* else which = 0; */
-          if (which == 0){
-            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+          if (which == 0) {
+            if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
            else n++;
-          } else if (minimum_image_check(delx,dely,delz)){
-            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+          } else if (minimum_image_check(delx,dely,delz)) {
+            if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
            else n++;
          }
          else if (which > 0) {
-            if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+            if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
            else n++;
          }
        } else {
-          if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+          if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
          else n++;
        }
      }
@ -661,15 +661,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli

  const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
    = d_stencil;
-  for(int k = 0; k < nstencil; k++) {
+  for (int k = 0; k < nstencil; k++) {
    const int jbin = ibin + stencil[k];

-    if(ibin == jbin) continue;
+    if (ibin == jbin) continue;

    bincount_current = c_bincount[jbin];
    int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;

-    if(j >= 0) {
+    if (j >= 0) {
      other_x[MY_II] = x(j, 0);
      other_x[MY_II + atoms_per_bin] = x(j, 1);
      other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
@ -680,16 +680,16 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli

    __syncthreads();

-    if(i >= 0 && i < nlocal) {
+    if (i >= 0 && i < nlocal) {
      #pragma unroll 8
-      for(int m = 0; m < bincount_current; m++) {
+      for (int m = 0; m < bincount_current; m++) {
        const int j = other_id[m];
        const int jtype = other_x[m + 3 * atoms_per_bin];

        //if(HalfNeigh && (j < i))  continue;
-        if(HalfNeigh && !Newton && (j < i)) continue;
-        if(!HalfNeigh && j==i) continue;
-        if(Tri) {
+        if (HalfNeigh && !Newton && (j < i)) continue;
+        if (!HalfNeigh && j==i) continue;
+        if (Tri) {
          if (x(j,2) < ztmp) continue;
          if (x(j,2) == ztmp) {
            if (x(j,1) < ytmp) continue;
@ -699,14 +699,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
            }
          }
        }
-        if(exclude && exclusion(i,j,itype,jtype)) continue;
+        if (exclude && exclusion(i,j,itype,jtype)) continue;

        const X_FLOAT delx = xtmp - other_x[m];
        const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
        const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
        const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;

-        if(rsq <= cutneighsq(itype,jtype)) {
+        if (rsq <= cutneighsq(itype,jtype)) {
          if (molecular != Atom::ATOMIC) {
            int which = 0;
            if (!moltemplate)
@ -716,19 +716,19 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
            /*                        onemols[imol]->nspecial[iatom], */
            /*                        tag[j]-tagprev); */
            /* else which = 0; */
-            if (which == 0){
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            if (which == 0) {
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
              else n++;
-            } else if (minimum_image_check(delx,dely,delz)){
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            } else if (minimum_image_check(delx,dely,delz)) {
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
              else n++;
            }
            else if (which > 0) {
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
              else n++;
            }
          } else {
-            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
            else n++;
          }
        }
@ -738,15 +738,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
    __syncthreads();
  }

-  if(i >= 0 && i < nlocal) {
+  if (i >= 0 && i < nlocal) {
    neigh_list.d_numneigh(i) = n;
    neigh_list.d_ilist(i) = i;
  }

-  if(n > neigh_list.maxneighs) {
+  if (n > neigh_list.maxneighs) {
    resize() = 1;

-    if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
+    if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
  }
  }
 }
@ -787,14 +787,14 @@ void NeighborKokkosExecute<DeviceType>::
    const int ibin = c_atom2bin(i);
    for (int k = 0; k < nstencil; k++) {
      const int jbin = ibin + stencil[k];
-      for(int m = 0; m < c_bincount(jbin); m++) {
+      for (int m = 0; m < c_bincount(jbin); m++) {
        const int j = c_bins(jbin,m);

        if (HalfNeigh && j <= i) continue;
        else if (j == i) continue;

        const int jtype = type[j];
-        if(exclude && exclusion(i,j,itype,jtype)) continue;
+        if (exclude && exclusion(i,j,itype,jtype)) continue;

        const X_FLOAT delx = xtmp - x(j,0);
        const X_FLOAT dely = ytmp - x(j,1);
@ -810,19 +810,19 @@ void NeighborKokkosExecute<DeviceType>::
            /*                        onemols[imol]->nspecial[iatom], */
            /*                        tag[j]-tagprev); */
            /* else which = 0; */
-            if (which == 0){
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            if (which == 0) {
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
              else n++;
-            } else if (minimum_image_check(delx,dely,delz)){
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            } else if (minimum_image_check(delx,dely,delz)) {
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
              else n++;
            }
            else if (which > 0) {
-              if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+              if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
              else n++;
            }
          } else {
-            if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+            if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
            else n++;
          }
        }
@ -843,14 +843,14 @@ void NeighborKokkosExecute<DeviceType>::
          ybin2 < 0 || ybin2 >= mbiny ||
          zbin2 < 0 || zbin2 >= mbinz) continue;
      const int jbin = ibin + stencil[k];
-      for(int m = 0; m < c_bincount(jbin); m++) {
+      for (int m = 0; m < c_bincount(jbin); m++) {
        const int j = c_bins(jbin,m);

        if (HalfNeigh && j <= i) continue;
        else if (j == i) continue;

        const int jtype = type[j];
-        if(exclude && exclusion(i,j,itype,jtype)) continue;
+        if (exclude && exclusion(i,j,itype,jtype)) continue;

        const X_FLOAT delx = xtmp - x(j,0);
        const X_FLOAT dely = ytmp - x(j,1);
@ -858,7 +858,7 @@ void NeighborKokkosExecute<DeviceType>::
        const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;

        if (rsq <= cutneighsq(itype,jtype)) {
-          if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+          if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
          else n++;
        }
      }
@ -867,10 +867,10 @@ void NeighborKokkosExecute<DeviceType>::

  neigh_list.d_numneigh(i) = n;

-  if(n > neigh_list.maxneighs) {
+  if (n > neigh_list.maxneighs) {
    resize() = 1;

-    if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
+    if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
  }
  neigh_list.d_ilist(i) = i;
 }
@ -902,18 +902,18 @@ void NeighborKokkosExecute<DeviceType>::
  const int mask_history = 3 << SBBITS;

  // loop over all bins in neighborhood (includes ibin)
-  if(HalfNeigh)
-  for(int m = 0; m < c_bincount(ibin); m++) {
+  if (HalfNeigh)
+  for (int m = 0; m < c_bincount(ibin); m++) {
    const int j = c_bins(ibin,m);
    const int jtype = type(j);

    //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
-    if((j == i) || (HalfNeigh && !Newton && (j < i))  ||
+    if ((j == i) || (HalfNeigh && !Newton && (j < i))  ||
        (HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
                                       ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
                                        (x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
      ) continue;
-    if(exclude && exclusion(i,j,itype,jtype)) continue;
+    if (exclude && exclusion(i,j,itype,jtype)) continue;

    const X_FLOAT delx = xtmp - x(j, 0);
    const X_FLOAT dely = ytmp - x(j, 1);
@ -922,29 +922,29 @@ void NeighborKokkosExecute<DeviceType>::
    const X_FLOAT radsum = radi + radius(j);
    const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);

-    if(rsq <= cutsq) {
-      if(n<neigh_list.maxneighs) {
-        if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
+    if (rsq <= cutsq) {
+      if (n<neigh_list.maxneighs) {
+        if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
        else neighbors_i(n++) = j;
      }
      else n++;
    }
  }

-  for(int k = 0; k < nstencil; k++) {
+  for (int k = 0; k < nstencil; k++) {
    const int jbin = ibin + stencil[k];

    // get subview of jbin
-    if(HalfNeigh && (ibin==jbin)) continue;
+    if (HalfNeigh && (ibin==jbin)) continue;
    //const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
-    for(int m = 0; m < c_bincount(jbin); m++) {
+    for (int m = 0; m < c_bincount(jbin); m++) {

      const int j = c_bins(jbin,m);
      const int jtype = type(j);

-      if(HalfNeigh && !Newton && (j < i)) continue;
-      if(!HalfNeigh && j==i) continue;
-      if(Tri) {
+      if (HalfNeigh && !Newton && (j < i)) continue;
+      if (!HalfNeigh && j==i) continue;
+      if (Tri) {
        if (x(j,2) < ztmp) continue;
        if (x(j,2) == ztmp) {
          if (x(j,1) < ytmp) continue;
@ -954,7 +954,7 @@ void NeighborKokkosExecute<DeviceType>::
          }
        }
      }
-      if(exclude && exclusion(i,j,itype,jtype)) continue;
+      if (exclude && exclusion(i,j,itype,jtype)) continue;

      const X_FLOAT delx = xtmp - x(j, 0);
      const X_FLOAT dely = ytmp - x(j, 1);
@ -963,9 +963,9 @@ void NeighborKokkosExecute<DeviceType>::
      const X_FLOAT radsum = radi + radius(j);
      const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);

-      if(rsq <= cutsq) {
-        if(n<neigh_list.maxneighs) {
-          if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
+      if (rsq <= cutsq) {
+        if (n<neigh_list.maxneighs) {
+          if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
          else neighbors_i(n++) = j;
        }
        else n++;
@ -975,10 +975,10 @@ void NeighborKokkosExecute<DeviceType>::

  neigh_list.d_numneigh(i) = n;

-  if(n > neigh_list.maxneighs) {
+  if (n > neigh_list.maxneighs) {
    resize() = 1;

-    if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
+    if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
  }

  neigh_list.d_ilist(i) = i;
@ -1005,7 +1005,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team

  const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;

-  if(ibin >= mbins) return;
+  if (ibin >= mbins) return;
  X_FLOAT* other_x = sharedmem;
  other_x = other_x + 6*atoms_per_bin*MY_BIN;

@ -1013,7 +1013,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team

  int bincount_current = c_bincount[ibin];

-  for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
+  for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
    const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
    const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
    /* if necessary, goto next page and add pages */
@ -1028,7 +1028,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
    const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
    const int mask_history = 3 << SBBITS;

-    if(i >= 0) {
+    if (i >= 0) {
      xtmp = x(i, 0);
      ytmp = x(i, 1);
      ztmp = x(i, 2);
@ -1043,23 +1043,23 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
    other_id[MY_II] = i;
    int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);

-    if(test) return;
+    if (test) return;

-    if(i >= 0 && i < nlocal) {
+    if (i >= 0 && i < nlocal) {
      #pragma unroll 4
-      for(int m = 0; m < bincount_current; m++) {
+      for (int m = 0; m < bincount_current; m++) {
        int j = other_id[m];
        const int jtype = other_x[m + 3 * atoms_per_bin];

        //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
-        if((j == i) ||
+        if ((j == i) ||
           (HalfNeigh && !Newton && (j < i))  ||
           (HalfNeigh && Newton &&
            ((j < i) ||
             ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
                                (x(j, 2) == ztmp && x(j, 1)  == ytmp && x(j, 0) < xtmp)))))
           ) continue;
-        if(Tri) {
+        if (Tri) {
          if (x(j,2) < ztmp) continue;
          if (x(j,2) == ztmp) {
            if (x(j,1) < ytmp) continue;
@ -1069,7 +1069,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
            }
          }
        }
-        if(exclude && exclusion(i,j,itype,jtype)) continue;
+        if (exclude && exclusion(i,j,itype,jtype)) continue;
        const X_FLOAT delx = xtmp - other_x[m];
        const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
        const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
@ -1077,8 +1077,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
        const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
        const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);

-        if(rsq <= cutsq) {
-          if(n<neigh_list.maxneighs) {
+        if (rsq <= cutsq) {
+          if (n<neigh_list.maxneighs) {
            if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
            else neighbors_i(n++) = j;
          }
@ -1090,15 +1090,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team

    const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
      = d_stencil;
-    for(int k = 0; k < nstencil; k++) {
+    for (int k = 0; k < nstencil; k++) {
      const int jbin = ibin + stencil[k];

-      if(ibin == jbin) continue;
+      if (ibin == jbin) continue;

      bincount_current = c_bincount[jbin];
      int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;

-      if(j >= 0) {
+      if (j >= 0) {
        other_x[MY_II] = x(j, 0);
        other_x[MY_II + atoms_per_bin] = x(j, 1);
        other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
@ -1110,16 +1110,16 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team

      __syncthreads();

-      if(i >= 0 && i < nlocal) {
+      if (i >= 0 && i < nlocal) {
        #pragma unroll 8
-        for(int m = 0; m < bincount_current; m++) {
+        for (int m = 0; m < bincount_current; m++) {
          const int j = other_id[m];
          const int jtype = other_x[m + 3 * atoms_per_bin];

-          if(HalfNeigh && (j < i))  continue;
-          if(HalfNeigh && !Newton && (j < i)) continue;
-          if(!HalfNeigh && j==i) continue;
-          if(Tri) {
+          if (HalfNeigh && (j < i))  continue;
+          if (HalfNeigh && !Newton && (j < i)) continue;
+          if (!HalfNeigh && j==i) continue;
+          if (Tri) {
            if (x(j,2) < ztmp) continue;
            if (x(j,2) == ztmp) {
              if (x(j,1) < ytmp) continue;
@ -1129,7 +1129,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
              }
            }
          }
-          if(exclude && exclusion(i,j,itype,jtype)) continue;
+          if (exclude && exclusion(i,j,itype,jtype)) continue;

          const X_FLOAT delx = xtmp - other_x[m];
          const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
@ -1138,8 +1138,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
          const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
          const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);

-          if(rsq <= cutsq) {
-            if(n<neigh_list.maxneighs) {
+          if (rsq <= cutsq) {
+            if (n<neigh_list.maxneighs) {
              if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
              else neighbors_i(n++) = j;
            }
@ -1150,15 +1150,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
      __syncthreads();
    }

-    if(i >= 0 && i < nlocal) {
+    if (i >= 0 && i < nlocal) {
      neigh_list.d_numneigh(i) = n;
      neigh_list.d_ilist(i) = i;
    }

-    if(n > neigh_list.maxneighs) {
+    if (n > neigh_list.maxneighs) {
      resize() = 1;

-      if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
+      if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
    }
  }
 }
--- a/src/KOKKOS/npair_ssa_kokkos.cpp
+++ b/src/KOKKOS/npair_ssa_kokkos.cpp
@ -452,7 +452,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu

  bool firstTry = true;
  data.h_resize()=1;
-  while(data.h_resize()) {
+  while (data.h_resize()) {
    data.h_new_maxneighs() = list->maxneighs;
    data.h_resize() = 0;

@ -489,7 +489,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu

    deep_copy(data.h_resize, data.resize);

-    if(data.h_resize()) {
+    if (data.h_resize()) {
      deep_copy(data.h_new_maxneighs, data.new_maxneighs);
      list->maxneighs = data.h_new_maxneighs() * 1.2;
      list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.extent(0), list->maxneighs);
@ -571,13 +571,13 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
          for (; jl < c_bincount(jbin); ++jl) {
            const int j = c_bins(jbin, jl);
            const int jtype = type(j);
-            if(exclude && exclusion(i,j,itype,jtype)) continue;
+            if (exclude && exclusion(i,j,itype,jtype)) continue;

            const X_FLOAT delx = xtmp - x(j, 0);
            const X_FLOAT dely = ytmp - x(j, 1);
            const X_FLOAT delz = ztmp - x(j, 2);
            const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
-            if(rsq <= cutneighsq(itype,jtype)) {
+            if (rsq <= cutneighsq(itype,jtype)) {
              if (molecular != Atom::ATOMIC) {
                if (!moltemplate)
                  which = find_special(i,j);
@ -586,19 +586,19 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
                    /*                        onemols[imol]->nspecial[iatom], */
                    /*                        tag[j]-tagprev); */
                    /* else which = 0; */
-                if (which == 0){
-                  if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+                if (which == 0) {
+                  if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
                  else n++;
-                } else if (minimum_image_check(delx,dely,delz)){
-                  if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+                } else if (minimum_image_check(delx,dely,delz)) {
+                  if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
                  else n++;
                }
                else if (which > 0) {
-                  if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+                  if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
                  else n++;
                }
              } else {
-                if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+                if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
                else n++;
              }
            }
@ -608,9 +608,9 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
        if (n > 0) {
          neigh_list.d_numneigh(inum) = n;
          neigh_list.d_ilist(inum++) = i;
-          if(n > neigh_list.maxneighs) {
+          if (n > neigh_list.maxneighs) {
            resize() = 1;
-            if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
+            if (n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
          }
        }
      }
@ -699,13 +699,13 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
          for (int jl = 0; jl < c_bincount(jbin); ++jl) {
            const int j = c_bins(jbin, jl);
            const int jtype = type(j);
-            if(exclude && exclusion(i,j,itype,jtype)) continue;
+            if (exclude && exclusion(i,j,itype,jtype)) continue;

            const X_FLOAT delx = xtmp - x(j, 0);
            const X_FLOAT dely = ytmp - x(j, 1);
            const X_FLOAT delz = ztmp - x(j, 2);
            const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
-            if(rsq <= cutneighsq(itype,jtype)) {
+            if (rsq <= cutneighsq(itype,jtype)) {
              if (molecular != Atom::ATOMIC) {
                if (!moltemplate)
                  which = find_special(j,i);
@ -714,19 +714,19 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
                    /*                        onemols[jmol]->nspecial[jatom], */
                    /*                        tag[i]-jtagprev); */
                    /* else which = 0; */
-                if (which == 0){
-                  if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+                if (which == 0) {
+                  if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
                  else n++;
-                } else if (minimum_image_check(delx,dely,delz)){
-                  if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+                } else if (minimum_image_check(delx,dely,delz)) {
+                  if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
                  else n++;
                }
                else if (which > 0) {
-                  if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
+                  if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
                  else n++;
                }
              } else {
-                if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
+                if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
                else n++;
              }
            }
@ -736,9 +736,9 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
        if (n > 0) {
          neigh_list.d_numneigh(gNdx) = n;
          neigh_list.d_ilist(gNdx++) = i;
-          if(n > neigh_list.maxneighs) {
+          if (n > neigh_list.maxneighs) {
            resize() = 1;
-            if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
+            if (n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
          }
        }
      }
--- a/Show More
+++ b/Show More